Update interface to TARGET_VECTORIZE_VEC_PERM_CONST_OK
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
77
78 void (*arm_lang_output_object_attributes_hook)(void);
79
80 struct four_ints
81 {
82 int i[4];
83 };
84
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
195
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
203
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
289
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
291
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
293
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
302
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
306
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 \f
320 /* Table of machine attributes. */
321 static const struct attribute_spec arm_attribute_table[] =
322 {
323 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
324 affects_type_identity } */
325 /* Function calls made to this symbol must be done indirectly, because
326 it may lie outside of the 26 bit addressing range of a normal function
327 call. */
328 { "long_call", 0, 0, false, true, true, NULL, false },
329 /* Whereas these functions are always known to reside within the 26 bit
330 addressing range. */
331 { "short_call", 0, 0, false, true, true, NULL, false },
332 /* Specify the procedure call conventions for a function. */
333 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
334 false },
335 /* Interrupt Service Routines have special prologue and epilogue requirements. */
336 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
337 false },
338 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
339 false },
340 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
341 false },
342 #ifdef ARM_PE
343 /* ARM/PE has three new attributes:
344 interfacearm - ?
345 dllexport - for exporting a function/variable that will live in a dll
346 dllimport - for importing a function/variable from a dll
347
348 Microsoft allows multiple declspecs in one __declspec, separating
349 them with spaces. We do NOT support this. Instead, use __declspec
350 multiple times.
351 */
352 { "dllimport", 0, 0, true, false, false, NULL, false },
353 { "dllexport", 0, 0, true, false, false, NULL, false },
354 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
355 false },
356 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
357 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
358 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
360 false },
361 #endif
362 /* ARMv8-M Security Extensions support. */
363 { "cmse_nonsecure_entry", 0, 0, true, false, false,
364 arm_handle_cmse_nonsecure_entry, false },
365 { "cmse_nonsecure_call", 0, 0, true, false, false,
366 arm_handle_cmse_nonsecure_call, true },
367 { NULL, 0, 0, false, false, false, NULL, false }
368 };
369 \f
370 /* Initialize the GCC target structure. */
371 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
372 #undef TARGET_MERGE_DECL_ATTRIBUTES
373 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
374 #endif
375
376 #undef TARGET_LEGITIMIZE_ADDRESS
377 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
378
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
381
382 #undef TARGET_INSERT_ATTRIBUTES
383 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
384
385 #undef TARGET_ASM_FILE_START
386 #define TARGET_ASM_FILE_START arm_file_start
387 #undef TARGET_ASM_FILE_END
388 #define TARGET_ASM_FILE_END arm_file_end
389
390 #undef TARGET_ASM_ALIGNED_SI_OP
391 #define TARGET_ASM_ALIGNED_SI_OP NULL
392 #undef TARGET_ASM_INTEGER
393 #define TARGET_ASM_INTEGER arm_assemble_integer
394
395 #undef TARGET_PRINT_OPERAND
396 #define TARGET_PRINT_OPERAND arm_print_operand
397 #undef TARGET_PRINT_OPERAND_ADDRESS
398 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
401
402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
404
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
407
408 #undef TARGET_ASM_FUNCTION_EPILOGUE
409 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
410
411 #undef TARGET_CAN_INLINE_P
412 #define TARGET_CAN_INLINE_P arm_can_inline_p
413
414 #undef TARGET_RELAYOUT_FUNCTION
415 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
416
417 #undef TARGET_OPTION_OVERRIDE
418 #define TARGET_OPTION_OVERRIDE arm_option_override
419
420 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
421 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
422
423 #undef TARGET_OPTION_SAVE
424 #define TARGET_OPTION_SAVE arm_option_save
425
426 #undef TARGET_OPTION_RESTORE
427 #define TARGET_OPTION_RESTORE arm_option_restore
428
429 #undef TARGET_OPTION_PRINT
430 #define TARGET_OPTION_PRINT arm_option_print
431
432 #undef TARGET_COMP_TYPE_ATTRIBUTES
433 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
434
435 #undef TARGET_SCHED_CAN_SPECULATE_INSN
436 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
437
438 #undef TARGET_SCHED_MACRO_FUSION_P
439 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
440
441 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
442 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
443
444 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
445 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
446
447 #undef TARGET_SCHED_ADJUST_COST
448 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
449
450 #undef TARGET_SET_CURRENT_FUNCTION
451 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
452
453 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
454 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
455
456 #undef TARGET_SCHED_REORDER
457 #define TARGET_SCHED_REORDER arm_sched_reorder
458
459 #undef TARGET_REGISTER_MOVE_COST
460 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
461
462 #undef TARGET_MEMORY_MOVE_COST
463 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
464
465 #undef TARGET_ENCODE_SECTION_INFO
466 #ifdef ARM_PE
467 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
468 #else
469 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
470 #endif
471
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
474
475 #undef TARGET_ASM_INTERNAL_LABEL
476 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
477
478 #undef TARGET_FLOATN_MODE
479 #define TARGET_FLOATN_MODE arm_floatn_mode
480
481 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
482 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
483
484 #undef TARGET_FUNCTION_VALUE
485 #define TARGET_FUNCTION_VALUE arm_function_value
486
487 #undef TARGET_LIBCALL_VALUE
488 #define TARGET_LIBCALL_VALUE arm_libcall_value
489
490 #undef TARGET_FUNCTION_VALUE_REGNO_P
491 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
492
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
497
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS arm_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST arm_address_cost
502
503 #undef TARGET_SHIFT_TRUNCATION_MASK
504 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
507 #undef TARGET_ARRAY_MODE_SUPPORTED_P
508 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
509 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
510 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
511 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
512 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
513 arm_autovectorize_vector_sizes
514
515 #undef TARGET_MACHINE_DEPENDENT_REORG
516 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
517
518 #undef TARGET_INIT_BUILTINS
519 #define TARGET_INIT_BUILTINS arm_init_builtins
520 #undef TARGET_EXPAND_BUILTIN
521 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
522 #undef TARGET_BUILTIN_DECL
523 #define TARGET_BUILTIN_DECL arm_builtin_decl
524
525 #undef TARGET_INIT_LIBFUNCS
526 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
527
528 #undef TARGET_PROMOTE_FUNCTION_MODE
529 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
530 #undef TARGET_PROMOTE_PROTOTYPES
531 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
532 #undef TARGET_PASS_BY_REFERENCE
533 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
534 #undef TARGET_ARG_PARTIAL_BYTES
535 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
536 #undef TARGET_FUNCTION_ARG
537 #define TARGET_FUNCTION_ARG arm_function_arg
538 #undef TARGET_FUNCTION_ARG_ADVANCE
539 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
540 #undef TARGET_FUNCTION_ARG_PADDING
541 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
542 #undef TARGET_FUNCTION_ARG_BOUNDARY
543 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
544
545 #undef TARGET_SETUP_INCOMING_VARARGS
546 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
547
548 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
549 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
550
551 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
552 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
553 #undef TARGET_TRAMPOLINE_INIT
554 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
555 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
556 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
557
558 #undef TARGET_WARN_FUNC_RETURN
559 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
560
561 #undef TARGET_DEFAULT_SHORT_ENUMS
562 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
563
564 #undef TARGET_ALIGN_ANON_BITFIELD
565 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
566
567 #undef TARGET_NARROW_VOLATILE_BITFIELD
568 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
569
570 #undef TARGET_CXX_GUARD_TYPE
571 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
572
573 #undef TARGET_CXX_GUARD_MASK_BIT
574 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
575
576 #undef TARGET_CXX_GET_COOKIE_SIZE
577 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
578
579 #undef TARGET_CXX_COOKIE_HAS_SIZE
580 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
581
582 #undef TARGET_CXX_CDTOR_RETURNS_THIS
583 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
584
585 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
586 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
587
588 #undef TARGET_CXX_USE_AEABI_ATEXIT
589 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
590
591 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
592 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
593 arm_cxx_determine_class_data_visibility
594
595 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
596 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
597
598 #undef TARGET_RETURN_IN_MSB
599 #define TARGET_RETURN_IN_MSB arm_return_in_msb
600
601 #undef TARGET_RETURN_IN_MEMORY
602 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
603
604 #undef TARGET_MUST_PASS_IN_STACK
605 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
606
607 #if ARM_UNWIND_INFO
608 #undef TARGET_ASM_UNWIND_EMIT
609 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
610
611 /* EABI unwinding tables use a different format for the typeinfo tables. */
612 #undef TARGET_ASM_TTYPE
613 #define TARGET_ASM_TTYPE arm_output_ttype
614
615 #undef TARGET_ARM_EABI_UNWINDER
616 #define TARGET_ARM_EABI_UNWINDER true
617
618 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
619 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
620
621 #endif /* ARM_UNWIND_INFO */
622
623 #undef TARGET_ASM_INIT_SECTIONS
624 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
625
626 #undef TARGET_DWARF_REGISTER_SPAN
627 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
628
629 #undef TARGET_CANNOT_COPY_INSN_P
630 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
631
632 #ifdef HAVE_AS_TLS
633 #undef TARGET_HAVE_TLS
634 #define TARGET_HAVE_TLS true
635 #endif
636
637 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
638 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
639
640 #undef TARGET_LEGITIMATE_CONSTANT_P
641 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
642
643 #undef TARGET_CANNOT_FORCE_CONST_MEM
644 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
645
646 #undef TARGET_MAX_ANCHOR_OFFSET
647 #define TARGET_MAX_ANCHOR_OFFSET 4095
648
649 /* The minimum is set such that the total size of the block
650 for a particular anchor is -4088 + 1 + 4095 bytes, which is
651 divisible by eight, ensuring natural spacing of anchors. */
652 #undef TARGET_MIN_ANCHOR_OFFSET
653 #define TARGET_MIN_ANCHOR_OFFSET -4088
654
655 #undef TARGET_SCHED_ISSUE_RATE
656 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
657
658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
660 arm_first_cycle_multipass_dfa_lookahead
661
662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
664 arm_first_cycle_multipass_dfa_lookahead_guard
665
666 #undef TARGET_MANGLE_TYPE
667 #define TARGET_MANGLE_TYPE arm_mangle_type
668
669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
671
672 #undef TARGET_BUILD_BUILTIN_VA_LIST
673 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
674 #undef TARGET_EXPAND_BUILTIN_VA_START
675 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
677 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
678
679 #ifdef HAVE_AS_TLS
680 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
681 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
682 #endif
683
684 #undef TARGET_LEGITIMATE_ADDRESS_P
685 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
686
687 #undef TARGET_PREFERRED_RELOAD_CLASS
688 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
689
690 #undef TARGET_PROMOTED_TYPE
691 #define TARGET_PROMOTED_TYPE arm_promoted_type
692
693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
694 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
695
696 #undef TARGET_COMPUTE_FRAME_LAYOUT
697 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
698
699 #undef TARGET_FRAME_POINTER_REQUIRED
700 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
701
702 #undef TARGET_CAN_ELIMINATE
703 #define TARGET_CAN_ELIMINATE arm_can_eliminate
704
705 #undef TARGET_CONDITIONAL_REGISTER_USAGE
706 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
707
708 #undef TARGET_CLASS_LIKELY_SPILLED_P
709 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
710
711 #undef TARGET_VECTORIZE_BUILTINS
712 #define TARGET_VECTORIZE_BUILTINS
713
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
716 arm_builtin_vectorized_function
717
718 #undef TARGET_VECTOR_ALIGNMENT
719 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
720
721 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
722 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
723 arm_vector_alignment_reachable
724
725 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
726 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
727 arm_builtin_support_vector_misalignment
728
729 #undef TARGET_PREFERRED_RENAME_CLASS
730 #define TARGET_PREFERRED_RENAME_CLASS \
731 arm_preferred_rename_class
732
733 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
734 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
735 arm_vectorize_vec_perm_const_ok
736
737 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
738 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
739 arm_builtin_vectorization_cost
740 #undef TARGET_VECTORIZE_ADD_STMT_COST
741 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
742
743 #undef TARGET_CANONICALIZE_COMPARISON
744 #define TARGET_CANONICALIZE_COMPARISON \
745 arm_canonicalize_comparison
746
747 #undef TARGET_ASAN_SHADOW_OFFSET
748 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
749
750 #undef MAX_INSN_PER_IT_BLOCK
751 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
752
753 #undef TARGET_CAN_USE_DOLOOP_P
754 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
755
756 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
757 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
758
759 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
760 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
761
762 #undef TARGET_SCHED_FUSION_PRIORITY
763 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
764
765 #undef TARGET_ASM_FUNCTION_SECTION
766 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
767
768 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
769 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
770
771 #undef TARGET_SECTION_TYPE_FLAGS
772 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
773
774 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
775 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
776
777 #undef TARGET_C_EXCESS_PRECISION
778 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
779
780 /* Although the architecture reserves bits 0 and 1, only the former is
781 used for ARM/Thumb ISA selection in v7 and earlier versions. */
782 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
783 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
784
785 #undef TARGET_FIXED_CONDITION_CODE_REGS
786 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
787
788 #undef TARGET_HARD_REGNO_NREGS
789 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
790 #undef TARGET_HARD_REGNO_MODE_OK
791 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
792
793 #undef TARGET_MODES_TIEABLE_P
794 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
795
796 #undef TARGET_CAN_CHANGE_MODE_CLASS
797 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
798 \f
799 /* Obstack for minipool constant handling. */
800 static struct obstack minipool_obstack;
801 static char * minipool_startobj;
802
803 /* The maximum number of insns skipped which
804 will be conditionalised if possible. */
805 static int max_insns_skipped = 5;
806
807 extern FILE * asm_out_file;
808
809 /* True if we are currently building a constant table. */
810 int making_const_table;
811
812 /* The processor for which instructions should be scheduled. */
813 enum processor_type arm_tune = TARGET_CPU_arm_none;
814
815 /* The current tuning set. */
816 const struct tune_params *current_tune;
817
818 /* Which floating point hardware to schedule for. */
819 int arm_fpu_attr;
820
821 /* Used for Thumb call_via trampolines. */
822 rtx thumb_call_via_label[14];
823 static int thumb_call_reg_needed;
824
825 /* The bits in this mask specify which instruction scheduling options should
826 be used. */
827 unsigned int tune_flags = 0;
828
829 /* The highest ARM architecture version supported by the
830 target. */
831 enum base_architecture arm_base_arch = BASE_ARCH_0;
832
833 /* Active target architecture and tuning. */
834
835 struct arm_build_target arm_active_target;
836
837 /* The following are used in the arm.md file as equivalents to bits
838 in the above two flag variables. */
839
840 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
841 int arm_arch3m = 0;
842
843 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
844 int arm_arch4 = 0;
845
846 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
847 int arm_arch4t = 0;
848
849 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
850 int arm_arch5 = 0;
851
852 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
853 int arm_arch5e = 0;
854
855 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
856 int arm_arch5te = 0;
857
858 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
859 int arm_arch6 = 0;
860
861 /* Nonzero if this chip supports the ARM 6K extensions. */
862 int arm_arch6k = 0;
863
864 /* Nonzero if this chip supports the ARM 6KZ extensions. */
865 int arm_arch6kz = 0;
866
867 /* Nonzero if instructions present in ARMv6-M can be used. */
868 int arm_arch6m = 0;
869
870 /* Nonzero if this chip supports the ARM 7 extensions. */
871 int arm_arch7 = 0;
872
873 /* Nonzero if this chip supports the Large Physical Address Extension. */
874 int arm_arch_lpae = 0;
875
876 /* Nonzero if instructions not present in the 'M' profile can be used. */
877 int arm_arch_notm = 0;
878
879 /* Nonzero if instructions present in ARMv7E-M can be used. */
880 int arm_arch7em = 0;
881
882 /* Nonzero if instructions present in ARMv8 can be used. */
883 int arm_arch8 = 0;
884
885 /* Nonzero if this chip supports the ARMv8.1 extensions. */
886 int arm_arch8_1 = 0;
887
888 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
889 int arm_arch8_2 = 0;
890
891 /* Nonzero if this chip supports the FP16 instructions extension of ARM
892 Architecture 8.2. */
893 int arm_fp16_inst = 0;
894
895 /* Nonzero if this chip can benefit from load scheduling. */
896 int arm_ld_sched = 0;
897
898 /* Nonzero if this chip is a StrongARM. */
899 int arm_tune_strongarm = 0;
900
901 /* Nonzero if this chip supports Intel Wireless MMX technology. */
902 int arm_arch_iwmmxt = 0;
903
904 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
905 int arm_arch_iwmmxt2 = 0;
906
907 /* Nonzero if this chip is an XScale. */
908 int arm_arch_xscale = 0;
909
910 /* Nonzero if tuning for XScale */
911 int arm_tune_xscale = 0;
912
913 /* Nonzero if we want to tune for stores that access the write-buffer.
914 This typically means an ARM6 or ARM7 with MMU or MPU. */
915 int arm_tune_wbuf = 0;
916
917 /* Nonzero if tuning for Cortex-A9. */
918 int arm_tune_cortex_a9 = 0;
919
920 /* Nonzero if we should define __THUMB_INTERWORK__ in the
921 preprocessor.
922 XXX This is a bit of a hack, it's intended to help work around
923 problems in GLD which doesn't understand that armv5t code is
924 interworking clean. */
925 int arm_cpp_interwork = 0;
926
927 /* Nonzero if chip supports Thumb 1. */
928 int arm_arch_thumb1;
929
930 /* Nonzero if chip supports Thumb 2. */
931 int arm_arch_thumb2;
932
933 /* Nonzero if chip supports integer division instruction. */
934 int arm_arch_arm_hwdiv;
935 int arm_arch_thumb_hwdiv;
936
937 /* Nonzero if chip disallows volatile memory access in IT block. */
938 int arm_arch_no_volatile_ce;
939
940 /* Nonzero if we should use Neon to handle 64-bits operations rather
941 than core registers. */
942 int prefer_neon_for_64bits = 0;
943
944 /* Nonzero if we shouldn't use literal pools. */
945 bool arm_disable_literal_pool = false;
946
947 /* The register number to be used for the PIC offset register. */
948 unsigned arm_pic_register = INVALID_REGNUM;
949
950 enum arm_pcs arm_pcs_default;
951
952 /* For an explanation of these variables, see final_prescan_insn below. */
953 int arm_ccfsm_state;
954 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
955 enum arm_cond_code arm_current_cc;
956
957 rtx arm_target_insn;
958 int arm_target_label;
959 /* The number of conditionally executed insns, including the current insn. */
960 int arm_condexec_count = 0;
961 /* A bitmask specifying the patterns for the IT block.
962 Zero means do not output an IT block before this insn. */
963 int arm_condexec_mask = 0;
964 /* The number of bits used in arm_condexec_mask. */
965 int arm_condexec_masklen = 0;
966
967 /* Nonzero if chip supports the ARMv8 CRC instructions. */
968 int arm_arch_crc = 0;
969
970 /* Nonzero if chip supports the ARMv8-M security extensions. */
971 int arm_arch_cmse = 0;
972
973 /* Nonzero if the core has a very small, high-latency, multiply unit. */
974 int arm_m_profile_small_mul = 0;
975
976 /* The condition codes of the ARM, and the inverse function. */
977 static const char * const arm_condition_codes[] =
978 {
979 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
980 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
981 };
982
983 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
984 int arm_regs_in_sequence[] =
985 {
986 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
987 };
988
989 #define ARM_LSL_NAME "lsl"
990 #define streq(string1, string2) (strcmp (string1, string2) == 0)
991
992 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
993 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
994 | (1 << PIC_OFFSET_TABLE_REGNUM)))
995 \f
996 /* Initialization code. */
997
998 struct cpu_tune
999 {
1000 enum processor_type scheduler;
1001 unsigned int tune_flags;
1002 const struct tune_params *tune;
1003 };
1004
1005 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1006 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1007 { \
1008 num_slots, \
1009 l1_size, \
1010 l1_line_size \
1011 }
1012
1013 /* arm generic vectorizer costs. */
1014 static const
1015 struct cpu_vec_costs arm_default_vec_cost = {
1016 1, /* scalar_stmt_cost. */
1017 1, /* scalar load_cost. */
1018 1, /* scalar_store_cost. */
1019 1, /* vec_stmt_cost. */
1020 1, /* vec_to_scalar_cost. */
1021 1, /* scalar_to_vec_cost. */
1022 1, /* vec_align_load_cost. */
1023 1, /* vec_unalign_load_cost. */
1024 1, /* vec_unalign_store_cost. */
1025 1, /* vec_store_cost. */
1026 3, /* cond_taken_branch_cost. */
1027 1, /* cond_not_taken_branch_cost. */
1028 };
1029
1030 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1031 #include "aarch-cost-tables.h"
1032
1033
1034
1035 const struct cpu_cost_table cortexa9_extra_costs =
1036 {
1037 /* ALU */
1038 {
1039 0, /* arith. */
1040 0, /* logical. */
1041 0, /* shift. */
1042 COSTS_N_INSNS (1), /* shift_reg. */
1043 COSTS_N_INSNS (1), /* arith_shift. */
1044 COSTS_N_INSNS (2), /* arith_shift_reg. */
1045 0, /* log_shift. */
1046 COSTS_N_INSNS (1), /* log_shift_reg. */
1047 COSTS_N_INSNS (1), /* extend. */
1048 COSTS_N_INSNS (2), /* extend_arith. */
1049 COSTS_N_INSNS (1), /* bfi. */
1050 COSTS_N_INSNS (1), /* bfx. */
1051 0, /* clz. */
1052 0, /* rev. */
1053 0, /* non_exec. */
1054 true /* non_exec_costs_exec. */
1055 },
1056 {
1057 /* MULT SImode */
1058 {
1059 COSTS_N_INSNS (3), /* simple. */
1060 COSTS_N_INSNS (3), /* flag_setting. */
1061 COSTS_N_INSNS (2), /* extend. */
1062 COSTS_N_INSNS (3), /* add. */
1063 COSTS_N_INSNS (2), /* extend_add. */
1064 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1065 },
1066 /* MULT DImode */
1067 {
1068 0, /* simple (N/A). */
1069 0, /* flag_setting (N/A). */
1070 COSTS_N_INSNS (4), /* extend. */
1071 0, /* add (N/A). */
1072 COSTS_N_INSNS (4), /* extend_add. */
1073 0 /* idiv (N/A). */
1074 }
1075 },
1076 /* LD/ST */
1077 {
1078 COSTS_N_INSNS (2), /* load. */
1079 COSTS_N_INSNS (2), /* load_sign_extend. */
1080 COSTS_N_INSNS (2), /* ldrd. */
1081 COSTS_N_INSNS (2), /* ldm_1st. */
1082 1, /* ldm_regs_per_insn_1st. */
1083 2, /* ldm_regs_per_insn_subsequent. */
1084 COSTS_N_INSNS (5), /* loadf. */
1085 COSTS_N_INSNS (5), /* loadd. */
1086 COSTS_N_INSNS (1), /* load_unaligned. */
1087 COSTS_N_INSNS (2), /* store. */
1088 COSTS_N_INSNS (2), /* strd. */
1089 COSTS_N_INSNS (2), /* stm_1st. */
1090 1, /* stm_regs_per_insn_1st. */
1091 2, /* stm_regs_per_insn_subsequent. */
1092 COSTS_N_INSNS (1), /* storef. */
1093 COSTS_N_INSNS (1), /* stored. */
1094 COSTS_N_INSNS (1), /* store_unaligned. */
1095 COSTS_N_INSNS (1), /* loadv. */
1096 COSTS_N_INSNS (1) /* storev. */
1097 },
1098 {
1099 /* FP SFmode */
1100 {
1101 COSTS_N_INSNS (14), /* div. */
1102 COSTS_N_INSNS (4), /* mult. */
1103 COSTS_N_INSNS (7), /* mult_addsub. */
1104 COSTS_N_INSNS (30), /* fma. */
1105 COSTS_N_INSNS (3), /* addsub. */
1106 COSTS_N_INSNS (1), /* fpconst. */
1107 COSTS_N_INSNS (1), /* neg. */
1108 COSTS_N_INSNS (3), /* compare. */
1109 COSTS_N_INSNS (3), /* widen. */
1110 COSTS_N_INSNS (3), /* narrow. */
1111 COSTS_N_INSNS (3), /* toint. */
1112 COSTS_N_INSNS (3), /* fromint. */
1113 COSTS_N_INSNS (3) /* roundint. */
1114 },
1115 /* FP DFmode */
1116 {
1117 COSTS_N_INSNS (24), /* div. */
1118 COSTS_N_INSNS (5), /* mult. */
1119 COSTS_N_INSNS (8), /* mult_addsub. */
1120 COSTS_N_INSNS (30), /* fma. */
1121 COSTS_N_INSNS (3), /* addsub. */
1122 COSTS_N_INSNS (1), /* fpconst. */
1123 COSTS_N_INSNS (1), /* neg. */
1124 COSTS_N_INSNS (3), /* compare. */
1125 COSTS_N_INSNS (3), /* widen. */
1126 COSTS_N_INSNS (3), /* narrow. */
1127 COSTS_N_INSNS (3), /* toint. */
1128 COSTS_N_INSNS (3), /* fromint. */
1129 COSTS_N_INSNS (3) /* roundint. */
1130 }
1131 },
1132 /* Vector */
1133 {
1134 COSTS_N_INSNS (1) /* alu. */
1135 }
1136 };
1137
1138 const struct cpu_cost_table cortexa8_extra_costs =
1139 {
1140 /* ALU */
1141 {
1142 0, /* arith. */
1143 0, /* logical. */
1144 COSTS_N_INSNS (1), /* shift. */
1145 0, /* shift_reg. */
1146 COSTS_N_INSNS (1), /* arith_shift. */
1147 0, /* arith_shift_reg. */
1148 COSTS_N_INSNS (1), /* log_shift. */
1149 0, /* log_shift_reg. */
1150 0, /* extend. */
1151 0, /* extend_arith. */
1152 0, /* bfi. */
1153 0, /* bfx. */
1154 0, /* clz. */
1155 0, /* rev. */
1156 0, /* non_exec. */
1157 true /* non_exec_costs_exec. */
1158 },
1159 {
1160 /* MULT SImode */
1161 {
1162 COSTS_N_INSNS (1), /* simple. */
1163 COSTS_N_INSNS (1), /* flag_setting. */
1164 COSTS_N_INSNS (1), /* extend. */
1165 COSTS_N_INSNS (1), /* add. */
1166 COSTS_N_INSNS (1), /* extend_add. */
1167 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1168 },
1169 /* MULT DImode */
1170 {
1171 0, /* simple (N/A). */
1172 0, /* flag_setting (N/A). */
1173 COSTS_N_INSNS (2), /* extend. */
1174 0, /* add (N/A). */
1175 COSTS_N_INSNS (2), /* extend_add. */
1176 0 /* idiv (N/A). */
1177 }
1178 },
1179 /* LD/ST */
1180 {
1181 COSTS_N_INSNS (1), /* load. */
1182 COSTS_N_INSNS (1), /* load_sign_extend. */
1183 COSTS_N_INSNS (1), /* ldrd. */
1184 COSTS_N_INSNS (1), /* ldm_1st. */
1185 1, /* ldm_regs_per_insn_1st. */
1186 2, /* ldm_regs_per_insn_subsequent. */
1187 COSTS_N_INSNS (1), /* loadf. */
1188 COSTS_N_INSNS (1), /* loadd. */
1189 COSTS_N_INSNS (1), /* load_unaligned. */
1190 COSTS_N_INSNS (1), /* store. */
1191 COSTS_N_INSNS (1), /* strd. */
1192 COSTS_N_INSNS (1), /* stm_1st. */
1193 1, /* stm_regs_per_insn_1st. */
1194 2, /* stm_regs_per_insn_subsequent. */
1195 COSTS_N_INSNS (1), /* storef. */
1196 COSTS_N_INSNS (1), /* stored. */
1197 COSTS_N_INSNS (1), /* store_unaligned. */
1198 COSTS_N_INSNS (1), /* loadv. */
1199 COSTS_N_INSNS (1) /* storev. */
1200 },
1201 {
1202 /* FP SFmode */
1203 {
1204 COSTS_N_INSNS (36), /* div. */
1205 COSTS_N_INSNS (11), /* mult. */
1206 COSTS_N_INSNS (20), /* mult_addsub. */
1207 COSTS_N_INSNS (30), /* fma. */
1208 COSTS_N_INSNS (9), /* addsub. */
1209 COSTS_N_INSNS (3), /* fpconst. */
1210 COSTS_N_INSNS (3), /* neg. */
1211 COSTS_N_INSNS (6), /* compare. */
1212 COSTS_N_INSNS (4), /* widen. */
1213 COSTS_N_INSNS (4), /* narrow. */
1214 COSTS_N_INSNS (8), /* toint. */
1215 COSTS_N_INSNS (8), /* fromint. */
1216 COSTS_N_INSNS (8) /* roundint. */
1217 },
1218 /* FP DFmode */
1219 {
1220 COSTS_N_INSNS (64), /* div. */
1221 COSTS_N_INSNS (16), /* mult. */
1222 COSTS_N_INSNS (25), /* mult_addsub. */
1223 COSTS_N_INSNS (30), /* fma. */
1224 COSTS_N_INSNS (9), /* addsub. */
1225 COSTS_N_INSNS (3), /* fpconst. */
1226 COSTS_N_INSNS (3), /* neg. */
1227 COSTS_N_INSNS (6), /* compare. */
1228 COSTS_N_INSNS (6), /* widen. */
1229 COSTS_N_INSNS (6), /* narrow. */
1230 COSTS_N_INSNS (8), /* toint. */
1231 COSTS_N_INSNS (8), /* fromint. */
1232 COSTS_N_INSNS (8) /* roundint. */
1233 }
1234 },
1235 /* Vector */
1236 {
1237 COSTS_N_INSNS (1) /* alu. */
1238 }
1239 };
1240
1241 const struct cpu_cost_table cortexa5_extra_costs =
1242 {
1243 /* ALU */
1244 {
1245 0, /* arith. */
1246 0, /* logical. */
1247 COSTS_N_INSNS (1), /* shift. */
1248 COSTS_N_INSNS (1), /* shift_reg. */
1249 COSTS_N_INSNS (1), /* arith_shift. */
1250 COSTS_N_INSNS (1), /* arith_shift_reg. */
1251 COSTS_N_INSNS (1), /* log_shift. */
1252 COSTS_N_INSNS (1), /* log_shift_reg. */
1253 COSTS_N_INSNS (1), /* extend. */
1254 COSTS_N_INSNS (1), /* extend_arith. */
1255 COSTS_N_INSNS (1), /* bfi. */
1256 COSTS_N_INSNS (1), /* bfx. */
1257 COSTS_N_INSNS (1), /* clz. */
1258 COSTS_N_INSNS (1), /* rev. */
1259 0, /* non_exec. */
1260 true /* non_exec_costs_exec. */
1261 },
1262
1263 {
1264 /* MULT SImode */
1265 {
1266 0, /* simple. */
1267 COSTS_N_INSNS (1), /* flag_setting. */
1268 COSTS_N_INSNS (1), /* extend. */
1269 COSTS_N_INSNS (1), /* add. */
1270 COSTS_N_INSNS (1), /* extend_add. */
1271 COSTS_N_INSNS (7) /* idiv. */
1272 },
1273 /* MULT DImode */
1274 {
1275 0, /* simple (N/A). */
1276 0, /* flag_setting (N/A). */
1277 COSTS_N_INSNS (1), /* extend. */
1278 0, /* add. */
1279 COSTS_N_INSNS (2), /* extend_add. */
1280 0 /* idiv (N/A). */
1281 }
1282 },
1283 /* LD/ST */
1284 {
1285 COSTS_N_INSNS (1), /* load. */
1286 COSTS_N_INSNS (1), /* load_sign_extend. */
1287 COSTS_N_INSNS (6), /* ldrd. */
1288 COSTS_N_INSNS (1), /* ldm_1st. */
1289 1, /* ldm_regs_per_insn_1st. */
1290 2, /* ldm_regs_per_insn_subsequent. */
1291 COSTS_N_INSNS (2), /* loadf. */
1292 COSTS_N_INSNS (4), /* loadd. */
1293 COSTS_N_INSNS (1), /* load_unaligned. */
1294 COSTS_N_INSNS (1), /* store. */
1295 COSTS_N_INSNS (3), /* strd. */
1296 COSTS_N_INSNS (1), /* stm_1st. */
1297 1, /* stm_regs_per_insn_1st. */
1298 2, /* stm_regs_per_insn_subsequent. */
1299 COSTS_N_INSNS (2), /* storef. */
1300 COSTS_N_INSNS (2), /* stored. */
1301 COSTS_N_INSNS (1), /* store_unaligned. */
1302 COSTS_N_INSNS (1), /* loadv. */
1303 COSTS_N_INSNS (1) /* storev. */
1304 },
1305 {
1306 /* FP SFmode */
1307 {
1308 COSTS_N_INSNS (15), /* div. */
1309 COSTS_N_INSNS (3), /* mult. */
1310 COSTS_N_INSNS (7), /* mult_addsub. */
1311 COSTS_N_INSNS (7), /* fma. */
1312 COSTS_N_INSNS (3), /* addsub. */
1313 COSTS_N_INSNS (3), /* fpconst. */
1314 COSTS_N_INSNS (3), /* neg. */
1315 COSTS_N_INSNS (3), /* compare. */
1316 COSTS_N_INSNS (3), /* widen. */
1317 COSTS_N_INSNS (3), /* narrow. */
1318 COSTS_N_INSNS (3), /* toint. */
1319 COSTS_N_INSNS (3), /* fromint. */
1320 COSTS_N_INSNS (3) /* roundint. */
1321 },
1322 /* FP DFmode */
1323 {
1324 COSTS_N_INSNS (30), /* div. */
1325 COSTS_N_INSNS (6), /* mult. */
1326 COSTS_N_INSNS (10), /* mult_addsub. */
1327 COSTS_N_INSNS (7), /* fma. */
1328 COSTS_N_INSNS (3), /* addsub. */
1329 COSTS_N_INSNS (3), /* fpconst. */
1330 COSTS_N_INSNS (3), /* neg. */
1331 COSTS_N_INSNS (3), /* compare. */
1332 COSTS_N_INSNS (3), /* widen. */
1333 COSTS_N_INSNS (3), /* narrow. */
1334 COSTS_N_INSNS (3), /* toint. */
1335 COSTS_N_INSNS (3), /* fromint. */
1336 COSTS_N_INSNS (3) /* roundint. */
1337 }
1338 },
1339 /* Vector */
1340 {
1341 COSTS_N_INSNS (1) /* alu. */
1342 }
1343 };
1344
1345
1346 const struct cpu_cost_table cortexa7_extra_costs =
1347 {
1348 /* ALU */
1349 {
1350 0, /* arith. */
1351 0, /* logical. */
1352 COSTS_N_INSNS (1), /* shift. */
1353 COSTS_N_INSNS (1), /* shift_reg. */
1354 COSTS_N_INSNS (1), /* arith_shift. */
1355 COSTS_N_INSNS (1), /* arith_shift_reg. */
1356 COSTS_N_INSNS (1), /* log_shift. */
1357 COSTS_N_INSNS (1), /* log_shift_reg. */
1358 COSTS_N_INSNS (1), /* extend. */
1359 COSTS_N_INSNS (1), /* extend_arith. */
1360 COSTS_N_INSNS (1), /* bfi. */
1361 COSTS_N_INSNS (1), /* bfx. */
1362 COSTS_N_INSNS (1), /* clz. */
1363 COSTS_N_INSNS (1), /* rev. */
1364 0, /* non_exec. */
1365 true /* non_exec_costs_exec. */
1366 },
1367
1368 {
1369 /* MULT SImode */
1370 {
1371 0, /* simple. */
1372 COSTS_N_INSNS (1), /* flag_setting. */
1373 COSTS_N_INSNS (1), /* extend. */
1374 COSTS_N_INSNS (1), /* add. */
1375 COSTS_N_INSNS (1), /* extend_add. */
1376 COSTS_N_INSNS (7) /* idiv. */
1377 },
1378 /* MULT DImode */
1379 {
1380 0, /* simple (N/A). */
1381 0, /* flag_setting (N/A). */
1382 COSTS_N_INSNS (1), /* extend. */
1383 0, /* add. */
1384 COSTS_N_INSNS (2), /* extend_add. */
1385 0 /* idiv (N/A). */
1386 }
1387 },
1388 /* LD/ST */
1389 {
1390 COSTS_N_INSNS (1), /* load. */
1391 COSTS_N_INSNS (1), /* load_sign_extend. */
1392 COSTS_N_INSNS (3), /* ldrd. */
1393 COSTS_N_INSNS (1), /* ldm_1st. */
1394 1, /* ldm_regs_per_insn_1st. */
1395 2, /* ldm_regs_per_insn_subsequent. */
1396 COSTS_N_INSNS (2), /* loadf. */
1397 COSTS_N_INSNS (2), /* loadd. */
1398 COSTS_N_INSNS (1), /* load_unaligned. */
1399 COSTS_N_INSNS (1), /* store. */
1400 COSTS_N_INSNS (3), /* strd. */
1401 COSTS_N_INSNS (1), /* stm_1st. */
1402 1, /* stm_regs_per_insn_1st. */
1403 2, /* stm_regs_per_insn_subsequent. */
1404 COSTS_N_INSNS (2), /* storef. */
1405 COSTS_N_INSNS (2), /* stored. */
1406 COSTS_N_INSNS (1), /* store_unaligned. */
1407 COSTS_N_INSNS (1), /* loadv. */
1408 COSTS_N_INSNS (1) /* storev. */
1409 },
1410 {
1411 /* FP SFmode */
1412 {
1413 COSTS_N_INSNS (15), /* div. */
1414 COSTS_N_INSNS (3), /* mult. */
1415 COSTS_N_INSNS (7), /* mult_addsub. */
1416 COSTS_N_INSNS (7), /* fma. */
1417 COSTS_N_INSNS (3), /* addsub. */
1418 COSTS_N_INSNS (3), /* fpconst. */
1419 COSTS_N_INSNS (3), /* neg. */
1420 COSTS_N_INSNS (3), /* compare. */
1421 COSTS_N_INSNS (3), /* widen. */
1422 COSTS_N_INSNS (3), /* narrow. */
1423 COSTS_N_INSNS (3), /* toint. */
1424 COSTS_N_INSNS (3), /* fromint. */
1425 COSTS_N_INSNS (3) /* roundint. */
1426 },
1427 /* FP DFmode */
1428 {
1429 COSTS_N_INSNS (30), /* div. */
1430 COSTS_N_INSNS (6), /* mult. */
1431 COSTS_N_INSNS (10), /* mult_addsub. */
1432 COSTS_N_INSNS (7), /* fma. */
1433 COSTS_N_INSNS (3), /* addsub. */
1434 COSTS_N_INSNS (3), /* fpconst. */
1435 COSTS_N_INSNS (3), /* neg. */
1436 COSTS_N_INSNS (3), /* compare. */
1437 COSTS_N_INSNS (3), /* widen. */
1438 COSTS_N_INSNS (3), /* narrow. */
1439 COSTS_N_INSNS (3), /* toint. */
1440 COSTS_N_INSNS (3), /* fromint. */
1441 COSTS_N_INSNS (3) /* roundint. */
1442 }
1443 },
1444 /* Vector */
1445 {
1446 COSTS_N_INSNS (1) /* alu. */
1447 }
1448 };
1449
1450 const struct cpu_cost_table cortexa12_extra_costs =
1451 {
1452 /* ALU */
1453 {
1454 0, /* arith. */
1455 0, /* logical. */
1456 0, /* shift. */
1457 COSTS_N_INSNS (1), /* shift_reg. */
1458 COSTS_N_INSNS (1), /* arith_shift. */
1459 COSTS_N_INSNS (1), /* arith_shift_reg. */
1460 COSTS_N_INSNS (1), /* log_shift. */
1461 COSTS_N_INSNS (1), /* log_shift_reg. */
1462 0, /* extend. */
1463 COSTS_N_INSNS (1), /* extend_arith. */
1464 0, /* bfi. */
1465 COSTS_N_INSNS (1), /* bfx. */
1466 COSTS_N_INSNS (1), /* clz. */
1467 COSTS_N_INSNS (1), /* rev. */
1468 0, /* non_exec. */
1469 true /* non_exec_costs_exec. */
1470 },
1471 /* MULT SImode */
1472 {
1473 {
1474 COSTS_N_INSNS (2), /* simple. */
1475 COSTS_N_INSNS (3), /* flag_setting. */
1476 COSTS_N_INSNS (2), /* extend. */
1477 COSTS_N_INSNS (3), /* add. */
1478 COSTS_N_INSNS (2), /* extend_add. */
1479 COSTS_N_INSNS (18) /* idiv. */
1480 },
1481 /* MULT DImode */
1482 {
1483 0, /* simple (N/A). */
1484 0, /* flag_setting (N/A). */
1485 COSTS_N_INSNS (3), /* extend. */
1486 0, /* add (N/A). */
1487 COSTS_N_INSNS (3), /* extend_add. */
1488 0 /* idiv (N/A). */
1489 }
1490 },
1491 /* LD/ST */
1492 {
1493 COSTS_N_INSNS (3), /* load. */
1494 COSTS_N_INSNS (3), /* load_sign_extend. */
1495 COSTS_N_INSNS (3), /* ldrd. */
1496 COSTS_N_INSNS (3), /* ldm_1st. */
1497 1, /* ldm_regs_per_insn_1st. */
1498 2, /* ldm_regs_per_insn_subsequent. */
1499 COSTS_N_INSNS (3), /* loadf. */
1500 COSTS_N_INSNS (3), /* loadd. */
1501 0, /* load_unaligned. */
1502 0, /* store. */
1503 0, /* strd. */
1504 0, /* stm_1st. */
1505 1, /* stm_regs_per_insn_1st. */
1506 2, /* stm_regs_per_insn_subsequent. */
1507 COSTS_N_INSNS (2), /* storef. */
1508 COSTS_N_INSNS (2), /* stored. */
1509 0, /* store_unaligned. */
1510 COSTS_N_INSNS (1), /* loadv. */
1511 COSTS_N_INSNS (1) /* storev. */
1512 },
1513 {
1514 /* FP SFmode */
1515 {
1516 COSTS_N_INSNS (17), /* div. */
1517 COSTS_N_INSNS (4), /* mult. */
1518 COSTS_N_INSNS (8), /* mult_addsub. */
1519 COSTS_N_INSNS (8), /* fma. */
1520 COSTS_N_INSNS (4), /* addsub. */
1521 COSTS_N_INSNS (2), /* fpconst. */
1522 COSTS_N_INSNS (2), /* neg. */
1523 COSTS_N_INSNS (2), /* compare. */
1524 COSTS_N_INSNS (4), /* widen. */
1525 COSTS_N_INSNS (4), /* narrow. */
1526 COSTS_N_INSNS (4), /* toint. */
1527 COSTS_N_INSNS (4), /* fromint. */
1528 COSTS_N_INSNS (4) /* roundint. */
1529 },
1530 /* FP DFmode */
1531 {
1532 COSTS_N_INSNS (31), /* div. */
1533 COSTS_N_INSNS (4), /* mult. */
1534 COSTS_N_INSNS (8), /* mult_addsub. */
1535 COSTS_N_INSNS (8), /* fma. */
1536 COSTS_N_INSNS (4), /* addsub. */
1537 COSTS_N_INSNS (2), /* fpconst. */
1538 COSTS_N_INSNS (2), /* neg. */
1539 COSTS_N_INSNS (2), /* compare. */
1540 COSTS_N_INSNS (4), /* widen. */
1541 COSTS_N_INSNS (4), /* narrow. */
1542 COSTS_N_INSNS (4), /* toint. */
1543 COSTS_N_INSNS (4), /* fromint. */
1544 COSTS_N_INSNS (4) /* roundint. */
1545 }
1546 },
1547 /* Vector */
1548 {
1549 COSTS_N_INSNS (1) /* alu. */
1550 }
1551 };
1552
1553 const struct cpu_cost_table cortexa15_extra_costs =
1554 {
1555 /* ALU */
1556 {
1557 0, /* arith. */
1558 0, /* logical. */
1559 0, /* shift. */
1560 0, /* shift_reg. */
1561 COSTS_N_INSNS (1), /* arith_shift. */
1562 COSTS_N_INSNS (1), /* arith_shift_reg. */
1563 COSTS_N_INSNS (1), /* log_shift. */
1564 COSTS_N_INSNS (1), /* log_shift_reg. */
1565 0, /* extend. */
1566 COSTS_N_INSNS (1), /* extend_arith. */
1567 COSTS_N_INSNS (1), /* bfi. */
1568 0, /* bfx. */
1569 0, /* clz. */
1570 0, /* rev. */
1571 0, /* non_exec. */
1572 true /* non_exec_costs_exec. */
1573 },
1574 /* MULT SImode */
1575 {
1576 {
1577 COSTS_N_INSNS (2), /* simple. */
1578 COSTS_N_INSNS (3), /* flag_setting. */
1579 COSTS_N_INSNS (2), /* extend. */
1580 COSTS_N_INSNS (2), /* add. */
1581 COSTS_N_INSNS (2), /* extend_add. */
1582 COSTS_N_INSNS (18) /* idiv. */
1583 },
1584 /* MULT DImode */
1585 {
1586 0, /* simple (N/A). */
1587 0, /* flag_setting (N/A). */
1588 COSTS_N_INSNS (3), /* extend. */
1589 0, /* add (N/A). */
1590 COSTS_N_INSNS (3), /* extend_add. */
1591 0 /* idiv (N/A). */
1592 }
1593 },
1594 /* LD/ST */
1595 {
1596 COSTS_N_INSNS (3), /* load. */
1597 COSTS_N_INSNS (3), /* load_sign_extend. */
1598 COSTS_N_INSNS (3), /* ldrd. */
1599 COSTS_N_INSNS (4), /* ldm_1st. */
1600 1, /* ldm_regs_per_insn_1st. */
1601 2, /* ldm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (4), /* loadf. */
1603 COSTS_N_INSNS (4), /* loadd. */
1604 0, /* load_unaligned. */
1605 0, /* store. */
1606 0, /* strd. */
1607 COSTS_N_INSNS (1), /* stm_1st. */
1608 1, /* stm_regs_per_insn_1st. */
1609 2, /* stm_regs_per_insn_subsequent. */
1610 0, /* storef. */
1611 0, /* stored. */
1612 0, /* store_unaligned. */
1613 COSTS_N_INSNS (1), /* loadv. */
1614 COSTS_N_INSNS (1) /* storev. */
1615 },
1616 {
1617 /* FP SFmode */
1618 {
1619 COSTS_N_INSNS (17), /* div. */
1620 COSTS_N_INSNS (4), /* mult. */
1621 COSTS_N_INSNS (8), /* mult_addsub. */
1622 COSTS_N_INSNS (8), /* fma. */
1623 COSTS_N_INSNS (4), /* addsub. */
1624 COSTS_N_INSNS (2), /* fpconst. */
1625 COSTS_N_INSNS (2), /* neg. */
1626 COSTS_N_INSNS (5), /* compare. */
1627 COSTS_N_INSNS (4), /* widen. */
1628 COSTS_N_INSNS (4), /* narrow. */
1629 COSTS_N_INSNS (4), /* toint. */
1630 COSTS_N_INSNS (4), /* fromint. */
1631 COSTS_N_INSNS (4) /* roundint. */
1632 },
1633 /* FP DFmode */
1634 {
1635 COSTS_N_INSNS (31), /* div. */
1636 COSTS_N_INSNS (4), /* mult. */
1637 COSTS_N_INSNS (8), /* mult_addsub. */
1638 COSTS_N_INSNS (8), /* fma. */
1639 COSTS_N_INSNS (4), /* addsub. */
1640 COSTS_N_INSNS (2), /* fpconst. */
1641 COSTS_N_INSNS (2), /* neg. */
1642 COSTS_N_INSNS (2), /* compare. */
1643 COSTS_N_INSNS (4), /* widen. */
1644 COSTS_N_INSNS (4), /* narrow. */
1645 COSTS_N_INSNS (4), /* toint. */
1646 COSTS_N_INSNS (4), /* fromint. */
1647 COSTS_N_INSNS (4) /* roundint. */
1648 }
1649 },
1650 /* Vector */
1651 {
1652 COSTS_N_INSNS (1) /* alu. */
1653 }
1654 };
1655
1656 const struct cpu_cost_table v7m_extra_costs =
1657 {
1658 /* ALU */
1659 {
1660 0, /* arith. */
1661 0, /* logical. */
1662 0, /* shift. */
1663 0, /* shift_reg. */
1664 0, /* arith_shift. */
1665 COSTS_N_INSNS (1), /* arith_shift_reg. */
1666 0, /* log_shift. */
1667 COSTS_N_INSNS (1), /* log_shift_reg. */
1668 0, /* extend. */
1669 COSTS_N_INSNS (1), /* extend_arith. */
1670 0, /* bfi. */
1671 0, /* bfx. */
1672 0, /* clz. */
1673 0, /* rev. */
1674 COSTS_N_INSNS (1), /* non_exec. */
1675 false /* non_exec_costs_exec. */
1676 },
1677 {
1678 /* MULT SImode */
1679 {
1680 COSTS_N_INSNS (1), /* simple. */
1681 COSTS_N_INSNS (1), /* flag_setting. */
1682 COSTS_N_INSNS (2), /* extend. */
1683 COSTS_N_INSNS (1), /* add. */
1684 COSTS_N_INSNS (3), /* extend_add. */
1685 COSTS_N_INSNS (8) /* idiv. */
1686 },
1687 /* MULT DImode */
1688 {
1689 0, /* simple (N/A). */
1690 0, /* flag_setting (N/A). */
1691 COSTS_N_INSNS (2), /* extend. */
1692 0, /* add (N/A). */
1693 COSTS_N_INSNS (3), /* extend_add. */
1694 0 /* idiv (N/A). */
1695 }
1696 },
1697 /* LD/ST */
1698 {
1699 COSTS_N_INSNS (2), /* load. */
1700 0, /* load_sign_extend. */
1701 COSTS_N_INSNS (3), /* ldrd. */
1702 COSTS_N_INSNS (2), /* ldm_1st. */
1703 1, /* ldm_regs_per_insn_1st. */
1704 1, /* ldm_regs_per_insn_subsequent. */
1705 COSTS_N_INSNS (2), /* loadf. */
1706 COSTS_N_INSNS (3), /* loadd. */
1707 COSTS_N_INSNS (1), /* load_unaligned. */
1708 COSTS_N_INSNS (2), /* store. */
1709 COSTS_N_INSNS (3), /* strd. */
1710 COSTS_N_INSNS (2), /* stm_1st. */
1711 1, /* stm_regs_per_insn_1st. */
1712 1, /* stm_regs_per_insn_subsequent. */
1713 COSTS_N_INSNS (2), /* storef. */
1714 COSTS_N_INSNS (3), /* stored. */
1715 COSTS_N_INSNS (1), /* store_unaligned. */
1716 COSTS_N_INSNS (1), /* loadv. */
1717 COSTS_N_INSNS (1) /* storev. */
1718 },
1719 {
1720 /* FP SFmode */
1721 {
1722 COSTS_N_INSNS (7), /* div. */
1723 COSTS_N_INSNS (2), /* mult. */
1724 COSTS_N_INSNS (5), /* mult_addsub. */
1725 COSTS_N_INSNS (3), /* fma. */
1726 COSTS_N_INSNS (1), /* addsub. */
1727 0, /* fpconst. */
1728 0, /* neg. */
1729 0, /* compare. */
1730 0, /* widen. */
1731 0, /* narrow. */
1732 0, /* toint. */
1733 0, /* fromint. */
1734 0 /* roundint. */
1735 },
1736 /* FP DFmode */
1737 {
1738 COSTS_N_INSNS (15), /* div. */
1739 COSTS_N_INSNS (5), /* mult. */
1740 COSTS_N_INSNS (7), /* mult_addsub. */
1741 COSTS_N_INSNS (7), /* fma. */
1742 COSTS_N_INSNS (3), /* addsub. */
1743 0, /* fpconst. */
1744 0, /* neg. */
1745 0, /* compare. */
1746 0, /* widen. */
1747 0, /* narrow. */
1748 0, /* toint. */
1749 0, /* fromint. */
1750 0 /* roundint. */
1751 }
1752 },
1753 /* Vector */
1754 {
1755 COSTS_N_INSNS (1) /* alu. */
1756 }
1757 };
1758
1759 const struct tune_params arm_slowmul_tune =
1760 {
1761 &generic_extra_costs, /* Insn extra costs. */
1762 NULL, /* Sched adj cost. */
1763 arm_default_branch_cost,
1764 &arm_default_vec_cost,
1765 3, /* Constant limit. */
1766 5, /* Max cond insns. */
1767 8, /* Memset max inline. */
1768 1, /* Issue rate. */
1769 ARM_PREFETCH_NOT_BENEFICIAL,
1770 tune_params::PREF_CONST_POOL_TRUE,
1771 tune_params::PREF_LDRD_FALSE,
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1773 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1774 tune_params::DISPARAGE_FLAGS_NEITHER,
1775 tune_params::PREF_NEON_64_FALSE,
1776 tune_params::PREF_NEON_STRINGOPS_FALSE,
1777 tune_params::FUSE_NOTHING,
1778 tune_params::SCHED_AUTOPREF_OFF
1779 };
1780
1781 const struct tune_params arm_fastmul_tune =
1782 {
1783 &generic_extra_costs, /* Insn extra costs. */
1784 NULL, /* Sched adj cost. */
1785 arm_default_branch_cost,
1786 &arm_default_vec_cost,
1787 1, /* Constant limit. */
1788 5, /* Max cond insns. */
1789 8, /* Memset max inline. */
1790 1, /* Issue rate. */
1791 ARM_PREFETCH_NOT_BENEFICIAL,
1792 tune_params::PREF_CONST_POOL_TRUE,
1793 tune_params::PREF_LDRD_FALSE,
1794 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1796 tune_params::DISPARAGE_FLAGS_NEITHER,
1797 tune_params::PREF_NEON_64_FALSE,
1798 tune_params::PREF_NEON_STRINGOPS_FALSE,
1799 tune_params::FUSE_NOTHING,
1800 tune_params::SCHED_AUTOPREF_OFF
1801 };
1802
1803 /* StrongARM has early execution of branches, so a sequence that is worth
1804 skipping is shorter. Set max_insns_skipped to a lower value. */
1805
1806 const struct tune_params arm_strongarm_tune =
1807 {
1808 &generic_extra_costs, /* Insn extra costs. */
1809 NULL, /* Sched adj cost. */
1810 arm_default_branch_cost,
1811 &arm_default_vec_cost,
1812 1, /* Constant limit. */
1813 3, /* Max cond insns. */
1814 8, /* Memset max inline. */
1815 1, /* Issue rate. */
1816 ARM_PREFETCH_NOT_BENEFICIAL,
1817 tune_params::PREF_CONST_POOL_TRUE,
1818 tune_params::PREF_LDRD_FALSE,
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1820 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1821 tune_params::DISPARAGE_FLAGS_NEITHER,
1822 tune_params::PREF_NEON_64_FALSE,
1823 tune_params::PREF_NEON_STRINGOPS_FALSE,
1824 tune_params::FUSE_NOTHING,
1825 tune_params::SCHED_AUTOPREF_OFF
1826 };
1827
1828 const struct tune_params arm_xscale_tune =
1829 {
1830 &generic_extra_costs, /* Insn extra costs. */
1831 xscale_sched_adjust_cost,
1832 arm_default_branch_cost,
1833 &arm_default_vec_cost,
1834 2, /* Constant limit. */
1835 3, /* Max cond insns. */
1836 8, /* Memset max inline. */
1837 1, /* Issue rate. */
1838 ARM_PREFETCH_NOT_BENEFICIAL,
1839 tune_params::PREF_CONST_POOL_TRUE,
1840 tune_params::PREF_LDRD_FALSE,
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1842 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1843 tune_params::DISPARAGE_FLAGS_NEITHER,
1844 tune_params::PREF_NEON_64_FALSE,
1845 tune_params::PREF_NEON_STRINGOPS_FALSE,
1846 tune_params::FUSE_NOTHING,
1847 tune_params::SCHED_AUTOPREF_OFF
1848 };
1849
1850 const struct tune_params arm_9e_tune =
1851 {
1852 &generic_extra_costs, /* Insn extra costs. */
1853 NULL, /* Sched adj cost. */
1854 arm_default_branch_cost,
1855 &arm_default_vec_cost,
1856 1, /* Constant limit. */
1857 5, /* Max cond insns. */
1858 8, /* Memset max inline. */
1859 1, /* Issue rate. */
1860 ARM_PREFETCH_NOT_BENEFICIAL,
1861 tune_params::PREF_CONST_POOL_TRUE,
1862 tune_params::PREF_LDRD_FALSE,
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1864 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1865 tune_params::DISPARAGE_FLAGS_NEITHER,
1866 tune_params::PREF_NEON_64_FALSE,
1867 tune_params::PREF_NEON_STRINGOPS_FALSE,
1868 tune_params::FUSE_NOTHING,
1869 tune_params::SCHED_AUTOPREF_OFF
1870 };
1871
1872 const struct tune_params arm_marvell_pj4_tune =
1873 {
1874 &generic_extra_costs, /* Insn extra costs. */
1875 NULL, /* Sched adj cost. */
1876 arm_default_branch_cost,
1877 &arm_default_vec_cost,
1878 1, /* Constant limit. */
1879 5, /* Max cond insns. */
1880 8, /* Memset max inline. */
1881 2, /* Issue rate. */
1882 ARM_PREFETCH_NOT_BENEFICIAL,
1883 tune_params::PREF_CONST_POOL_TRUE,
1884 tune_params::PREF_LDRD_FALSE,
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1886 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1887 tune_params::DISPARAGE_FLAGS_NEITHER,
1888 tune_params::PREF_NEON_64_FALSE,
1889 tune_params::PREF_NEON_STRINGOPS_FALSE,
1890 tune_params::FUSE_NOTHING,
1891 tune_params::SCHED_AUTOPREF_OFF
1892 };
1893
1894 const struct tune_params arm_v6t2_tune =
1895 {
1896 &generic_extra_costs, /* Insn extra costs. */
1897 NULL, /* Sched adj cost. */
1898 arm_default_branch_cost,
1899 &arm_default_vec_cost,
1900 1, /* Constant limit. */
1901 5, /* Max cond insns. */
1902 8, /* Memset max inline. */
1903 1, /* Issue rate. */
1904 ARM_PREFETCH_NOT_BENEFICIAL,
1905 tune_params::PREF_CONST_POOL_FALSE,
1906 tune_params::PREF_LDRD_FALSE,
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1909 tune_params::DISPARAGE_FLAGS_NEITHER,
1910 tune_params::PREF_NEON_64_FALSE,
1911 tune_params::PREF_NEON_STRINGOPS_FALSE,
1912 tune_params::FUSE_NOTHING,
1913 tune_params::SCHED_AUTOPREF_OFF
1914 };
1915
1916
1917 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1918 const struct tune_params arm_cortex_tune =
1919 {
1920 &generic_extra_costs,
1921 NULL, /* Sched adj cost. */
1922 arm_default_branch_cost,
1923 &arm_default_vec_cost,
1924 1, /* Constant limit. */
1925 5, /* Max cond insns. */
1926 8, /* Memset max inline. */
1927 2, /* Issue rate. */
1928 ARM_PREFETCH_NOT_BENEFICIAL,
1929 tune_params::PREF_CONST_POOL_FALSE,
1930 tune_params::PREF_LDRD_FALSE,
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1932 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1933 tune_params::DISPARAGE_FLAGS_NEITHER,
1934 tune_params::PREF_NEON_64_FALSE,
1935 tune_params::PREF_NEON_STRINGOPS_FALSE,
1936 tune_params::FUSE_NOTHING,
1937 tune_params::SCHED_AUTOPREF_OFF
1938 };
1939
1940 const struct tune_params arm_cortex_a8_tune =
1941 {
1942 &cortexa8_extra_costs,
1943 NULL, /* Sched adj cost. */
1944 arm_default_branch_cost,
1945 &arm_default_vec_cost,
1946 1, /* Constant limit. */
1947 5, /* Max cond insns. */
1948 8, /* Memset max inline. */
1949 2, /* Issue rate. */
1950 ARM_PREFETCH_NOT_BENEFICIAL,
1951 tune_params::PREF_CONST_POOL_FALSE,
1952 tune_params::PREF_LDRD_FALSE,
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1954 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1955 tune_params::DISPARAGE_FLAGS_NEITHER,
1956 tune_params::PREF_NEON_64_FALSE,
1957 tune_params::PREF_NEON_STRINGOPS_TRUE,
1958 tune_params::FUSE_NOTHING,
1959 tune_params::SCHED_AUTOPREF_OFF
1960 };
1961
1962 const struct tune_params arm_cortex_a7_tune =
1963 {
1964 &cortexa7_extra_costs,
1965 NULL, /* Sched adj cost. */
1966 arm_default_branch_cost,
1967 &arm_default_vec_cost,
1968 1, /* Constant limit. */
1969 5, /* Max cond insns. */
1970 8, /* Memset max inline. */
1971 2, /* Issue rate. */
1972 ARM_PREFETCH_NOT_BENEFICIAL,
1973 tune_params::PREF_CONST_POOL_FALSE,
1974 tune_params::PREF_LDRD_FALSE,
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1976 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1977 tune_params::DISPARAGE_FLAGS_NEITHER,
1978 tune_params::PREF_NEON_64_FALSE,
1979 tune_params::PREF_NEON_STRINGOPS_TRUE,
1980 tune_params::FUSE_NOTHING,
1981 tune_params::SCHED_AUTOPREF_OFF
1982 };
1983
1984 const struct tune_params arm_cortex_a15_tune =
1985 {
1986 &cortexa15_extra_costs,
1987 NULL, /* Sched adj cost. */
1988 arm_default_branch_cost,
1989 &arm_default_vec_cost,
1990 1, /* Constant limit. */
1991 2, /* Max cond insns. */
1992 8, /* Memset max inline. */
1993 3, /* Issue rate. */
1994 ARM_PREFETCH_NOT_BENEFICIAL,
1995 tune_params::PREF_CONST_POOL_FALSE,
1996 tune_params::PREF_LDRD_TRUE,
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1998 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1999 tune_params::DISPARAGE_FLAGS_ALL,
2000 tune_params::PREF_NEON_64_FALSE,
2001 tune_params::PREF_NEON_STRINGOPS_TRUE,
2002 tune_params::FUSE_NOTHING,
2003 tune_params::SCHED_AUTOPREF_FULL
2004 };
2005
2006 const struct tune_params arm_cortex_a35_tune =
2007 {
2008 &cortexa53_extra_costs,
2009 NULL, /* Sched adj cost. */
2010 arm_default_branch_cost,
2011 &arm_default_vec_cost,
2012 1, /* Constant limit. */
2013 5, /* Max cond insns. */
2014 8, /* Memset max inline. */
2015 1, /* Issue rate. */
2016 ARM_PREFETCH_NOT_BENEFICIAL,
2017 tune_params::PREF_CONST_POOL_FALSE,
2018 tune_params::PREF_LDRD_FALSE,
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2020 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2021 tune_params::DISPARAGE_FLAGS_NEITHER,
2022 tune_params::PREF_NEON_64_FALSE,
2023 tune_params::PREF_NEON_STRINGOPS_TRUE,
2024 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2025 tune_params::SCHED_AUTOPREF_OFF
2026 };
2027
2028 const struct tune_params arm_cortex_a53_tune =
2029 {
2030 &cortexa53_extra_costs,
2031 NULL, /* Sched adj cost. */
2032 arm_default_branch_cost,
2033 &arm_default_vec_cost,
2034 1, /* Constant limit. */
2035 5, /* Max cond insns. */
2036 8, /* Memset max inline. */
2037 2, /* Issue rate. */
2038 ARM_PREFETCH_NOT_BENEFICIAL,
2039 tune_params::PREF_CONST_POOL_FALSE,
2040 tune_params::PREF_LDRD_FALSE,
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2043 tune_params::DISPARAGE_FLAGS_NEITHER,
2044 tune_params::PREF_NEON_64_FALSE,
2045 tune_params::PREF_NEON_STRINGOPS_TRUE,
2046 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2047 tune_params::SCHED_AUTOPREF_OFF
2048 };
2049
2050 const struct tune_params arm_cortex_a57_tune =
2051 {
2052 &cortexa57_extra_costs,
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 2, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 3, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_TRUE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_ALL,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2069 tune_params::SCHED_AUTOPREF_FULL
2070 };
2071
2072 const struct tune_params arm_exynosm1_tune =
2073 {
2074 &exynosm1_extra_costs,
2075 NULL, /* Sched adj cost. */
2076 arm_default_branch_cost,
2077 &arm_default_vec_cost,
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 3, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL,
2083 tune_params::PREF_CONST_POOL_FALSE,
2084 tune_params::PREF_LDRD_TRUE,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_ALL,
2088 tune_params::PREF_NEON_64_FALSE,
2089 tune_params::PREF_NEON_STRINGOPS_TRUE,
2090 tune_params::FUSE_NOTHING,
2091 tune_params::SCHED_AUTOPREF_OFF
2092 };
2093
2094 const struct tune_params arm_xgene1_tune =
2095 {
2096 &xgene1_extra_costs,
2097 NULL, /* Sched adj cost. */
2098 arm_default_branch_cost,
2099 &arm_default_vec_cost,
2100 1, /* Constant limit. */
2101 2, /* Max cond insns. */
2102 32, /* Memset max inline. */
2103 4, /* Issue rate. */
2104 ARM_PREFETCH_NOT_BENEFICIAL,
2105 tune_params::PREF_CONST_POOL_FALSE,
2106 tune_params::PREF_LDRD_TRUE,
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2108 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2109 tune_params::DISPARAGE_FLAGS_ALL,
2110 tune_params::PREF_NEON_64_FALSE,
2111 tune_params::PREF_NEON_STRINGOPS_FALSE,
2112 tune_params::FUSE_NOTHING,
2113 tune_params::SCHED_AUTOPREF_OFF
2114 };
2115
2116 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2117 less appealing. Set max_insns_skipped to a low value. */
2118
2119 const struct tune_params arm_cortex_a5_tune =
2120 {
2121 &cortexa5_extra_costs,
2122 NULL, /* Sched adj cost. */
2123 arm_cortex_a5_branch_cost,
2124 &arm_default_vec_cost,
2125 1, /* Constant limit. */
2126 1, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 2, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL,
2130 tune_params::PREF_CONST_POOL_FALSE,
2131 tune_params::PREF_LDRD_FALSE,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_NEITHER,
2135 tune_params::PREF_NEON_64_FALSE,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 tune_params::FUSE_NOTHING,
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_cortex_a9_tune =
2142 {
2143 &cortexa9_extra_costs,
2144 cortex_a9_sched_adjust_cost,
2145 arm_default_branch_cost,
2146 &arm_default_vec_cost,
2147 1, /* Constant limit. */
2148 5, /* Max cond insns. */
2149 8, /* Memset max inline. */
2150 2, /* Issue rate. */
2151 ARM_PREFETCH_BENEFICIAL(4,32,32),
2152 tune_params::PREF_CONST_POOL_FALSE,
2153 tune_params::PREF_LDRD_FALSE,
2154 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2156 tune_params::DISPARAGE_FLAGS_NEITHER,
2157 tune_params::PREF_NEON_64_FALSE,
2158 tune_params::PREF_NEON_STRINGOPS_FALSE,
2159 tune_params::FUSE_NOTHING,
2160 tune_params::SCHED_AUTOPREF_OFF
2161 };
2162
2163 const struct tune_params arm_cortex_a12_tune =
2164 {
2165 &cortexa12_extra_costs,
2166 NULL, /* Sched adj cost. */
2167 arm_default_branch_cost,
2168 &arm_default_vec_cost, /* Vectorizer costs. */
2169 1, /* Constant limit. */
2170 2, /* Max cond insns. */
2171 8, /* Memset max inline. */
2172 2, /* Issue rate. */
2173 ARM_PREFETCH_NOT_BENEFICIAL,
2174 tune_params::PREF_CONST_POOL_FALSE,
2175 tune_params::PREF_LDRD_TRUE,
2176 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2177 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2178 tune_params::DISPARAGE_FLAGS_ALL,
2179 tune_params::PREF_NEON_64_FALSE,
2180 tune_params::PREF_NEON_STRINGOPS_TRUE,
2181 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2182 tune_params::SCHED_AUTOPREF_OFF
2183 };
2184
2185 const struct tune_params arm_cortex_a73_tune =
2186 {
2187 &cortexa57_extra_costs,
2188 NULL, /* Sched adj cost. */
2189 arm_default_branch_cost,
2190 &arm_default_vec_cost, /* Vectorizer costs. */
2191 1, /* Constant limit. */
2192 2, /* Max cond insns. */
2193 8, /* Memset max inline. */
2194 2, /* Issue rate. */
2195 ARM_PREFETCH_NOT_BENEFICIAL,
2196 tune_params::PREF_CONST_POOL_FALSE,
2197 tune_params::PREF_LDRD_TRUE,
2198 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2199 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2200 tune_params::DISPARAGE_FLAGS_ALL,
2201 tune_params::PREF_NEON_64_FALSE,
2202 tune_params::PREF_NEON_STRINGOPS_TRUE,
2203 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2204 tune_params::SCHED_AUTOPREF_FULL
2205 };
2206
2207 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2208 cycle to execute each. An LDR from the constant pool also takes two cycles
2209 to execute, but mildly increases pipelining opportunity (consecutive
2210 loads/stores can be pipelined together, saving one cycle), and may also
2211 improve icache utilisation. Hence we prefer the constant pool for such
2212 processors. */
2213
2214 const struct tune_params arm_v7m_tune =
2215 {
2216 &v7m_extra_costs,
2217 NULL, /* Sched adj cost. */
2218 arm_cortex_m_branch_cost,
2219 &arm_default_vec_cost,
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 1, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL,
2225 tune_params::PREF_CONST_POOL_TRUE,
2226 tune_params::PREF_LDRD_FALSE,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_NEITHER,
2230 tune_params::PREF_NEON_64_FALSE,
2231 tune_params::PREF_NEON_STRINGOPS_FALSE,
2232 tune_params::FUSE_NOTHING,
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 /* Cortex-M7 tuning. */
2237
2238 const struct tune_params arm_cortex_m7_tune =
2239 {
2240 &v7m_extra_costs,
2241 NULL, /* Sched adj cost. */
2242 arm_cortex_m7_branch_cost,
2243 &arm_default_vec_cost,
2244 0, /* Constant limit. */
2245 1, /* Max cond insns. */
2246 8, /* Memset max inline. */
2247 2, /* Issue rate. */
2248 ARM_PREFETCH_NOT_BENEFICIAL,
2249 tune_params::PREF_CONST_POOL_TRUE,
2250 tune_params::PREF_LDRD_FALSE,
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2252 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2253 tune_params::DISPARAGE_FLAGS_NEITHER,
2254 tune_params::PREF_NEON_64_FALSE,
2255 tune_params::PREF_NEON_STRINGOPS_FALSE,
2256 tune_params::FUSE_NOTHING,
2257 tune_params::SCHED_AUTOPREF_OFF
2258 };
2259
2260 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2261 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2262 cortex-m23. */
2263 const struct tune_params arm_v6m_tune =
2264 {
2265 &generic_extra_costs, /* Insn extra costs. */
2266 NULL, /* Sched adj cost. */
2267 arm_default_branch_cost,
2268 &arm_default_vec_cost, /* Vectorizer costs. */
2269 1, /* Constant limit. */
2270 5, /* Max cond insns. */
2271 8, /* Memset max inline. */
2272 1, /* Issue rate. */
2273 ARM_PREFETCH_NOT_BENEFICIAL,
2274 tune_params::PREF_CONST_POOL_FALSE,
2275 tune_params::PREF_LDRD_FALSE,
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2277 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2278 tune_params::DISPARAGE_FLAGS_NEITHER,
2279 tune_params::PREF_NEON_64_FALSE,
2280 tune_params::PREF_NEON_STRINGOPS_FALSE,
2281 tune_params::FUSE_NOTHING,
2282 tune_params::SCHED_AUTOPREF_OFF
2283 };
2284
2285 const struct tune_params arm_fa726te_tune =
2286 {
2287 &generic_extra_costs, /* Insn extra costs. */
2288 fa726te_sched_adjust_cost,
2289 arm_default_branch_cost,
2290 &arm_default_vec_cost,
2291 1, /* Constant limit. */
2292 5, /* Max cond insns. */
2293 8, /* Memset max inline. */
2294 2, /* Issue rate. */
2295 ARM_PREFETCH_NOT_BENEFICIAL,
2296 tune_params::PREF_CONST_POOL_TRUE,
2297 tune_params::PREF_LDRD_FALSE,
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2299 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2300 tune_params::DISPARAGE_FLAGS_NEITHER,
2301 tune_params::PREF_NEON_64_FALSE,
2302 tune_params::PREF_NEON_STRINGOPS_FALSE,
2303 tune_params::FUSE_NOTHING,
2304 tune_params::SCHED_AUTOPREF_OFF
2305 };
2306
2307 /* Auto-generated CPU, FPU and architecture tables. */
2308 #include "arm-cpu-data.h"
2309
2310 /* The name of the preprocessor macro to define for this architecture. PROFILE
2311 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2312 is thus chosen to be big enough to hold the longest architecture name. */
2313
2314 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2315
2316 /* Supported TLS relocations. */
2317
2318 enum tls_reloc {
2319 TLS_GD32,
2320 TLS_LDM32,
2321 TLS_LDO32,
2322 TLS_IE32,
2323 TLS_LE32,
2324 TLS_DESCSEQ /* GNU scheme */
2325 };
2326
2327 /* The maximum number of insns to be used when loading a constant. */
2328 inline static int
2329 arm_constant_limit (bool size_p)
2330 {
2331 return size_p ? 1 : current_tune->constant_limit;
2332 }
2333
2334 /* Emit an insn that's a simple single-set. Both the operands must be known
2335 to be valid. */
2336 inline static rtx_insn *
2337 emit_set_insn (rtx x, rtx y)
2338 {
2339 return emit_insn (gen_rtx_SET (x, y));
2340 }
2341
2342 /* Return the number of bits set in VALUE. */
2343 static unsigned
2344 bit_count (unsigned long value)
2345 {
2346 unsigned long count = 0;
2347
2348 while (value)
2349 {
2350 count++;
2351 value &= value - 1; /* Clear the least-significant set bit. */
2352 }
2353
2354 return count;
2355 }
2356
2357 /* Return the number of bits set in BMAP. */
2358 static unsigned
2359 bitmap_popcount (const sbitmap bmap)
2360 {
2361 unsigned int count = 0;
2362 unsigned int n = 0;
2363 sbitmap_iterator sbi;
2364
2365 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2366 count++;
2367 return count;
2368 }
2369
2370 typedef struct
2371 {
2372 machine_mode mode;
2373 const char *name;
2374 } arm_fixed_mode_set;
2375
2376 /* A small helper for setting fixed-point library libfuncs. */
2377
2378 static void
2379 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2380 const char *funcname, const char *modename,
2381 int num_suffix)
2382 {
2383 char buffer[50];
2384
2385 if (num_suffix == 0)
2386 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2387 else
2388 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2389
2390 set_optab_libfunc (optable, mode, buffer);
2391 }
2392
2393 static void
2394 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2395 machine_mode from, const char *funcname,
2396 const char *toname, const char *fromname)
2397 {
2398 char buffer[50];
2399 const char *maybe_suffix_2 = "";
2400
2401 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2402 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2403 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2404 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2405 maybe_suffix_2 = "2";
2406
2407 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2408 maybe_suffix_2);
2409
2410 set_conv_libfunc (optable, to, from, buffer);
2411 }
2412
2413 /* Set up library functions unique to ARM. */
2414
2415 static void
2416 arm_init_libfuncs (void)
2417 {
2418 /* For Linux, we have access to kernel support for atomic operations. */
2419 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2420 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2421
2422 /* There are no special library functions unless we are using the
2423 ARM BPABI. */
2424 if (!TARGET_BPABI)
2425 return;
2426
2427 /* The functions below are described in Section 4 of the "Run-Time
2428 ABI for the ARM architecture", Version 1.0. */
2429
2430 /* Double-precision floating-point arithmetic. Table 2. */
2431 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2432 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2433 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2434 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2435 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2436
2437 /* Double-precision comparisons. Table 3. */
2438 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2439 set_optab_libfunc (ne_optab, DFmode, NULL);
2440 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2441 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2442 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2443 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2444 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2445
2446 /* Single-precision floating-point arithmetic. Table 4. */
2447 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2448 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2449 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2450 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2451 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2452
2453 /* Single-precision comparisons. Table 5. */
2454 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2455 set_optab_libfunc (ne_optab, SFmode, NULL);
2456 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2457 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2458 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2459 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2460 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2461
2462 /* Floating-point to integer conversions. Table 6. */
2463 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2464 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2465 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2466 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2467 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2468 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2469 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2470 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2471
2472 /* Conversions between floating types. Table 7. */
2473 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2474 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2475
2476 /* Integer to floating-point conversions. Table 8. */
2477 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2478 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2479 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2480 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2481 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2482 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2483 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2484 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2485
2486 /* Long long. Table 9. */
2487 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2488 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2489 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2490 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2491 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2492 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2493 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2494 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2495
2496 /* Integer (32/32->32) division. \S 4.3.1. */
2497 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2498 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2499
2500 /* The divmod functions are designed so that they can be used for
2501 plain division, even though they return both the quotient and the
2502 remainder. The quotient is returned in the usual location (i.e.,
2503 r0 for SImode, {r0, r1} for DImode), just as would be expected
2504 for an ordinary division routine. Because the AAPCS calling
2505 conventions specify that all of { r0, r1, r2, r3 } are
2506 callee-saved registers, there is no need to tell the compiler
2507 explicitly that those registers are clobbered by these
2508 routines. */
2509 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2510 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2511
2512 /* For SImode division the ABI provides div-without-mod routines,
2513 which are faster. */
2514 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2515 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2516
2517 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2518 divmod libcalls instead. */
2519 set_optab_libfunc (smod_optab, DImode, NULL);
2520 set_optab_libfunc (umod_optab, DImode, NULL);
2521 set_optab_libfunc (smod_optab, SImode, NULL);
2522 set_optab_libfunc (umod_optab, SImode, NULL);
2523
2524 /* Half-precision float operations. The compiler handles all operations
2525 with NULL libfuncs by converting the SFmode. */
2526 switch (arm_fp16_format)
2527 {
2528 case ARM_FP16_FORMAT_IEEE:
2529 case ARM_FP16_FORMAT_ALTERNATIVE:
2530
2531 /* Conversions. */
2532 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2533 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2534 ? "__gnu_f2h_ieee"
2535 : "__gnu_f2h_alternative"));
2536 set_conv_libfunc (sext_optab, SFmode, HFmode,
2537 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538 ? "__gnu_h2f_ieee"
2539 : "__gnu_h2f_alternative"));
2540
2541 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2542 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2543 ? "__gnu_d2h_ieee"
2544 : "__gnu_d2h_alternative"));
2545
2546 /* Arithmetic. */
2547 set_optab_libfunc (add_optab, HFmode, NULL);
2548 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2549 set_optab_libfunc (smul_optab, HFmode, NULL);
2550 set_optab_libfunc (neg_optab, HFmode, NULL);
2551 set_optab_libfunc (sub_optab, HFmode, NULL);
2552
2553 /* Comparisons. */
2554 set_optab_libfunc (eq_optab, HFmode, NULL);
2555 set_optab_libfunc (ne_optab, HFmode, NULL);
2556 set_optab_libfunc (lt_optab, HFmode, NULL);
2557 set_optab_libfunc (le_optab, HFmode, NULL);
2558 set_optab_libfunc (ge_optab, HFmode, NULL);
2559 set_optab_libfunc (gt_optab, HFmode, NULL);
2560 set_optab_libfunc (unord_optab, HFmode, NULL);
2561 break;
2562
2563 default:
2564 break;
2565 }
2566
2567 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2568 {
2569 const arm_fixed_mode_set fixed_arith_modes[] =
2570 {
2571 { E_QQmode, "qq" },
2572 { E_UQQmode, "uqq" },
2573 { E_HQmode, "hq" },
2574 { E_UHQmode, "uhq" },
2575 { E_SQmode, "sq" },
2576 { E_USQmode, "usq" },
2577 { E_DQmode, "dq" },
2578 { E_UDQmode, "udq" },
2579 { E_TQmode, "tq" },
2580 { E_UTQmode, "utq" },
2581 { E_HAmode, "ha" },
2582 { E_UHAmode, "uha" },
2583 { E_SAmode, "sa" },
2584 { E_USAmode, "usa" },
2585 { E_DAmode, "da" },
2586 { E_UDAmode, "uda" },
2587 { E_TAmode, "ta" },
2588 { E_UTAmode, "uta" }
2589 };
2590 const arm_fixed_mode_set fixed_conv_modes[] =
2591 {
2592 { E_QQmode, "qq" },
2593 { E_UQQmode, "uqq" },
2594 { E_HQmode, "hq" },
2595 { E_UHQmode, "uhq" },
2596 { E_SQmode, "sq" },
2597 { E_USQmode, "usq" },
2598 { E_DQmode, "dq" },
2599 { E_UDQmode, "udq" },
2600 { E_TQmode, "tq" },
2601 { E_UTQmode, "utq" },
2602 { E_HAmode, "ha" },
2603 { E_UHAmode, "uha" },
2604 { E_SAmode, "sa" },
2605 { E_USAmode, "usa" },
2606 { E_DAmode, "da" },
2607 { E_UDAmode, "uda" },
2608 { E_TAmode, "ta" },
2609 { E_UTAmode, "uta" },
2610 { E_QImode, "qi" },
2611 { E_HImode, "hi" },
2612 { E_SImode, "si" },
2613 { E_DImode, "di" },
2614 { E_TImode, "ti" },
2615 { E_SFmode, "sf" },
2616 { E_DFmode, "df" }
2617 };
2618 unsigned int i, j;
2619
2620 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2621 {
2622 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2623 "add", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2625 "ssadd", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2627 "usadd", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2629 "sub", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2631 "sssub", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2633 "ussub", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2635 "mul", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2637 "ssmul", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2639 "usmul", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2641 "div", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2643 "udiv", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2645 "ssdiv", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2647 "usdiv", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2649 "neg", fixed_arith_modes[i].name, 2);
2650 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2651 "ssneg", fixed_arith_modes[i].name, 2);
2652 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2653 "usneg", fixed_arith_modes[i].name, 2);
2654 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2655 "ashl", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2657 "ashr", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2659 "lshr", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2661 "ssashl", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2663 "usashl", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2665 "cmp", fixed_arith_modes[i].name, 2);
2666 }
2667
2668 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2669 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2670 {
2671 if (i == j
2672 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2673 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2674 continue;
2675
2676 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2677 fixed_conv_modes[j].mode, "fract",
2678 fixed_conv_modes[i].name,
2679 fixed_conv_modes[j].name);
2680 arm_set_fixed_conv_libfunc (satfract_optab,
2681 fixed_conv_modes[i].mode,
2682 fixed_conv_modes[j].mode, "satfract",
2683 fixed_conv_modes[i].name,
2684 fixed_conv_modes[j].name);
2685 arm_set_fixed_conv_libfunc (fractuns_optab,
2686 fixed_conv_modes[i].mode,
2687 fixed_conv_modes[j].mode, "fractuns",
2688 fixed_conv_modes[i].name,
2689 fixed_conv_modes[j].name);
2690 arm_set_fixed_conv_libfunc (satfractuns_optab,
2691 fixed_conv_modes[i].mode,
2692 fixed_conv_modes[j].mode, "satfractuns",
2693 fixed_conv_modes[i].name,
2694 fixed_conv_modes[j].name);
2695 }
2696 }
2697
2698 if (TARGET_AAPCS_BASED)
2699 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2700 }
2701
2702 /* On AAPCS systems, this is the "struct __va_list". */
2703 static GTY(()) tree va_list_type;
2704
2705 /* Return the type to use as __builtin_va_list. */
2706 static tree
2707 arm_build_builtin_va_list (void)
2708 {
2709 tree va_list_name;
2710 tree ap_field;
2711
2712 if (!TARGET_AAPCS_BASED)
2713 return std_build_builtin_va_list ();
2714
2715 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2716 defined as:
2717
2718 struct __va_list
2719 {
2720 void *__ap;
2721 };
2722
2723 The C Library ABI further reinforces this definition in \S
2724 4.1.
2725
2726 We must follow this definition exactly. The structure tag
2727 name is visible in C++ mangled names, and thus forms a part
2728 of the ABI. The field name may be used by people who
2729 #include <stdarg.h>. */
2730 /* Create the type. */
2731 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2732 /* Give it the required name. */
2733 va_list_name = build_decl (BUILTINS_LOCATION,
2734 TYPE_DECL,
2735 get_identifier ("__va_list"),
2736 va_list_type);
2737 DECL_ARTIFICIAL (va_list_name) = 1;
2738 TYPE_NAME (va_list_type) = va_list_name;
2739 TYPE_STUB_DECL (va_list_type) = va_list_name;
2740 /* Create the __ap field. */
2741 ap_field = build_decl (BUILTINS_LOCATION,
2742 FIELD_DECL,
2743 get_identifier ("__ap"),
2744 ptr_type_node);
2745 DECL_ARTIFICIAL (ap_field) = 1;
2746 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2747 TYPE_FIELDS (va_list_type) = ap_field;
2748 /* Compute its layout. */
2749 layout_type (va_list_type);
2750
2751 return va_list_type;
2752 }
2753
2754 /* Return an expression of type "void *" pointing to the next
2755 available argument in a variable-argument list. VALIST is the
2756 user-level va_list object, of type __builtin_va_list. */
2757 static tree
2758 arm_extract_valist_ptr (tree valist)
2759 {
2760 if (TREE_TYPE (valist) == error_mark_node)
2761 return error_mark_node;
2762
2763 /* On an AAPCS target, the pointer is stored within "struct
2764 va_list". */
2765 if (TARGET_AAPCS_BASED)
2766 {
2767 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2768 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2769 valist, ap_field, NULL_TREE);
2770 }
2771
2772 return valist;
2773 }
2774
2775 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2776 static void
2777 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2778 {
2779 valist = arm_extract_valist_ptr (valist);
2780 std_expand_builtin_va_start (valist, nextarg);
2781 }
2782
2783 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2784 static tree
2785 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2786 gimple_seq *post_p)
2787 {
2788 valist = arm_extract_valist_ptr (valist);
2789 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2790 }
2791
2792 /* Check any incompatible options that the user has specified. */
2793 static void
2794 arm_option_check_internal (struct gcc_options *opts)
2795 {
2796 int flags = opts->x_target_flags;
2797
2798 /* iWMMXt and NEON are incompatible. */
2799 if (TARGET_IWMMXT
2800 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2801 error ("iWMMXt and NEON are incompatible");
2802
2803 /* Make sure that the processor choice does not conflict with any of the
2804 other command line choices. */
2805 if (TARGET_ARM_P (flags)
2806 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2807 error ("target CPU does not support ARM mode");
2808
2809 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2810 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2811 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2812
2813 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2814 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2815
2816 /* If this target is normally configured to use APCS frames, warn if they
2817 are turned off and debugging is turned on. */
2818 if (TARGET_ARM_P (flags)
2819 && write_symbols != NO_DEBUG
2820 && !TARGET_APCS_FRAME
2821 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2822 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2823
2824 /* iWMMXt unsupported under Thumb mode. */
2825 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2826 error ("iWMMXt unsupported under Thumb mode");
2827
2828 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2829 error ("can not use -mtp=cp15 with 16-bit Thumb");
2830
2831 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2832 {
2833 error ("RTP PIC is incompatible with Thumb");
2834 flag_pic = 0;
2835 }
2836
2837 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2838 with MOVT. */
2839 if ((target_pure_code || target_slow_flash_data)
2840 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2841 {
2842 const char *flag = (target_pure_code ? "-mpure-code" :
2843 "-mslow-flash-data");
2844 error ("%s only supports non-pic code on M-profile targets with the "
2845 "MOVT instruction", flag);
2846 }
2847
2848 }
2849
2850 /* Recompute the global settings depending on target attribute options. */
2851
2852 static void
2853 arm_option_params_internal (void)
2854 {
2855 /* If we are not using the default (ARM mode) section anchor offset
2856 ranges, then set the correct ranges now. */
2857 if (TARGET_THUMB1)
2858 {
2859 /* Thumb-1 LDR instructions cannot have negative offsets.
2860 Permissible positive offset ranges are 5-bit (for byte loads),
2861 6-bit (for halfword loads), or 7-bit (for word loads).
2862 Empirical results suggest a 7-bit anchor range gives the best
2863 overall code size. */
2864 targetm.min_anchor_offset = 0;
2865 targetm.max_anchor_offset = 127;
2866 }
2867 else if (TARGET_THUMB2)
2868 {
2869 /* The minimum is set such that the total size of the block
2870 for a particular anchor is 248 + 1 + 4095 bytes, which is
2871 divisible by eight, ensuring natural spacing of anchors. */
2872 targetm.min_anchor_offset = -248;
2873 targetm.max_anchor_offset = 4095;
2874 }
2875 else
2876 {
2877 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2878 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2879 }
2880
2881 /* Increase the number of conditional instructions with -Os. */
2882 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2883
2884 /* For THUMB2, we limit the conditional sequence to one IT block. */
2885 if (TARGET_THUMB2)
2886 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2887 }
2888
2889 /* True if -mflip-thumb should next add an attribute for the default
2890 mode, false if it should next add an attribute for the opposite mode. */
2891 static GTY(()) bool thumb_flipper;
2892
2893 /* Options after initial target override. */
2894 static GTY(()) tree init_optimize;
2895
2896 static void
2897 arm_override_options_after_change_1 (struct gcc_options *opts)
2898 {
2899 if (opts->x_align_functions <= 0)
2900 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2901 && opts->x_optimize_size ? 2 : 4;
2902 }
2903
2904 /* Implement targetm.override_options_after_change. */
2905
2906 static void
2907 arm_override_options_after_change (void)
2908 {
2909 arm_configure_build_target (&arm_active_target,
2910 TREE_TARGET_OPTION (target_option_default_node),
2911 &global_options_set, false);
2912
2913 arm_override_options_after_change_1 (&global_options);
2914 }
2915
2916 /* Implement TARGET_OPTION_SAVE. */
2917 static void
2918 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2919 {
2920 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2921 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2922 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2923 }
2924
2925 /* Implement TARGET_OPTION_RESTORE. */
2926 static void
2927 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2928 {
2929 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2930 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2931 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2932 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2933 false);
2934 }
2935
2936 /* Reset options between modes that the user has specified. */
2937 static void
2938 arm_option_override_internal (struct gcc_options *opts,
2939 struct gcc_options *opts_set)
2940 {
2941 arm_override_options_after_change_1 (opts);
2942
2943 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2944 {
2945 /* The default is to enable interworking, so this warning message would
2946 be confusing to users who have just compiled with, eg, -march=armv3. */
2947 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2948 opts->x_target_flags &= ~MASK_INTERWORK;
2949 }
2950
2951 if (TARGET_THUMB_P (opts->x_target_flags)
2952 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2953 {
2954 warning (0, "target CPU does not support THUMB instructions");
2955 opts->x_target_flags &= ~MASK_THUMB;
2956 }
2957
2958 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2959 {
2960 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2961 opts->x_target_flags &= ~MASK_APCS_FRAME;
2962 }
2963
2964 /* Callee super interworking implies thumb interworking. Adding
2965 this to the flags here simplifies the logic elsewhere. */
2966 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2967 opts->x_target_flags |= MASK_INTERWORK;
2968
2969 /* need to remember initial values so combinaisons of options like
2970 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2971 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2972
2973 if (! opts_set->x_arm_restrict_it)
2974 opts->x_arm_restrict_it = arm_arch8;
2975
2976 /* ARM execution state and M profile don't have [restrict] IT. */
2977 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2978 opts->x_arm_restrict_it = 0;
2979
2980 /* Enable -munaligned-access by default for
2981 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2982 i.e. Thumb2 and ARM state only.
2983 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2984 - ARMv8 architecture-base processors.
2985
2986 Disable -munaligned-access by default for
2987 - all pre-ARMv6 architecture-based processors
2988 - ARMv6-M architecture-based processors
2989 - ARMv8-M Baseline processors. */
2990
2991 if (! opts_set->x_unaligned_access)
2992 {
2993 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2994 && arm_arch6 && (arm_arch_notm || arm_arch7));
2995 }
2996 else if (opts->x_unaligned_access == 1
2997 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2998 {
2999 warning (0, "target CPU does not support unaligned accesses");
3000 opts->x_unaligned_access = 0;
3001 }
3002
3003 /* Don't warn since it's on by default in -O2. */
3004 if (TARGET_THUMB1_P (opts->x_target_flags))
3005 opts->x_flag_schedule_insns = 0;
3006 else
3007 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3008
3009 /* Disable shrink-wrap when optimizing function for size, since it tends to
3010 generate additional returns. */
3011 if (optimize_function_for_size_p (cfun)
3012 && TARGET_THUMB2_P (opts->x_target_flags))
3013 opts->x_flag_shrink_wrap = false;
3014 else
3015 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3016
3017 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3018 - epilogue_insns - does not accurately model the corresponding insns
3019 emitted in the asm file. In particular, see the comment in thumb_exit
3020 'Find out how many of the (return) argument registers we can corrupt'.
3021 As a consequence, the epilogue may clobber registers without fipa-ra
3022 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3023 TODO: Accurately model clobbers for epilogue_insns and reenable
3024 fipa-ra. */
3025 if (TARGET_THUMB1_P (opts->x_target_flags))
3026 opts->x_flag_ipa_ra = 0;
3027 else
3028 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3029
3030 /* Thumb2 inline assembly code should always use unified syntax.
3031 This will apply to ARM and Thumb1 eventually. */
3032 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3033
3034 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3035 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3036 #endif
3037 }
3038
3039 static sbitmap isa_all_fpubits;
3040 static sbitmap isa_quirkbits;
3041
3042 /* Configure a build target TARGET from the user-specified options OPTS and
3043 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3044 architecture have been specified, but the two are not identical. */
3045 void
3046 arm_configure_build_target (struct arm_build_target *target,
3047 struct cl_target_option *opts,
3048 struct gcc_options *opts_set,
3049 bool warn_compatible)
3050 {
3051 const cpu_option *arm_selected_tune = NULL;
3052 const arch_option *arm_selected_arch = NULL;
3053 const cpu_option *arm_selected_cpu = NULL;
3054 const arm_fpu_desc *arm_selected_fpu = NULL;
3055 const char *tune_opts = NULL;
3056 const char *arch_opts = NULL;
3057 const char *cpu_opts = NULL;
3058
3059 bitmap_clear (target->isa);
3060 target->core_name = NULL;
3061 target->arch_name = NULL;
3062
3063 if (opts_set->x_arm_arch_string)
3064 {
3065 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3066 "-march",
3067 opts->x_arm_arch_string);
3068 arch_opts = strchr (opts->x_arm_arch_string, '+');
3069 }
3070
3071 if (opts_set->x_arm_cpu_string)
3072 {
3073 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3074 opts->x_arm_cpu_string);
3075 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3076 arm_selected_tune = arm_selected_cpu;
3077 /* If taking the tuning from -mcpu, we don't need to rescan the
3078 options for tuning. */
3079 }
3080
3081 if (opts_set->x_arm_tune_string)
3082 {
3083 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3084 opts->x_arm_tune_string);
3085 tune_opts = strchr (opts->x_arm_tune_string, '+');
3086 }
3087
3088 if (arm_selected_arch)
3089 {
3090 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3091 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3092 arch_opts);
3093
3094 if (arm_selected_cpu)
3095 {
3096 auto_sbitmap cpu_isa (isa_num_bits);
3097 auto_sbitmap isa_delta (isa_num_bits);
3098
3099 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3100 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3101 cpu_opts);
3102 bitmap_xor (isa_delta, cpu_isa, target->isa);
3103 /* Ignore any bits that are quirk bits. */
3104 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3105 /* Ignore (for now) any bits that might be set by -mfpu. */
3106 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3107
3108 if (!bitmap_empty_p (isa_delta))
3109 {
3110 if (warn_compatible)
3111 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3112 arm_selected_cpu->common.name,
3113 arm_selected_arch->common.name);
3114 /* -march wins for code generation.
3115 -mcpu wins for default tuning. */
3116 if (!arm_selected_tune)
3117 arm_selected_tune = arm_selected_cpu;
3118
3119 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3120 target->arch_name = arm_selected_arch->common.name;
3121 }
3122 else
3123 {
3124 /* Architecture and CPU are essentially the same.
3125 Prefer the CPU setting. */
3126 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3127 target->core_name = arm_selected_cpu->common.name;
3128 /* Copy the CPU's capabilities, so that we inherit the
3129 appropriate extensions and quirks. */
3130 bitmap_copy (target->isa, cpu_isa);
3131 }
3132 }
3133 else
3134 {
3135 /* Pick a CPU based on the architecture. */
3136 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3137 target->arch_name = arm_selected_arch->common.name;
3138 /* Note: target->core_name is left unset in this path. */
3139 }
3140 }
3141 else if (arm_selected_cpu)
3142 {
3143 target->core_name = arm_selected_cpu->common.name;
3144 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3145 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3146 cpu_opts);
3147 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3148 }
3149 /* If the user did not specify a processor or architecture, choose
3150 one for them. */
3151 else
3152 {
3153 const cpu_option *sel;
3154 auto_sbitmap sought_isa (isa_num_bits);
3155 bitmap_clear (sought_isa);
3156 auto_sbitmap default_isa (isa_num_bits);
3157
3158 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3159 TARGET_CPU_DEFAULT);
3160 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3161 gcc_assert (arm_selected_cpu->common.name);
3162
3163 /* RWE: All of the selection logic below (to the end of this
3164 'if' clause) looks somewhat suspect. It appears to be mostly
3165 there to support forcing thumb support when the default CPU
3166 does not have thumb (somewhat dubious in terms of what the
3167 user might be expecting). I think it should be removed once
3168 support for the pre-thumb era cores is removed. */
3169 sel = arm_selected_cpu;
3170 arm_initialize_isa (default_isa, sel->common.isa_bits);
3171 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3172 cpu_opts);
3173
3174 /* Now check to see if the user has specified any command line
3175 switches that require certain abilities from the cpu. */
3176
3177 if (TARGET_INTERWORK || TARGET_THUMB)
3178 {
3179 bitmap_set_bit (sought_isa, isa_bit_thumb);
3180 bitmap_set_bit (sought_isa, isa_bit_mode32);
3181
3182 /* There are no ARM processors that support both APCS-26 and
3183 interworking. Therefore we forcibly remove MODE26 from
3184 from the isa features here (if it was set), so that the
3185 search below will always be able to find a compatible
3186 processor. */
3187 bitmap_clear_bit (default_isa, isa_bit_mode26);
3188 }
3189
3190 /* If there are such requirements and the default CPU does not
3191 satisfy them, we need to run over the complete list of
3192 cores looking for one that is satisfactory. */
3193 if (!bitmap_empty_p (sought_isa)
3194 && !bitmap_subset_p (sought_isa, default_isa))
3195 {
3196 auto_sbitmap candidate_isa (isa_num_bits);
3197 /* We're only interested in a CPU with at least the
3198 capabilities of the default CPU and the required
3199 additional features. */
3200 bitmap_ior (default_isa, default_isa, sought_isa);
3201
3202 /* Try to locate a CPU type that supports all of the abilities
3203 of the default CPU, plus the extra abilities requested by
3204 the user. */
3205 for (sel = all_cores; sel->common.name != NULL; sel++)
3206 {
3207 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3208 /* An exact match? */
3209 if (bitmap_equal_p (default_isa, candidate_isa))
3210 break;
3211 }
3212
3213 if (sel->common.name == NULL)
3214 {
3215 unsigned current_bit_count = isa_num_bits;
3216 const cpu_option *best_fit = NULL;
3217
3218 /* Ideally we would like to issue an error message here
3219 saying that it was not possible to find a CPU compatible
3220 with the default CPU, but which also supports the command
3221 line options specified by the programmer, and so they
3222 ought to use the -mcpu=<name> command line option to
3223 override the default CPU type.
3224
3225 If we cannot find a CPU that has exactly the
3226 characteristics of the default CPU and the given
3227 command line options we scan the array again looking
3228 for a best match. The best match must have at least
3229 the capabilities of the perfect match. */
3230 for (sel = all_cores; sel->common.name != NULL; sel++)
3231 {
3232 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3233
3234 if (bitmap_subset_p (default_isa, candidate_isa))
3235 {
3236 unsigned count;
3237
3238 bitmap_and_compl (candidate_isa, candidate_isa,
3239 default_isa);
3240 count = bitmap_popcount (candidate_isa);
3241
3242 if (count < current_bit_count)
3243 {
3244 best_fit = sel;
3245 current_bit_count = count;
3246 }
3247 }
3248
3249 gcc_assert (best_fit);
3250 sel = best_fit;
3251 }
3252 }
3253 arm_selected_cpu = sel;
3254 }
3255
3256 /* Now we know the CPU, we can finally initialize the target
3257 structure. */
3258 target->core_name = arm_selected_cpu->common.name;
3259 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3260 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3261 cpu_opts);
3262 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3263 }
3264
3265 gcc_assert (arm_selected_cpu);
3266 gcc_assert (arm_selected_arch);
3267
3268 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3269 {
3270 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3271 auto_sbitmap fpu_bits (isa_num_bits);
3272
3273 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3274 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3275 bitmap_ior (target->isa, target->isa, fpu_bits);
3276 }
3277
3278 if (!arm_selected_tune)
3279 arm_selected_tune = arm_selected_cpu;
3280 else /* Validate the features passed to -mtune. */
3281 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3282
3283 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3284
3285 /* Finish initializing the target structure. */
3286 target->arch_pp_name = arm_selected_arch->arch;
3287 target->base_arch = arm_selected_arch->base_arch;
3288 target->profile = arm_selected_arch->profile;
3289
3290 target->tune_flags = tune_data->tune_flags;
3291 target->tune = tune_data->tune;
3292 target->tune_core = tune_data->scheduler;
3293 }
3294
3295 /* Fix up any incompatible options that the user has specified. */
3296 static void
3297 arm_option_override (void)
3298 {
3299 static const enum isa_feature fpu_bitlist[]
3300 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3301 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3302 cl_target_option opts;
3303
3304 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3305 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3306
3307 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3308 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3309
3310 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3311
3312 if (!global_options_set.x_arm_fpu_index)
3313 {
3314 bool ok;
3315 int fpu_index;
3316
3317 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3318 CL_TARGET);
3319 gcc_assert (ok);
3320 arm_fpu_index = (enum fpu_type) fpu_index;
3321 }
3322
3323 cl_target_option_save (&opts, &global_options);
3324 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3325 true);
3326
3327 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3328 SUBTARGET_OVERRIDE_OPTIONS;
3329 #endif
3330
3331 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3332 arm_base_arch = arm_active_target.base_arch;
3333
3334 arm_tune = arm_active_target.tune_core;
3335 tune_flags = arm_active_target.tune_flags;
3336 current_tune = arm_active_target.tune;
3337
3338 /* TBD: Dwarf info for apcs frame is not handled yet. */
3339 if (TARGET_APCS_FRAME)
3340 flag_shrink_wrap = false;
3341
3342 /* BPABI targets use linker tricks to allow interworking on cores
3343 without thumb support. */
3344 if (TARGET_INTERWORK
3345 && !TARGET_BPABI
3346 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3347 {
3348 warning (0, "target CPU does not support interworking" );
3349 target_flags &= ~MASK_INTERWORK;
3350 }
3351
3352 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3353 {
3354 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3355 target_flags |= MASK_APCS_FRAME;
3356 }
3357
3358 if (TARGET_POKE_FUNCTION_NAME)
3359 target_flags |= MASK_APCS_FRAME;
3360
3361 if (TARGET_APCS_REENT && flag_pic)
3362 error ("-fpic and -mapcs-reent are incompatible");
3363
3364 if (TARGET_APCS_REENT)
3365 warning (0, "APCS reentrant code not supported. Ignored");
3366
3367 /* Initialize boolean versions of the architectural flags, for use
3368 in the arm.md file. */
3369 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3370 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3371 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3372 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3373 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3374 arm_arch5te = arm_arch5e
3375 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3376 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3377 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3378 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3379 arm_arch6m = arm_arch6 && !arm_arch_notm;
3380 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3381 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3382 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3383 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3384 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3385 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3386 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3387 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3388 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3389 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3390 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3391 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3392 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3393 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3394 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3395 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3396 if (arm_fp16_inst)
3397 {
3398 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3399 error ("selected fp16 options are incompatible");
3400 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3401 }
3402
3403
3404 /* Set up some tuning parameters. */
3405 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3406 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3407 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3408 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3409 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3410 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3411
3412 /* And finally, set up some quirks. */
3413 arm_arch_no_volatile_ce
3414 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3415 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3416 isa_bit_quirk_armv6kz);
3417
3418 /* V5 code we generate is completely interworking capable, so we turn off
3419 TARGET_INTERWORK here to avoid many tests later on. */
3420
3421 /* XXX However, we must pass the right pre-processor defines to CPP
3422 or GLD can get confused. This is a hack. */
3423 if (TARGET_INTERWORK)
3424 arm_cpp_interwork = 1;
3425
3426 if (arm_arch5)
3427 target_flags &= ~MASK_INTERWORK;
3428
3429 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3430 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3431
3432 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3433 error ("iwmmxt abi requires an iwmmxt capable cpu");
3434
3435 /* If soft-float is specified then don't use FPU. */
3436 if (TARGET_SOFT_FLOAT)
3437 arm_fpu_attr = FPU_NONE;
3438 else
3439 arm_fpu_attr = FPU_VFP;
3440
3441 if (TARGET_AAPCS_BASED)
3442 {
3443 if (TARGET_CALLER_INTERWORKING)
3444 error ("AAPCS does not support -mcaller-super-interworking");
3445 else
3446 if (TARGET_CALLEE_INTERWORKING)
3447 error ("AAPCS does not support -mcallee-super-interworking");
3448 }
3449
3450 /* __fp16 support currently assumes the core has ldrh. */
3451 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3452 sorry ("__fp16 and no ldrh");
3453
3454 if (TARGET_AAPCS_BASED)
3455 {
3456 if (arm_abi == ARM_ABI_IWMMXT)
3457 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3458 else if (TARGET_HARD_FLOAT_ABI)
3459 {
3460 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3461 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3462 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3463 }
3464 else
3465 arm_pcs_default = ARM_PCS_AAPCS;
3466 }
3467 else
3468 {
3469 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3470 sorry ("-mfloat-abi=hard and VFP");
3471
3472 if (arm_abi == ARM_ABI_APCS)
3473 arm_pcs_default = ARM_PCS_APCS;
3474 else
3475 arm_pcs_default = ARM_PCS_ATPCS;
3476 }
3477
3478 /* For arm2/3 there is no need to do any scheduling if we are doing
3479 software floating-point. */
3480 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3481 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3482
3483 /* Use the cp15 method if it is available. */
3484 if (target_thread_pointer == TP_AUTO)
3485 {
3486 if (arm_arch6k && !TARGET_THUMB1)
3487 target_thread_pointer = TP_CP15;
3488 else
3489 target_thread_pointer = TP_SOFT;
3490 }
3491
3492 /* Override the default structure alignment for AAPCS ABI. */
3493 if (!global_options_set.x_arm_structure_size_boundary)
3494 {
3495 if (TARGET_AAPCS_BASED)
3496 arm_structure_size_boundary = 8;
3497 }
3498 else
3499 {
3500 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3501
3502 if (arm_structure_size_boundary != 8
3503 && arm_structure_size_boundary != 32
3504 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3505 {
3506 if (ARM_DOUBLEWORD_ALIGN)
3507 warning (0,
3508 "structure size boundary can only be set to 8, 32 or 64");
3509 else
3510 warning (0, "structure size boundary can only be set to 8 or 32");
3511 arm_structure_size_boundary
3512 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3513 }
3514 }
3515
3516 if (TARGET_VXWORKS_RTP)
3517 {
3518 if (!global_options_set.x_arm_pic_data_is_text_relative)
3519 arm_pic_data_is_text_relative = 0;
3520 }
3521 else if (flag_pic
3522 && !arm_pic_data_is_text_relative
3523 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3524 /* When text & data segments don't have a fixed displacement, the
3525 intended use is with a single, read only, pic base register.
3526 Unless the user explicitly requested not to do that, set
3527 it. */
3528 target_flags |= MASK_SINGLE_PIC_BASE;
3529
3530 /* If stack checking is disabled, we can use r10 as the PIC register,
3531 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3532 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3533 {
3534 if (TARGET_VXWORKS_RTP)
3535 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3536 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3537 }
3538
3539 if (flag_pic && TARGET_VXWORKS_RTP)
3540 arm_pic_register = 9;
3541
3542 if (arm_pic_register_string != NULL)
3543 {
3544 int pic_register = decode_reg_name (arm_pic_register_string);
3545
3546 if (!flag_pic)
3547 warning (0, "-mpic-register= is useless without -fpic");
3548
3549 /* Prevent the user from choosing an obviously stupid PIC register. */
3550 else if (pic_register < 0 || call_used_regs[pic_register]
3551 || pic_register == HARD_FRAME_POINTER_REGNUM
3552 || pic_register == STACK_POINTER_REGNUM
3553 || pic_register >= PC_REGNUM
3554 || (TARGET_VXWORKS_RTP
3555 && (unsigned int) pic_register != arm_pic_register))
3556 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3557 else
3558 arm_pic_register = pic_register;
3559 }
3560
3561 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3562 if (fix_cm3_ldrd == 2)
3563 {
3564 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3565 fix_cm3_ldrd = 1;
3566 else
3567 fix_cm3_ldrd = 0;
3568 }
3569
3570 /* Hot/Cold partitioning is not currently supported, since we can't
3571 handle literal pool placement in that case. */
3572 if (flag_reorder_blocks_and_partition)
3573 {
3574 inform (input_location,
3575 "-freorder-blocks-and-partition not supported on this architecture");
3576 flag_reorder_blocks_and_partition = 0;
3577 flag_reorder_blocks = 1;
3578 }
3579
3580 if (flag_pic)
3581 /* Hoisting PIC address calculations more aggressively provides a small,
3582 but measurable, size reduction for PIC code. Therefore, we decrease
3583 the bar for unrestricted expression hoisting to the cost of PIC address
3584 calculation, which is 2 instructions. */
3585 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3586 global_options.x_param_values,
3587 global_options_set.x_param_values);
3588
3589 /* ARM EABI defaults to strict volatile bitfields. */
3590 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3591 && abi_version_at_least(2))
3592 flag_strict_volatile_bitfields = 1;
3593
3594 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3595 have deemed it beneficial (signified by setting
3596 prefetch.num_slots to 1 or more). */
3597 if (flag_prefetch_loop_arrays < 0
3598 && HAVE_prefetch
3599 && optimize >= 3
3600 && current_tune->prefetch.num_slots > 0)
3601 flag_prefetch_loop_arrays = 1;
3602
3603 /* Set up parameters to be used in prefetching algorithm. Do not
3604 override the defaults unless we are tuning for a core we have
3605 researched values for. */
3606 if (current_tune->prefetch.num_slots > 0)
3607 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3608 current_tune->prefetch.num_slots,
3609 global_options.x_param_values,
3610 global_options_set.x_param_values);
3611 if (current_tune->prefetch.l1_cache_line_size >= 0)
3612 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3613 current_tune->prefetch.l1_cache_line_size,
3614 global_options.x_param_values,
3615 global_options_set.x_param_values);
3616 if (current_tune->prefetch.l1_cache_size >= 0)
3617 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3618 current_tune->prefetch.l1_cache_size,
3619 global_options.x_param_values,
3620 global_options_set.x_param_values);
3621
3622 /* Use Neon to perform 64-bits operations rather than core
3623 registers. */
3624 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3625 if (use_neon_for_64bits == 1)
3626 prefer_neon_for_64bits = true;
3627
3628 /* Use the alternative scheduling-pressure algorithm by default. */
3629 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3630 global_options.x_param_values,
3631 global_options_set.x_param_values);
3632
3633 /* Look through ready list and all of queue for instructions
3634 relevant for L2 auto-prefetcher. */
3635 int param_sched_autopref_queue_depth;
3636
3637 switch (current_tune->sched_autopref)
3638 {
3639 case tune_params::SCHED_AUTOPREF_OFF:
3640 param_sched_autopref_queue_depth = -1;
3641 break;
3642
3643 case tune_params::SCHED_AUTOPREF_RANK:
3644 param_sched_autopref_queue_depth = 0;
3645 break;
3646
3647 case tune_params::SCHED_AUTOPREF_FULL:
3648 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3649 break;
3650
3651 default:
3652 gcc_unreachable ();
3653 }
3654
3655 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3656 param_sched_autopref_queue_depth,
3657 global_options.x_param_values,
3658 global_options_set.x_param_values);
3659
3660 /* Currently, for slow flash data, we just disable literal pools. We also
3661 disable it for pure-code. */
3662 if (target_slow_flash_data || target_pure_code)
3663 arm_disable_literal_pool = true;
3664
3665 if (use_cmse && !arm_arch_cmse)
3666 error ("target CPU does not support ARMv8-M Security Extensions");
3667
3668 /* Disable scheduling fusion by default if it's not armv7 processor
3669 or doesn't prefer ldrd/strd. */
3670 if (flag_schedule_fusion == 2
3671 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3672 flag_schedule_fusion = 0;
3673
3674 /* Need to remember initial options before they are overriden. */
3675 init_optimize = build_optimization_node (&global_options);
3676
3677 arm_option_override_internal (&global_options, &global_options_set);
3678 arm_option_check_internal (&global_options);
3679 arm_option_params_internal ();
3680
3681 /* Create the default target_options structure. */
3682 target_option_default_node = target_option_current_node
3683 = build_target_option_node (&global_options);
3684
3685 /* Register global variables with the garbage collector. */
3686 arm_add_gc_roots ();
3687
3688 /* Init initial mode for testing. */
3689 thumb_flipper = TARGET_THUMB;
3690 }
3691
3692 static void
3693 arm_add_gc_roots (void)
3694 {
3695 gcc_obstack_init(&minipool_obstack);
3696 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3697 }
3698 \f
3699 /* A table of known ARM exception types.
3700 For use with the interrupt function attribute. */
3701
3702 typedef struct
3703 {
3704 const char *const arg;
3705 const unsigned long return_value;
3706 }
3707 isr_attribute_arg;
3708
3709 static const isr_attribute_arg isr_attribute_args [] =
3710 {
3711 { "IRQ", ARM_FT_ISR },
3712 { "irq", ARM_FT_ISR },
3713 { "FIQ", ARM_FT_FIQ },
3714 { "fiq", ARM_FT_FIQ },
3715 { "ABORT", ARM_FT_ISR },
3716 { "abort", ARM_FT_ISR },
3717 { "ABORT", ARM_FT_ISR },
3718 { "abort", ARM_FT_ISR },
3719 { "UNDEF", ARM_FT_EXCEPTION },
3720 { "undef", ARM_FT_EXCEPTION },
3721 { "SWI", ARM_FT_EXCEPTION },
3722 { "swi", ARM_FT_EXCEPTION },
3723 { NULL, ARM_FT_NORMAL }
3724 };
3725
3726 /* Returns the (interrupt) function type of the current
3727 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3728
3729 static unsigned long
3730 arm_isr_value (tree argument)
3731 {
3732 const isr_attribute_arg * ptr;
3733 const char * arg;
3734
3735 if (!arm_arch_notm)
3736 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3737
3738 /* No argument - default to IRQ. */
3739 if (argument == NULL_TREE)
3740 return ARM_FT_ISR;
3741
3742 /* Get the value of the argument. */
3743 if (TREE_VALUE (argument) == NULL_TREE
3744 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3745 return ARM_FT_UNKNOWN;
3746
3747 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3748
3749 /* Check it against the list of known arguments. */
3750 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3751 if (streq (arg, ptr->arg))
3752 return ptr->return_value;
3753
3754 /* An unrecognized interrupt type. */
3755 return ARM_FT_UNKNOWN;
3756 }
3757
3758 /* Computes the type of the current function. */
3759
3760 static unsigned long
3761 arm_compute_func_type (void)
3762 {
3763 unsigned long type = ARM_FT_UNKNOWN;
3764 tree a;
3765 tree attr;
3766
3767 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3768
3769 /* Decide if the current function is volatile. Such functions
3770 never return, and many memory cycles can be saved by not storing
3771 register values that will never be needed again. This optimization
3772 was added to speed up context switching in a kernel application. */
3773 if (optimize > 0
3774 && (TREE_NOTHROW (current_function_decl)
3775 || !(flag_unwind_tables
3776 || (flag_exceptions
3777 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3778 && TREE_THIS_VOLATILE (current_function_decl))
3779 type |= ARM_FT_VOLATILE;
3780
3781 if (cfun->static_chain_decl != NULL)
3782 type |= ARM_FT_NESTED;
3783
3784 attr = DECL_ATTRIBUTES (current_function_decl);
3785
3786 a = lookup_attribute ("naked", attr);
3787 if (a != NULL_TREE)
3788 type |= ARM_FT_NAKED;
3789
3790 a = lookup_attribute ("isr", attr);
3791 if (a == NULL_TREE)
3792 a = lookup_attribute ("interrupt", attr);
3793
3794 if (a == NULL_TREE)
3795 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3796 else
3797 type |= arm_isr_value (TREE_VALUE (a));
3798
3799 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3800 type |= ARM_FT_CMSE_ENTRY;
3801
3802 return type;
3803 }
3804
3805 /* Returns the type of the current function. */
3806
3807 unsigned long
3808 arm_current_func_type (void)
3809 {
3810 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3811 cfun->machine->func_type = arm_compute_func_type ();
3812
3813 return cfun->machine->func_type;
3814 }
3815
3816 bool
3817 arm_allocate_stack_slots_for_args (void)
3818 {
3819 /* Naked functions should not allocate stack slots for arguments. */
3820 return !IS_NAKED (arm_current_func_type ());
3821 }
3822
3823 static bool
3824 arm_warn_func_return (tree decl)
3825 {
3826 /* Naked functions are implemented entirely in assembly, including the
3827 return sequence, so suppress warnings about this. */
3828 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3829 }
3830
3831 \f
3832 /* Output assembler code for a block containing the constant parts
3833 of a trampoline, leaving space for the variable parts.
3834
3835 On the ARM, (if r8 is the static chain regnum, and remembering that
3836 referencing pc adds an offset of 8) the trampoline looks like:
3837 ldr r8, [pc, #0]
3838 ldr pc, [pc]
3839 .word static chain value
3840 .word function's address
3841 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3842
3843 static void
3844 arm_asm_trampoline_template (FILE *f)
3845 {
3846 fprintf (f, "\t.syntax unified\n");
3847
3848 if (TARGET_ARM)
3849 {
3850 fprintf (f, "\t.arm\n");
3851 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3852 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3853 }
3854 else if (TARGET_THUMB2)
3855 {
3856 fprintf (f, "\t.thumb\n");
3857 /* The Thumb-2 trampoline is similar to the arm implementation.
3858 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3859 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3860 STATIC_CHAIN_REGNUM, PC_REGNUM);
3861 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3862 }
3863 else
3864 {
3865 ASM_OUTPUT_ALIGN (f, 2);
3866 fprintf (f, "\t.code\t16\n");
3867 fprintf (f, ".Ltrampoline_start:\n");
3868 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3869 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3870 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3871 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3872 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3873 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3874 }
3875 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3876 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3877 }
3878
3879 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3880
3881 static void
3882 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3883 {
3884 rtx fnaddr, mem, a_tramp;
3885
3886 emit_block_move (m_tramp, assemble_trampoline_template (),
3887 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3888
3889 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3890 emit_move_insn (mem, chain_value);
3891
3892 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3893 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3894 emit_move_insn (mem, fnaddr);
3895
3896 a_tramp = XEXP (m_tramp, 0);
3897 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3898 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3899 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3900 }
3901
3902 /* Thumb trampolines should be entered in thumb mode, so set
3903 the bottom bit of the address. */
3904
3905 static rtx
3906 arm_trampoline_adjust_address (rtx addr)
3907 {
3908 if (TARGET_THUMB)
3909 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3910 NULL, 0, OPTAB_LIB_WIDEN);
3911 return addr;
3912 }
3913 \f
3914 /* Return 1 if it is possible to return using a single instruction.
3915 If SIBLING is non-null, this is a test for a return before a sibling
3916 call. SIBLING is the call insn, so we can examine its register usage. */
3917
3918 int
3919 use_return_insn (int iscond, rtx sibling)
3920 {
3921 int regno;
3922 unsigned int func_type;
3923 unsigned long saved_int_regs;
3924 unsigned HOST_WIDE_INT stack_adjust;
3925 arm_stack_offsets *offsets;
3926
3927 /* Never use a return instruction before reload has run. */
3928 if (!reload_completed)
3929 return 0;
3930
3931 func_type = arm_current_func_type ();
3932
3933 /* Naked, volatile and stack alignment functions need special
3934 consideration. */
3935 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3936 return 0;
3937
3938 /* So do interrupt functions that use the frame pointer and Thumb
3939 interrupt functions. */
3940 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3941 return 0;
3942
3943 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3944 && !optimize_function_for_size_p (cfun))
3945 return 0;
3946
3947 offsets = arm_get_frame_offsets ();
3948 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3949
3950 /* As do variadic functions. */
3951 if (crtl->args.pretend_args_size
3952 || cfun->machine->uses_anonymous_args
3953 /* Or if the function calls __builtin_eh_return () */
3954 || crtl->calls_eh_return
3955 /* Or if the function calls alloca */
3956 || cfun->calls_alloca
3957 /* Or if there is a stack adjustment. However, if the stack pointer
3958 is saved on the stack, we can use a pre-incrementing stack load. */
3959 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3960 && stack_adjust == 4))
3961 /* Or if the static chain register was saved above the frame, under the
3962 assumption that the stack pointer isn't saved on the stack. */
3963 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3964 && arm_compute_static_chain_stack_bytes() != 0))
3965 return 0;
3966
3967 saved_int_regs = offsets->saved_regs_mask;
3968
3969 /* Unfortunately, the insn
3970
3971 ldmib sp, {..., sp, ...}
3972
3973 triggers a bug on most SA-110 based devices, such that the stack
3974 pointer won't be correctly restored if the instruction takes a
3975 page fault. We work around this problem by popping r3 along with
3976 the other registers, since that is never slower than executing
3977 another instruction.
3978
3979 We test for !arm_arch5 here, because code for any architecture
3980 less than this could potentially be run on one of the buggy
3981 chips. */
3982 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3983 {
3984 /* Validate that r3 is a call-clobbered register (always true in
3985 the default abi) ... */
3986 if (!call_used_regs[3])
3987 return 0;
3988
3989 /* ... that it isn't being used for a return value ... */
3990 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3991 return 0;
3992
3993 /* ... or for a tail-call argument ... */
3994 if (sibling)
3995 {
3996 gcc_assert (CALL_P (sibling));
3997
3998 if (find_regno_fusage (sibling, USE, 3))
3999 return 0;
4000 }
4001
4002 /* ... and that there are no call-saved registers in r0-r2
4003 (always true in the default ABI). */
4004 if (saved_int_regs & 0x7)
4005 return 0;
4006 }
4007
4008 /* Can't be done if interworking with Thumb, and any registers have been
4009 stacked. */
4010 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4011 return 0;
4012
4013 /* On StrongARM, conditional returns are expensive if they aren't
4014 taken and multiple registers have been stacked. */
4015 if (iscond && arm_tune_strongarm)
4016 {
4017 /* Conditional return when just the LR is stored is a simple
4018 conditional-load instruction, that's not expensive. */
4019 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4020 return 0;
4021
4022 if (flag_pic
4023 && arm_pic_register != INVALID_REGNUM
4024 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4025 return 0;
4026 }
4027
4028 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4029 several instructions if anything needs to be popped. */
4030 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4031 return 0;
4032
4033 /* If there are saved registers but the LR isn't saved, then we need
4034 two instructions for the return. */
4035 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4036 return 0;
4037
4038 /* Can't be done if any of the VFP regs are pushed,
4039 since this also requires an insn. */
4040 if (TARGET_HARD_FLOAT)
4041 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4042 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4043 return 0;
4044
4045 if (TARGET_REALLY_IWMMXT)
4046 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4047 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4048 return 0;
4049
4050 return 1;
4051 }
4052
4053 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4054 shrink-wrapping if possible. This is the case if we need to emit a
4055 prologue, which we can test by looking at the offsets. */
4056 bool
4057 use_simple_return_p (void)
4058 {
4059 arm_stack_offsets *offsets;
4060
4061 /* Note this function can be called before or after reload. */
4062 if (!reload_completed)
4063 arm_compute_frame_layout ();
4064
4065 offsets = arm_get_frame_offsets ();
4066 return offsets->outgoing_args != 0;
4067 }
4068
4069 /* Return TRUE if int I is a valid immediate ARM constant. */
4070
4071 int
4072 const_ok_for_arm (HOST_WIDE_INT i)
4073 {
4074 int lowbit;
4075
4076 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4077 be all zero, or all one. */
4078 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4079 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4080 != ((~(unsigned HOST_WIDE_INT) 0)
4081 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4082 return FALSE;
4083
4084 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4085
4086 /* Fast return for 0 and small values. We must do this for zero, since
4087 the code below can't handle that one case. */
4088 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4089 return TRUE;
4090
4091 /* Get the number of trailing zeros. */
4092 lowbit = ffs((int) i) - 1;
4093
4094 /* Only even shifts are allowed in ARM mode so round down to the
4095 nearest even number. */
4096 if (TARGET_ARM)
4097 lowbit &= ~1;
4098
4099 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4100 return TRUE;
4101
4102 if (TARGET_ARM)
4103 {
4104 /* Allow rotated constants in ARM mode. */
4105 if (lowbit <= 4
4106 && ((i & ~0xc000003f) == 0
4107 || (i & ~0xf000000f) == 0
4108 || (i & ~0xfc000003) == 0))
4109 return TRUE;
4110 }
4111 else if (TARGET_THUMB2)
4112 {
4113 HOST_WIDE_INT v;
4114
4115 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4116 v = i & 0xff;
4117 v |= v << 16;
4118 if (i == v || i == (v | (v << 8)))
4119 return TRUE;
4120
4121 /* Allow repeated pattern 0xXY00XY00. */
4122 v = i & 0xff00;
4123 v |= v << 16;
4124 if (i == v)
4125 return TRUE;
4126 }
4127 else if (TARGET_HAVE_MOVT)
4128 {
4129 /* Thumb-1 Targets with MOVT. */
4130 if (i > 0xffff)
4131 return FALSE;
4132 else
4133 return TRUE;
4134 }
4135
4136 return FALSE;
4137 }
4138
4139 /* Return true if I is a valid constant for the operation CODE. */
4140 int
4141 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4142 {
4143 if (const_ok_for_arm (i))
4144 return 1;
4145
4146 switch (code)
4147 {
4148 case SET:
4149 /* See if we can use movw. */
4150 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4151 return 1;
4152 else
4153 /* Otherwise, try mvn. */
4154 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4155
4156 case PLUS:
4157 /* See if we can use addw or subw. */
4158 if (TARGET_THUMB2
4159 && ((i & 0xfffff000) == 0
4160 || ((-i) & 0xfffff000) == 0))
4161 return 1;
4162 /* Fall through. */
4163 case COMPARE:
4164 case EQ:
4165 case NE:
4166 case GT:
4167 case LE:
4168 case LT:
4169 case GE:
4170 case GEU:
4171 case LTU:
4172 case GTU:
4173 case LEU:
4174 case UNORDERED:
4175 case ORDERED:
4176 case UNEQ:
4177 case UNGE:
4178 case UNLT:
4179 case UNGT:
4180 case UNLE:
4181 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4182
4183 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4184 case XOR:
4185 return 0;
4186
4187 case IOR:
4188 if (TARGET_THUMB2)
4189 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4190 return 0;
4191
4192 case AND:
4193 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4194
4195 default:
4196 gcc_unreachable ();
4197 }
4198 }
4199
4200 /* Return true if I is a valid di mode constant for the operation CODE. */
4201 int
4202 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4203 {
4204 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4205 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4206 rtx hi = GEN_INT (hi_val);
4207 rtx lo = GEN_INT (lo_val);
4208
4209 if (TARGET_THUMB1)
4210 return 0;
4211
4212 switch (code)
4213 {
4214 case AND:
4215 case IOR:
4216 case XOR:
4217 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4218 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4219 case PLUS:
4220 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4221
4222 default:
4223 return 0;
4224 }
4225 }
4226
4227 /* Emit a sequence of insns to handle a large constant.
4228 CODE is the code of the operation required, it can be any of SET, PLUS,
4229 IOR, AND, XOR, MINUS;
4230 MODE is the mode in which the operation is being performed;
4231 VAL is the integer to operate on;
4232 SOURCE is the other operand (a register, or a null-pointer for SET);
4233 SUBTARGETS means it is safe to create scratch registers if that will
4234 either produce a simpler sequence, or we will want to cse the values.
4235 Return value is the number of insns emitted. */
4236
4237 /* ??? Tweak this for thumb2. */
4238 int
4239 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4240 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4241 {
4242 rtx cond;
4243
4244 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4245 cond = COND_EXEC_TEST (PATTERN (insn));
4246 else
4247 cond = NULL_RTX;
4248
4249 if (subtargets || code == SET
4250 || (REG_P (target) && REG_P (source)
4251 && REGNO (target) != REGNO (source)))
4252 {
4253 /* After arm_reorg has been called, we can't fix up expensive
4254 constants by pushing them into memory so we must synthesize
4255 them in-line, regardless of the cost. This is only likely to
4256 be more costly on chips that have load delay slots and we are
4257 compiling without running the scheduler (so no splitting
4258 occurred before the final instruction emission).
4259
4260 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4261 */
4262 if (!cfun->machine->after_arm_reorg
4263 && !cond
4264 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4265 1, 0)
4266 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4267 + (code != SET))))
4268 {
4269 if (code == SET)
4270 {
4271 /* Currently SET is the only monadic value for CODE, all
4272 the rest are diadic. */
4273 if (TARGET_USE_MOVT)
4274 arm_emit_movpair (target, GEN_INT (val));
4275 else
4276 emit_set_insn (target, GEN_INT (val));
4277
4278 return 1;
4279 }
4280 else
4281 {
4282 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4283
4284 if (TARGET_USE_MOVT)
4285 arm_emit_movpair (temp, GEN_INT (val));
4286 else
4287 emit_set_insn (temp, GEN_INT (val));
4288
4289 /* For MINUS, the value is subtracted from, since we never
4290 have subtraction of a constant. */
4291 if (code == MINUS)
4292 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4293 else
4294 emit_set_insn (target,
4295 gen_rtx_fmt_ee (code, mode, source, temp));
4296 return 2;
4297 }
4298 }
4299 }
4300
4301 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4302 1);
4303 }
4304
4305 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4306 ARM/THUMB2 immediates, and add up to VAL.
4307 Thr function return value gives the number of insns required. */
4308 static int
4309 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4310 struct four_ints *return_sequence)
4311 {
4312 int best_consecutive_zeros = 0;
4313 int i;
4314 int best_start = 0;
4315 int insns1, insns2;
4316 struct four_ints tmp_sequence;
4317
4318 /* If we aren't targeting ARM, the best place to start is always at
4319 the bottom, otherwise look more closely. */
4320 if (TARGET_ARM)
4321 {
4322 for (i = 0; i < 32; i += 2)
4323 {
4324 int consecutive_zeros = 0;
4325
4326 if (!(val & (3 << i)))
4327 {
4328 while ((i < 32) && !(val & (3 << i)))
4329 {
4330 consecutive_zeros += 2;
4331 i += 2;
4332 }
4333 if (consecutive_zeros > best_consecutive_zeros)
4334 {
4335 best_consecutive_zeros = consecutive_zeros;
4336 best_start = i - consecutive_zeros;
4337 }
4338 i -= 2;
4339 }
4340 }
4341 }
4342
4343 /* So long as it won't require any more insns to do so, it's
4344 desirable to emit a small constant (in bits 0...9) in the last
4345 insn. This way there is more chance that it can be combined with
4346 a later addressing insn to form a pre-indexed load or store
4347 operation. Consider:
4348
4349 *((volatile int *)0xe0000100) = 1;
4350 *((volatile int *)0xe0000110) = 2;
4351
4352 We want this to wind up as:
4353
4354 mov rA, #0xe0000000
4355 mov rB, #1
4356 str rB, [rA, #0x100]
4357 mov rB, #2
4358 str rB, [rA, #0x110]
4359
4360 rather than having to synthesize both large constants from scratch.
4361
4362 Therefore, we calculate how many insns would be required to emit
4363 the constant starting from `best_start', and also starting from
4364 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4365 yield a shorter sequence, we may as well use zero. */
4366 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4367 if (best_start != 0
4368 && ((HOST_WIDE_INT_1U << best_start) < val))
4369 {
4370 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4371 if (insns2 <= insns1)
4372 {
4373 *return_sequence = tmp_sequence;
4374 insns1 = insns2;
4375 }
4376 }
4377
4378 return insns1;
4379 }
4380
4381 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4382 static int
4383 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4384 struct four_ints *return_sequence, int i)
4385 {
4386 int remainder = val & 0xffffffff;
4387 int insns = 0;
4388
4389 /* Try and find a way of doing the job in either two or three
4390 instructions.
4391
4392 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4393 location. We start at position I. This may be the MSB, or
4394 optimial_immediate_sequence may have positioned it at the largest block
4395 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4396 wrapping around to the top of the word when we drop off the bottom.
4397 In the worst case this code should produce no more than four insns.
4398
4399 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4400 constants, shifted to any arbitrary location. We should always start
4401 at the MSB. */
4402 do
4403 {
4404 int end;
4405 unsigned int b1, b2, b3, b4;
4406 unsigned HOST_WIDE_INT result;
4407 int loc;
4408
4409 gcc_assert (insns < 4);
4410
4411 if (i <= 0)
4412 i += 32;
4413
4414 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4415 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4416 {
4417 loc = i;
4418 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4419 /* We can use addw/subw for the last 12 bits. */
4420 result = remainder;
4421 else
4422 {
4423 /* Use an 8-bit shifted/rotated immediate. */
4424 end = i - 8;
4425 if (end < 0)
4426 end += 32;
4427 result = remainder & ((0x0ff << end)
4428 | ((i < end) ? (0xff >> (32 - end))
4429 : 0));
4430 i -= 8;
4431 }
4432 }
4433 else
4434 {
4435 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4436 arbitrary shifts. */
4437 i -= TARGET_ARM ? 2 : 1;
4438 continue;
4439 }
4440
4441 /* Next, see if we can do a better job with a thumb2 replicated
4442 constant.
4443
4444 We do it this way around to catch the cases like 0x01F001E0 where
4445 two 8-bit immediates would work, but a replicated constant would
4446 make it worse.
4447
4448 TODO: 16-bit constants that don't clear all the bits, but still win.
4449 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4450 if (TARGET_THUMB2)
4451 {
4452 b1 = (remainder & 0xff000000) >> 24;
4453 b2 = (remainder & 0x00ff0000) >> 16;
4454 b3 = (remainder & 0x0000ff00) >> 8;
4455 b4 = remainder & 0xff;
4456
4457 if (loc > 24)
4458 {
4459 /* The 8-bit immediate already found clears b1 (and maybe b2),
4460 but must leave b3 and b4 alone. */
4461
4462 /* First try to find a 32-bit replicated constant that clears
4463 almost everything. We can assume that we can't do it in one,
4464 or else we wouldn't be here. */
4465 unsigned int tmp = b1 & b2 & b3 & b4;
4466 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4467 + (tmp << 24);
4468 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4469 + (tmp == b3) + (tmp == b4);
4470 if (tmp
4471 && (matching_bytes >= 3
4472 || (matching_bytes == 2
4473 && const_ok_for_op (remainder & ~tmp2, code))))
4474 {
4475 /* At least 3 of the bytes match, and the fourth has at
4476 least as many bits set, or two of the bytes match
4477 and it will only require one more insn to finish. */
4478 result = tmp2;
4479 i = tmp != b1 ? 32
4480 : tmp != b2 ? 24
4481 : tmp != b3 ? 16
4482 : 8;
4483 }
4484
4485 /* Second, try to find a 16-bit replicated constant that can
4486 leave three of the bytes clear. If b2 or b4 is already
4487 zero, then we can. If the 8-bit from above would not
4488 clear b2 anyway, then we still win. */
4489 else if (b1 == b3 && (!b2 || !b4
4490 || (remainder & 0x00ff0000 & ~result)))
4491 {
4492 result = remainder & 0xff00ff00;
4493 i = 24;
4494 }
4495 }
4496 else if (loc > 16)
4497 {
4498 /* The 8-bit immediate already found clears b2 (and maybe b3)
4499 and we don't get here unless b1 is alredy clear, but it will
4500 leave b4 unchanged. */
4501
4502 /* If we can clear b2 and b4 at once, then we win, since the
4503 8-bits couldn't possibly reach that far. */
4504 if (b2 == b4)
4505 {
4506 result = remainder & 0x00ff00ff;
4507 i = 16;
4508 }
4509 }
4510 }
4511
4512 return_sequence->i[insns++] = result;
4513 remainder &= ~result;
4514
4515 if (code == SET || code == MINUS)
4516 code = PLUS;
4517 }
4518 while (remainder);
4519
4520 return insns;
4521 }
4522
4523 /* Emit an instruction with the indicated PATTERN. If COND is
4524 non-NULL, conditionalize the execution of the instruction on COND
4525 being true. */
4526
4527 static void
4528 emit_constant_insn (rtx cond, rtx pattern)
4529 {
4530 if (cond)
4531 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4532 emit_insn (pattern);
4533 }
4534
4535 /* As above, but extra parameter GENERATE which, if clear, suppresses
4536 RTL generation. */
4537
4538 static int
4539 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4540 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4541 int subtargets, int generate)
4542 {
4543 int can_invert = 0;
4544 int can_negate = 0;
4545 int final_invert = 0;
4546 int i;
4547 int set_sign_bit_copies = 0;
4548 int clear_sign_bit_copies = 0;
4549 int clear_zero_bit_copies = 0;
4550 int set_zero_bit_copies = 0;
4551 int insns = 0, neg_insns, inv_insns;
4552 unsigned HOST_WIDE_INT temp1, temp2;
4553 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4554 struct four_ints *immediates;
4555 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4556
4557 /* Find out which operations are safe for a given CODE. Also do a quick
4558 check for degenerate cases; these can occur when DImode operations
4559 are split. */
4560 switch (code)
4561 {
4562 case SET:
4563 can_invert = 1;
4564 break;
4565
4566 case PLUS:
4567 can_negate = 1;
4568 break;
4569
4570 case IOR:
4571 if (remainder == 0xffffffff)
4572 {
4573 if (generate)
4574 emit_constant_insn (cond,
4575 gen_rtx_SET (target,
4576 GEN_INT (ARM_SIGN_EXTEND (val))));
4577 return 1;
4578 }
4579
4580 if (remainder == 0)
4581 {
4582 if (reload_completed && rtx_equal_p (target, source))
4583 return 0;
4584
4585 if (generate)
4586 emit_constant_insn (cond, gen_rtx_SET (target, source));
4587 return 1;
4588 }
4589 break;
4590
4591 case AND:
4592 if (remainder == 0)
4593 {
4594 if (generate)
4595 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4596 return 1;
4597 }
4598 if (remainder == 0xffffffff)
4599 {
4600 if (reload_completed && rtx_equal_p (target, source))
4601 return 0;
4602 if (generate)
4603 emit_constant_insn (cond, gen_rtx_SET (target, source));
4604 return 1;
4605 }
4606 can_invert = 1;
4607 break;
4608
4609 case XOR:
4610 if (remainder == 0)
4611 {
4612 if (reload_completed && rtx_equal_p (target, source))
4613 return 0;
4614 if (generate)
4615 emit_constant_insn (cond, gen_rtx_SET (target, source));
4616 return 1;
4617 }
4618
4619 if (remainder == 0xffffffff)
4620 {
4621 if (generate)
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (target,
4624 gen_rtx_NOT (mode, source)));
4625 return 1;
4626 }
4627 final_invert = 1;
4628 break;
4629
4630 case MINUS:
4631 /* We treat MINUS as (val - source), since (source - val) is always
4632 passed as (source + (-val)). */
4633 if (remainder == 0)
4634 {
4635 if (generate)
4636 emit_constant_insn (cond,
4637 gen_rtx_SET (target,
4638 gen_rtx_NEG (mode, source)));
4639 return 1;
4640 }
4641 if (const_ok_for_arm (val))
4642 {
4643 if (generate)
4644 emit_constant_insn (cond,
4645 gen_rtx_SET (target,
4646 gen_rtx_MINUS (mode, GEN_INT (val),
4647 source)));
4648 return 1;
4649 }
4650
4651 break;
4652
4653 default:
4654 gcc_unreachable ();
4655 }
4656
4657 /* If we can do it in one insn get out quickly. */
4658 if (const_ok_for_op (val, code))
4659 {
4660 if (generate)
4661 emit_constant_insn (cond,
4662 gen_rtx_SET (target,
4663 (source
4664 ? gen_rtx_fmt_ee (code, mode, source,
4665 GEN_INT (val))
4666 : GEN_INT (val))));
4667 return 1;
4668 }
4669
4670 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4671 insn. */
4672 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4673 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4674 {
4675 if (generate)
4676 {
4677 if (mode == SImode && i == 16)
4678 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4679 smaller insn. */
4680 emit_constant_insn (cond,
4681 gen_zero_extendhisi2
4682 (target, gen_lowpart (HImode, source)));
4683 else
4684 /* Extz only supports SImode, but we can coerce the operands
4685 into that mode. */
4686 emit_constant_insn (cond,
4687 gen_extzv_t2 (gen_lowpart (SImode, target),
4688 gen_lowpart (SImode, source),
4689 GEN_INT (i), const0_rtx));
4690 }
4691
4692 return 1;
4693 }
4694
4695 /* Calculate a few attributes that may be useful for specific
4696 optimizations. */
4697 /* Count number of leading zeros. */
4698 for (i = 31; i >= 0; i--)
4699 {
4700 if ((remainder & (1 << i)) == 0)
4701 clear_sign_bit_copies++;
4702 else
4703 break;
4704 }
4705
4706 /* Count number of leading 1's. */
4707 for (i = 31; i >= 0; i--)
4708 {
4709 if ((remainder & (1 << i)) != 0)
4710 set_sign_bit_copies++;
4711 else
4712 break;
4713 }
4714
4715 /* Count number of trailing zero's. */
4716 for (i = 0; i <= 31; i++)
4717 {
4718 if ((remainder & (1 << i)) == 0)
4719 clear_zero_bit_copies++;
4720 else
4721 break;
4722 }
4723
4724 /* Count number of trailing 1's. */
4725 for (i = 0; i <= 31; i++)
4726 {
4727 if ((remainder & (1 << i)) != 0)
4728 set_zero_bit_copies++;
4729 else
4730 break;
4731 }
4732
4733 switch (code)
4734 {
4735 case SET:
4736 /* See if we can do this by sign_extending a constant that is known
4737 to be negative. This is a good, way of doing it, since the shift
4738 may well merge into a subsequent insn. */
4739 if (set_sign_bit_copies > 1)
4740 {
4741 if (const_ok_for_arm
4742 (temp1 = ARM_SIGN_EXTEND (remainder
4743 << (set_sign_bit_copies - 1))))
4744 {
4745 if (generate)
4746 {
4747 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4748 emit_constant_insn (cond,
4749 gen_rtx_SET (new_src, GEN_INT (temp1)));
4750 emit_constant_insn (cond,
4751 gen_ashrsi3 (target, new_src,
4752 GEN_INT (set_sign_bit_copies - 1)));
4753 }
4754 return 2;
4755 }
4756 /* For an inverted constant, we will need to set the low bits,
4757 these will be shifted out of harm's way. */
4758 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4759 if (const_ok_for_arm (~temp1))
4760 {
4761 if (generate)
4762 {
4763 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4764 emit_constant_insn (cond,
4765 gen_rtx_SET (new_src, GEN_INT (temp1)));
4766 emit_constant_insn (cond,
4767 gen_ashrsi3 (target, new_src,
4768 GEN_INT (set_sign_bit_copies - 1)));
4769 }
4770 return 2;
4771 }
4772 }
4773
4774 /* See if we can calculate the value as the difference between two
4775 valid immediates. */
4776 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4777 {
4778 int topshift = clear_sign_bit_copies & ~1;
4779
4780 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4781 & (0xff000000 >> topshift));
4782
4783 /* If temp1 is zero, then that means the 9 most significant
4784 bits of remainder were 1 and we've caused it to overflow.
4785 When topshift is 0 we don't need to do anything since we
4786 can borrow from 'bit 32'. */
4787 if (temp1 == 0 && topshift != 0)
4788 temp1 = 0x80000000 >> (topshift - 1);
4789
4790 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4791
4792 if (const_ok_for_arm (temp2))
4793 {
4794 if (generate)
4795 {
4796 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4797 emit_constant_insn (cond,
4798 gen_rtx_SET (new_src, GEN_INT (temp1)));
4799 emit_constant_insn (cond,
4800 gen_addsi3 (target, new_src,
4801 GEN_INT (-temp2)));
4802 }
4803
4804 return 2;
4805 }
4806 }
4807
4808 /* See if we can generate this by setting the bottom (or the top)
4809 16 bits, and then shifting these into the other half of the
4810 word. We only look for the simplest cases, to do more would cost
4811 too much. Be careful, however, not to generate this when the
4812 alternative would take fewer insns. */
4813 if (val & 0xffff0000)
4814 {
4815 temp1 = remainder & 0xffff0000;
4816 temp2 = remainder & 0x0000ffff;
4817
4818 /* Overlaps outside this range are best done using other methods. */
4819 for (i = 9; i < 24; i++)
4820 {
4821 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4822 && !const_ok_for_arm (temp2))
4823 {
4824 rtx new_src = (subtargets
4825 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4826 : target);
4827 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4828 source, subtargets, generate);
4829 source = new_src;
4830 if (generate)
4831 emit_constant_insn
4832 (cond,
4833 gen_rtx_SET
4834 (target,
4835 gen_rtx_IOR (mode,
4836 gen_rtx_ASHIFT (mode, source,
4837 GEN_INT (i)),
4838 source)));
4839 return insns + 1;
4840 }
4841 }
4842
4843 /* Don't duplicate cases already considered. */
4844 for (i = 17; i < 24; i++)
4845 {
4846 if (((temp1 | (temp1 >> i)) == remainder)
4847 && !const_ok_for_arm (temp1))
4848 {
4849 rtx new_src = (subtargets
4850 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4851 : target);
4852 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4853 source, subtargets, generate);
4854 source = new_src;
4855 if (generate)
4856 emit_constant_insn
4857 (cond,
4858 gen_rtx_SET (target,
4859 gen_rtx_IOR
4860 (mode,
4861 gen_rtx_LSHIFTRT (mode, source,
4862 GEN_INT (i)),
4863 source)));
4864 return insns + 1;
4865 }
4866 }
4867 }
4868 break;
4869
4870 case IOR:
4871 case XOR:
4872 /* If we have IOR or XOR, and the constant can be loaded in a
4873 single instruction, and we can find a temporary to put it in,
4874 then this can be done in two instructions instead of 3-4. */
4875 if (subtargets
4876 /* TARGET can't be NULL if SUBTARGETS is 0 */
4877 || (reload_completed && !reg_mentioned_p (target, source)))
4878 {
4879 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4880 {
4881 if (generate)
4882 {
4883 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4884
4885 emit_constant_insn (cond,
4886 gen_rtx_SET (sub, GEN_INT (val)));
4887 emit_constant_insn (cond,
4888 gen_rtx_SET (target,
4889 gen_rtx_fmt_ee (code, mode,
4890 source, sub)));
4891 }
4892 return 2;
4893 }
4894 }
4895
4896 if (code == XOR)
4897 break;
4898
4899 /* Convert.
4900 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4901 and the remainder 0s for e.g. 0xfff00000)
4902 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4903
4904 This can be done in 2 instructions by using shifts with mov or mvn.
4905 e.g. for
4906 x = x | 0xfff00000;
4907 we generate.
4908 mvn r0, r0, asl #12
4909 mvn r0, r0, lsr #12 */
4910 if (set_sign_bit_copies > 8
4911 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4912 {
4913 if (generate)
4914 {
4915 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4916 rtx shift = GEN_INT (set_sign_bit_copies);
4917
4918 emit_constant_insn
4919 (cond,
4920 gen_rtx_SET (sub,
4921 gen_rtx_NOT (mode,
4922 gen_rtx_ASHIFT (mode,
4923 source,
4924 shift))));
4925 emit_constant_insn
4926 (cond,
4927 gen_rtx_SET (target,
4928 gen_rtx_NOT (mode,
4929 gen_rtx_LSHIFTRT (mode, sub,
4930 shift))));
4931 }
4932 return 2;
4933 }
4934
4935 /* Convert
4936 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4937 to
4938 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4939
4940 For eg. r0 = r0 | 0xfff
4941 mvn r0, r0, lsr #12
4942 mvn r0, r0, asl #12
4943
4944 */
4945 if (set_zero_bit_copies > 8
4946 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4947 {
4948 if (generate)
4949 {
4950 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4951 rtx shift = GEN_INT (set_zero_bit_copies);
4952
4953 emit_constant_insn
4954 (cond,
4955 gen_rtx_SET (sub,
4956 gen_rtx_NOT (mode,
4957 gen_rtx_LSHIFTRT (mode,
4958 source,
4959 shift))));
4960 emit_constant_insn
4961 (cond,
4962 gen_rtx_SET (target,
4963 gen_rtx_NOT (mode,
4964 gen_rtx_ASHIFT (mode, sub,
4965 shift))));
4966 }
4967 return 2;
4968 }
4969
4970 /* This will never be reached for Thumb2 because orn is a valid
4971 instruction. This is for Thumb1 and the ARM 32 bit cases.
4972
4973 x = y | constant (such that ~constant is a valid constant)
4974 Transform this to
4975 x = ~(~y & ~constant).
4976 */
4977 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4978 {
4979 if (generate)
4980 {
4981 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4982 emit_constant_insn (cond,
4983 gen_rtx_SET (sub,
4984 gen_rtx_NOT (mode, source)));
4985 source = sub;
4986 if (subtargets)
4987 sub = gen_reg_rtx (mode);
4988 emit_constant_insn (cond,
4989 gen_rtx_SET (sub,
4990 gen_rtx_AND (mode, source,
4991 GEN_INT (temp1))));
4992 emit_constant_insn (cond,
4993 gen_rtx_SET (target,
4994 gen_rtx_NOT (mode, sub)));
4995 }
4996 return 3;
4997 }
4998 break;
4999
5000 case AND:
5001 /* See if two shifts will do 2 or more insn's worth of work. */
5002 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5003 {
5004 HOST_WIDE_INT shift_mask = ((0xffffffff
5005 << (32 - clear_sign_bit_copies))
5006 & 0xffffffff);
5007
5008 if ((remainder | shift_mask) != 0xffffffff)
5009 {
5010 HOST_WIDE_INT new_val
5011 = ARM_SIGN_EXTEND (remainder | shift_mask);
5012
5013 if (generate)
5014 {
5015 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5016 insns = arm_gen_constant (AND, SImode, cond, new_val,
5017 new_src, source, subtargets, 1);
5018 source = new_src;
5019 }
5020 else
5021 {
5022 rtx targ = subtargets ? NULL_RTX : target;
5023 insns = arm_gen_constant (AND, mode, cond, new_val,
5024 targ, source, subtargets, 0);
5025 }
5026 }
5027
5028 if (generate)
5029 {
5030 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5031 rtx shift = GEN_INT (clear_sign_bit_copies);
5032
5033 emit_insn (gen_ashlsi3 (new_src, source, shift));
5034 emit_insn (gen_lshrsi3 (target, new_src, shift));
5035 }
5036
5037 return insns + 2;
5038 }
5039
5040 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5041 {
5042 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5043
5044 if ((remainder | shift_mask) != 0xffffffff)
5045 {
5046 HOST_WIDE_INT new_val
5047 = ARM_SIGN_EXTEND (remainder | shift_mask);
5048 if (generate)
5049 {
5050 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5051
5052 insns = arm_gen_constant (AND, mode, cond, new_val,
5053 new_src, source, subtargets, 1);
5054 source = new_src;
5055 }
5056 else
5057 {
5058 rtx targ = subtargets ? NULL_RTX : target;
5059
5060 insns = arm_gen_constant (AND, mode, cond, new_val,
5061 targ, source, subtargets, 0);
5062 }
5063 }
5064
5065 if (generate)
5066 {
5067 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5068 rtx shift = GEN_INT (clear_zero_bit_copies);
5069
5070 emit_insn (gen_lshrsi3 (new_src, source, shift));
5071 emit_insn (gen_ashlsi3 (target, new_src, shift));
5072 }
5073
5074 return insns + 2;
5075 }
5076
5077 break;
5078
5079 default:
5080 break;
5081 }
5082
5083 /* Calculate what the instruction sequences would be if we generated it
5084 normally, negated, or inverted. */
5085 if (code == AND)
5086 /* AND cannot be split into multiple insns, so invert and use BIC. */
5087 insns = 99;
5088 else
5089 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5090
5091 if (can_negate)
5092 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5093 &neg_immediates);
5094 else
5095 neg_insns = 99;
5096
5097 if (can_invert || final_invert)
5098 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5099 &inv_immediates);
5100 else
5101 inv_insns = 99;
5102
5103 immediates = &pos_immediates;
5104
5105 /* Is the negated immediate sequence more efficient? */
5106 if (neg_insns < insns && neg_insns <= inv_insns)
5107 {
5108 insns = neg_insns;
5109 immediates = &neg_immediates;
5110 }
5111 else
5112 can_negate = 0;
5113
5114 /* Is the inverted immediate sequence more efficient?
5115 We must allow for an extra NOT instruction for XOR operations, although
5116 there is some chance that the final 'mvn' will get optimized later. */
5117 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5118 {
5119 insns = inv_insns;
5120 immediates = &inv_immediates;
5121 }
5122 else
5123 {
5124 can_invert = 0;
5125 final_invert = 0;
5126 }
5127
5128 /* Now output the chosen sequence as instructions. */
5129 if (generate)
5130 {
5131 for (i = 0; i < insns; i++)
5132 {
5133 rtx new_src, temp1_rtx;
5134
5135 temp1 = immediates->i[i];
5136
5137 if (code == SET || code == MINUS)
5138 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5139 else if ((final_invert || i < (insns - 1)) && subtargets)
5140 new_src = gen_reg_rtx (mode);
5141 else
5142 new_src = target;
5143
5144 if (can_invert)
5145 temp1 = ~temp1;
5146 else if (can_negate)
5147 temp1 = -temp1;
5148
5149 temp1 = trunc_int_for_mode (temp1, mode);
5150 temp1_rtx = GEN_INT (temp1);
5151
5152 if (code == SET)
5153 ;
5154 else if (code == MINUS)
5155 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5156 else
5157 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5158
5159 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5160 source = new_src;
5161
5162 if (code == SET)
5163 {
5164 can_negate = can_invert;
5165 can_invert = 0;
5166 code = PLUS;
5167 }
5168 else if (code == MINUS)
5169 code = PLUS;
5170 }
5171 }
5172
5173 if (final_invert)
5174 {
5175 if (generate)
5176 emit_constant_insn (cond, gen_rtx_SET (target,
5177 gen_rtx_NOT (mode, source)));
5178 insns++;
5179 }
5180
5181 return insns;
5182 }
5183
5184 /* Canonicalize a comparison so that we are more likely to recognize it.
5185 This can be done for a few constant compares, where we can make the
5186 immediate value easier to load. */
5187
5188 static void
5189 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5190 bool op0_preserve_value)
5191 {
5192 machine_mode mode;
5193 unsigned HOST_WIDE_INT i, maxval;
5194
5195 mode = GET_MODE (*op0);
5196 if (mode == VOIDmode)
5197 mode = GET_MODE (*op1);
5198
5199 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5200
5201 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5202 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5203 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5204 for GTU/LEU in Thumb mode. */
5205 if (mode == DImode)
5206 {
5207
5208 if (*code == GT || *code == LE
5209 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5210 {
5211 /* Missing comparison. First try to use an available
5212 comparison. */
5213 if (CONST_INT_P (*op1))
5214 {
5215 i = INTVAL (*op1);
5216 switch (*code)
5217 {
5218 case GT:
5219 case LE:
5220 if (i != maxval
5221 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5222 {
5223 *op1 = GEN_INT (i + 1);
5224 *code = *code == GT ? GE : LT;
5225 return;
5226 }
5227 break;
5228 case GTU:
5229 case LEU:
5230 if (i != ~((unsigned HOST_WIDE_INT) 0)
5231 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5232 {
5233 *op1 = GEN_INT (i + 1);
5234 *code = *code == GTU ? GEU : LTU;
5235 return;
5236 }
5237 break;
5238 default:
5239 gcc_unreachable ();
5240 }
5241 }
5242
5243 /* If that did not work, reverse the condition. */
5244 if (!op0_preserve_value)
5245 {
5246 std::swap (*op0, *op1);
5247 *code = (int)swap_condition ((enum rtx_code)*code);
5248 }
5249 }
5250 return;
5251 }
5252
5253 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5254 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5255 to facilitate possible combining with a cmp into 'ands'. */
5256 if (mode == SImode
5257 && GET_CODE (*op0) == ZERO_EXTEND
5258 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5259 && GET_MODE (XEXP (*op0, 0)) == QImode
5260 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5261 && subreg_lowpart_p (XEXP (*op0, 0))
5262 && *op1 == const0_rtx)
5263 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5264 GEN_INT (255));
5265
5266 /* Comparisons smaller than DImode. Only adjust comparisons against
5267 an out-of-range constant. */
5268 if (!CONST_INT_P (*op1)
5269 || const_ok_for_arm (INTVAL (*op1))
5270 || const_ok_for_arm (- INTVAL (*op1)))
5271 return;
5272
5273 i = INTVAL (*op1);
5274
5275 switch (*code)
5276 {
5277 case EQ:
5278 case NE:
5279 return;
5280
5281 case GT:
5282 case LE:
5283 if (i != maxval
5284 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5285 {
5286 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5287 *code = *code == GT ? GE : LT;
5288 return;
5289 }
5290 break;
5291
5292 case GE:
5293 case LT:
5294 if (i != ~maxval
5295 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5296 {
5297 *op1 = GEN_INT (i - 1);
5298 *code = *code == GE ? GT : LE;
5299 return;
5300 }
5301 break;
5302
5303 case GTU:
5304 case LEU:
5305 if (i != ~((unsigned HOST_WIDE_INT) 0)
5306 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5307 {
5308 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5309 *code = *code == GTU ? GEU : LTU;
5310 return;
5311 }
5312 break;
5313
5314 case GEU:
5315 case LTU:
5316 if (i != 0
5317 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5318 {
5319 *op1 = GEN_INT (i - 1);
5320 *code = *code == GEU ? GTU : LEU;
5321 return;
5322 }
5323 break;
5324
5325 default:
5326 gcc_unreachable ();
5327 }
5328 }
5329
5330
5331 /* Define how to find the value returned by a function. */
5332
5333 static rtx
5334 arm_function_value(const_tree type, const_tree func,
5335 bool outgoing ATTRIBUTE_UNUSED)
5336 {
5337 machine_mode mode;
5338 int unsignedp ATTRIBUTE_UNUSED;
5339 rtx r ATTRIBUTE_UNUSED;
5340
5341 mode = TYPE_MODE (type);
5342
5343 if (TARGET_AAPCS_BASED)
5344 return aapcs_allocate_return_reg (mode, type, func);
5345
5346 /* Promote integer types. */
5347 if (INTEGRAL_TYPE_P (type))
5348 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5349
5350 /* Promotes small structs returned in a register to full-word size
5351 for big-endian AAPCS. */
5352 if (arm_return_in_msb (type))
5353 {
5354 HOST_WIDE_INT size = int_size_in_bytes (type);
5355 if (size % UNITS_PER_WORD != 0)
5356 {
5357 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5358 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5359 }
5360 }
5361
5362 return arm_libcall_value_1 (mode);
5363 }
5364
5365 /* libcall hashtable helpers. */
5366
5367 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5368 {
5369 static inline hashval_t hash (const rtx_def *);
5370 static inline bool equal (const rtx_def *, const rtx_def *);
5371 static inline void remove (rtx_def *);
5372 };
5373
5374 inline bool
5375 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5376 {
5377 return rtx_equal_p (p1, p2);
5378 }
5379
5380 inline hashval_t
5381 libcall_hasher::hash (const rtx_def *p1)
5382 {
5383 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5384 }
5385
5386 typedef hash_table<libcall_hasher> libcall_table_type;
5387
5388 static void
5389 add_libcall (libcall_table_type *htab, rtx libcall)
5390 {
5391 *htab->find_slot (libcall, INSERT) = libcall;
5392 }
5393
5394 static bool
5395 arm_libcall_uses_aapcs_base (const_rtx libcall)
5396 {
5397 static bool init_done = false;
5398 static libcall_table_type *libcall_htab = NULL;
5399
5400 if (!init_done)
5401 {
5402 init_done = true;
5403
5404 libcall_htab = new libcall_table_type (31);
5405 add_libcall (libcall_htab,
5406 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5407 add_libcall (libcall_htab,
5408 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5409 add_libcall (libcall_htab,
5410 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5411 add_libcall (libcall_htab,
5412 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5413
5414 add_libcall (libcall_htab,
5415 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5418 add_libcall (libcall_htab,
5419 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5420 add_libcall (libcall_htab,
5421 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5422
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5425 add_libcall (libcall_htab,
5426 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5427 add_libcall (libcall_htab,
5428 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5429 add_libcall (libcall_htab,
5430 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5431 add_libcall (libcall_htab,
5432 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5433 add_libcall (libcall_htab,
5434 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5435 add_libcall (libcall_htab,
5436 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5437 add_libcall (libcall_htab,
5438 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5439
5440 /* Values from double-precision helper functions are returned in core
5441 registers if the selected core only supports single-precision
5442 arithmetic, even if we are using the hard-float ABI. The same is
5443 true for single-precision helpers, but we will never be using the
5444 hard-float ABI on a CPU which doesn't support single-precision
5445 operations in hardware. */
5446 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5447 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5448 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5449 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5450 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5451 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5452 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5453 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5454 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5455 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5456 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5457 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5458 SFmode));
5459 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5460 DFmode));
5461 add_libcall (libcall_htab,
5462 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5463 }
5464
5465 return libcall && libcall_htab->find (libcall) != NULL;
5466 }
5467
5468 static rtx
5469 arm_libcall_value_1 (machine_mode mode)
5470 {
5471 if (TARGET_AAPCS_BASED)
5472 return aapcs_libcall_value (mode);
5473 else if (TARGET_IWMMXT_ABI
5474 && arm_vector_mode_supported_p (mode))
5475 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5476 else
5477 return gen_rtx_REG (mode, ARG_REGISTER (1));
5478 }
5479
5480 /* Define how to find the value returned by a library function
5481 assuming the value has mode MODE. */
5482
5483 static rtx
5484 arm_libcall_value (machine_mode mode, const_rtx libcall)
5485 {
5486 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5487 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5488 {
5489 /* The following libcalls return their result in integer registers,
5490 even though they return a floating point value. */
5491 if (arm_libcall_uses_aapcs_base (libcall))
5492 return gen_rtx_REG (mode, ARG_REGISTER(1));
5493
5494 }
5495
5496 return arm_libcall_value_1 (mode);
5497 }
5498
5499 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5500
5501 static bool
5502 arm_function_value_regno_p (const unsigned int regno)
5503 {
5504 if (regno == ARG_REGISTER (1)
5505 || (TARGET_32BIT
5506 && TARGET_AAPCS_BASED
5507 && TARGET_HARD_FLOAT
5508 && regno == FIRST_VFP_REGNUM)
5509 || (TARGET_IWMMXT_ABI
5510 && regno == FIRST_IWMMXT_REGNUM))
5511 return true;
5512
5513 return false;
5514 }
5515
5516 /* Determine the amount of memory needed to store the possible return
5517 registers of an untyped call. */
5518 int
5519 arm_apply_result_size (void)
5520 {
5521 int size = 16;
5522
5523 if (TARGET_32BIT)
5524 {
5525 if (TARGET_HARD_FLOAT_ABI)
5526 size += 32;
5527 if (TARGET_IWMMXT_ABI)
5528 size += 8;
5529 }
5530
5531 return size;
5532 }
5533
5534 /* Decide whether TYPE should be returned in memory (true)
5535 or in a register (false). FNTYPE is the type of the function making
5536 the call. */
5537 static bool
5538 arm_return_in_memory (const_tree type, const_tree fntype)
5539 {
5540 HOST_WIDE_INT size;
5541
5542 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5543
5544 if (TARGET_AAPCS_BASED)
5545 {
5546 /* Simple, non-aggregate types (ie not including vectors and
5547 complex) are always returned in a register (or registers).
5548 We don't care about which register here, so we can short-cut
5549 some of the detail. */
5550 if (!AGGREGATE_TYPE_P (type)
5551 && TREE_CODE (type) != VECTOR_TYPE
5552 && TREE_CODE (type) != COMPLEX_TYPE)
5553 return false;
5554
5555 /* Any return value that is no larger than one word can be
5556 returned in r0. */
5557 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5558 return false;
5559
5560 /* Check any available co-processors to see if they accept the
5561 type as a register candidate (VFP, for example, can return
5562 some aggregates in consecutive registers). These aren't
5563 available if the call is variadic. */
5564 if (aapcs_select_return_coproc (type, fntype) >= 0)
5565 return false;
5566
5567 /* Vector values should be returned using ARM registers, not
5568 memory (unless they're over 16 bytes, which will break since
5569 we only have four call-clobbered registers to play with). */
5570 if (TREE_CODE (type) == VECTOR_TYPE)
5571 return (size < 0 || size > (4 * UNITS_PER_WORD));
5572
5573 /* The rest go in memory. */
5574 return true;
5575 }
5576
5577 if (TREE_CODE (type) == VECTOR_TYPE)
5578 return (size < 0 || size > (4 * UNITS_PER_WORD));
5579
5580 if (!AGGREGATE_TYPE_P (type) &&
5581 (TREE_CODE (type) != VECTOR_TYPE))
5582 /* All simple types are returned in registers. */
5583 return false;
5584
5585 if (arm_abi != ARM_ABI_APCS)
5586 {
5587 /* ATPCS and later return aggregate types in memory only if they are
5588 larger than a word (or are variable size). */
5589 return (size < 0 || size > UNITS_PER_WORD);
5590 }
5591
5592 /* For the arm-wince targets we choose to be compatible with Microsoft's
5593 ARM and Thumb compilers, which always return aggregates in memory. */
5594 #ifndef ARM_WINCE
5595 /* All structures/unions bigger than one word are returned in memory.
5596 Also catch the case where int_size_in_bytes returns -1. In this case
5597 the aggregate is either huge or of variable size, and in either case
5598 we will want to return it via memory and not in a register. */
5599 if (size < 0 || size > UNITS_PER_WORD)
5600 return true;
5601
5602 if (TREE_CODE (type) == RECORD_TYPE)
5603 {
5604 tree field;
5605
5606 /* For a struct the APCS says that we only return in a register
5607 if the type is 'integer like' and every addressable element
5608 has an offset of zero. For practical purposes this means
5609 that the structure can have at most one non bit-field element
5610 and that this element must be the first one in the structure. */
5611
5612 /* Find the first field, ignoring non FIELD_DECL things which will
5613 have been created by C++. */
5614 for (field = TYPE_FIELDS (type);
5615 field && TREE_CODE (field) != FIELD_DECL;
5616 field = DECL_CHAIN (field))
5617 continue;
5618
5619 if (field == NULL)
5620 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5621
5622 /* Check that the first field is valid for returning in a register. */
5623
5624 /* ... Floats are not allowed */
5625 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5626 return true;
5627
5628 /* ... Aggregates that are not themselves valid for returning in
5629 a register are not allowed. */
5630 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5631 return true;
5632
5633 /* Now check the remaining fields, if any. Only bitfields are allowed,
5634 since they are not addressable. */
5635 for (field = DECL_CHAIN (field);
5636 field;
5637 field = DECL_CHAIN (field))
5638 {
5639 if (TREE_CODE (field) != FIELD_DECL)
5640 continue;
5641
5642 if (!DECL_BIT_FIELD_TYPE (field))
5643 return true;
5644 }
5645
5646 return false;
5647 }
5648
5649 if (TREE_CODE (type) == UNION_TYPE)
5650 {
5651 tree field;
5652
5653 /* Unions can be returned in registers if every element is
5654 integral, or can be returned in an integer register. */
5655 for (field = TYPE_FIELDS (type);
5656 field;
5657 field = DECL_CHAIN (field))
5658 {
5659 if (TREE_CODE (field) != FIELD_DECL)
5660 continue;
5661
5662 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5663 return true;
5664
5665 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5666 return true;
5667 }
5668
5669 return false;
5670 }
5671 #endif /* not ARM_WINCE */
5672
5673 /* Return all other types in memory. */
5674 return true;
5675 }
5676
5677 const struct pcs_attribute_arg
5678 {
5679 const char *arg;
5680 enum arm_pcs value;
5681 } pcs_attribute_args[] =
5682 {
5683 {"aapcs", ARM_PCS_AAPCS},
5684 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5685 #if 0
5686 /* We could recognize these, but changes would be needed elsewhere
5687 * to implement them. */
5688 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5689 {"atpcs", ARM_PCS_ATPCS},
5690 {"apcs", ARM_PCS_APCS},
5691 #endif
5692 {NULL, ARM_PCS_UNKNOWN}
5693 };
5694
5695 static enum arm_pcs
5696 arm_pcs_from_attribute (tree attr)
5697 {
5698 const struct pcs_attribute_arg *ptr;
5699 const char *arg;
5700
5701 /* Get the value of the argument. */
5702 if (TREE_VALUE (attr) == NULL_TREE
5703 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5704 return ARM_PCS_UNKNOWN;
5705
5706 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5707
5708 /* Check it against the list of known arguments. */
5709 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5710 if (streq (arg, ptr->arg))
5711 return ptr->value;
5712
5713 /* An unrecognized interrupt type. */
5714 return ARM_PCS_UNKNOWN;
5715 }
5716
5717 /* Get the PCS variant to use for this call. TYPE is the function's type
5718 specification, DECL is the specific declartion. DECL may be null if
5719 the call could be indirect or if this is a library call. */
5720 static enum arm_pcs
5721 arm_get_pcs_model (const_tree type, const_tree decl)
5722 {
5723 bool user_convention = false;
5724 enum arm_pcs user_pcs = arm_pcs_default;
5725 tree attr;
5726
5727 gcc_assert (type);
5728
5729 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5730 if (attr)
5731 {
5732 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5733 user_convention = true;
5734 }
5735
5736 if (TARGET_AAPCS_BASED)
5737 {
5738 /* Detect varargs functions. These always use the base rules
5739 (no argument is ever a candidate for a co-processor
5740 register). */
5741 bool base_rules = stdarg_p (type);
5742
5743 if (user_convention)
5744 {
5745 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5746 sorry ("non-AAPCS derived PCS variant");
5747 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5748 error ("variadic functions must use the base AAPCS variant");
5749 }
5750
5751 if (base_rules)
5752 return ARM_PCS_AAPCS;
5753 else if (user_convention)
5754 return user_pcs;
5755 else if (decl && flag_unit_at_a_time)
5756 {
5757 /* Local functions never leak outside this compilation unit,
5758 so we are free to use whatever conventions are
5759 appropriate. */
5760 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5761 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5762 if (i && i->local)
5763 return ARM_PCS_AAPCS_LOCAL;
5764 }
5765 }
5766 else if (user_convention && user_pcs != arm_pcs_default)
5767 sorry ("PCS variant");
5768
5769 /* For everything else we use the target's default. */
5770 return arm_pcs_default;
5771 }
5772
5773
5774 static void
5775 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5776 const_tree fntype ATTRIBUTE_UNUSED,
5777 rtx libcall ATTRIBUTE_UNUSED,
5778 const_tree fndecl ATTRIBUTE_UNUSED)
5779 {
5780 /* Record the unallocated VFP registers. */
5781 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5782 pcum->aapcs_vfp_reg_alloc = 0;
5783 }
5784
5785 /* Walk down the type tree of TYPE counting consecutive base elements.
5786 If *MODEP is VOIDmode, then set it to the first valid floating point
5787 type. If a non-floating point type is found, or if a floating point
5788 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5789 otherwise return the count in the sub-tree. */
5790 static int
5791 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5792 {
5793 machine_mode mode;
5794 HOST_WIDE_INT size;
5795
5796 switch (TREE_CODE (type))
5797 {
5798 case REAL_TYPE:
5799 mode = TYPE_MODE (type);
5800 if (mode != DFmode && mode != SFmode && mode != HFmode)
5801 return -1;
5802
5803 if (*modep == VOIDmode)
5804 *modep = mode;
5805
5806 if (*modep == mode)
5807 return 1;
5808
5809 break;
5810
5811 case COMPLEX_TYPE:
5812 mode = TYPE_MODE (TREE_TYPE (type));
5813 if (mode != DFmode && mode != SFmode)
5814 return -1;
5815
5816 if (*modep == VOIDmode)
5817 *modep = mode;
5818
5819 if (*modep == mode)
5820 return 2;
5821
5822 break;
5823
5824 case VECTOR_TYPE:
5825 /* Use V2SImode and V4SImode as representatives of all 64-bit
5826 and 128-bit vector types, whether or not those modes are
5827 supported with the present options. */
5828 size = int_size_in_bytes (type);
5829 switch (size)
5830 {
5831 case 8:
5832 mode = V2SImode;
5833 break;
5834 case 16:
5835 mode = V4SImode;
5836 break;
5837 default:
5838 return -1;
5839 }
5840
5841 if (*modep == VOIDmode)
5842 *modep = mode;
5843
5844 /* Vector modes are considered to be opaque: two vectors are
5845 equivalent for the purposes of being homogeneous aggregates
5846 if they are the same size. */
5847 if (*modep == mode)
5848 return 1;
5849
5850 break;
5851
5852 case ARRAY_TYPE:
5853 {
5854 int count;
5855 tree index = TYPE_DOMAIN (type);
5856
5857 /* Can't handle incomplete types nor sizes that are not
5858 fixed. */
5859 if (!COMPLETE_TYPE_P (type)
5860 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5861 return -1;
5862
5863 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5864 if (count == -1
5865 || !index
5866 || !TYPE_MAX_VALUE (index)
5867 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5868 || !TYPE_MIN_VALUE (index)
5869 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5870 || count < 0)
5871 return -1;
5872
5873 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5874 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5875
5876 /* There must be no padding. */
5877 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5878 return -1;
5879
5880 return count;
5881 }
5882
5883 case RECORD_TYPE:
5884 {
5885 int count = 0;
5886 int sub_count;
5887 tree field;
5888
5889 /* Can't handle incomplete types nor sizes that are not
5890 fixed. */
5891 if (!COMPLETE_TYPE_P (type)
5892 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5893 return -1;
5894
5895 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5896 {
5897 if (TREE_CODE (field) != FIELD_DECL)
5898 continue;
5899
5900 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5901 if (sub_count < 0)
5902 return -1;
5903 count += sub_count;
5904 }
5905
5906 /* There must be no padding. */
5907 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5908 return -1;
5909
5910 return count;
5911 }
5912
5913 case UNION_TYPE:
5914 case QUAL_UNION_TYPE:
5915 {
5916 /* These aren't very interesting except in a degenerate case. */
5917 int count = 0;
5918 int sub_count;
5919 tree field;
5920
5921 /* Can't handle incomplete types nor sizes that are not
5922 fixed. */
5923 if (!COMPLETE_TYPE_P (type)
5924 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5925 return -1;
5926
5927 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5928 {
5929 if (TREE_CODE (field) != FIELD_DECL)
5930 continue;
5931
5932 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5933 if (sub_count < 0)
5934 return -1;
5935 count = count > sub_count ? count : sub_count;
5936 }
5937
5938 /* There must be no padding. */
5939 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5940 return -1;
5941
5942 return count;
5943 }
5944
5945 default:
5946 break;
5947 }
5948
5949 return -1;
5950 }
5951
5952 /* Return true if PCS_VARIANT should use VFP registers. */
5953 static bool
5954 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5955 {
5956 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5957 {
5958 static bool seen_thumb1_vfp = false;
5959
5960 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5961 {
5962 sorry ("Thumb-1 hard-float VFP ABI");
5963 /* sorry() is not immediately fatal, so only display this once. */
5964 seen_thumb1_vfp = true;
5965 }
5966
5967 return true;
5968 }
5969
5970 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5971 return false;
5972
5973 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5974 (TARGET_VFP_DOUBLE || !is_double));
5975 }
5976
5977 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5978 suitable for passing or returning in VFP registers for the PCS
5979 variant selected. If it is, then *BASE_MODE is updated to contain
5980 a machine mode describing each element of the argument's type and
5981 *COUNT to hold the number of such elements. */
5982 static bool
5983 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5984 machine_mode mode, const_tree type,
5985 machine_mode *base_mode, int *count)
5986 {
5987 machine_mode new_mode = VOIDmode;
5988
5989 /* If we have the type information, prefer that to working things
5990 out from the mode. */
5991 if (type)
5992 {
5993 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5994
5995 if (ag_count > 0 && ag_count <= 4)
5996 *count = ag_count;
5997 else
5998 return false;
5999 }
6000 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6001 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6002 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6003 {
6004 *count = 1;
6005 new_mode = mode;
6006 }
6007 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6008 {
6009 *count = 2;
6010 new_mode = (mode == DCmode ? DFmode : SFmode);
6011 }
6012 else
6013 return false;
6014
6015
6016 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6017 return false;
6018
6019 *base_mode = new_mode;
6020 return true;
6021 }
6022
6023 static bool
6024 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6025 machine_mode mode, const_tree type)
6026 {
6027 int count ATTRIBUTE_UNUSED;
6028 machine_mode ag_mode ATTRIBUTE_UNUSED;
6029
6030 if (!use_vfp_abi (pcs_variant, false))
6031 return false;
6032 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6033 &ag_mode, &count);
6034 }
6035
6036 static bool
6037 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6038 const_tree type)
6039 {
6040 if (!use_vfp_abi (pcum->pcs_variant, false))
6041 return false;
6042
6043 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6044 &pcum->aapcs_vfp_rmode,
6045 &pcum->aapcs_vfp_rcount);
6046 }
6047
6048 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6049 for the behaviour of this function. */
6050
6051 static bool
6052 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6053 const_tree type ATTRIBUTE_UNUSED)
6054 {
6055 int rmode_size
6056 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6057 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6058 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6059 int regno;
6060
6061 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6062 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6063 {
6064 pcum->aapcs_vfp_reg_alloc = mask << regno;
6065 if (mode == BLKmode
6066 || (mode == TImode && ! TARGET_NEON)
6067 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6068 {
6069 int i;
6070 int rcount = pcum->aapcs_vfp_rcount;
6071 int rshift = shift;
6072 machine_mode rmode = pcum->aapcs_vfp_rmode;
6073 rtx par;
6074 if (!TARGET_NEON)
6075 {
6076 /* Avoid using unsupported vector modes. */
6077 if (rmode == V2SImode)
6078 rmode = DImode;
6079 else if (rmode == V4SImode)
6080 {
6081 rmode = DImode;
6082 rcount *= 2;
6083 rshift /= 2;
6084 }
6085 }
6086 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6087 for (i = 0; i < rcount; i++)
6088 {
6089 rtx tmp = gen_rtx_REG (rmode,
6090 FIRST_VFP_REGNUM + regno + i * rshift);
6091 tmp = gen_rtx_EXPR_LIST
6092 (VOIDmode, tmp,
6093 GEN_INT (i * GET_MODE_SIZE (rmode)));
6094 XVECEXP (par, 0, i) = tmp;
6095 }
6096
6097 pcum->aapcs_reg = par;
6098 }
6099 else
6100 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6101 return true;
6102 }
6103 return false;
6104 }
6105
6106 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6107 comment there for the behaviour of this function. */
6108
6109 static rtx
6110 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6111 machine_mode mode,
6112 const_tree type ATTRIBUTE_UNUSED)
6113 {
6114 if (!use_vfp_abi (pcs_variant, false))
6115 return NULL;
6116
6117 if (mode == BLKmode
6118 || (GET_MODE_CLASS (mode) == MODE_INT
6119 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6120 && !TARGET_NEON))
6121 {
6122 int count;
6123 machine_mode ag_mode;
6124 int i;
6125 rtx par;
6126 int shift;
6127
6128 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6129 &ag_mode, &count);
6130
6131 if (!TARGET_NEON)
6132 {
6133 if (ag_mode == V2SImode)
6134 ag_mode = DImode;
6135 else if (ag_mode == V4SImode)
6136 {
6137 ag_mode = DImode;
6138 count *= 2;
6139 }
6140 }
6141 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6142 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6143 for (i = 0; i < count; i++)
6144 {
6145 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6146 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6147 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6148 XVECEXP (par, 0, i) = tmp;
6149 }
6150
6151 return par;
6152 }
6153
6154 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6155 }
6156
6157 static void
6158 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6159 machine_mode mode ATTRIBUTE_UNUSED,
6160 const_tree type ATTRIBUTE_UNUSED)
6161 {
6162 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6163 pcum->aapcs_vfp_reg_alloc = 0;
6164 return;
6165 }
6166
6167 #define AAPCS_CP(X) \
6168 { \
6169 aapcs_ ## X ## _cum_init, \
6170 aapcs_ ## X ## _is_call_candidate, \
6171 aapcs_ ## X ## _allocate, \
6172 aapcs_ ## X ## _is_return_candidate, \
6173 aapcs_ ## X ## _allocate_return_reg, \
6174 aapcs_ ## X ## _advance \
6175 }
6176
6177 /* Table of co-processors that can be used to pass arguments in
6178 registers. Idealy no arugment should be a candidate for more than
6179 one co-processor table entry, but the table is processed in order
6180 and stops after the first match. If that entry then fails to put
6181 the argument into a co-processor register, the argument will go on
6182 the stack. */
6183 static struct
6184 {
6185 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6186 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6187
6188 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6189 BLKmode) is a candidate for this co-processor's registers; this
6190 function should ignore any position-dependent state in
6191 CUMULATIVE_ARGS and only use call-type dependent information. */
6192 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6193
6194 /* Return true if the argument does get a co-processor register; it
6195 should set aapcs_reg to an RTX of the register allocated as is
6196 required for a return from FUNCTION_ARG. */
6197 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6198
6199 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6200 be returned in this co-processor's registers. */
6201 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6202
6203 /* Allocate and return an RTX element to hold the return type of a call. This
6204 routine must not fail and will only be called if is_return_candidate
6205 returned true with the same parameters. */
6206 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6207
6208 /* Finish processing this argument and prepare to start processing
6209 the next one. */
6210 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6211 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6212 {
6213 AAPCS_CP(vfp)
6214 };
6215
6216 #undef AAPCS_CP
6217
6218 static int
6219 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6220 const_tree type)
6221 {
6222 int i;
6223
6224 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6225 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6226 return i;
6227
6228 return -1;
6229 }
6230
6231 static int
6232 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6233 {
6234 /* We aren't passed a decl, so we can't check that a call is local.
6235 However, it isn't clear that that would be a win anyway, since it
6236 might limit some tail-calling opportunities. */
6237 enum arm_pcs pcs_variant;
6238
6239 if (fntype)
6240 {
6241 const_tree fndecl = NULL_TREE;
6242
6243 if (TREE_CODE (fntype) == FUNCTION_DECL)
6244 {
6245 fndecl = fntype;
6246 fntype = TREE_TYPE (fntype);
6247 }
6248
6249 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6250 }
6251 else
6252 pcs_variant = arm_pcs_default;
6253
6254 if (pcs_variant != ARM_PCS_AAPCS)
6255 {
6256 int i;
6257
6258 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6259 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6260 TYPE_MODE (type),
6261 type))
6262 return i;
6263 }
6264 return -1;
6265 }
6266
6267 static rtx
6268 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6269 const_tree fntype)
6270 {
6271 /* We aren't passed a decl, so we can't check that a call is local.
6272 However, it isn't clear that that would be a win anyway, since it
6273 might limit some tail-calling opportunities. */
6274 enum arm_pcs pcs_variant;
6275 int unsignedp ATTRIBUTE_UNUSED;
6276
6277 if (fntype)
6278 {
6279 const_tree fndecl = NULL_TREE;
6280
6281 if (TREE_CODE (fntype) == FUNCTION_DECL)
6282 {
6283 fndecl = fntype;
6284 fntype = TREE_TYPE (fntype);
6285 }
6286
6287 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6288 }
6289 else
6290 pcs_variant = arm_pcs_default;
6291
6292 /* Promote integer types. */
6293 if (type && INTEGRAL_TYPE_P (type))
6294 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6295
6296 if (pcs_variant != ARM_PCS_AAPCS)
6297 {
6298 int i;
6299
6300 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6301 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6302 type))
6303 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6304 mode, type);
6305 }
6306
6307 /* Promotes small structs returned in a register to full-word size
6308 for big-endian AAPCS. */
6309 if (type && arm_return_in_msb (type))
6310 {
6311 HOST_WIDE_INT size = int_size_in_bytes (type);
6312 if (size % UNITS_PER_WORD != 0)
6313 {
6314 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6315 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6316 }
6317 }
6318
6319 return gen_rtx_REG (mode, R0_REGNUM);
6320 }
6321
6322 static rtx
6323 aapcs_libcall_value (machine_mode mode)
6324 {
6325 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6326 && GET_MODE_SIZE (mode) <= 4)
6327 mode = SImode;
6328
6329 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6330 }
6331
6332 /* Lay out a function argument using the AAPCS rules. The rule
6333 numbers referred to here are those in the AAPCS. */
6334 static void
6335 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6336 const_tree type, bool named)
6337 {
6338 int nregs, nregs2;
6339 int ncrn;
6340
6341 /* We only need to do this once per argument. */
6342 if (pcum->aapcs_arg_processed)
6343 return;
6344
6345 pcum->aapcs_arg_processed = true;
6346
6347 /* Special case: if named is false then we are handling an incoming
6348 anonymous argument which is on the stack. */
6349 if (!named)
6350 return;
6351
6352 /* Is this a potential co-processor register candidate? */
6353 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6354 {
6355 int slot = aapcs_select_call_coproc (pcum, mode, type);
6356 pcum->aapcs_cprc_slot = slot;
6357
6358 /* We don't have to apply any of the rules from part B of the
6359 preparation phase, these are handled elsewhere in the
6360 compiler. */
6361
6362 if (slot >= 0)
6363 {
6364 /* A Co-processor register candidate goes either in its own
6365 class of registers or on the stack. */
6366 if (!pcum->aapcs_cprc_failed[slot])
6367 {
6368 /* C1.cp - Try to allocate the argument to co-processor
6369 registers. */
6370 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6371 return;
6372
6373 /* C2.cp - Put the argument on the stack and note that we
6374 can't assign any more candidates in this slot. We also
6375 need to note that we have allocated stack space, so that
6376 we won't later try to split a non-cprc candidate between
6377 core registers and the stack. */
6378 pcum->aapcs_cprc_failed[slot] = true;
6379 pcum->can_split = false;
6380 }
6381
6382 /* We didn't get a register, so this argument goes on the
6383 stack. */
6384 gcc_assert (pcum->can_split == false);
6385 return;
6386 }
6387 }
6388
6389 /* C3 - For double-word aligned arguments, round the NCRN up to the
6390 next even number. */
6391 ncrn = pcum->aapcs_ncrn;
6392 if (ncrn & 1)
6393 {
6394 int res = arm_needs_doubleword_align (mode, type);
6395 /* Only warn during RTL expansion of call stmts, otherwise we would
6396 warn e.g. during gimplification even on functions that will be
6397 always inlined, and we'd warn multiple times. Don't warn when
6398 called in expand_function_start either, as we warn instead in
6399 arm_function_arg_boundary in that case. */
6400 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6401 inform (input_location, "parameter passing for argument of type "
6402 "%qT changed in GCC 7.1", type);
6403 else if (res > 0)
6404 ncrn++;
6405 }
6406
6407 nregs = ARM_NUM_REGS2(mode, type);
6408
6409 /* Sigh, this test should really assert that nregs > 0, but a GCC
6410 extension allows empty structs and then gives them empty size; it
6411 then allows such a structure to be passed by value. For some of
6412 the code below we have to pretend that such an argument has
6413 non-zero size so that we 'locate' it correctly either in
6414 registers or on the stack. */
6415 gcc_assert (nregs >= 0);
6416
6417 nregs2 = nregs ? nregs : 1;
6418
6419 /* C4 - Argument fits entirely in core registers. */
6420 if (ncrn + nregs2 <= NUM_ARG_REGS)
6421 {
6422 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6423 pcum->aapcs_next_ncrn = ncrn + nregs;
6424 return;
6425 }
6426
6427 /* C5 - Some core registers left and there are no arguments already
6428 on the stack: split this argument between the remaining core
6429 registers and the stack. */
6430 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6431 {
6432 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6433 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6434 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6435 return;
6436 }
6437
6438 /* C6 - NCRN is set to 4. */
6439 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6440
6441 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6442 return;
6443 }
6444
6445 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6446 for a call to a function whose data type is FNTYPE.
6447 For a library call, FNTYPE is NULL. */
6448 void
6449 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6450 rtx libname,
6451 tree fndecl ATTRIBUTE_UNUSED)
6452 {
6453 /* Long call handling. */
6454 if (fntype)
6455 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6456 else
6457 pcum->pcs_variant = arm_pcs_default;
6458
6459 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6460 {
6461 if (arm_libcall_uses_aapcs_base (libname))
6462 pcum->pcs_variant = ARM_PCS_AAPCS;
6463
6464 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6465 pcum->aapcs_reg = NULL_RTX;
6466 pcum->aapcs_partial = 0;
6467 pcum->aapcs_arg_processed = false;
6468 pcum->aapcs_cprc_slot = -1;
6469 pcum->can_split = true;
6470
6471 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6472 {
6473 int i;
6474
6475 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6476 {
6477 pcum->aapcs_cprc_failed[i] = false;
6478 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6479 }
6480 }
6481 return;
6482 }
6483
6484 /* Legacy ABIs */
6485
6486 /* On the ARM, the offset starts at 0. */
6487 pcum->nregs = 0;
6488 pcum->iwmmxt_nregs = 0;
6489 pcum->can_split = true;
6490
6491 /* Varargs vectors are treated the same as long long.
6492 named_count avoids having to change the way arm handles 'named' */
6493 pcum->named_count = 0;
6494 pcum->nargs = 0;
6495
6496 if (TARGET_REALLY_IWMMXT && fntype)
6497 {
6498 tree fn_arg;
6499
6500 for (fn_arg = TYPE_ARG_TYPES (fntype);
6501 fn_arg;
6502 fn_arg = TREE_CHAIN (fn_arg))
6503 pcum->named_count += 1;
6504
6505 if (! pcum->named_count)
6506 pcum->named_count = INT_MAX;
6507 }
6508 }
6509
6510 /* Return 1 if double word alignment is required for argument passing.
6511 Return -1 if double word alignment used to be required for argument
6512 passing before PR77728 ABI fix, but is not required anymore.
6513 Return 0 if double word alignment is not required and wasn't requried
6514 before either. */
6515 static int
6516 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6517 {
6518 if (!type)
6519 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6520
6521 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6522 if (!AGGREGATE_TYPE_P (type))
6523 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6524
6525 /* Array types: Use member alignment of element type. */
6526 if (TREE_CODE (type) == ARRAY_TYPE)
6527 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6528
6529 int ret = 0;
6530 /* Record/aggregate types: Use greatest member alignment of any member. */
6531 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6532 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6533 {
6534 if (TREE_CODE (field) == FIELD_DECL)
6535 return 1;
6536 else
6537 /* Before PR77728 fix, we were incorrectly considering also
6538 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6539 Make sure we can warn about that with -Wpsabi. */
6540 ret = -1;
6541 }
6542
6543 return ret;
6544 }
6545
6546
6547 /* Determine where to put an argument to a function.
6548 Value is zero to push the argument on the stack,
6549 or a hard register in which to store the argument.
6550
6551 MODE is the argument's machine mode.
6552 TYPE is the data type of the argument (as a tree).
6553 This is null for libcalls where that information may
6554 not be available.
6555 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6556 the preceding args and about the function being called.
6557 NAMED is nonzero if this argument is a named parameter
6558 (otherwise it is an extra parameter matching an ellipsis).
6559
6560 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6561 other arguments are passed on the stack. If (NAMED == 0) (which happens
6562 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6563 defined), say it is passed in the stack (function_prologue will
6564 indeed make it pass in the stack if necessary). */
6565
6566 static rtx
6567 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6568 const_tree type, bool named)
6569 {
6570 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6571 int nregs;
6572
6573 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6574 a call insn (op3 of a call_value insn). */
6575 if (mode == VOIDmode)
6576 return const0_rtx;
6577
6578 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6579 {
6580 aapcs_layout_arg (pcum, mode, type, named);
6581 return pcum->aapcs_reg;
6582 }
6583
6584 /* Varargs vectors are treated the same as long long.
6585 named_count avoids having to change the way arm handles 'named' */
6586 if (TARGET_IWMMXT_ABI
6587 && arm_vector_mode_supported_p (mode)
6588 && pcum->named_count > pcum->nargs + 1)
6589 {
6590 if (pcum->iwmmxt_nregs <= 9)
6591 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6592 else
6593 {
6594 pcum->can_split = false;
6595 return NULL_RTX;
6596 }
6597 }
6598
6599 /* Put doubleword aligned quantities in even register pairs. */
6600 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6601 {
6602 int res = arm_needs_doubleword_align (mode, type);
6603 if (res < 0 && warn_psabi)
6604 inform (input_location, "parameter passing for argument of type "
6605 "%qT changed in GCC 7.1", type);
6606 else if (res > 0)
6607 pcum->nregs++;
6608 }
6609
6610 /* Only allow splitting an arg between regs and memory if all preceding
6611 args were allocated to regs. For args passed by reference we only count
6612 the reference pointer. */
6613 if (pcum->can_split)
6614 nregs = 1;
6615 else
6616 nregs = ARM_NUM_REGS2 (mode, type);
6617
6618 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6619 return NULL_RTX;
6620
6621 return gen_rtx_REG (mode, pcum->nregs);
6622 }
6623
6624 static unsigned int
6625 arm_function_arg_boundary (machine_mode mode, const_tree type)
6626 {
6627 if (!ARM_DOUBLEWORD_ALIGN)
6628 return PARM_BOUNDARY;
6629
6630 int res = arm_needs_doubleword_align (mode, type);
6631 if (res < 0 && warn_psabi)
6632 inform (input_location, "parameter passing for argument of type %qT "
6633 "changed in GCC 7.1", type);
6634
6635 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6636 }
6637
6638 static int
6639 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6640 tree type, bool named)
6641 {
6642 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6643 int nregs = pcum->nregs;
6644
6645 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6646 {
6647 aapcs_layout_arg (pcum, mode, type, named);
6648 return pcum->aapcs_partial;
6649 }
6650
6651 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6652 return 0;
6653
6654 if (NUM_ARG_REGS > nregs
6655 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6656 && pcum->can_split)
6657 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6658
6659 return 0;
6660 }
6661
6662 /* Update the data in PCUM to advance over an argument
6663 of mode MODE and data type TYPE.
6664 (TYPE is null for libcalls where that information may not be available.) */
6665
6666 static void
6667 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6668 const_tree type, bool named)
6669 {
6670 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6671
6672 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6673 {
6674 aapcs_layout_arg (pcum, mode, type, named);
6675
6676 if (pcum->aapcs_cprc_slot >= 0)
6677 {
6678 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6679 type);
6680 pcum->aapcs_cprc_slot = -1;
6681 }
6682
6683 /* Generic stuff. */
6684 pcum->aapcs_arg_processed = false;
6685 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6686 pcum->aapcs_reg = NULL_RTX;
6687 pcum->aapcs_partial = 0;
6688 }
6689 else
6690 {
6691 pcum->nargs += 1;
6692 if (arm_vector_mode_supported_p (mode)
6693 && pcum->named_count > pcum->nargs
6694 && TARGET_IWMMXT_ABI)
6695 pcum->iwmmxt_nregs += 1;
6696 else
6697 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6698 }
6699 }
6700
6701 /* Variable sized types are passed by reference. This is a GCC
6702 extension to the ARM ABI. */
6703
6704 static bool
6705 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6706 machine_mode mode ATTRIBUTE_UNUSED,
6707 const_tree type, bool named ATTRIBUTE_UNUSED)
6708 {
6709 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6710 }
6711 \f
6712 /* Encode the current state of the #pragma [no_]long_calls. */
6713 typedef enum
6714 {
6715 OFF, /* No #pragma [no_]long_calls is in effect. */
6716 LONG, /* #pragma long_calls is in effect. */
6717 SHORT /* #pragma no_long_calls is in effect. */
6718 } arm_pragma_enum;
6719
6720 static arm_pragma_enum arm_pragma_long_calls = OFF;
6721
6722 void
6723 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6724 {
6725 arm_pragma_long_calls = LONG;
6726 }
6727
6728 void
6729 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6730 {
6731 arm_pragma_long_calls = SHORT;
6732 }
6733
6734 void
6735 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6736 {
6737 arm_pragma_long_calls = OFF;
6738 }
6739 \f
6740 /* Handle an attribute requiring a FUNCTION_DECL;
6741 arguments as in struct attribute_spec.handler. */
6742 static tree
6743 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6744 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6745 {
6746 if (TREE_CODE (*node) != FUNCTION_DECL)
6747 {
6748 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6749 name);
6750 *no_add_attrs = true;
6751 }
6752
6753 return NULL_TREE;
6754 }
6755
6756 /* Handle an "interrupt" or "isr" attribute;
6757 arguments as in struct attribute_spec.handler. */
6758 static tree
6759 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6760 bool *no_add_attrs)
6761 {
6762 if (DECL_P (*node))
6763 {
6764 if (TREE_CODE (*node) != FUNCTION_DECL)
6765 {
6766 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6767 name);
6768 *no_add_attrs = true;
6769 }
6770 /* FIXME: the argument if any is checked for type attributes;
6771 should it be checked for decl ones? */
6772 }
6773 else
6774 {
6775 if (TREE_CODE (*node) == FUNCTION_TYPE
6776 || TREE_CODE (*node) == METHOD_TYPE)
6777 {
6778 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6779 {
6780 warning (OPT_Wattributes, "%qE attribute ignored",
6781 name);
6782 *no_add_attrs = true;
6783 }
6784 }
6785 else if (TREE_CODE (*node) == POINTER_TYPE
6786 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6787 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6788 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6789 {
6790 *node = build_variant_type_copy (*node);
6791 TREE_TYPE (*node) = build_type_attribute_variant
6792 (TREE_TYPE (*node),
6793 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6794 *no_add_attrs = true;
6795 }
6796 else
6797 {
6798 /* Possibly pass this attribute on from the type to a decl. */
6799 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6800 | (int) ATTR_FLAG_FUNCTION_NEXT
6801 | (int) ATTR_FLAG_ARRAY_NEXT))
6802 {
6803 *no_add_attrs = true;
6804 return tree_cons (name, args, NULL_TREE);
6805 }
6806 else
6807 {
6808 warning (OPT_Wattributes, "%qE attribute ignored",
6809 name);
6810 }
6811 }
6812 }
6813
6814 return NULL_TREE;
6815 }
6816
6817 /* Handle a "pcs" attribute; arguments as in struct
6818 attribute_spec.handler. */
6819 static tree
6820 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6821 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6822 {
6823 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6824 {
6825 warning (OPT_Wattributes, "%qE attribute ignored", name);
6826 *no_add_attrs = true;
6827 }
6828 return NULL_TREE;
6829 }
6830
6831 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6832 /* Handle the "notshared" attribute. This attribute is another way of
6833 requesting hidden visibility. ARM's compiler supports
6834 "__declspec(notshared)"; we support the same thing via an
6835 attribute. */
6836
6837 static tree
6838 arm_handle_notshared_attribute (tree *node,
6839 tree name ATTRIBUTE_UNUSED,
6840 tree args ATTRIBUTE_UNUSED,
6841 int flags ATTRIBUTE_UNUSED,
6842 bool *no_add_attrs)
6843 {
6844 tree decl = TYPE_NAME (*node);
6845
6846 if (decl)
6847 {
6848 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6849 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6850 *no_add_attrs = false;
6851 }
6852 return NULL_TREE;
6853 }
6854 #endif
6855
6856 /* This function returns true if a function with declaration FNDECL and type
6857 FNTYPE uses the stack to pass arguments or return variables and false
6858 otherwise. This is used for functions with the attributes
6859 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6860 diagnostic messages if the stack is used. NAME is the name of the attribute
6861 used. */
6862
6863 static bool
6864 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6865 {
6866 function_args_iterator args_iter;
6867 CUMULATIVE_ARGS args_so_far_v;
6868 cumulative_args_t args_so_far;
6869 bool first_param = true;
6870 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6871
6872 /* Error out if any argument is passed on the stack. */
6873 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6874 args_so_far = pack_cumulative_args (&args_so_far_v);
6875 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6876 {
6877 rtx arg_rtx;
6878 machine_mode arg_mode = TYPE_MODE (arg_type);
6879
6880 prev_arg_type = arg_type;
6881 if (VOID_TYPE_P (arg_type))
6882 continue;
6883
6884 if (!first_param)
6885 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6886 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6887 if (!arg_rtx
6888 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6889 {
6890 error ("%qE attribute not available to functions with arguments "
6891 "passed on the stack", name);
6892 return true;
6893 }
6894 first_param = false;
6895 }
6896
6897 /* Error out for variadic functions since we cannot control how many
6898 arguments will be passed and thus stack could be used. stdarg_p () is not
6899 used for the checking to avoid browsing arguments twice. */
6900 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6901 {
6902 error ("%qE attribute not available to functions with variable number "
6903 "of arguments", name);
6904 return true;
6905 }
6906
6907 /* Error out if return value is passed on the stack. */
6908 ret_type = TREE_TYPE (fntype);
6909 if (arm_return_in_memory (ret_type, fntype))
6910 {
6911 error ("%qE attribute not available to functions that return value on "
6912 "the stack", name);
6913 return true;
6914 }
6915 return false;
6916 }
6917
6918 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6919 function will check whether the attribute is allowed here and will add the
6920 attribute to the function declaration tree or otherwise issue a warning. */
6921
6922 static tree
6923 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6924 tree /* args */,
6925 int /* flags */,
6926 bool *no_add_attrs)
6927 {
6928 tree fndecl;
6929
6930 if (!use_cmse)
6931 {
6932 *no_add_attrs = true;
6933 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6934 name);
6935 return NULL_TREE;
6936 }
6937
6938 /* Ignore attribute for function types. */
6939 if (TREE_CODE (*node) != FUNCTION_DECL)
6940 {
6941 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6942 name);
6943 *no_add_attrs = true;
6944 return NULL_TREE;
6945 }
6946
6947 fndecl = *node;
6948
6949 /* Warn for static linkage functions. */
6950 if (!TREE_PUBLIC (fndecl))
6951 {
6952 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6953 "with static linkage", name);
6954 *no_add_attrs = true;
6955 return NULL_TREE;
6956 }
6957
6958 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6959 TREE_TYPE (fndecl));
6960 return NULL_TREE;
6961 }
6962
6963
6964 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6965 function will check whether the attribute is allowed here and will add the
6966 attribute to the function type tree or otherwise issue a diagnostic. The
6967 reason we check this at declaration time is to only allow the use of the
6968 attribute with declarations of function pointers and not function
6969 declarations. This function checks NODE is of the expected type and issues
6970 diagnostics otherwise using NAME. If it is not of the expected type
6971 *NO_ADD_ATTRS will be set to true. */
6972
6973 static tree
6974 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6975 tree /* args */,
6976 int /* flags */,
6977 bool *no_add_attrs)
6978 {
6979 tree decl = NULL_TREE, fntype = NULL_TREE;
6980 tree type;
6981
6982 if (!use_cmse)
6983 {
6984 *no_add_attrs = true;
6985 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6986 name);
6987 return NULL_TREE;
6988 }
6989
6990 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6991 {
6992 decl = *node;
6993 fntype = TREE_TYPE (decl);
6994 }
6995
6996 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6997 fntype = TREE_TYPE (fntype);
6998
6999 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7000 {
7001 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7002 "function pointer", name);
7003 *no_add_attrs = true;
7004 return NULL_TREE;
7005 }
7006
7007 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7008
7009 if (*no_add_attrs)
7010 return NULL_TREE;
7011
7012 /* Prevent trees being shared among function types with and without
7013 cmse_nonsecure_call attribute. */
7014 type = TREE_TYPE (decl);
7015
7016 type = build_distinct_type_copy (type);
7017 TREE_TYPE (decl) = type;
7018 fntype = type;
7019
7020 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7021 {
7022 type = fntype;
7023 fntype = TREE_TYPE (fntype);
7024 fntype = build_distinct_type_copy (fntype);
7025 TREE_TYPE (type) = fntype;
7026 }
7027
7028 /* Construct a type attribute and add it to the function type. */
7029 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7030 TYPE_ATTRIBUTES (fntype));
7031 TYPE_ATTRIBUTES (fntype) = attrs;
7032 return NULL_TREE;
7033 }
7034
7035 /* Return 0 if the attributes for two types are incompatible, 1 if they
7036 are compatible, and 2 if they are nearly compatible (which causes a
7037 warning to be generated). */
7038 static int
7039 arm_comp_type_attributes (const_tree type1, const_tree type2)
7040 {
7041 int l1, l2, s1, s2;
7042
7043 /* Check for mismatch of non-default calling convention. */
7044 if (TREE_CODE (type1) != FUNCTION_TYPE)
7045 return 1;
7046
7047 /* Check for mismatched call attributes. */
7048 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7049 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7050 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7051 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7052
7053 /* Only bother to check if an attribute is defined. */
7054 if (l1 | l2 | s1 | s2)
7055 {
7056 /* If one type has an attribute, the other must have the same attribute. */
7057 if ((l1 != l2) || (s1 != s2))
7058 return 0;
7059
7060 /* Disallow mixed attributes. */
7061 if ((l1 & s2) || (l2 & s1))
7062 return 0;
7063 }
7064
7065 /* Check for mismatched ISR attribute. */
7066 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7067 if (! l1)
7068 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7069 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7070 if (! l2)
7071 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7072 if (l1 != l2)
7073 return 0;
7074
7075 l1 = lookup_attribute ("cmse_nonsecure_call",
7076 TYPE_ATTRIBUTES (type1)) != NULL;
7077 l2 = lookup_attribute ("cmse_nonsecure_call",
7078 TYPE_ATTRIBUTES (type2)) != NULL;
7079
7080 if (l1 != l2)
7081 return 0;
7082
7083 return 1;
7084 }
7085
7086 /* Assigns default attributes to newly defined type. This is used to
7087 set short_call/long_call attributes for function types of
7088 functions defined inside corresponding #pragma scopes. */
7089 static void
7090 arm_set_default_type_attributes (tree type)
7091 {
7092 /* Add __attribute__ ((long_call)) to all functions, when
7093 inside #pragma long_calls or __attribute__ ((short_call)),
7094 when inside #pragma no_long_calls. */
7095 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7096 {
7097 tree type_attr_list, attr_name;
7098 type_attr_list = TYPE_ATTRIBUTES (type);
7099
7100 if (arm_pragma_long_calls == LONG)
7101 attr_name = get_identifier ("long_call");
7102 else if (arm_pragma_long_calls == SHORT)
7103 attr_name = get_identifier ("short_call");
7104 else
7105 return;
7106
7107 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7108 TYPE_ATTRIBUTES (type) = type_attr_list;
7109 }
7110 }
7111 \f
7112 /* Return true if DECL is known to be linked into section SECTION. */
7113
7114 static bool
7115 arm_function_in_section_p (tree decl, section *section)
7116 {
7117 /* We can only be certain about the prevailing symbol definition. */
7118 if (!decl_binds_to_current_def_p (decl))
7119 return false;
7120
7121 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7122 if (!DECL_SECTION_NAME (decl))
7123 {
7124 /* Make sure that we will not create a unique section for DECL. */
7125 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7126 return false;
7127 }
7128
7129 return function_section (decl) == section;
7130 }
7131
7132 /* Return nonzero if a 32-bit "long_call" should be generated for
7133 a call from the current function to DECL. We generate a long_call
7134 if the function:
7135
7136 a. has an __attribute__((long call))
7137 or b. is within the scope of a #pragma long_calls
7138 or c. the -mlong-calls command line switch has been specified
7139
7140 However we do not generate a long call if the function:
7141
7142 d. has an __attribute__ ((short_call))
7143 or e. is inside the scope of a #pragma no_long_calls
7144 or f. is defined in the same section as the current function. */
7145
7146 bool
7147 arm_is_long_call_p (tree decl)
7148 {
7149 tree attrs;
7150
7151 if (!decl)
7152 return TARGET_LONG_CALLS;
7153
7154 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7155 if (lookup_attribute ("short_call", attrs))
7156 return false;
7157
7158 /* For "f", be conservative, and only cater for cases in which the
7159 whole of the current function is placed in the same section. */
7160 if (!flag_reorder_blocks_and_partition
7161 && TREE_CODE (decl) == FUNCTION_DECL
7162 && arm_function_in_section_p (decl, current_function_section ()))
7163 return false;
7164
7165 if (lookup_attribute ("long_call", attrs))
7166 return true;
7167
7168 return TARGET_LONG_CALLS;
7169 }
7170
7171 /* Return nonzero if it is ok to make a tail-call to DECL. */
7172 static bool
7173 arm_function_ok_for_sibcall (tree decl, tree exp)
7174 {
7175 unsigned long func_type;
7176
7177 if (cfun->machine->sibcall_blocked)
7178 return false;
7179
7180 /* Never tailcall something if we are generating code for Thumb-1. */
7181 if (TARGET_THUMB1)
7182 return false;
7183
7184 /* The PIC register is live on entry to VxWorks PLT entries, so we
7185 must make the call before restoring the PIC register. */
7186 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7187 return false;
7188
7189 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7190 may be used both as target of the call and base register for restoring
7191 the VFP registers */
7192 if (TARGET_APCS_FRAME && TARGET_ARM
7193 && TARGET_HARD_FLOAT
7194 && decl && arm_is_long_call_p (decl))
7195 return false;
7196
7197 /* If we are interworking and the function is not declared static
7198 then we can't tail-call it unless we know that it exists in this
7199 compilation unit (since it might be a Thumb routine). */
7200 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7201 && !TREE_ASM_WRITTEN (decl))
7202 return false;
7203
7204 func_type = arm_current_func_type ();
7205 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7206 if (IS_INTERRUPT (func_type))
7207 return false;
7208
7209 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7210 generated for entry functions themselves. */
7211 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7212 return false;
7213
7214 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7215 this would complicate matters for later code generation. */
7216 if (TREE_CODE (exp) == CALL_EXPR)
7217 {
7218 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7219 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7220 return false;
7221 }
7222
7223 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7224 {
7225 /* Check that the return value locations are the same. For
7226 example that we aren't returning a value from the sibling in
7227 a VFP register but then need to transfer it to a core
7228 register. */
7229 rtx a, b;
7230 tree decl_or_type = decl;
7231
7232 /* If it is an indirect function pointer, get the function type. */
7233 if (!decl)
7234 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7235
7236 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7237 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7238 cfun->decl, false);
7239 if (!rtx_equal_p (a, b))
7240 return false;
7241 }
7242
7243 /* Never tailcall if function may be called with a misaligned SP. */
7244 if (IS_STACKALIGN (func_type))
7245 return false;
7246
7247 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7248 references should become a NOP. Don't convert such calls into
7249 sibling calls. */
7250 if (TARGET_AAPCS_BASED
7251 && arm_abi == ARM_ABI_AAPCS
7252 && decl
7253 && DECL_WEAK (decl))
7254 return false;
7255
7256 /* We cannot do a tailcall for an indirect call by descriptor if all the
7257 argument registers are used because the only register left to load the
7258 address is IP and it will already contain the static chain. */
7259 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7260 {
7261 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7262 CUMULATIVE_ARGS cum;
7263 cumulative_args_t cum_v;
7264
7265 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7266 cum_v = pack_cumulative_args (&cum);
7267
7268 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7269 {
7270 tree type = TREE_VALUE (t);
7271 if (!VOID_TYPE_P (type))
7272 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7273 }
7274
7275 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7276 return false;
7277 }
7278
7279 /* Everything else is ok. */
7280 return true;
7281 }
7282
7283 \f
7284 /* Addressing mode support functions. */
7285
7286 /* Return nonzero if X is a legitimate immediate operand when compiling
7287 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7288 int
7289 legitimate_pic_operand_p (rtx x)
7290 {
7291 if (GET_CODE (x) == SYMBOL_REF
7292 || (GET_CODE (x) == CONST
7293 && GET_CODE (XEXP (x, 0)) == PLUS
7294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7295 return 0;
7296
7297 return 1;
7298 }
7299
7300 /* Record that the current function needs a PIC register. Initialize
7301 cfun->machine->pic_reg if we have not already done so. */
7302
7303 static void
7304 require_pic_register (void)
7305 {
7306 /* A lot of the logic here is made obscure by the fact that this
7307 routine gets called as part of the rtx cost estimation process.
7308 We don't want those calls to affect any assumptions about the real
7309 function; and further, we can't call entry_of_function() until we
7310 start the real expansion process. */
7311 if (!crtl->uses_pic_offset_table)
7312 {
7313 gcc_assert (can_create_pseudo_p ());
7314 if (arm_pic_register != INVALID_REGNUM
7315 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7316 {
7317 if (!cfun->machine->pic_reg)
7318 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7319
7320 /* Play games to avoid marking the function as needing pic
7321 if we are being called as part of the cost-estimation
7322 process. */
7323 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7324 crtl->uses_pic_offset_table = 1;
7325 }
7326 else
7327 {
7328 rtx_insn *seq, *insn;
7329
7330 if (!cfun->machine->pic_reg)
7331 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7332
7333 /* Play games to avoid marking the function as needing pic
7334 if we are being called as part of the cost-estimation
7335 process. */
7336 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7337 {
7338 crtl->uses_pic_offset_table = 1;
7339 start_sequence ();
7340
7341 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7342 && arm_pic_register > LAST_LO_REGNUM)
7343 emit_move_insn (cfun->machine->pic_reg,
7344 gen_rtx_REG (Pmode, arm_pic_register));
7345 else
7346 arm_load_pic_register (0UL);
7347
7348 seq = get_insns ();
7349 end_sequence ();
7350
7351 for (insn = seq; insn; insn = NEXT_INSN (insn))
7352 if (INSN_P (insn))
7353 INSN_LOCATION (insn) = prologue_location;
7354
7355 /* We can be called during expansion of PHI nodes, where
7356 we can't yet emit instructions directly in the final
7357 insn stream. Queue the insns on the entry edge, they will
7358 be committed after everything else is expanded. */
7359 insert_insn_on_edge (seq,
7360 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7361 }
7362 }
7363 }
7364 }
7365
7366 rtx
7367 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7368 {
7369 if (GET_CODE (orig) == SYMBOL_REF
7370 || GET_CODE (orig) == LABEL_REF)
7371 {
7372 if (reg == 0)
7373 {
7374 gcc_assert (can_create_pseudo_p ());
7375 reg = gen_reg_rtx (Pmode);
7376 }
7377
7378 /* VxWorks does not impose a fixed gap between segments; the run-time
7379 gap can be different from the object-file gap. We therefore can't
7380 use GOTOFF unless we are absolutely sure that the symbol is in the
7381 same segment as the GOT. Unfortunately, the flexibility of linker
7382 scripts means that we can't be sure of that in general, so assume
7383 that GOTOFF is never valid on VxWorks. */
7384 /* References to weak symbols cannot be resolved locally: they
7385 may be overridden by a non-weak definition at link time. */
7386 rtx_insn *insn;
7387 if ((GET_CODE (orig) == LABEL_REF
7388 || (GET_CODE (orig) == SYMBOL_REF
7389 && SYMBOL_REF_LOCAL_P (orig)
7390 && (SYMBOL_REF_DECL (orig)
7391 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7392 && NEED_GOT_RELOC
7393 && arm_pic_data_is_text_relative)
7394 insn = arm_pic_static_addr (orig, reg);
7395 else
7396 {
7397 rtx pat;
7398 rtx mem;
7399
7400 /* If this function doesn't have a pic register, create one now. */
7401 require_pic_register ();
7402
7403 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7404
7405 /* Make the MEM as close to a constant as possible. */
7406 mem = SET_SRC (pat);
7407 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7408 MEM_READONLY_P (mem) = 1;
7409 MEM_NOTRAP_P (mem) = 1;
7410
7411 insn = emit_insn (pat);
7412 }
7413
7414 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7415 by loop. */
7416 set_unique_reg_note (insn, REG_EQUAL, orig);
7417
7418 return reg;
7419 }
7420 else if (GET_CODE (orig) == CONST)
7421 {
7422 rtx base, offset;
7423
7424 if (GET_CODE (XEXP (orig, 0)) == PLUS
7425 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7426 return orig;
7427
7428 /* Handle the case where we have: const (UNSPEC_TLS). */
7429 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7430 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7431 return orig;
7432
7433 /* Handle the case where we have:
7434 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7435 CONST_INT. */
7436 if (GET_CODE (XEXP (orig, 0)) == PLUS
7437 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7438 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7439 {
7440 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7441 return orig;
7442 }
7443
7444 if (reg == 0)
7445 {
7446 gcc_assert (can_create_pseudo_p ());
7447 reg = gen_reg_rtx (Pmode);
7448 }
7449
7450 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7451
7452 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7453 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7454 base == reg ? 0 : reg);
7455
7456 if (CONST_INT_P (offset))
7457 {
7458 /* The base register doesn't really matter, we only want to
7459 test the index for the appropriate mode. */
7460 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7461 {
7462 gcc_assert (can_create_pseudo_p ());
7463 offset = force_reg (Pmode, offset);
7464 }
7465
7466 if (CONST_INT_P (offset))
7467 return plus_constant (Pmode, base, INTVAL (offset));
7468 }
7469
7470 if (GET_MODE_SIZE (mode) > 4
7471 && (GET_MODE_CLASS (mode) == MODE_INT
7472 || TARGET_SOFT_FLOAT))
7473 {
7474 emit_insn (gen_addsi3 (reg, base, offset));
7475 return reg;
7476 }
7477
7478 return gen_rtx_PLUS (Pmode, base, offset);
7479 }
7480
7481 return orig;
7482 }
7483
7484
7485 /* Find a spare register to use during the prolog of a function. */
7486
7487 static int
7488 thumb_find_work_register (unsigned long pushed_regs_mask)
7489 {
7490 int reg;
7491
7492 /* Check the argument registers first as these are call-used. The
7493 register allocation order means that sometimes r3 might be used
7494 but earlier argument registers might not, so check them all. */
7495 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7496 if (!df_regs_ever_live_p (reg))
7497 return reg;
7498
7499 /* Before going on to check the call-saved registers we can try a couple
7500 more ways of deducing that r3 is available. The first is when we are
7501 pushing anonymous arguments onto the stack and we have less than 4
7502 registers worth of fixed arguments(*). In this case r3 will be part of
7503 the variable argument list and so we can be sure that it will be
7504 pushed right at the start of the function. Hence it will be available
7505 for the rest of the prologue.
7506 (*): ie crtl->args.pretend_args_size is greater than 0. */
7507 if (cfun->machine->uses_anonymous_args
7508 && crtl->args.pretend_args_size > 0)
7509 return LAST_ARG_REGNUM;
7510
7511 /* The other case is when we have fixed arguments but less than 4 registers
7512 worth. In this case r3 might be used in the body of the function, but
7513 it is not being used to convey an argument into the function. In theory
7514 we could just check crtl->args.size to see how many bytes are
7515 being passed in argument registers, but it seems that it is unreliable.
7516 Sometimes it will have the value 0 when in fact arguments are being
7517 passed. (See testcase execute/20021111-1.c for an example). So we also
7518 check the args_info.nregs field as well. The problem with this field is
7519 that it makes no allowances for arguments that are passed to the
7520 function but which are not used. Hence we could miss an opportunity
7521 when a function has an unused argument in r3. But it is better to be
7522 safe than to be sorry. */
7523 if (! cfun->machine->uses_anonymous_args
7524 && crtl->args.size >= 0
7525 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7526 && (TARGET_AAPCS_BASED
7527 ? crtl->args.info.aapcs_ncrn < 4
7528 : crtl->args.info.nregs < 4))
7529 return LAST_ARG_REGNUM;
7530
7531 /* Otherwise look for a call-saved register that is going to be pushed. */
7532 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7533 if (pushed_regs_mask & (1 << reg))
7534 return reg;
7535
7536 if (TARGET_THUMB2)
7537 {
7538 /* Thumb-2 can use high regs. */
7539 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7540 if (pushed_regs_mask & (1 << reg))
7541 return reg;
7542 }
7543 /* Something went wrong - thumb_compute_save_reg_mask()
7544 should have arranged for a suitable register to be pushed. */
7545 gcc_unreachable ();
7546 }
7547
7548 static GTY(()) int pic_labelno;
7549
7550 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7551 low register. */
7552
7553 void
7554 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7555 {
7556 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7557
7558 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7559 return;
7560
7561 gcc_assert (flag_pic);
7562
7563 pic_reg = cfun->machine->pic_reg;
7564 if (TARGET_VXWORKS_RTP)
7565 {
7566 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7567 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7568 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7569
7570 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7571
7572 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7573 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7574 }
7575 else
7576 {
7577 /* We use an UNSPEC rather than a LABEL_REF because this label
7578 never appears in the code stream. */
7579
7580 labelno = GEN_INT (pic_labelno++);
7581 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7582 l1 = gen_rtx_CONST (VOIDmode, l1);
7583
7584 /* On the ARM the PC register contains 'dot + 8' at the time of the
7585 addition, on the Thumb it is 'dot + 4'. */
7586 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7587 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7588 UNSPEC_GOTSYM_OFF);
7589 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7590
7591 if (TARGET_32BIT)
7592 {
7593 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7594 }
7595 else /* TARGET_THUMB1 */
7596 {
7597 if (arm_pic_register != INVALID_REGNUM
7598 && REGNO (pic_reg) > LAST_LO_REGNUM)
7599 {
7600 /* We will have pushed the pic register, so we should always be
7601 able to find a work register. */
7602 pic_tmp = gen_rtx_REG (SImode,
7603 thumb_find_work_register (saved_regs));
7604 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7605 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7606 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7607 }
7608 else if (arm_pic_register != INVALID_REGNUM
7609 && arm_pic_register > LAST_LO_REGNUM
7610 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7611 {
7612 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7613 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7614 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7615 }
7616 else
7617 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7618 }
7619 }
7620
7621 /* Need to emit this whether or not we obey regdecls,
7622 since setjmp/longjmp can cause life info to screw up. */
7623 emit_use (pic_reg);
7624 }
7625
7626 /* Generate code to load the address of a static var when flag_pic is set. */
7627 static rtx_insn *
7628 arm_pic_static_addr (rtx orig, rtx reg)
7629 {
7630 rtx l1, labelno, offset_rtx;
7631
7632 gcc_assert (flag_pic);
7633
7634 /* We use an UNSPEC rather than a LABEL_REF because this label
7635 never appears in the code stream. */
7636 labelno = GEN_INT (pic_labelno++);
7637 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7638 l1 = gen_rtx_CONST (VOIDmode, l1);
7639
7640 /* On the ARM the PC register contains 'dot + 8' at the time of the
7641 addition, on the Thumb it is 'dot + 4'. */
7642 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7643 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7644 UNSPEC_SYMBOL_OFFSET);
7645 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7646
7647 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7648 }
7649
7650 /* Return nonzero if X is valid as an ARM state addressing register. */
7651 static int
7652 arm_address_register_rtx_p (rtx x, int strict_p)
7653 {
7654 int regno;
7655
7656 if (!REG_P (x))
7657 return 0;
7658
7659 regno = REGNO (x);
7660
7661 if (strict_p)
7662 return ARM_REGNO_OK_FOR_BASE_P (regno);
7663
7664 return (regno <= LAST_ARM_REGNUM
7665 || regno >= FIRST_PSEUDO_REGISTER
7666 || regno == FRAME_POINTER_REGNUM
7667 || regno == ARG_POINTER_REGNUM);
7668 }
7669
7670 /* Return TRUE if this rtx is the difference of a symbol and a label,
7671 and will reduce to a PC-relative relocation in the object file.
7672 Expressions like this can be left alone when generating PIC, rather
7673 than forced through the GOT. */
7674 static int
7675 pcrel_constant_p (rtx x)
7676 {
7677 if (GET_CODE (x) == MINUS)
7678 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7679
7680 return FALSE;
7681 }
7682
7683 /* Return true if X will surely end up in an index register after next
7684 splitting pass. */
7685 static bool
7686 will_be_in_index_register (const_rtx x)
7687 {
7688 /* arm.md: calculate_pic_address will split this into a register. */
7689 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7690 }
7691
7692 /* Return nonzero if X is a valid ARM state address operand. */
7693 int
7694 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7695 int strict_p)
7696 {
7697 bool use_ldrd;
7698 enum rtx_code code = GET_CODE (x);
7699
7700 if (arm_address_register_rtx_p (x, strict_p))
7701 return 1;
7702
7703 use_ldrd = (TARGET_LDRD
7704 && (mode == DImode || mode == DFmode));
7705
7706 if (code == POST_INC || code == PRE_DEC
7707 || ((code == PRE_INC || code == POST_DEC)
7708 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7709 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7710
7711 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7712 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7713 && GET_CODE (XEXP (x, 1)) == PLUS
7714 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7715 {
7716 rtx addend = XEXP (XEXP (x, 1), 1);
7717
7718 /* Don't allow ldrd post increment by register because it's hard
7719 to fixup invalid register choices. */
7720 if (use_ldrd
7721 && GET_CODE (x) == POST_MODIFY
7722 && REG_P (addend))
7723 return 0;
7724
7725 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7726 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7727 }
7728
7729 /* After reload constants split into minipools will have addresses
7730 from a LABEL_REF. */
7731 else if (reload_completed
7732 && (code == LABEL_REF
7733 || (code == CONST
7734 && GET_CODE (XEXP (x, 0)) == PLUS
7735 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7736 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7737 return 1;
7738
7739 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7740 return 0;
7741
7742 else if (code == PLUS)
7743 {
7744 rtx xop0 = XEXP (x, 0);
7745 rtx xop1 = XEXP (x, 1);
7746
7747 return ((arm_address_register_rtx_p (xop0, strict_p)
7748 && ((CONST_INT_P (xop1)
7749 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7750 || (!strict_p && will_be_in_index_register (xop1))))
7751 || (arm_address_register_rtx_p (xop1, strict_p)
7752 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7753 }
7754
7755 #if 0
7756 /* Reload currently can't handle MINUS, so disable this for now */
7757 else if (GET_CODE (x) == MINUS)
7758 {
7759 rtx xop0 = XEXP (x, 0);
7760 rtx xop1 = XEXP (x, 1);
7761
7762 return (arm_address_register_rtx_p (xop0, strict_p)
7763 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7764 }
7765 #endif
7766
7767 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7768 && code == SYMBOL_REF
7769 && CONSTANT_POOL_ADDRESS_P (x)
7770 && ! (flag_pic
7771 && symbol_mentioned_p (get_pool_constant (x))
7772 && ! pcrel_constant_p (get_pool_constant (x))))
7773 return 1;
7774
7775 return 0;
7776 }
7777
7778 /* Return true if we can avoid creating a constant pool entry for x. */
7779 static bool
7780 can_avoid_literal_pool_for_label_p (rtx x)
7781 {
7782 /* Normally we can assign constant values to target registers without
7783 the help of constant pool. But there are cases we have to use constant
7784 pool like:
7785 1) assign a label to register.
7786 2) sign-extend a 8bit value to 32bit and then assign to register.
7787
7788 Constant pool access in format:
7789 (set (reg r0) (mem (symbol_ref (".LC0"))))
7790 will cause the use of literal pool (later in function arm_reorg).
7791 So here we mark such format as an invalid format, then the compiler
7792 will adjust it into:
7793 (set (reg r0) (symbol_ref (".LC0")))
7794 (set (reg r0) (mem (reg r0))).
7795 No extra register is required, and (mem (reg r0)) won't cause the use
7796 of literal pools. */
7797 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7798 && CONSTANT_POOL_ADDRESS_P (x))
7799 return 1;
7800 return 0;
7801 }
7802
7803
7804 /* Return nonzero if X is a valid Thumb-2 address operand. */
7805 static int
7806 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7807 {
7808 bool use_ldrd;
7809 enum rtx_code code = GET_CODE (x);
7810
7811 if (arm_address_register_rtx_p (x, strict_p))
7812 return 1;
7813
7814 use_ldrd = (TARGET_LDRD
7815 && (mode == DImode || mode == DFmode));
7816
7817 if (code == POST_INC || code == PRE_DEC
7818 || ((code == PRE_INC || code == POST_DEC)
7819 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7820 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7821
7822 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7823 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7824 && GET_CODE (XEXP (x, 1)) == PLUS
7825 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7826 {
7827 /* Thumb-2 only has autoincrement by constant. */
7828 rtx addend = XEXP (XEXP (x, 1), 1);
7829 HOST_WIDE_INT offset;
7830
7831 if (!CONST_INT_P (addend))
7832 return 0;
7833
7834 offset = INTVAL(addend);
7835 if (GET_MODE_SIZE (mode) <= 4)
7836 return (offset > -256 && offset < 256);
7837
7838 return (use_ldrd && offset > -1024 && offset < 1024
7839 && (offset & 3) == 0);
7840 }
7841
7842 /* After reload constants split into minipools will have addresses
7843 from a LABEL_REF. */
7844 else if (reload_completed
7845 && (code == LABEL_REF
7846 || (code == CONST
7847 && GET_CODE (XEXP (x, 0)) == PLUS
7848 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7849 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7850 return 1;
7851
7852 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7853 return 0;
7854
7855 else if (code == PLUS)
7856 {
7857 rtx xop0 = XEXP (x, 0);
7858 rtx xop1 = XEXP (x, 1);
7859
7860 return ((arm_address_register_rtx_p (xop0, strict_p)
7861 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7862 || (!strict_p && will_be_in_index_register (xop1))))
7863 || (arm_address_register_rtx_p (xop1, strict_p)
7864 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7865 }
7866
7867 else if (can_avoid_literal_pool_for_label_p (x))
7868 return 0;
7869
7870 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7871 && code == SYMBOL_REF
7872 && CONSTANT_POOL_ADDRESS_P (x)
7873 && ! (flag_pic
7874 && symbol_mentioned_p (get_pool_constant (x))
7875 && ! pcrel_constant_p (get_pool_constant (x))))
7876 return 1;
7877
7878 return 0;
7879 }
7880
7881 /* Return nonzero if INDEX is valid for an address index operand in
7882 ARM state. */
7883 static int
7884 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7885 int strict_p)
7886 {
7887 HOST_WIDE_INT range;
7888 enum rtx_code code = GET_CODE (index);
7889
7890 /* Standard coprocessor addressing modes. */
7891 if (TARGET_HARD_FLOAT
7892 && (mode == SFmode || mode == DFmode))
7893 return (code == CONST_INT && INTVAL (index) < 1024
7894 && INTVAL (index) > -1024
7895 && (INTVAL (index) & 3) == 0);
7896
7897 /* For quad modes, we restrict the constant offset to be slightly less
7898 than what the instruction format permits. We do this because for
7899 quad mode moves, we will actually decompose them into two separate
7900 double-mode reads or writes. INDEX must therefore be a valid
7901 (double-mode) offset and so should INDEX+8. */
7902 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7903 return (code == CONST_INT
7904 && INTVAL (index) < 1016
7905 && INTVAL (index) > -1024
7906 && (INTVAL (index) & 3) == 0);
7907
7908 /* We have no such constraint on double mode offsets, so we permit the
7909 full range of the instruction format. */
7910 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7911 return (code == CONST_INT
7912 && INTVAL (index) < 1024
7913 && INTVAL (index) > -1024
7914 && (INTVAL (index) & 3) == 0);
7915
7916 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7917 return (code == CONST_INT
7918 && INTVAL (index) < 1024
7919 && INTVAL (index) > -1024
7920 && (INTVAL (index) & 3) == 0);
7921
7922 if (arm_address_register_rtx_p (index, strict_p)
7923 && (GET_MODE_SIZE (mode) <= 4))
7924 return 1;
7925
7926 if (mode == DImode || mode == DFmode)
7927 {
7928 if (code == CONST_INT)
7929 {
7930 HOST_WIDE_INT val = INTVAL (index);
7931
7932 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7933 If vldr is selected it uses arm_coproc_mem_operand. */
7934 if (TARGET_LDRD)
7935 return val > -256 && val < 256;
7936 else
7937 return val > -4096 && val < 4092;
7938 }
7939
7940 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7941 }
7942
7943 if (GET_MODE_SIZE (mode) <= 4
7944 && ! (arm_arch4
7945 && (mode == HImode
7946 || mode == HFmode
7947 || (mode == QImode && outer == SIGN_EXTEND))))
7948 {
7949 if (code == MULT)
7950 {
7951 rtx xiop0 = XEXP (index, 0);
7952 rtx xiop1 = XEXP (index, 1);
7953
7954 return ((arm_address_register_rtx_p (xiop0, strict_p)
7955 && power_of_two_operand (xiop1, SImode))
7956 || (arm_address_register_rtx_p (xiop1, strict_p)
7957 && power_of_two_operand (xiop0, SImode)));
7958 }
7959 else if (code == LSHIFTRT || code == ASHIFTRT
7960 || code == ASHIFT || code == ROTATERT)
7961 {
7962 rtx op = XEXP (index, 1);
7963
7964 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7965 && CONST_INT_P (op)
7966 && INTVAL (op) > 0
7967 && INTVAL (op) <= 31);
7968 }
7969 }
7970
7971 /* For ARM v4 we may be doing a sign-extend operation during the
7972 load. */
7973 if (arm_arch4)
7974 {
7975 if (mode == HImode
7976 || mode == HFmode
7977 || (outer == SIGN_EXTEND && mode == QImode))
7978 range = 256;
7979 else
7980 range = 4096;
7981 }
7982 else
7983 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7984
7985 return (code == CONST_INT
7986 && INTVAL (index) < range
7987 && INTVAL (index) > -range);
7988 }
7989
7990 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7991 index operand. i.e. 1, 2, 4 or 8. */
7992 static bool
7993 thumb2_index_mul_operand (rtx op)
7994 {
7995 HOST_WIDE_INT val;
7996
7997 if (!CONST_INT_P (op))
7998 return false;
7999
8000 val = INTVAL(op);
8001 return (val == 1 || val == 2 || val == 4 || val == 8);
8002 }
8003
8004 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8005 static int
8006 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8007 {
8008 enum rtx_code code = GET_CODE (index);
8009
8010 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8011 /* Standard coprocessor addressing modes. */
8012 if (TARGET_HARD_FLOAT
8013 && (mode == SFmode || mode == DFmode))
8014 return (code == CONST_INT && INTVAL (index) < 1024
8015 /* Thumb-2 allows only > -256 index range for it's core register
8016 load/stores. Since we allow SF/DF in core registers, we have
8017 to use the intersection between -256~4096 (core) and -1024~1024
8018 (coprocessor). */
8019 && INTVAL (index) > -256
8020 && (INTVAL (index) & 3) == 0);
8021
8022 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8023 {
8024 /* For DImode assume values will usually live in core regs
8025 and only allow LDRD addressing modes. */
8026 if (!TARGET_LDRD || mode != DImode)
8027 return (code == CONST_INT
8028 && INTVAL (index) < 1024
8029 && INTVAL (index) > -1024
8030 && (INTVAL (index) & 3) == 0);
8031 }
8032
8033 /* For quad modes, we restrict the constant offset to be slightly less
8034 than what the instruction format permits. We do this because for
8035 quad mode moves, we will actually decompose them into two separate
8036 double-mode reads or writes. INDEX must therefore be a valid
8037 (double-mode) offset and so should INDEX+8. */
8038 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8039 return (code == CONST_INT
8040 && INTVAL (index) < 1016
8041 && INTVAL (index) > -1024
8042 && (INTVAL (index) & 3) == 0);
8043
8044 /* We have no such constraint on double mode offsets, so we permit the
8045 full range of the instruction format. */
8046 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8047 return (code == CONST_INT
8048 && INTVAL (index) < 1024
8049 && INTVAL (index) > -1024
8050 && (INTVAL (index) & 3) == 0);
8051
8052 if (arm_address_register_rtx_p (index, strict_p)
8053 && (GET_MODE_SIZE (mode) <= 4))
8054 return 1;
8055
8056 if (mode == DImode || mode == DFmode)
8057 {
8058 if (code == CONST_INT)
8059 {
8060 HOST_WIDE_INT val = INTVAL (index);
8061 /* Thumb-2 ldrd only has reg+const addressing modes.
8062 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8063 If vldr is selected it uses arm_coproc_mem_operand. */
8064 if (TARGET_LDRD)
8065 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8066 else
8067 return IN_RANGE (val, -255, 4095 - 4);
8068 }
8069 else
8070 return 0;
8071 }
8072
8073 if (code == MULT)
8074 {
8075 rtx xiop0 = XEXP (index, 0);
8076 rtx xiop1 = XEXP (index, 1);
8077
8078 return ((arm_address_register_rtx_p (xiop0, strict_p)
8079 && thumb2_index_mul_operand (xiop1))
8080 || (arm_address_register_rtx_p (xiop1, strict_p)
8081 && thumb2_index_mul_operand (xiop0)));
8082 }
8083 else if (code == ASHIFT)
8084 {
8085 rtx op = XEXP (index, 1);
8086
8087 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8088 && CONST_INT_P (op)
8089 && INTVAL (op) > 0
8090 && INTVAL (op) <= 3);
8091 }
8092
8093 return (code == CONST_INT
8094 && INTVAL (index) < 4096
8095 && INTVAL (index) > -256);
8096 }
8097
8098 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8099 static int
8100 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8101 {
8102 int regno;
8103
8104 if (!REG_P (x))
8105 return 0;
8106
8107 regno = REGNO (x);
8108
8109 if (strict_p)
8110 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8111
8112 return (regno <= LAST_LO_REGNUM
8113 || regno > LAST_VIRTUAL_REGISTER
8114 || regno == FRAME_POINTER_REGNUM
8115 || (GET_MODE_SIZE (mode) >= 4
8116 && (regno == STACK_POINTER_REGNUM
8117 || regno >= FIRST_PSEUDO_REGISTER
8118 || x == hard_frame_pointer_rtx
8119 || x == arg_pointer_rtx)));
8120 }
8121
8122 /* Return nonzero if x is a legitimate index register. This is the case
8123 for any base register that can access a QImode object. */
8124 inline static int
8125 thumb1_index_register_rtx_p (rtx x, int strict_p)
8126 {
8127 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8128 }
8129
8130 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8131
8132 The AP may be eliminated to either the SP or the FP, so we use the
8133 least common denominator, e.g. SImode, and offsets from 0 to 64.
8134
8135 ??? Verify whether the above is the right approach.
8136
8137 ??? Also, the FP may be eliminated to the SP, so perhaps that
8138 needs special handling also.
8139
8140 ??? Look at how the mips16 port solves this problem. It probably uses
8141 better ways to solve some of these problems.
8142
8143 Although it is not incorrect, we don't accept QImode and HImode
8144 addresses based on the frame pointer or arg pointer until the
8145 reload pass starts. This is so that eliminating such addresses
8146 into stack based ones won't produce impossible code. */
8147 int
8148 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8149 {
8150 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8151 return 0;
8152
8153 /* ??? Not clear if this is right. Experiment. */
8154 if (GET_MODE_SIZE (mode) < 4
8155 && !(reload_in_progress || reload_completed)
8156 && (reg_mentioned_p (frame_pointer_rtx, x)
8157 || reg_mentioned_p (arg_pointer_rtx, x)
8158 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8159 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8160 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8161 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8162 return 0;
8163
8164 /* Accept any base register. SP only in SImode or larger. */
8165 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8166 return 1;
8167
8168 /* This is PC relative data before arm_reorg runs. */
8169 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8170 && GET_CODE (x) == SYMBOL_REF
8171 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8172 return 1;
8173
8174 /* This is PC relative data after arm_reorg runs. */
8175 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8176 && reload_completed
8177 && (GET_CODE (x) == LABEL_REF
8178 || (GET_CODE (x) == CONST
8179 && GET_CODE (XEXP (x, 0)) == PLUS
8180 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8181 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8182 return 1;
8183
8184 /* Post-inc indexing only supported for SImode and larger. */
8185 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8186 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8187 return 1;
8188
8189 else if (GET_CODE (x) == PLUS)
8190 {
8191 /* REG+REG address can be any two index registers. */
8192 /* We disallow FRAME+REG addressing since we know that FRAME
8193 will be replaced with STACK, and SP relative addressing only
8194 permits SP+OFFSET. */
8195 if (GET_MODE_SIZE (mode) <= 4
8196 && XEXP (x, 0) != frame_pointer_rtx
8197 && XEXP (x, 1) != frame_pointer_rtx
8198 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8199 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8200 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8201 return 1;
8202
8203 /* REG+const has 5-7 bit offset for non-SP registers. */
8204 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8205 || XEXP (x, 0) == arg_pointer_rtx)
8206 && CONST_INT_P (XEXP (x, 1))
8207 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8208 return 1;
8209
8210 /* REG+const has 10-bit offset for SP, but only SImode and
8211 larger is supported. */
8212 /* ??? Should probably check for DI/DFmode overflow here
8213 just like GO_IF_LEGITIMATE_OFFSET does. */
8214 else if (REG_P (XEXP (x, 0))
8215 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8216 && GET_MODE_SIZE (mode) >= 4
8217 && CONST_INT_P (XEXP (x, 1))
8218 && INTVAL (XEXP (x, 1)) >= 0
8219 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8220 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8221 return 1;
8222
8223 else if (REG_P (XEXP (x, 0))
8224 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8225 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8226 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8227 && REGNO (XEXP (x, 0))
8228 <= LAST_VIRTUAL_POINTER_REGISTER))
8229 && GET_MODE_SIZE (mode) >= 4
8230 && CONST_INT_P (XEXP (x, 1))
8231 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8232 return 1;
8233 }
8234
8235 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8236 && GET_MODE_SIZE (mode) == 4
8237 && GET_CODE (x) == SYMBOL_REF
8238 && CONSTANT_POOL_ADDRESS_P (x)
8239 && ! (flag_pic
8240 && symbol_mentioned_p (get_pool_constant (x))
8241 && ! pcrel_constant_p (get_pool_constant (x))))
8242 return 1;
8243
8244 return 0;
8245 }
8246
8247 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8248 instruction of mode MODE. */
8249 int
8250 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8251 {
8252 switch (GET_MODE_SIZE (mode))
8253 {
8254 case 1:
8255 return val >= 0 && val < 32;
8256
8257 case 2:
8258 return val >= 0 && val < 64 && (val & 1) == 0;
8259
8260 default:
8261 return (val >= 0
8262 && (val + GET_MODE_SIZE (mode)) <= 128
8263 && (val & 3) == 0);
8264 }
8265 }
8266
8267 bool
8268 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8269 {
8270 if (TARGET_ARM)
8271 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8272 else if (TARGET_THUMB2)
8273 return thumb2_legitimate_address_p (mode, x, strict_p);
8274 else /* if (TARGET_THUMB1) */
8275 return thumb1_legitimate_address_p (mode, x, strict_p);
8276 }
8277
8278 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8279
8280 Given an rtx X being reloaded into a reg required to be
8281 in class CLASS, return the class of reg to actually use.
8282 In general this is just CLASS, but for the Thumb core registers and
8283 immediate constants we prefer a LO_REGS class or a subset. */
8284
8285 static reg_class_t
8286 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8287 {
8288 if (TARGET_32BIT)
8289 return rclass;
8290 else
8291 {
8292 if (rclass == GENERAL_REGS)
8293 return LO_REGS;
8294 else
8295 return rclass;
8296 }
8297 }
8298
8299 /* Build the SYMBOL_REF for __tls_get_addr. */
8300
8301 static GTY(()) rtx tls_get_addr_libfunc;
8302
8303 static rtx
8304 get_tls_get_addr (void)
8305 {
8306 if (!tls_get_addr_libfunc)
8307 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8308 return tls_get_addr_libfunc;
8309 }
8310
8311 rtx
8312 arm_load_tp (rtx target)
8313 {
8314 if (!target)
8315 target = gen_reg_rtx (SImode);
8316
8317 if (TARGET_HARD_TP)
8318 {
8319 /* Can return in any reg. */
8320 emit_insn (gen_load_tp_hard (target));
8321 }
8322 else
8323 {
8324 /* Always returned in r0. Immediately copy the result into a pseudo,
8325 otherwise other uses of r0 (e.g. setting up function arguments) may
8326 clobber the value. */
8327
8328 rtx tmp;
8329
8330 emit_insn (gen_load_tp_soft ());
8331
8332 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8333 emit_move_insn (target, tmp);
8334 }
8335 return target;
8336 }
8337
8338 static rtx
8339 load_tls_operand (rtx x, rtx reg)
8340 {
8341 rtx tmp;
8342
8343 if (reg == NULL_RTX)
8344 reg = gen_reg_rtx (SImode);
8345
8346 tmp = gen_rtx_CONST (SImode, x);
8347
8348 emit_move_insn (reg, tmp);
8349
8350 return reg;
8351 }
8352
8353 static rtx_insn *
8354 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8355 {
8356 rtx label, labelno, sum;
8357
8358 gcc_assert (reloc != TLS_DESCSEQ);
8359 start_sequence ();
8360
8361 labelno = GEN_INT (pic_labelno++);
8362 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8363 label = gen_rtx_CONST (VOIDmode, label);
8364
8365 sum = gen_rtx_UNSPEC (Pmode,
8366 gen_rtvec (4, x, GEN_INT (reloc), label,
8367 GEN_INT (TARGET_ARM ? 8 : 4)),
8368 UNSPEC_TLS);
8369 reg = load_tls_operand (sum, reg);
8370
8371 if (TARGET_ARM)
8372 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8373 else
8374 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8375
8376 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8377 LCT_PURE, /* LCT_CONST? */
8378 Pmode, reg, Pmode);
8379
8380 rtx_insn *insns = get_insns ();
8381 end_sequence ();
8382
8383 return insns;
8384 }
8385
8386 static rtx
8387 arm_tls_descseq_addr (rtx x, rtx reg)
8388 {
8389 rtx labelno = GEN_INT (pic_labelno++);
8390 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8391 rtx sum = gen_rtx_UNSPEC (Pmode,
8392 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8393 gen_rtx_CONST (VOIDmode, label),
8394 GEN_INT (!TARGET_ARM)),
8395 UNSPEC_TLS);
8396 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8397
8398 emit_insn (gen_tlscall (x, labelno));
8399 if (!reg)
8400 reg = gen_reg_rtx (SImode);
8401 else
8402 gcc_assert (REGNO (reg) != R0_REGNUM);
8403
8404 emit_move_insn (reg, reg0);
8405
8406 return reg;
8407 }
8408
8409 rtx
8410 legitimize_tls_address (rtx x, rtx reg)
8411 {
8412 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8413 rtx_insn *insns;
8414 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8415
8416 switch (model)
8417 {
8418 case TLS_MODEL_GLOBAL_DYNAMIC:
8419 if (TARGET_GNU2_TLS)
8420 {
8421 reg = arm_tls_descseq_addr (x, reg);
8422
8423 tp = arm_load_tp (NULL_RTX);
8424
8425 dest = gen_rtx_PLUS (Pmode, tp, reg);
8426 }
8427 else
8428 {
8429 /* Original scheme */
8430 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8431 dest = gen_reg_rtx (Pmode);
8432 emit_libcall_block (insns, dest, ret, x);
8433 }
8434 return dest;
8435
8436 case TLS_MODEL_LOCAL_DYNAMIC:
8437 if (TARGET_GNU2_TLS)
8438 {
8439 reg = arm_tls_descseq_addr (x, reg);
8440
8441 tp = arm_load_tp (NULL_RTX);
8442
8443 dest = gen_rtx_PLUS (Pmode, tp, reg);
8444 }
8445 else
8446 {
8447 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8448
8449 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8450 share the LDM result with other LD model accesses. */
8451 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8452 UNSPEC_TLS);
8453 dest = gen_reg_rtx (Pmode);
8454 emit_libcall_block (insns, dest, ret, eqv);
8455
8456 /* Load the addend. */
8457 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8458 GEN_INT (TLS_LDO32)),
8459 UNSPEC_TLS);
8460 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8461 dest = gen_rtx_PLUS (Pmode, dest, addend);
8462 }
8463 return dest;
8464
8465 case TLS_MODEL_INITIAL_EXEC:
8466 labelno = GEN_INT (pic_labelno++);
8467 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8468 label = gen_rtx_CONST (VOIDmode, label);
8469 sum = gen_rtx_UNSPEC (Pmode,
8470 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8471 GEN_INT (TARGET_ARM ? 8 : 4)),
8472 UNSPEC_TLS);
8473 reg = load_tls_operand (sum, reg);
8474
8475 if (TARGET_ARM)
8476 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8477 else if (TARGET_THUMB2)
8478 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8479 else
8480 {
8481 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8482 emit_move_insn (reg, gen_const_mem (SImode, reg));
8483 }
8484
8485 tp = arm_load_tp (NULL_RTX);
8486
8487 return gen_rtx_PLUS (Pmode, tp, reg);
8488
8489 case TLS_MODEL_LOCAL_EXEC:
8490 tp = arm_load_tp (NULL_RTX);
8491
8492 reg = gen_rtx_UNSPEC (Pmode,
8493 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8494 UNSPEC_TLS);
8495 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8496
8497 return gen_rtx_PLUS (Pmode, tp, reg);
8498
8499 default:
8500 abort ();
8501 }
8502 }
8503
8504 /* Try machine-dependent ways of modifying an illegitimate address
8505 to be legitimate. If we find one, return the new, valid address. */
8506 rtx
8507 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8508 {
8509 if (arm_tls_referenced_p (x))
8510 {
8511 rtx addend = NULL;
8512
8513 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8514 {
8515 addend = XEXP (XEXP (x, 0), 1);
8516 x = XEXP (XEXP (x, 0), 0);
8517 }
8518
8519 if (GET_CODE (x) != SYMBOL_REF)
8520 return x;
8521
8522 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8523
8524 x = legitimize_tls_address (x, NULL_RTX);
8525
8526 if (addend)
8527 {
8528 x = gen_rtx_PLUS (SImode, x, addend);
8529 orig_x = x;
8530 }
8531 else
8532 return x;
8533 }
8534
8535 if (!TARGET_ARM)
8536 {
8537 /* TODO: legitimize_address for Thumb2. */
8538 if (TARGET_THUMB2)
8539 return x;
8540 return thumb_legitimize_address (x, orig_x, mode);
8541 }
8542
8543 if (GET_CODE (x) == PLUS)
8544 {
8545 rtx xop0 = XEXP (x, 0);
8546 rtx xop1 = XEXP (x, 1);
8547
8548 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8549 xop0 = force_reg (SImode, xop0);
8550
8551 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8552 && !symbol_mentioned_p (xop1))
8553 xop1 = force_reg (SImode, xop1);
8554
8555 if (ARM_BASE_REGISTER_RTX_P (xop0)
8556 && CONST_INT_P (xop1))
8557 {
8558 HOST_WIDE_INT n, low_n;
8559 rtx base_reg, val;
8560 n = INTVAL (xop1);
8561
8562 /* VFP addressing modes actually allow greater offsets, but for
8563 now we just stick with the lowest common denominator. */
8564 if (mode == DImode || mode == DFmode)
8565 {
8566 low_n = n & 0x0f;
8567 n &= ~0x0f;
8568 if (low_n > 4)
8569 {
8570 n += 16;
8571 low_n -= 16;
8572 }
8573 }
8574 else
8575 {
8576 low_n = ((mode) == TImode ? 0
8577 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8578 n -= low_n;
8579 }
8580
8581 base_reg = gen_reg_rtx (SImode);
8582 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8583 emit_move_insn (base_reg, val);
8584 x = plus_constant (Pmode, base_reg, low_n);
8585 }
8586 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8587 x = gen_rtx_PLUS (SImode, xop0, xop1);
8588 }
8589
8590 /* XXX We don't allow MINUS any more -- see comment in
8591 arm_legitimate_address_outer_p (). */
8592 else if (GET_CODE (x) == MINUS)
8593 {
8594 rtx xop0 = XEXP (x, 0);
8595 rtx xop1 = XEXP (x, 1);
8596
8597 if (CONSTANT_P (xop0))
8598 xop0 = force_reg (SImode, xop0);
8599
8600 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8601 xop1 = force_reg (SImode, xop1);
8602
8603 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8604 x = gen_rtx_MINUS (SImode, xop0, xop1);
8605 }
8606
8607 /* Make sure to take full advantage of the pre-indexed addressing mode
8608 with absolute addresses which often allows for the base register to
8609 be factorized for multiple adjacent memory references, and it might
8610 even allows for the mini pool to be avoided entirely. */
8611 else if (CONST_INT_P (x) && optimize > 0)
8612 {
8613 unsigned int bits;
8614 HOST_WIDE_INT mask, base, index;
8615 rtx base_reg;
8616
8617 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8618 use a 8-bit index. So let's use a 12-bit index for SImode only and
8619 hope that arm_gen_constant will enable ldrb to use more bits. */
8620 bits = (mode == SImode) ? 12 : 8;
8621 mask = (1 << bits) - 1;
8622 base = INTVAL (x) & ~mask;
8623 index = INTVAL (x) & mask;
8624 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8625 {
8626 /* It'll most probably be more efficient to generate the base
8627 with more bits set and use a negative index instead. */
8628 base |= mask;
8629 index -= mask;
8630 }
8631 base_reg = force_reg (SImode, GEN_INT (base));
8632 x = plus_constant (Pmode, base_reg, index);
8633 }
8634
8635 if (flag_pic)
8636 {
8637 /* We need to find and carefully transform any SYMBOL and LABEL
8638 references; so go back to the original address expression. */
8639 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8640
8641 if (new_x != orig_x)
8642 x = new_x;
8643 }
8644
8645 return x;
8646 }
8647
8648
8649 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8650 to be legitimate. If we find one, return the new, valid address. */
8651 rtx
8652 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8653 {
8654 if (GET_CODE (x) == PLUS
8655 && CONST_INT_P (XEXP (x, 1))
8656 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8657 || INTVAL (XEXP (x, 1)) < 0))
8658 {
8659 rtx xop0 = XEXP (x, 0);
8660 rtx xop1 = XEXP (x, 1);
8661 HOST_WIDE_INT offset = INTVAL (xop1);
8662
8663 /* Try and fold the offset into a biasing of the base register and
8664 then offsetting that. Don't do this when optimizing for space
8665 since it can cause too many CSEs. */
8666 if (optimize_size && offset >= 0
8667 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8668 {
8669 HOST_WIDE_INT delta;
8670
8671 if (offset >= 256)
8672 delta = offset - (256 - GET_MODE_SIZE (mode));
8673 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8674 delta = 31 * GET_MODE_SIZE (mode);
8675 else
8676 delta = offset & (~31 * GET_MODE_SIZE (mode));
8677
8678 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8679 NULL_RTX);
8680 x = plus_constant (Pmode, xop0, delta);
8681 }
8682 else if (offset < 0 && offset > -256)
8683 /* Small negative offsets are best done with a subtract before the
8684 dereference, forcing these into a register normally takes two
8685 instructions. */
8686 x = force_operand (x, NULL_RTX);
8687 else
8688 {
8689 /* For the remaining cases, force the constant into a register. */
8690 xop1 = force_reg (SImode, xop1);
8691 x = gen_rtx_PLUS (SImode, xop0, xop1);
8692 }
8693 }
8694 else if (GET_CODE (x) == PLUS
8695 && s_register_operand (XEXP (x, 1), SImode)
8696 && !s_register_operand (XEXP (x, 0), SImode))
8697 {
8698 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8699
8700 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8701 }
8702
8703 if (flag_pic)
8704 {
8705 /* We need to find and carefully transform any SYMBOL and LABEL
8706 references; so go back to the original address expression. */
8707 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8708
8709 if (new_x != orig_x)
8710 x = new_x;
8711 }
8712
8713 return x;
8714 }
8715
8716 /* Return TRUE if X contains any TLS symbol references. */
8717
8718 bool
8719 arm_tls_referenced_p (rtx x)
8720 {
8721 if (! TARGET_HAVE_TLS)
8722 return false;
8723
8724 subrtx_iterator::array_type array;
8725 FOR_EACH_SUBRTX (iter, array, x, ALL)
8726 {
8727 const_rtx x = *iter;
8728 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8729 {
8730 /* ARM currently does not provide relocations to encode TLS variables
8731 into AArch32 instructions, only data, so there is no way to
8732 currently implement these if a literal pool is disabled. */
8733 if (arm_disable_literal_pool)
8734 sorry ("accessing thread-local storage is not currently supported "
8735 "with -mpure-code or -mslow-flash-data");
8736
8737 return true;
8738 }
8739
8740 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8741 TLS offsets, not real symbol references. */
8742 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8743 iter.skip_subrtxes ();
8744 }
8745 return false;
8746 }
8747
8748 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8749
8750 On the ARM, allow any integer (invalid ones are removed later by insn
8751 patterns), nice doubles and symbol_refs which refer to the function's
8752 constant pool XXX.
8753
8754 When generating pic allow anything. */
8755
8756 static bool
8757 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8758 {
8759 return flag_pic || !label_mentioned_p (x);
8760 }
8761
8762 static bool
8763 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8764 {
8765 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8766 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8767 for ARMv8-M Baseline or later the result is valid. */
8768 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8769 x = XEXP (x, 0);
8770
8771 return (CONST_INT_P (x)
8772 || CONST_DOUBLE_P (x)
8773 || CONSTANT_ADDRESS_P (x)
8774 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8775 || flag_pic);
8776 }
8777
8778 static bool
8779 arm_legitimate_constant_p (machine_mode mode, rtx x)
8780 {
8781 return (!arm_cannot_force_const_mem (mode, x)
8782 && (TARGET_32BIT
8783 ? arm_legitimate_constant_p_1 (mode, x)
8784 : thumb_legitimate_constant_p (mode, x)));
8785 }
8786
8787 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8788
8789 static bool
8790 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8791 {
8792 rtx base, offset;
8793
8794 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8795 {
8796 split_const (x, &base, &offset);
8797 if (GET_CODE (base) == SYMBOL_REF
8798 && !offset_within_block_p (base, INTVAL (offset)))
8799 return true;
8800 }
8801 return arm_tls_referenced_p (x);
8802 }
8803 \f
8804 #define REG_OR_SUBREG_REG(X) \
8805 (REG_P (X) \
8806 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8807
8808 #define REG_OR_SUBREG_RTX(X) \
8809 (REG_P (X) ? (X) : SUBREG_REG (X))
8810
8811 static inline int
8812 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8813 {
8814 machine_mode mode = GET_MODE (x);
8815 int total, words;
8816
8817 switch (code)
8818 {
8819 case ASHIFT:
8820 case ASHIFTRT:
8821 case LSHIFTRT:
8822 case ROTATERT:
8823 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8824
8825 case PLUS:
8826 case MINUS:
8827 case COMPARE:
8828 case NEG:
8829 case NOT:
8830 return COSTS_N_INSNS (1);
8831
8832 case MULT:
8833 if (arm_arch6m && arm_m_profile_small_mul)
8834 return COSTS_N_INSNS (32);
8835
8836 if (CONST_INT_P (XEXP (x, 1)))
8837 {
8838 int cycles = 0;
8839 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8840
8841 while (i)
8842 {
8843 i >>= 2;
8844 cycles++;
8845 }
8846 return COSTS_N_INSNS (2) + cycles;
8847 }
8848 return COSTS_N_INSNS (1) + 16;
8849
8850 case SET:
8851 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8852 the mode. */
8853 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8854 return (COSTS_N_INSNS (words)
8855 + 4 * ((MEM_P (SET_SRC (x)))
8856 + MEM_P (SET_DEST (x))));
8857
8858 case CONST_INT:
8859 if (outer == SET)
8860 {
8861 if (UINTVAL (x) < 256
8862 /* 16-bit constant. */
8863 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8864 return 0;
8865 if (thumb_shiftable_const (INTVAL (x)))
8866 return COSTS_N_INSNS (2);
8867 return COSTS_N_INSNS (3);
8868 }
8869 else if ((outer == PLUS || outer == COMPARE)
8870 && INTVAL (x) < 256 && INTVAL (x) > -256)
8871 return 0;
8872 else if ((outer == IOR || outer == XOR || outer == AND)
8873 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8874 return COSTS_N_INSNS (1);
8875 else if (outer == AND)
8876 {
8877 int i;
8878 /* This duplicates the tests in the andsi3 expander. */
8879 for (i = 9; i <= 31; i++)
8880 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8881 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8882 return COSTS_N_INSNS (2);
8883 }
8884 else if (outer == ASHIFT || outer == ASHIFTRT
8885 || outer == LSHIFTRT)
8886 return 0;
8887 return COSTS_N_INSNS (2);
8888
8889 case CONST:
8890 case CONST_DOUBLE:
8891 case LABEL_REF:
8892 case SYMBOL_REF:
8893 return COSTS_N_INSNS (3);
8894
8895 case UDIV:
8896 case UMOD:
8897 case DIV:
8898 case MOD:
8899 return 100;
8900
8901 case TRUNCATE:
8902 return 99;
8903
8904 case AND:
8905 case XOR:
8906 case IOR:
8907 /* XXX guess. */
8908 return 8;
8909
8910 case MEM:
8911 /* XXX another guess. */
8912 /* Memory costs quite a lot for the first word, but subsequent words
8913 load at the equivalent of a single insn each. */
8914 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8915 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8916 ? 4 : 0));
8917
8918 case IF_THEN_ELSE:
8919 /* XXX a guess. */
8920 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8921 return 14;
8922 return 2;
8923
8924 case SIGN_EXTEND:
8925 case ZERO_EXTEND:
8926 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8927 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8928
8929 if (mode == SImode)
8930 return total;
8931
8932 if (arm_arch6)
8933 return total + COSTS_N_INSNS (1);
8934
8935 /* Assume a two-shift sequence. Increase the cost slightly so
8936 we prefer actual shifts over an extend operation. */
8937 return total + 1 + COSTS_N_INSNS (2);
8938
8939 default:
8940 return 99;
8941 }
8942 }
8943
8944 /* Estimates the size cost of thumb1 instructions.
8945 For now most of the code is copied from thumb1_rtx_costs. We need more
8946 fine grain tuning when we have more related test cases. */
8947 static inline int
8948 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8949 {
8950 machine_mode mode = GET_MODE (x);
8951 int words, cost;
8952
8953 switch (code)
8954 {
8955 case ASHIFT:
8956 case ASHIFTRT:
8957 case LSHIFTRT:
8958 case ROTATERT:
8959 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8960
8961 case PLUS:
8962 case MINUS:
8963 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8964 defined by RTL expansion, especially for the expansion of
8965 multiplication. */
8966 if ((GET_CODE (XEXP (x, 0)) == MULT
8967 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8968 || (GET_CODE (XEXP (x, 1)) == MULT
8969 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8970 return COSTS_N_INSNS (2);
8971 /* Fall through. */
8972 case COMPARE:
8973 case NEG:
8974 case NOT:
8975 return COSTS_N_INSNS (1);
8976
8977 case MULT:
8978 if (CONST_INT_P (XEXP (x, 1)))
8979 {
8980 /* Thumb1 mul instruction can't operate on const. We must Load it
8981 into a register first. */
8982 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8983 /* For the targets which have a very small and high-latency multiply
8984 unit, we prefer to synthesize the mult with up to 5 instructions,
8985 giving a good balance between size and performance. */
8986 if (arm_arch6m && arm_m_profile_small_mul)
8987 return COSTS_N_INSNS (5);
8988 else
8989 return COSTS_N_INSNS (1) + const_size;
8990 }
8991 return COSTS_N_INSNS (1);
8992
8993 case SET:
8994 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8995 the mode. */
8996 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8997 cost = COSTS_N_INSNS (words);
8998 if (satisfies_constraint_J (SET_SRC (x))
8999 || satisfies_constraint_K (SET_SRC (x))
9000 /* Too big an immediate for a 2-byte mov, using MOVT. */
9001 || (CONST_INT_P (SET_SRC (x))
9002 && UINTVAL (SET_SRC (x)) >= 256
9003 && TARGET_HAVE_MOVT
9004 && satisfies_constraint_j (SET_SRC (x)))
9005 /* thumb1_movdi_insn. */
9006 || ((words > 1) && MEM_P (SET_SRC (x))))
9007 cost += COSTS_N_INSNS (1);
9008 return cost;
9009
9010 case CONST_INT:
9011 if (outer == SET)
9012 {
9013 if (UINTVAL (x) < 256)
9014 return COSTS_N_INSNS (1);
9015 /* movw is 4byte long. */
9016 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9017 return COSTS_N_INSNS (2);
9018 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9019 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9020 return COSTS_N_INSNS (2);
9021 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9022 if (thumb_shiftable_const (INTVAL (x)))
9023 return COSTS_N_INSNS (2);
9024 return COSTS_N_INSNS (3);
9025 }
9026 else if ((outer == PLUS || outer == COMPARE)
9027 && INTVAL (x) < 256 && INTVAL (x) > -256)
9028 return 0;
9029 else if ((outer == IOR || outer == XOR || outer == AND)
9030 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9031 return COSTS_N_INSNS (1);
9032 else if (outer == AND)
9033 {
9034 int i;
9035 /* This duplicates the tests in the andsi3 expander. */
9036 for (i = 9; i <= 31; i++)
9037 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9038 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9039 return COSTS_N_INSNS (2);
9040 }
9041 else if (outer == ASHIFT || outer == ASHIFTRT
9042 || outer == LSHIFTRT)
9043 return 0;
9044 return COSTS_N_INSNS (2);
9045
9046 case CONST:
9047 case CONST_DOUBLE:
9048 case LABEL_REF:
9049 case SYMBOL_REF:
9050 return COSTS_N_INSNS (3);
9051
9052 case UDIV:
9053 case UMOD:
9054 case DIV:
9055 case MOD:
9056 return 100;
9057
9058 case TRUNCATE:
9059 return 99;
9060
9061 case AND:
9062 case XOR:
9063 case IOR:
9064 return COSTS_N_INSNS (1);
9065
9066 case MEM:
9067 return (COSTS_N_INSNS (1)
9068 + COSTS_N_INSNS (1)
9069 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9070 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9071 ? COSTS_N_INSNS (1) : 0));
9072
9073 case IF_THEN_ELSE:
9074 /* XXX a guess. */
9075 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9076 return 14;
9077 return 2;
9078
9079 case ZERO_EXTEND:
9080 /* XXX still guessing. */
9081 switch (GET_MODE (XEXP (x, 0)))
9082 {
9083 case E_QImode:
9084 return (1 + (mode == DImode ? 4 : 0)
9085 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9086
9087 case E_HImode:
9088 return (4 + (mode == DImode ? 4 : 0)
9089 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9090
9091 case E_SImode:
9092 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9093
9094 default:
9095 return 99;
9096 }
9097
9098 default:
9099 return 99;
9100 }
9101 }
9102
9103 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9104 operand, then return the operand that is being shifted. If the shift
9105 is not by a constant, then set SHIFT_REG to point to the operand.
9106 Return NULL if OP is not a shifter operand. */
9107 static rtx
9108 shifter_op_p (rtx op, rtx *shift_reg)
9109 {
9110 enum rtx_code code = GET_CODE (op);
9111
9112 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9113 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9114 return XEXP (op, 0);
9115 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9116 return XEXP (op, 0);
9117 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9118 || code == ASHIFTRT)
9119 {
9120 if (!CONST_INT_P (XEXP (op, 1)))
9121 *shift_reg = XEXP (op, 1);
9122 return XEXP (op, 0);
9123 }
9124
9125 return NULL;
9126 }
9127
9128 static bool
9129 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9130 {
9131 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9132 rtx_code code = GET_CODE (x);
9133 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9134
9135 switch (XINT (x, 1))
9136 {
9137 case UNSPEC_UNALIGNED_LOAD:
9138 /* We can only do unaligned loads into the integer unit, and we can't
9139 use LDM or LDRD. */
9140 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9141 if (speed_p)
9142 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9143 + extra_cost->ldst.load_unaligned);
9144
9145 #ifdef NOT_YET
9146 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9147 ADDR_SPACE_GENERIC, speed_p);
9148 #endif
9149 return true;
9150
9151 case UNSPEC_UNALIGNED_STORE:
9152 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9153 if (speed_p)
9154 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9155 + extra_cost->ldst.store_unaligned);
9156
9157 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9158 #ifdef NOT_YET
9159 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9160 ADDR_SPACE_GENERIC, speed_p);
9161 #endif
9162 return true;
9163
9164 case UNSPEC_VRINTZ:
9165 case UNSPEC_VRINTP:
9166 case UNSPEC_VRINTM:
9167 case UNSPEC_VRINTR:
9168 case UNSPEC_VRINTX:
9169 case UNSPEC_VRINTA:
9170 if (speed_p)
9171 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9172
9173 return true;
9174 default:
9175 *cost = COSTS_N_INSNS (2);
9176 break;
9177 }
9178 return true;
9179 }
9180
9181 /* Cost of a libcall. We assume one insn per argument, an amount for the
9182 call (one insn for -Os) and then one for processing the result. */
9183 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9184
9185 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9186 do \
9187 { \
9188 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9189 if (shift_op != NULL \
9190 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9191 { \
9192 if (shift_reg) \
9193 { \
9194 if (speed_p) \
9195 *cost += extra_cost->alu.arith_shift_reg; \
9196 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9197 ASHIFT, 1, speed_p); \
9198 } \
9199 else if (speed_p) \
9200 *cost += extra_cost->alu.arith_shift; \
9201 \
9202 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9203 ASHIFT, 0, speed_p) \
9204 + rtx_cost (XEXP (x, 1 - IDX), \
9205 GET_MODE (shift_op), \
9206 OP, 1, speed_p)); \
9207 return true; \
9208 } \
9209 } \
9210 while (0);
9211
9212 /* RTX costs. Make an estimate of the cost of executing the operation
9213 X, which is contained with an operation with code OUTER_CODE.
9214 SPEED_P indicates whether the cost desired is the performance cost,
9215 or the size cost. The estimate is stored in COST and the return
9216 value is TRUE if the cost calculation is final, or FALSE if the
9217 caller should recurse through the operands of X to add additional
9218 costs.
9219
9220 We currently make no attempt to model the size savings of Thumb-2
9221 16-bit instructions. At the normal points in compilation where
9222 this code is called we have no measure of whether the condition
9223 flags are live or not, and thus no realistic way to determine what
9224 the size will eventually be. */
9225 static bool
9226 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9227 const struct cpu_cost_table *extra_cost,
9228 int *cost, bool speed_p)
9229 {
9230 machine_mode mode = GET_MODE (x);
9231
9232 *cost = COSTS_N_INSNS (1);
9233
9234 if (TARGET_THUMB1)
9235 {
9236 if (speed_p)
9237 *cost = thumb1_rtx_costs (x, code, outer_code);
9238 else
9239 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9240 return true;
9241 }
9242
9243 switch (code)
9244 {
9245 case SET:
9246 *cost = 0;
9247 /* SET RTXs don't have a mode so we get it from the destination. */
9248 mode = GET_MODE (SET_DEST (x));
9249
9250 if (REG_P (SET_SRC (x))
9251 && REG_P (SET_DEST (x)))
9252 {
9253 /* Assume that most copies can be done with a single insn,
9254 unless we don't have HW FP, in which case everything
9255 larger than word mode will require two insns. */
9256 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9257 && GET_MODE_SIZE (mode) > 4)
9258 || mode == DImode)
9259 ? 2 : 1);
9260 /* Conditional register moves can be encoded
9261 in 16 bits in Thumb mode. */
9262 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9263 *cost >>= 1;
9264
9265 return true;
9266 }
9267
9268 if (CONST_INT_P (SET_SRC (x)))
9269 {
9270 /* Handle CONST_INT here, since the value doesn't have a mode
9271 and we would otherwise be unable to work out the true cost. */
9272 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9273 0, speed_p);
9274 outer_code = SET;
9275 /* Slightly lower the cost of setting a core reg to a constant.
9276 This helps break up chains and allows for better scheduling. */
9277 if (REG_P (SET_DEST (x))
9278 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9279 *cost -= 1;
9280 x = SET_SRC (x);
9281 /* Immediate moves with an immediate in the range [0, 255] can be
9282 encoded in 16 bits in Thumb mode. */
9283 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9284 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9285 *cost >>= 1;
9286 goto const_int_cost;
9287 }
9288
9289 return false;
9290
9291 case MEM:
9292 /* A memory access costs 1 insn if the mode is small, or the address is
9293 a single register, otherwise it costs one insn per word. */
9294 if (REG_P (XEXP (x, 0)))
9295 *cost = COSTS_N_INSNS (1);
9296 else if (flag_pic
9297 && GET_CODE (XEXP (x, 0)) == PLUS
9298 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9299 /* This will be split into two instructions.
9300 See arm.md:calculate_pic_address. */
9301 *cost = COSTS_N_INSNS (2);
9302 else
9303 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9304
9305 /* For speed optimizations, add the costs of the address and
9306 accessing memory. */
9307 if (speed_p)
9308 #ifdef NOT_YET
9309 *cost += (extra_cost->ldst.load
9310 + arm_address_cost (XEXP (x, 0), mode,
9311 ADDR_SPACE_GENERIC, speed_p));
9312 #else
9313 *cost += extra_cost->ldst.load;
9314 #endif
9315 return true;
9316
9317 case PARALLEL:
9318 {
9319 /* Calculations of LDM costs are complex. We assume an initial cost
9320 (ldm_1st) which will load the number of registers mentioned in
9321 ldm_regs_per_insn_1st registers; then each additional
9322 ldm_regs_per_insn_subsequent registers cost one more insn. The
9323 formula for N regs is thus:
9324
9325 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9326 + ldm_regs_per_insn_subsequent - 1)
9327 / ldm_regs_per_insn_subsequent).
9328
9329 Additional costs may also be added for addressing. A similar
9330 formula is used for STM. */
9331
9332 bool is_ldm = load_multiple_operation (x, SImode);
9333 bool is_stm = store_multiple_operation (x, SImode);
9334
9335 if (is_ldm || is_stm)
9336 {
9337 if (speed_p)
9338 {
9339 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9340 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9341 ? extra_cost->ldst.ldm_regs_per_insn_1st
9342 : extra_cost->ldst.stm_regs_per_insn_1st;
9343 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9344 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9345 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9346
9347 *cost += regs_per_insn_1st
9348 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9349 + regs_per_insn_sub - 1)
9350 / regs_per_insn_sub);
9351 return true;
9352 }
9353
9354 }
9355 return false;
9356 }
9357 case DIV:
9358 case UDIV:
9359 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9360 && (mode == SFmode || !TARGET_VFP_SINGLE))
9361 *cost += COSTS_N_INSNS (speed_p
9362 ? extra_cost->fp[mode != SFmode].div : 0);
9363 else if (mode == SImode && TARGET_IDIV)
9364 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9365 else
9366 *cost = LIBCALL_COST (2);
9367
9368 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9369 possible udiv is prefered. */
9370 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9371 return false; /* All arguments must be in registers. */
9372
9373 case MOD:
9374 /* MOD by a power of 2 can be expanded as:
9375 rsbs r1, r0, #0
9376 and r0, r0, #(n - 1)
9377 and r1, r1, #(n - 1)
9378 rsbpl r0, r1, #0. */
9379 if (CONST_INT_P (XEXP (x, 1))
9380 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9381 && mode == SImode)
9382 {
9383 *cost += COSTS_N_INSNS (3);
9384
9385 if (speed_p)
9386 *cost += 2 * extra_cost->alu.logical
9387 + extra_cost->alu.arith;
9388 return true;
9389 }
9390
9391 /* Fall-through. */
9392 case UMOD:
9393 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9394 possible udiv is prefered. */
9395 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9396 return false; /* All arguments must be in registers. */
9397
9398 case ROTATE:
9399 if (mode == SImode && REG_P (XEXP (x, 1)))
9400 {
9401 *cost += (COSTS_N_INSNS (1)
9402 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9403 if (speed_p)
9404 *cost += extra_cost->alu.shift_reg;
9405 return true;
9406 }
9407 /* Fall through */
9408 case ROTATERT:
9409 case ASHIFT:
9410 case LSHIFTRT:
9411 case ASHIFTRT:
9412 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9413 {
9414 *cost += (COSTS_N_INSNS (2)
9415 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9416 if (speed_p)
9417 *cost += 2 * extra_cost->alu.shift;
9418 return true;
9419 }
9420 else if (mode == SImode)
9421 {
9422 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9423 /* Slightly disparage register shifts at -Os, but not by much. */
9424 if (!CONST_INT_P (XEXP (x, 1)))
9425 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9426 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9427 return true;
9428 }
9429 else if (GET_MODE_CLASS (mode) == MODE_INT
9430 && GET_MODE_SIZE (mode) < 4)
9431 {
9432 if (code == ASHIFT)
9433 {
9434 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9435 /* Slightly disparage register shifts at -Os, but not by
9436 much. */
9437 if (!CONST_INT_P (XEXP (x, 1)))
9438 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9439 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9440 }
9441 else if (code == LSHIFTRT || code == ASHIFTRT)
9442 {
9443 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9444 {
9445 /* Can use SBFX/UBFX. */
9446 if (speed_p)
9447 *cost += extra_cost->alu.bfx;
9448 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9449 }
9450 else
9451 {
9452 *cost += COSTS_N_INSNS (1);
9453 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9454 if (speed_p)
9455 {
9456 if (CONST_INT_P (XEXP (x, 1)))
9457 *cost += 2 * extra_cost->alu.shift;
9458 else
9459 *cost += (extra_cost->alu.shift
9460 + extra_cost->alu.shift_reg);
9461 }
9462 else
9463 /* Slightly disparage register shifts. */
9464 *cost += !CONST_INT_P (XEXP (x, 1));
9465 }
9466 }
9467 else /* Rotates. */
9468 {
9469 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9470 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9471 if (speed_p)
9472 {
9473 if (CONST_INT_P (XEXP (x, 1)))
9474 *cost += (2 * extra_cost->alu.shift
9475 + extra_cost->alu.log_shift);
9476 else
9477 *cost += (extra_cost->alu.shift
9478 + extra_cost->alu.shift_reg
9479 + extra_cost->alu.log_shift_reg);
9480 }
9481 }
9482 return true;
9483 }
9484
9485 *cost = LIBCALL_COST (2);
9486 return false;
9487
9488 case BSWAP:
9489 if (arm_arch6)
9490 {
9491 if (mode == SImode)
9492 {
9493 if (speed_p)
9494 *cost += extra_cost->alu.rev;
9495
9496 return false;
9497 }
9498 }
9499 else
9500 {
9501 /* No rev instruction available. Look at arm_legacy_rev
9502 and thumb_legacy_rev for the form of RTL used then. */
9503 if (TARGET_THUMB)
9504 {
9505 *cost += COSTS_N_INSNS (9);
9506
9507 if (speed_p)
9508 {
9509 *cost += 6 * extra_cost->alu.shift;
9510 *cost += 3 * extra_cost->alu.logical;
9511 }
9512 }
9513 else
9514 {
9515 *cost += COSTS_N_INSNS (4);
9516
9517 if (speed_p)
9518 {
9519 *cost += 2 * extra_cost->alu.shift;
9520 *cost += extra_cost->alu.arith_shift;
9521 *cost += 2 * extra_cost->alu.logical;
9522 }
9523 }
9524 return true;
9525 }
9526 return false;
9527
9528 case MINUS:
9529 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9530 && (mode == SFmode || !TARGET_VFP_SINGLE))
9531 {
9532 if (GET_CODE (XEXP (x, 0)) == MULT
9533 || GET_CODE (XEXP (x, 1)) == MULT)
9534 {
9535 rtx mul_op0, mul_op1, sub_op;
9536
9537 if (speed_p)
9538 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9539
9540 if (GET_CODE (XEXP (x, 0)) == MULT)
9541 {
9542 mul_op0 = XEXP (XEXP (x, 0), 0);
9543 mul_op1 = XEXP (XEXP (x, 0), 1);
9544 sub_op = XEXP (x, 1);
9545 }
9546 else
9547 {
9548 mul_op0 = XEXP (XEXP (x, 1), 0);
9549 mul_op1 = XEXP (XEXP (x, 1), 1);
9550 sub_op = XEXP (x, 0);
9551 }
9552
9553 /* The first operand of the multiply may be optionally
9554 negated. */
9555 if (GET_CODE (mul_op0) == NEG)
9556 mul_op0 = XEXP (mul_op0, 0);
9557
9558 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9559 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9560 + rtx_cost (sub_op, mode, code, 0, speed_p));
9561
9562 return true;
9563 }
9564
9565 if (speed_p)
9566 *cost += extra_cost->fp[mode != SFmode].addsub;
9567 return false;
9568 }
9569
9570 if (mode == SImode)
9571 {
9572 rtx shift_by_reg = NULL;
9573 rtx shift_op;
9574 rtx non_shift_op;
9575
9576 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9577 if (shift_op == NULL)
9578 {
9579 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9580 non_shift_op = XEXP (x, 0);
9581 }
9582 else
9583 non_shift_op = XEXP (x, 1);
9584
9585 if (shift_op != NULL)
9586 {
9587 if (shift_by_reg != NULL)
9588 {
9589 if (speed_p)
9590 *cost += extra_cost->alu.arith_shift_reg;
9591 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9592 }
9593 else if (speed_p)
9594 *cost += extra_cost->alu.arith_shift;
9595
9596 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9597 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9598 return true;
9599 }
9600
9601 if (arm_arch_thumb2
9602 && GET_CODE (XEXP (x, 1)) == MULT)
9603 {
9604 /* MLS. */
9605 if (speed_p)
9606 *cost += extra_cost->mult[0].add;
9607 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9608 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9609 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9610 return true;
9611 }
9612
9613 if (CONST_INT_P (XEXP (x, 0)))
9614 {
9615 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9616 INTVAL (XEXP (x, 0)), NULL_RTX,
9617 NULL_RTX, 1, 0);
9618 *cost = COSTS_N_INSNS (insns);
9619 if (speed_p)
9620 *cost += insns * extra_cost->alu.arith;
9621 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9622 return true;
9623 }
9624 else if (speed_p)
9625 *cost += extra_cost->alu.arith;
9626
9627 return false;
9628 }
9629
9630 if (GET_MODE_CLASS (mode) == MODE_INT
9631 && GET_MODE_SIZE (mode) < 4)
9632 {
9633 rtx shift_op, shift_reg;
9634 shift_reg = NULL;
9635
9636 /* We check both sides of the MINUS for shifter operands since,
9637 unlike PLUS, it's not commutative. */
9638
9639 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9640 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9641
9642 /* Slightly disparage, as we might need to widen the result. */
9643 *cost += 1;
9644 if (speed_p)
9645 *cost += extra_cost->alu.arith;
9646
9647 if (CONST_INT_P (XEXP (x, 0)))
9648 {
9649 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9650 return true;
9651 }
9652
9653 return false;
9654 }
9655
9656 if (mode == DImode)
9657 {
9658 *cost += COSTS_N_INSNS (1);
9659
9660 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9661 {
9662 rtx op1 = XEXP (x, 1);
9663
9664 if (speed_p)
9665 *cost += 2 * extra_cost->alu.arith;
9666
9667 if (GET_CODE (op1) == ZERO_EXTEND)
9668 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9669 0, speed_p);
9670 else
9671 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9672 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9673 0, speed_p);
9674 return true;
9675 }
9676 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9677 {
9678 if (speed_p)
9679 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9680 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9681 0, speed_p)
9682 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9683 return true;
9684 }
9685 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9686 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9687 {
9688 if (speed_p)
9689 *cost += (extra_cost->alu.arith
9690 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9691 ? extra_cost->alu.arith
9692 : extra_cost->alu.arith_shift));
9693 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9694 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9695 GET_CODE (XEXP (x, 1)), 0, speed_p));
9696 return true;
9697 }
9698
9699 if (speed_p)
9700 *cost += 2 * extra_cost->alu.arith;
9701 return false;
9702 }
9703
9704 /* Vector mode? */
9705
9706 *cost = LIBCALL_COST (2);
9707 return false;
9708
9709 case PLUS:
9710 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9711 && (mode == SFmode || !TARGET_VFP_SINGLE))
9712 {
9713 if (GET_CODE (XEXP (x, 0)) == MULT)
9714 {
9715 rtx mul_op0, mul_op1, add_op;
9716
9717 if (speed_p)
9718 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9719
9720 mul_op0 = XEXP (XEXP (x, 0), 0);
9721 mul_op1 = XEXP (XEXP (x, 0), 1);
9722 add_op = XEXP (x, 1);
9723
9724 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9725 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9726 + rtx_cost (add_op, mode, code, 0, speed_p));
9727
9728 return true;
9729 }
9730
9731 if (speed_p)
9732 *cost += extra_cost->fp[mode != SFmode].addsub;
9733 return false;
9734 }
9735 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9736 {
9737 *cost = LIBCALL_COST (2);
9738 return false;
9739 }
9740
9741 /* Narrow modes can be synthesized in SImode, but the range
9742 of useful sub-operations is limited. Check for shift operations
9743 on one of the operands. Only left shifts can be used in the
9744 narrow modes. */
9745 if (GET_MODE_CLASS (mode) == MODE_INT
9746 && GET_MODE_SIZE (mode) < 4)
9747 {
9748 rtx shift_op, shift_reg;
9749 shift_reg = NULL;
9750
9751 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9752
9753 if (CONST_INT_P (XEXP (x, 1)))
9754 {
9755 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9756 INTVAL (XEXP (x, 1)), NULL_RTX,
9757 NULL_RTX, 1, 0);
9758 *cost = COSTS_N_INSNS (insns);
9759 if (speed_p)
9760 *cost += insns * extra_cost->alu.arith;
9761 /* Slightly penalize a narrow operation as the result may
9762 need widening. */
9763 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9764 return true;
9765 }
9766
9767 /* Slightly penalize a narrow operation as the result may
9768 need widening. */
9769 *cost += 1;
9770 if (speed_p)
9771 *cost += extra_cost->alu.arith;
9772
9773 return false;
9774 }
9775
9776 if (mode == SImode)
9777 {
9778 rtx shift_op, shift_reg;
9779
9780 if (TARGET_INT_SIMD
9781 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9782 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9783 {
9784 /* UXTA[BH] or SXTA[BH]. */
9785 if (speed_p)
9786 *cost += extra_cost->alu.extend_arith;
9787 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9788 0, speed_p)
9789 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9790 return true;
9791 }
9792
9793 shift_reg = NULL;
9794 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9795 if (shift_op != NULL)
9796 {
9797 if (shift_reg)
9798 {
9799 if (speed_p)
9800 *cost += extra_cost->alu.arith_shift_reg;
9801 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9802 }
9803 else if (speed_p)
9804 *cost += extra_cost->alu.arith_shift;
9805
9806 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9807 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9808 return true;
9809 }
9810 if (GET_CODE (XEXP (x, 0)) == MULT)
9811 {
9812 rtx mul_op = XEXP (x, 0);
9813
9814 if (TARGET_DSP_MULTIPLY
9815 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9816 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9817 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9818 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9819 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9820 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9821 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9822 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9823 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9824 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9825 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9826 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9827 == 16))))))
9828 {
9829 /* SMLA[BT][BT]. */
9830 if (speed_p)
9831 *cost += extra_cost->mult[0].extend_add;
9832 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9833 SIGN_EXTEND, 0, speed_p)
9834 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9835 SIGN_EXTEND, 0, speed_p)
9836 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9837 return true;
9838 }
9839
9840 if (speed_p)
9841 *cost += extra_cost->mult[0].add;
9842 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9843 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9844 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9845 return true;
9846 }
9847 if (CONST_INT_P (XEXP (x, 1)))
9848 {
9849 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9850 INTVAL (XEXP (x, 1)), NULL_RTX,
9851 NULL_RTX, 1, 0);
9852 *cost = COSTS_N_INSNS (insns);
9853 if (speed_p)
9854 *cost += insns * extra_cost->alu.arith;
9855 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9856 return true;
9857 }
9858 else if (speed_p)
9859 *cost += extra_cost->alu.arith;
9860
9861 return false;
9862 }
9863
9864 if (mode == DImode)
9865 {
9866 if (arm_arch3m
9867 && GET_CODE (XEXP (x, 0)) == MULT
9868 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9869 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9870 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9871 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9872 {
9873 if (speed_p)
9874 *cost += extra_cost->mult[1].extend_add;
9875 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9876 ZERO_EXTEND, 0, speed_p)
9877 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9878 ZERO_EXTEND, 0, speed_p)
9879 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9880 return true;
9881 }
9882
9883 *cost += COSTS_N_INSNS (1);
9884
9885 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9886 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9887 {
9888 if (speed_p)
9889 *cost += (extra_cost->alu.arith
9890 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9891 ? extra_cost->alu.arith
9892 : extra_cost->alu.arith_shift));
9893
9894 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9895 0, speed_p)
9896 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9897 return true;
9898 }
9899
9900 if (speed_p)
9901 *cost += 2 * extra_cost->alu.arith;
9902 return false;
9903 }
9904
9905 /* Vector mode? */
9906 *cost = LIBCALL_COST (2);
9907 return false;
9908 case IOR:
9909 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9910 {
9911 if (speed_p)
9912 *cost += extra_cost->alu.rev;
9913
9914 return true;
9915 }
9916 /* Fall through. */
9917 case AND: case XOR:
9918 if (mode == SImode)
9919 {
9920 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9921 rtx op0 = XEXP (x, 0);
9922 rtx shift_op, shift_reg;
9923
9924 if (subcode == NOT
9925 && (code == AND
9926 || (code == IOR && TARGET_THUMB2)))
9927 op0 = XEXP (op0, 0);
9928
9929 shift_reg = NULL;
9930 shift_op = shifter_op_p (op0, &shift_reg);
9931 if (shift_op != NULL)
9932 {
9933 if (shift_reg)
9934 {
9935 if (speed_p)
9936 *cost += extra_cost->alu.log_shift_reg;
9937 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9938 }
9939 else if (speed_p)
9940 *cost += extra_cost->alu.log_shift;
9941
9942 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9943 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9944 return true;
9945 }
9946
9947 if (CONST_INT_P (XEXP (x, 1)))
9948 {
9949 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9950 INTVAL (XEXP (x, 1)), NULL_RTX,
9951 NULL_RTX, 1, 0);
9952
9953 *cost = COSTS_N_INSNS (insns);
9954 if (speed_p)
9955 *cost += insns * extra_cost->alu.logical;
9956 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9957 return true;
9958 }
9959
9960 if (speed_p)
9961 *cost += extra_cost->alu.logical;
9962 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9963 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9964 return true;
9965 }
9966
9967 if (mode == DImode)
9968 {
9969 rtx op0 = XEXP (x, 0);
9970 enum rtx_code subcode = GET_CODE (op0);
9971
9972 *cost += COSTS_N_INSNS (1);
9973
9974 if (subcode == NOT
9975 && (code == AND
9976 || (code == IOR && TARGET_THUMB2)))
9977 op0 = XEXP (op0, 0);
9978
9979 if (GET_CODE (op0) == ZERO_EXTEND)
9980 {
9981 if (speed_p)
9982 *cost += 2 * extra_cost->alu.logical;
9983
9984 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9985 0, speed_p)
9986 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9987 return true;
9988 }
9989 else if (GET_CODE (op0) == SIGN_EXTEND)
9990 {
9991 if (speed_p)
9992 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9993
9994 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9995 0, speed_p)
9996 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9997 return true;
9998 }
9999
10000 if (speed_p)
10001 *cost += 2 * extra_cost->alu.logical;
10002
10003 return true;
10004 }
10005 /* Vector mode? */
10006
10007 *cost = LIBCALL_COST (2);
10008 return false;
10009
10010 case MULT:
10011 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10012 && (mode == SFmode || !TARGET_VFP_SINGLE))
10013 {
10014 rtx op0 = XEXP (x, 0);
10015
10016 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10017 op0 = XEXP (op0, 0);
10018
10019 if (speed_p)
10020 *cost += extra_cost->fp[mode != SFmode].mult;
10021
10022 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10023 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10024 return true;
10025 }
10026 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10027 {
10028 *cost = LIBCALL_COST (2);
10029 return false;
10030 }
10031
10032 if (mode == SImode)
10033 {
10034 if (TARGET_DSP_MULTIPLY
10035 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10036 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10037 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10038 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10039 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10040 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10041 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10042 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10043 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10044 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10045 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10046 && (INTVAL (XEXP (XEXP (x, 1), 1))
10047 == 16))))))
10048 {
10049 /* SMUL[TB][TB]. */
10050 if (speed_p)
10051 *cost += extra_cost->mult[0].extend;
10052 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10053 SIGN_EXTEND, 0, speed_p);
10054 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10055 SIGN_EXTEND, 1, speed_p);
10056 return true;
10057 }
10058 if (speed_p)
10059 *cost += extra_cost->mult[0].simple;
10060 return false;
10061 }
10062
10063 if (mode == DImode)
10064 {
10065 if (arm_arch3m
10066 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10067 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10068 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10069 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10070 {
10071 if (speed_p)
10072 *cost += extra_cost->mult[1].extend;
10073 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10074 ZERO_EXTEND, 0, speed_p)
10075 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10076 ZERO_EXTEND, 0, speed_p));
10077 return true;
10078 }
10079
10080 *cost = LIBCALL_COST (2);
10081 return false;
10082 }
10083
10084 /* Vector mode? */
10085 *cost = LIBCALL_COST (2);
10086 return false;
10087
10088 case NEG:
10089 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10090 && (mode == SFmode || !TARGET_VFP_SINGLE))
10091 {
10092 if (GET_CODE (XEXP (x, 0)) == MULT)
10093 {
10094 /* VNMUL. */
10095 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10096 return true;
10097 }
10098
10099 if (speed_p)
10100 *cost += extra_cost->fp[mode != SFmode].neg;
10101
10102 return false;
10103 }
10104 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10105 {
10106 *cost = LIBCALL_COST (1);
10107 return false;
10108 }
10109
10110 if (mode == SImode)
10111 {
10112 if (GET_CODE (XEXP (x, 0)) == ABS)
10113 {
10114 *cost += COSTS_N_INSNS (1);
10115 /* Assume the non-flag-changing variant. */
10116 if (speed_p)
10117 *cost += (extra_cost->alu.log_shift
10118 + extra_cost->alu.arith_shift);
10119 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10120 return true;
10121 }
10122
10123 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10124 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10125 {
10126 *cost += COSTS_N_INSNS (1);
10127 /* No extra cost for MOV imm and MVN imm. */
10128 /* If the comparison op is using the flags, there's no further
10129 cost, otherwise we need to add the cost of the comparison. */
10130 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10131 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10132 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10133 {
10134 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10135 *cost += (COSTS_N_INSNS (1)
10136 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10137 0, speed_p)
10138 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10139 1, speed_p));
10140 if (speed_p)
10141 *cost += extra_cost->alu.arith;
10142 }
10143 return true;
10144 }
10145
10146 if (speed_p)
10147 *cost += extra_cost->alu.arith;
10148 return false;
10149 }
10150
10151 if (GET_MODE_CLASS (mode) == MODE_INT
10152 && GET_MODE_SIZE (mode) < 4)
10153 {
10154 /* Slightly disparage, as we might need an extend operation. */
10155 *cost += 1;
10156 if (speed_p)
10157 *cost += extra_cost->alu.arith;
10158 return false;
10159 }
10160
10161 if (mode == DImode)
10162 {
10163 *cost += COSTS_N_INSNS (1);
10164 if (speed_p)
10165 *cost += 2 * extra_cost->alu.arith;
10166 return false;
10167 }
10168
10169 /* Vector mode? */
10170 *cost = LIBCALL_COST (1);
10171 return false;
10172
10173 case NOT:
10174 if (mode == SImode)
10175 {
10176 rtx shift_op;
10177 rtx shift_reg = NULL;
10178
10179 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10180
10181 if (shift_op)
10182 {
10183 if (shift_reg != NULL)
10184 {
10185 if (speed_p)
10186 *cost += extra_cost->alu.log_shift_reg;
10187 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10188 }
10189 else if (speed_p)
10190 *cost += extra_cost->alu.log_shift;
10191 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10192 return true;
10193 }
10194
10195 if (speed_p)
10196 *cost += extra_cost->alu.logical;
10197 return false;
10198 }
10199 if (mode == DImode)
10200 {
10201 *cost += COSTS_N_INSNS (1);
10202 return false;
10203 }
10204
10205 /* Vector mode? */
10206
10207 *cost += LIBCALL_COST (1);
10208 return false;
10209
10210 case IF_THEN_ELSE:
10211 {
10212 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10213 {
10214 *cost += COSTS_N_INSNS (3);
10215 return true;
10216 }
10217 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10218 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10219
10220 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10221 /* Assume that if one arm of the if_then_else is a register,
10222 that it will be tied with the result and eliminate the
10223 conditional insn. */
10224 if (REG_P (XEXP (x, 1)))
10225 *cost += op2cost;
10226 else if (REG_P (XEXP (x, 2)))
10227 *cost += op1cost;
10228 else
10229 {
10230 if (speed_p)
10231 {
10232 if (extra_cost->alu.non_exec_costs_exec)
10233 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10234 else
10235 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10236 }
10237 else
10238 *cost += op1cost + op2cost;
10239 }
10240 }
10241 return true;
10242
10243 case COMPARE:
10244 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10245 *cost = 0;
10246 else
10247 {
10248 machine_mode op0mode;
10249 /* We'll mostly assume that the cost of a compare is the cost of the
10250 LHS. However, there are some notable exceptions. */
10251
10252 /* Floating point compares are never done as side-effects. */
10253 op0mode = GET_MODE (XEXP (x, 0));
10254 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10255 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10256 {
10257 if (speed_p)
10258 *cost += extra_cost->fp[op0mode != SFmode].compare;
10259
10260 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10261 {
10262 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10263 return true;
10264 }
10265
10266 return false;
10267 }
10268 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10269 {
10270 *cost = LIBCALL_COST (2);
10271 return false;
10272 }
10273
10274 /* DImode compares normally take two insns. */
10275 if (op0mode == DImode)
10276 {
10277 *cost += COSTS_N_INSNS (1);
10278 if (speed_p)
10279 *cost += 2 * extra_cost->alu.arith;
10280 return false;
10281 }
10282
10283 if (op0mode == SImode)
10284 {
10285 rtx shift_op;
10286 rtx shift_reg;
10287
10288 if (XEXP (x, 1) == const0_rtx
10289 && !(REG_P (XEXP (x, 0))
10290 || (GET_CODE (XEXP (x, 0)) == SUBREG
10291 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10292 {
10293 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10294
10295 /* Multiply operations that set the flags are often
10296 significantly more expensive. */
10297 if (speed_p
10298 && GET_CODE (XEXP (x, 0)) == MULT
10299 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10300 *cost += extra_cost->mult[0].flag_setting;
10301
10302 if (speed_p
10303 && GET_CODE (XEXP (x, 0)) == PLUS
10304 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10305 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10306 0), 1), mode))
10307 *cost += extra_cost->mult[0].flag_setting;
10308 return true;
10309 }
10310
10311 shift_reg = NULL;
10312 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10313 if (shift_op != NULL)
10314 {
10315 if (shift_reg != NULL)
10316 {
10317 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10318 1, speed_p);
10319 if (speed_p)
10320 *cost += extra_cost->alu.arith_shift_reg;
10321 }
10322 else if (speed_p)
10323 *cost += extra_cost->alu.arith_shift;
10324 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10325 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10326 return true;
10327 }
10328
10329 if (speed_p)
10330 *cost += extra_cost->alu.arith;
10331 if (CONST_INT_P (XEXP (x, 1))
10332 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10333 {
10334 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10335 return true;
10336 }
10337 return false;
10338 }
10339
10340 /* Vector mode? */
10341
10342 *cost = LIBCALL_COST (2);
10343 return false;
10344 }
10345 return true;
10346
10347 case EQ:
10348 case NE:
10349 case LT:
10350 case LE:
10351 case GT:
10352 case GE:
10353 case LTU:
10354 case LEU:
10355 case GEU:
10356 case GTU:
10357 case ORDERED:
10358 case UNORDERED:
10359 case UNEQ:
10360 case UNLE:
10361 case UNLT:
10362 case UNGE:
10363 case UNGT:
10364 case LTGT:
10365 if (outer_code == SET)
10366 {
10367 /* Is it a store-flag operation? */
10368 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10369 && XEXP (x, 1) == const0_rtx)
10370 {
10371 /* Thumb also needs an IT insn. */
10372 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10373 return true;
10374 }
10375 if (XEXP (x, 1) == const0_rtx)
10376 {
10377 switch (code)
10378 {
10379 case LT:
10380 /* LSR Rd, Rn, #31. */
10381 if (speed_p)
10382 *cost += extra_cost->alu.shift;
10383 break;
10384
10385 case EQ:
10386 /* RSBS T1, Rn, #0
10387 ADC Rd, Rn, T1. */
10388
10389 case NE:
10390 /* SUBS T1, Rn, #1
10391 SBC Rd, Rn, T1. */
10392 *cost += COSTS_N_INSNS (1);
10393 break;
10394
10395 case LE:
10396 /* RSBS T1, Rn, Rn, LSR #31
10397 ADC Rd, Rn, T1. */
10398 *cost += COSTS_N_INSNS (1);
10399 if (speed_p)
10400 *cost += extra_cost->alu.arith_shift;
10401 break;
10402
10403 case GT:
10404 /* RSB Rd, Rn, Rn, ASR #1
10405 LSR Rd, Rd, #31. */
10406 *cost += COSTS_N_INSNS (1);
10407 if (speed_p)
10408 *cost += (extra_cost->alu.arith_shift
10409 + extra_cost->alu.shift);
10410 break;
10411
10412 case GE:
10413 /* ASR Rd, Rn, #31
10414 ADD Rd, Rn, #1. */
10415 *cost += COSTS_N_INSNS (1);
10416 if (speed_p)
10417 *cost += extra_cost->alu.shift;
10418 break;
10419
10420 default:
10421 /* Remaining cases are either meaningless or would take
10422 three insns anyway. */
10423 *cost = COSTS_N_INSNS (3);
10424 break;
10425 }
10426 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10427 return true;
10428 }
10429 else
10430 {
10431 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10432 if (CONST_INT_P (XEXP (x, 1))
10433 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10434 {
10435 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10436 return true;
10437 }
10438
10439 return false;
10440 }
10441 }
10442 /* Not directly inside a set. If it involves the condition code
10443 register it must be the condition for a branch, cond_exec or
10444 I_T_E operation. Since the comparison is performed elsewhere
10445 this is just the control part which has no additional
10446 cost. */
10447 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10448 && XEXP (x, 1) == const0_rtx)
10449 {
10450 *cost = 0;
10451 return true;
10452 }
10453 return false;
10454
10455 case ABS:
10456 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10457 && (mode == SFmode || !TARGET_VFP_SINGLE))
10458 {
10459 if (speed_p)
10460 *cost += extra_cost->fp[mode != SFmode].neg;
10461
10462 return false;
10463 }
10464 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10465 {
10466 *cost = LIBCALL_COST (1);
10467 return false;
10468 }
10469
10470 if (mode == SImode)
10471 {
10472 if (speed_p)
10473 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10474 return false;
10475 }
10476 /* Vector mode? */
10477 *cost = LIBCALL_COST (1);
10478 return false;
10479
10480 case SIGN_EXTEND:
10481 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10482 && MEM_P (XEXP (x, 0)))
10483 {
10484 if (mode == DImode)
10485 *cost += COSTS_N_INSNS (1);
10486
10487 if (!speed_p)
10488 return true;
10489
10490 if (GET_MODE (XEXP (x, 0)) == SImode)
10491 *cost += extra_cost->ldst.load;
10492 else
10493 *cost += extra_cost->ldst.load_sign_extend;
10494
10495 if (mode == DImode)
10496 *cost += extra_cost->alu.shift;
10497
10498 return true;
10499 }
10500
10501 /* Widening from less than 32-bits requires an extend operation. */
10502 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10503 {
10504 /* We have SXTB/SXTH. */
10505 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10506 if (speed_p)
10507 *cost += extra_cost->alu.extend;
10508 }
10509 else if (GET_MODE (XEXP (x, 0)) != SImode)
10510 {
10511 /* Needs two shifts. */
10512 *cost += COSTS_N_INSNS (1);
10513 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10514 if (speed_p)
10515 *cost += 2 * extra_cost->alu.shift;
10516 }
10517
10518 /* Widening beyond 32-bits requires one more insn. */
10519 if (mode == DImode)
10520 {
10521 *cost += COSTS_N_INSNS (1);
10522 if (speed_p)
10523 *cost += extra_cost->alu.shift;
10524 }
10525
10526 return true;
10527
10528 case ZERO_EXTEND:
10529 if ((arm_arch4
10530 || GET_MODE (XEXP (x, 0)) == SImode
10531 || GET_MODE (XEXP (x, 0)) == QImode)
10532 && MEM_P (XEXP (x, 0)))
10533 {
10534 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10535
10536 if (mode == DImode)
10537 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10538
10539 return true;
10540 }
10541
10542 /* Widening from less than 32-bits requires an extend operation. */
10543 if (GET_MODE (XEXP (x, 0)) == QImode)
10544 {
10545 /* UXTB can be a shorter instruction in Thumb2, but it might
10546 be slower than the AND Rd, Rn, #255 alternative. When
10547 optimizing for speed it should never be slower to use
10548 AND, and we don't really model 16-bit vs 32-bit insns
10549 here. */
10550 if (speed_p)
10551 *cost += extra_cost->alu.logical;
10552 }
10553 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10554 {
10555 /* We have UXTB/UXTH. */
10556 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10557 if (speed_p)
10558 *cost += extra_cost->alu.extend;
10559 }
10560 else if (GET_MODE (XEXP (x, 0)) != SImode)
10561 {
10562 /* Needs two shifts. It's marginally preferable to use
10563 shifts rather than two BIC instructions as the second
10564 shift may merge with a subsequent insn as a shifter
10565 op. */
10566 *cost = COSTS_N_INSNS (2);
10567 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10568 if (speed_p)
10569 *cost += 2 * extra_cost->alu.shift;
10570 }
10571
10572 /* Widening beyond 32-bits requires one more insn. */
10573 if (mode == DImode)
10574 {
10575 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10576 }
10577
10578 return true;
10579
10580 case CONST_INT:
10581 *cost = 0;
10582 /* CONST_INT has no mode, so we cannot tell for sure how many
10583 insns are really going to be needed. The best we can do is
10584 look at the value passed. If it fits in SImode, then assume
10585 that's the mode it will be used for. Otherwise assume it
10586 will be used in DImode. */
10587 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10588 mode = SImode;
10589 else
10590 mode = DImode;
10591
10592 /* Avoid blowing up in arm_gen_constant (). */
10593 if (!(outer_code == PLUS
10594 || outer_code == AND
10595 || outer_code == IOR
10596 || outer_code == XOR
10597 || outer_code == MINUS))
10598 outer_code = SET;
10599
10600 const_int_cost:
10601 if (mode == SImode)
10602 {
10603 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10604 INTVAL (x), NULL, NULL,
10605 0, 0));
10606 /* Extra costs? */
10607 }
10608 else
10609 {
10610 *cost += COSTS_N_INSNS (arm_gen_constant
10611 (outer_code, SImode, NULL,
10612 trunc_int_for_mode (INTVAL (x), SImode),
10613 NULL, NULL, 0, 0)
10614 + arm_gen_constant (outer_code, SImode, NULL,
10615 INTVAL (x) >> 32, NULL,
10616 NULL, 0, 0));
10617 /* Extra costs? */
10618 }
10619
10620 return true;
10621
10622 case CONST:
10623 case LABEL_REF:
10624 case SYMBOL_REF:
10625 if (speed_p)
10626 {
10627 if (arm_arch_thumb2 && !flag_pic)
10628 *cost += COSTS_N_INSNS (1);
10629 else
10630 *cost += extra_cost->ldst.load;
10631 }
10632 else
10633 *cost += COSTS_N_INSNS (1);
10634
10635 if (flag_pic)
10636 {
10637 *cost += COSTS_N_INSNS (1);
10638 if (speed_p)
10639 *cost += extra_cost->alu.arith;
10640 }
10641
10642 return true;
10643
10644 case CONST_FIXED:
10645 *cost = COSTS_N_INSNS (4);
10646 /* Fixme. */
10647 return true;
10648
10649 case CONST_DOUBLE:
10650 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10651 && (mode == SFmode || !TARGET_VFP_SINGLE))
10652 {
10653 if (vfp3_const_double_rtx (x))
10654 {
10655 if (speed_p)
10656 *cost += extra_cost->fp[mode == DFmode].fpconst;
10657 return true;
10658 }
10659
10660 if (speed_p)
10661 {
10662 if (mode == DFmode)
10663 *cost += extra_cost->ldst.loadd;
10664 else
10665 *cost += extra_cost->ldst.loadf;
10666 }
10667 else
10668 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10669
10670 return true;
10671 }
10672 *cost = COSTS_N_INSNS (4);
10673 return true;
10674
10675 case CONST_VECTOR:
10676 /* Fixme. */
10677 if (TARGET_NEON
10678 && TARGET_HARD_FLOAT
10679 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10680 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10681 *cost = COSTS_N_INSNS (1);
10682 else
10683 *cost = COSTS_N_INSNS (4);
10684 return true;
10685
10686 case HIGH:
10687 case LO_SUM:
10688 /* When optimizing for size, we prefer constant pool entries to
10689 MOVW/MOVT pairs, so bump the cost of these slightly. */
10690 if (!speed_p)
10691 *cost += 1;
10692 return true;
10693
10694 case CLZ:
10695 if (speed_p)
10696 *cost += extra_cost->alu.clz;
10697 return false;
10698
10699 case SMIN:
10700 if (XEXP (x, 1) == const0_rtx)
10701 {
10702 if (speed_p)
10703 *cost += extra_cost->alu.log_shift;
10704 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10705 return true;
10706 }
10707 /* Fall through. */
10708 case SMAX:
10709 case UMIN:
10710 case UMAX:
10711 *cost += COSTS_N_INSNS (1);
10712 return false;
10713
10714 case TRUNCATE:
10715 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10716 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10717 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10718 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10719 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10720 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10721 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10722 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10723 == ZERO_EXTEND))))
10724 {
10725 if (speed_p)
10726 *cost += extra_cost->mult[1].extend;
10727 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10728 ZERO_EXTEND, 0, speed_p)
10729 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10730 ZERO_EXTEND, 0, speed_p));
10731 return true;
10732 }
10733 *cost = LIBCALL_COST (1);
10734 return false;
10735
10736 case UNSPEC_VOLATILE:
10737 case UNSPEC:
10738 return arm_unspec_cost (x, outer_code, speed_p, cost);
10739
10740 case PC:
10741 /* Reading the PC is like reading any other register. Writing it
10742 is more expensive, but we take that into account elsewhere. */
10743 *cost = 0;
10744 return true;
10745
10746 case ZERO_EXTRACT:
10747 /* TODO: Simple zero_extract of bottom bits using AND. */
10748 /* Fall through. */
10749 case SIGN_EXTRACT:
10750 if (arm_arch6
10751 && mode == SImode
10752 && CONST_INT_P (XEXP (x, 1))
10753 && CONST_INT_P (XEXP (x, 2)))
10754 {
10755 if (speed_p)
10756 *cost += extra_cost->alu.bfx;
10757 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10758 return true;
10759 }
10760 /* Without UBFX/SBFX, need to resort to shift operations. */
10761 *cost += COSTS_N_INSNS (1);
10762 if (speed_p)
10763 *cost += 2 * extra_cost->alu.shift;
10764 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10765 return true;
10766
10767 case FLOAT_EXTEND:
10768 if (TARGET_HARD_FLOAT)
10769 {
10770 if (speed_p)
10771 *cost += extra_cost->fp[mode == DFmode].widen;
10772 if (!TARGET_VFP5
10773 && GET_MODE (XEXP (x, 0)) == HFmode)
10774 {
10775 /* Pre v8, widening HF->DF is a two-step process, first
10776 widening to SFmode. */
10777 *cost += COSTS_N_INSNS (1);
10778 if (speed_p)
10779 *cost += extra_cost->fp[0].widen;
10780 }
10781 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10782 return true;
10783 }
10784
10785 *cost = LIBCALL_COST (1);
10786 return false;
10787
10788 case FLOAT_TRUNCATE:
10789 if (TARGET_HARD_FLOAT)
10790 {
10791 if (speed_p)
10792 *cost += extra_cost->fp[mode == DFmode].narrow;
10793 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10794 return true;
10795 /* Vector modes? */
10796 }
10797 *cost = LIBCALL_COST (1);
10798 return false;
10799
10800 case FMA:
10801 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10802 {
10803 rtx op0 = XEXP (x, 0);
10804 rtx op1 = XEXP (x, 1);
10805 rtx op2 = XEXP (x, 2);
10806
10807
10808 /* vfms or vfnma. */
10809 if (GET_CODE (op0) == NEG)
10810 op0 = XEXP (op0, 0);
10811
10812 /* vfnms or vfnma. */
10813 if (GET_CODE (op2) == NEG)
10814 op2 = XEXP (op2, 0);
10815
10816 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10817 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10818 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10819
10820 if (speed_p)
10821 *cost += extra_cost->fp[mode ==DFmode].fma;
10822
10823 return true;
10824 }
10825
10826 *cost = LIBCALL_COST (3);
10827 return false;
10828
10829 case FIX:
10830 case UNSIGNED_FIX:
10831 if (TARGET_HARD_FLOAT)
10832 {
10833 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10834 a vcvt fixed-point conversion. */
10835 if (code == FIX && mode == SImode
10836 && GET_CODE (XEXP (x, 0)) == FIX
10837 && GET_MODE (XEXP (x, 0)) == SFmode
10838 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10839 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10840 > 0)
10841 {
10842 if (speed_p)
10843 *cost += extra_cost->fp[0].toint;
10844
10845 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10846 code, 0, speed_p);
10847 return true;
10848 }
10849
10850 if (GET_MODE_CLASS (mode) == MODE_INT)
10851 {
10852 mode = GET_MODE (XEXP (x, 0));
10853 if (speed_p)
10854 *cost += extra_cost->fp[mode == DFmode].toint;
10855 /* Strip of the 'cost' of rounding towards zero. */
10856 if (GET_CODE (XEXP (x, 0)) == FIX)
10857 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10858 0, speed_p);
10859 else
10860 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10861 /* ??? Increase the cost to deal with transferring from
10862 FP -> CORE registers? */
10863 return true;
10864 }
10865 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10866 && TARGET_VFP5)
10867 {
10868 if (speed_p)
10869 *cost += extra_cost->fp[mode == DFmode].roundint;
10870 return false;
10871 }
10872 /* Vector costs? */
10873 }
10874 *cost = LIBCALL_COST (1);
10875 return false;
10876
10877 case FLOAT:
10878 case UNSIGNED_FLOAT:
10879 if (TARGET_HARD_FLOAT)
10880 {
10881 /* ??? Increase the cost to deal with transferring from CORE
10882 -> FP registers? */
10883 if (speed_p)
10884 *cost += extra_cost->fp[mode == DFmode].fromint;
10885 return false;
10886 }
10887 *cost = LIBCALL_COST (1);
10888 return false;
10889
10890 case CALL:
10891 return true;
10892
10893 case ASM_OPERANDS:
10894 {
10895 /* Just a guess. Guess number of instructions in the asm
10896 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10897 though (see PR60663). */
10898 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10899 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10900
10901 *cost = COSTS_N_INSNS (asm_length + num_operands);
10902 return true;
10903 }
10904 default:
10905 if (mode != VOIDmode)
10906 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10907 else
10908 *cost = COSTS_N_INSNS (4); /* Who knows? */
10909 return false;
10910 }
10911 }
10912
10913 #undef HANDLE_NARROW_SHIFT_ARITH
10914
10915 /* RTX costs entry point. */
10916
10917 static bool
10918 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10919 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10920 {
10921 bool result;
10922 int code = GET_CODE (x);
10923 gcc_assert (current_tune->insn_extra_cost);
10924
10925 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10926 (enum rtx_code) outer_code,
10927 current_tune->insn_extra_cost,
10928 total, speed);
10929
10930 if (dump_file && (dump_flags & TDF_DETAILS))
10931 {
10932 print_rtl_single (dump_file, x);
10933 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10934 *total, result ? "final" : "partial");
10935 }
10936 return result;
10937 }
10938
10939 /* All address computations that can be done are free, but rtx cost returns
10940 the same for practically all of them. So we weight the different types
10941 of address here in the order (most pref first):
10942 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10943 static inline int
10944 arm_arm_address_cost (rtx x)
10945 {
10946 enum rtx_code c = GET_CODE (x);
10947
10948 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10949 return 0;
10950 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10951 return 10;
10952
10953 if (c == PLUS)
10954 {
10955 if (CONST_INT_P (XEXP (x, 1)))
10956 return 2;
10957
10958 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10959 return 3;
10960
10961 return 4;
10962 }
10963
10964 return 6;
10965 }
10966
10967 static inline int
10968 arm_thumb_address_cost (rtx x)
10969 {
10970 enum rtx_code c = GET_CODE (x);
10971
10972 if (c == REG)
10973 return 1;
10974 if (c == PLUS
10975 && REG_P (XEXP (x, 0))
10976 && CONST_INT_P (XEXP (x, 1)))
10977 return 1;
10978
10979 return 2;
10980 }
10981
10982 static int
10983 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10984 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10985 {
10986 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10987 }
10988
10989 /* Adjust cost hook for XScale. */
10990 static bool
10991 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10992 int * cost)
10993 {
10994 /* Some true dependencies can have a higher cost depending
10995 on precisely how certain input operands are used. */
10996 if (dep_type == 0
10997 && recog_memoized (insn) >= 0
10998 && recog_memoized (dep) >= 0)
10999 {
11000 int shift_opnum = get_attr_shift (insn);
11001 enum attr_type attr_type = get_attr_type (dep);
11002
11003 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11004 operand for INSN. If we have a shifted input operand and the
11005 instruction we depend on is another ALU instruction, then we may
11006 have to account for an additional stall. */
11007 if (shift_opnum != 0
11008 && (attr_type == TYPE_ALU_SHIFT_IMM
11009 || attr_type == TYPE_ALUS_SHIFT_IMM
11010 || attr_type == TYPE_LOGIC_SHIFT_IMM
11011 || attr_type == TYPE_LOGICS_SHIFT_IMM
11012 || attr_type == TYPE_ALU_SHIFT_REG
11013 || attr_type == TYPE_ALUS_SHIFT_REG
11014 || attr_type == TYPE_LOGIC_SHIFT_REG
11015 || attr_type == TYPE_LOGICS_SHIFT_REG
11016 || attr_type == TYPE_MOV_SHIFT
11017 || attr_type == TYPE_MVN_SHIFT
11018 || attr_type == TYPE_MOV_SHIFT_REG
11019 || attr_type == TYPE_MVN_SHIFT_REG))
11020 {
11021 rtx shifted_operand;
11022 int opno;
11023
11024 /* Get the shifted operand. */
11025 extract_insn (insn);
11026 shifted_operand = recog_data.operand[shift_opnum];
11027
11028 /* Iterate over all the operands in DEP. If we write an operand
11029 that overlaps with SHIFTED_OPERAND, then we have increase the
11030 cost of this dependency. */
11031 extract_insn (dep);
11032 preprocess_constraints (dep);
11033 for (opno = 0; opno < recog_data.n_operands; opno++)
11034 {
11035 /* We can ignore strict inputs. */
11036 if (recog_data.operand_type[opno] == OP_IN)
11037 continue;
11038
11039 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11040 shifted_operand))
11041 {
11042 *cost = 2;
11043 return false;
11044 }
11045 }
11046 }
11047 }
11048 return true;
11049 }
11050
11051 /* Adjust cost hook for Cortex A9. */
11052 static bool
11053 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11054 int * cost)
11055 {
11056 switch (dep_type)
11057 {
11058 case REG_DEP_ANTI:
11059 *cost = 0;
11060 return false;
11061
11062 case REG_DEP_TRUE:
11063 case REG_DEP_OUTPUT:
11064 if (recog_memoized (insn) >= 0
11065 && recog_memoized (dep) >= 0)
11066 {
11067 if (GET_CODE (PATTERN (insn)) == SET)
11068 {
11069 if (GET_MODE_CLASS
11070 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11071 || GET_MODE_CLASS
11072 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11073 {
11074 enum attr_type attr_type_insn = get_attr_type (insn);
11075 enum attr_type attr_type_dep = get_attr_type (dep);
11076
11077 /* By default all dependencies of the form
11078 s0 = s0 <op> s1
11079 s0 = s0 <op> s2
11080 have an extra latency of 1 cycle because
11081 of the input and output dependency in this
11082 case. However this gets modeled as an true
11083 dependency and hence all these checks. */
11084 if (REG_P (SET_DEST (PATTERN (insn)))
11085 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11086 {
11087 /* FMACS is a special case where the dependent
11088 instruction can be issued 3 cycles before
11089 the normal latency in case of an output
11090 dependency. */
11091 if ((attr_type_insn == TYPE_FMACS
11092 || attr_type_insn == TYPE_FMACD)
11093 && (attr_type_dep == TYPE_FMACS
11094 || attr_type_dep == TYPE_FMACD))
11095 {
11096 if (dep_type == REG_DEP_OUTPUT)
11097 *cost = insn_default_latency (dep) - 3;
11098 else
11099 *cost = insn_default_latency (dep);
11100 return false;
11101 }
11102 else
11103 {
11104 if (dep_type == REG_DEP_OUTPUT)
11105 *cost = insn_default_latency (dep) + 1;
11106 else
11107 *cost = insn_default_latency (dep);
11108 }
11109 return false;
11110 }
11111 }
11112 }
11113 }
11114 break;
11115
11116 default:
11117 gcc_unreachable ();
11118 }
11119
11120 return true;
11121 }
11122
11123 /* Adjust cost hook for FA726TE. */
11124 static bool
11125 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11126 int * cost)
11127 {
11128 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11129 have penalty of 3. */
11130 if (dep_type == REG_DEP_TRUE
11131 && recog_memoized (insn) >= 0
11132 && recog_memoized (dep) >= 0
11133 && get_attr_conds (dep) == CONDS_SET)
11134 {
11135 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11136 if (get_attr_conds (insn) == CONDS_USE
11137 && get_attr_type (insn) != TYPE_BRANCH)
11138 {
11139 *cost = 3;
11140 return false;
11141 }
11142
11143 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11144 || get_attr_conds (insn) == CONDS_USE)
11145 {
11146 *cost = 0;
11147 return false;
11148 }
11149 }
11150
11151 return true;
11152 }
11153
11154 /* Implement TARGET_REGISTER_MOVE_COST.
11155
11156 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11157 it is typically more expensive than a single memory access. We set
11158 the cost to less than two memory accesses so that floating
11159 point to integer conversion does not go through memory. */
11160
11161 int
11162 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11163 reg_class_t from, reg_class_t to)
11164 {
11165 if (TARGET_32BIT)
11166 {
11167 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11168 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11169 return 15;
11170 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11171 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11172 return 4;
11173 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11174 return 20;
11175 else
11176 return 2;
11177 }
11178 else
11179 {
11180 if (from == HI_REGS || to == HI_REGS)
11181 return 4;
11182 else
11183 return 2;
11184 }
11185 }
11186
11187 /* Implement TARGET_MEMORY_MOVE_COST. */
11188
11189 int
11190 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11191 bool in ATTRIBUTE_UNUSED)
11192 {
11193 if (TARGET_32BIT)
11194 return 10;
11195 else
11196 {
11197 if (GET_MODE_SIZE (mode) < 4)
11198 return 8;
11199 else
11200 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11201 }
11202 }
11203
11204 /* Vectorizer cost model implementation. */
11205
11206 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11207 static int
11208 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11209 tree vectype,
11210 int misalign ATTRIBUTE_UNUSED)
11211 {
11212 unsigned elements;
11213
11214 switch (type_of_cost)
11215 {
11216 case scalar_stmt:
11217 return current_tune->vec_costs->scalar_stmt_cost;
11218
11219 case scalar_load:
11220 return current_tune->vec_costs->scalar_load_cost;
11221
11222 case scalar_store:
11223 return current_tune->vec_costs->scalar_store_cost;
11224
11225 case vector_stmt:
11226 return current_tune->vec_costs->vec_stmt_cost;
11227
11228 case vector_load:
11229 return current_tune->vec_costs->vec_align_load_cost;
11230
11231 case vector_store:
11232 return current_tune->vec_costs->vec_store_cost;
11233
11234 case vec_to_scalar:
11235 return current_tune->vec_costs->vec_to_scalar_cost;
11236
11237 case scalar_to_vec:
11238 return current_tune->vec_costs->scalar_to_vec_cost;
11239
11240 case unaligned_load:
11241 return current_tune->vec_costs->vec_unalign_load_cost;
11242
11243 case unaligned_store:
11244 return current_tune->vec_costs->vec_unalign_store_cost;
11245
11246 case cond_branch_taken:
11247 return current_tune->vec_costs->cond_taken_branch_cost;
11248
11249 case cond_branch_not_taken:
11250 return current_tune->vec_costs->cond_not_taken_branch_cost;
11251
11252 case vec_perm:
11253 case vec_promote_demote:
11254 return current_tune->vec_costs->vec_stmt_cost;
11255
11256 case vec_construct:
11257 elements = TYPE_VECTOR_SUBPARTS (vectype);
11258 return elements / 2 + 1;
11259
11260 default:
11261 gcc_unreachable ();
11262 }
11263 }
11264
11265 /* Implement targetm.vectorize.add_stmt_cost. */
11266
11267 static unsigned
11268 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11269 struct _stmt_vec_info *stmt_info, int misalign,
11270 enum vect_cost_model_location where)
11271 {
11272 unsigned *cost = (unsigned *) data;
11273 unsigned retval = 0;
11274
11275 if (flag_vect_cost_model)
11276 {
11277 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11278 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11279
11280 /* Statements in an inner loop relative to the loop being
11281 vectorized are weighted more heavily. The value here is
11282 arbitrary and could potentially be improved with analysis. */
11283 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11284 count *= 50; /* FIXME. */
11285
11286 retval = (unsigned) (count * stmt_cost);
11287 cost[where] += retval;
11288 }
11289
11290 return retval;
11291 }
11292
11293 /* Return true if and only if this insn can dual-issue only as older. */
11294 static bool
11295 cortexa7_older_only (rtx_insn *insn)
11296 {
11297 if (recog_memoized (insn) < 0)
11298 return false;
11299
11300 switch (get_attr_type (insn))
11301 {
11302 case TYPE_ALU_DSP_REG:
11303 case TYPE_ALU_SREG:
11304 case TYPE_ALUS_SREG:
11305 case TYPE_LOGIC_REG:
11306 case TYPE_LOGICS_REG:
11307 case TYPE_ADC_REG:
11308 case TYPE_ADCS_REG:
11309 case TYPE_ADR:
11310 case TYPE_BFM:
11311 case TYPE_REV:
11312 case TYPE_MVN_REG:
11313 case TYPE_SHIFT_IMM:
11314 case TYPE_SHIFT_REG:
11315 case TYPE_LOAD_BYTE:
11316 case TYPE_LOAD_4:
11317 case TYPE_STORE_4:
11318 case TYPE_FFARITHS:
11319 case TYPE_FADDS:
11320 case TYPE_FFARITHD:
11321 case TYPE_FADDD:
11322 case TYPE_FMOV:
11323 case TYPE_F_CVT:
11324 case TYPE_FCMPS:
11325 case TYPE_FCMPD:
11326 case TYPE_FCONSTS:
11327 case TYPE_FCONSTD:
11328 case TYPE_FMULS:
11329 case TYPE_FMACS:
11330 case TYPE_FMULD:
11331 case TYPE_FMACD:
11332 case TYPE_FDIVS:
11333 case TYPE_FDIVD:
11334 case TYPE_F_MRC:
11335 case TYPE_F_MRRC:
11336 case TYPE_F_FLAG:
11337 case TYPE_F_LOADS:
11338 case TYPE_F_STORES:
11339 return true;
11340 default:
11341 return false;
11342 }
11343 }
11344
11345 /* Return true if and only if this insn can dual-issue as younger. */
11346 static bool
11347 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11348 {
11349 if (recog_memoized (insn) < 0)
11350 {
11351 if (verbose > 5)
11352 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11353 return false;
11354 }
11355
11356 switch (get_attr_type (insn))
11357 {
11358 case TYPE_ALU_IMM:
11359 case TYPE_ALUS_IMM:
11360 case TYPE_LOGIC_IMM:
11361 case TYPE_LOGICS_IMM:
11362 case TYPE_EXTEND:
11363 case TYPE_MVN_IMM:
11364 case TYPE_MOV_IMM:
11365 case TYPE_MOV_REG:
11366 case TYPE_MOV_SHIFT:
11367 case TYPE_MOV_SHIFT_REG:
11368 case TYPE_BRANCH:
11369 case TYPE_CALL:
11370 return true;
11371 default:
11372 return false;
11373 }
11374 }
11375
11376
11377 /* Look for an instruction that can dual issue only as an older
11378 instruction, and move it in front of any instructions that can
11379 dual-issue as younger, while preserving the relative order of all
11380 other instructions in the ready list. This is a hueuristic to help
11381 dual-issue in later cycles, by postponing issue of more flexible
11382 instructions. This heuristic may affect dual issue opportunities
11383 in the current cycle. */
11384 static void
11385 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11386 int *n_readyp, int clock)
11387 {
11388 int i;
11389 int first_older_only = -1, first_younger = -1;
11390
11391 if (verbose > 5)
11392 fprintf (file,
11393 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11394 clock,
11395 *n_readyp);
11396
11397 /* Traverse the ready list from the head (the instruction to issue
11398 first), and looking for the first instruction that can issue as
11399 younger and the first instruction that can dual-issue only as
11400 older. */
11401 for (i = *n_readyp - 1; i >= 0; i--)
11402 {
11403 rtx_insn *insn = ready[i];
11404 if (cortexa7_older_only (insn))
11405 {
11406 first_older_only = i;
11407 if (verbose > 5)
11408 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11409 break;
11410 }
11411 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11412 first_younger = i;
11413 }
11414
11415 /* Nothing to reorder because either no younger insn found or insn
11416 that can dual-issue only as older appears before any insn that
11417 can dual-issue as younger. */
11418 if (first_younger == -1)
11419 {
11420 if (verbose > 5)
11421 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11422 return;
11423 }
11424
11425 /* Nothing to reorder because no older-only insn in the ready list. */
11426 if (first_older_only == -1)
11427 {
11428 if (verbose > 5)
11429 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11430 return;
11431 }
11432
11433 /* Move first_older_only insn before first_younger. */
11434 if (verbose > 5)
11435 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11436 INSN_UID(ready [first_older_only]),
11437 INSN_UID(ready [first_younger]));
11438 rtx_insn *first_older_only_insn = ready [first_older_only];
11439 for (i = first_older_only; i < first_younger; i++)
11440 {
11441 ready[i] = ready[i+1];
11442 }
11443
11444 ready[i] = first_older_only_insn;
11445 return;
11446 }
11447
11448 /* Implement TARGET_SCHED_REORDER. */
11449 static int
11450 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11451 int clock)
11452 {
11453 switch (arm_tune)
11454 {
11455 case TARGET_CPU_cortexa7:
11456 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11457 break;
11458 default:
11459 /* Do nothing for other cores. */
11460 break;
11461 }
11462
11463 return arm_issue_rate ();
11464 }
11465
11466 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11467 It corrects the value of COST based on the relationship between
11468 INSN and DEP through the dependence LINK. It returns the new
11469 value. There is a per-core adjust_cost hook to adjust scheduler costs
11470 and the per-core hook can choose to completely override the generic
11471 adjust_cost function. Only put bits of code into arm_adjust_cost that
11472 are common across all cores. */
11473 static int
11474 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11475 unsigned int)
11476 {
11477 rtx i_pat, d_pat;
11478
11479 /* When generating Thumb-1 code, we want to place flag-setting operations
11480 close to a conditional branch which depends on them, so that we can
11481 omit the comparison. */
11482 if (TARGET_THUMB1
11483 && dep_type == 0
11484 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11485 && recog_memoized (dep) >= 0
11486 && get_attr_conds (dep) == CONDS_SET)
11487 return 0;
11488
11489 if (current_tune->sched_adjust_cost != NULL)
11490 {
11491 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11492 return cost;
11493 }
11494
11495 /* XXX Is this strictly true? */
11496 if (dep_type == REG_DEP_ANTI
11497 || dep_type == REG_DEP_OUTPUT)
11498 return 0;
11499
11500 /* Call insns don't incur a stall, even if they follow a load. */
11501 if (dep_type == 0
11502 && CALL_P (insn))
11503 return 1;
11504
11505 if ((i_pat = single_set (insn)) != NULL
11506 && MEM_P (SET_SRC (i_pat))
11507 && (d_pat = single_set (dep)) != NULL
11508 && MEM_P (SET_DEST (d_pat)))
11509 {
11510 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11511 /* This is a load after a store, there is no conflict if the load reads
11512 from a cached area. Assume that loads from the stack, and from the
11513 constant pool are cached, and that others will miss. This is a
11514 hack. */
11515
11516 if ((GET_CODE (src_mem) == SYMBOL_REF
11517 && CONSTANT_POOL_ADDRESS_P (src_mem))
11518 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11519 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11520 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11521 return 1;
11522 }
11523
11524 return cost;
11525 }
11526
11527 int
11528 arm_max_conditional_execute (void)
11529 {
11530 return max_insns_skipped;
11531 }
11532
11533 static int
11534 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11535 {
11536 if (TARGET_32BIT)
11537 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11538 else
11539 return (optimize > 0) ? 2 : 0;
11540 }
11541
11542 static int
11543 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11544 {
11545 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11546 }
11547
11548 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11549 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11550 sequences of non-executed instructions in IT blocks probably take the same
11551 amount of time as executed instructions (and the IT instruction itself takes
11552 space in icache). This function was experimentally determined to give good
11553 results on a popular embedded benchmark. */
11554
11555 static int
11556 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11557 {
11558 return (TARGET_32BIT && speed_p) ? 1
11559 : arm_default_branch_cost (speed_p, predictable_p);
11560 }
11561
11562 static int
11563 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11564 {
11565 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11566 }
11567
11568 static bool fp_consts_inited = false;
11569
11570 static REAL_VALUE_TYPE value_fp0;
11571
11572 static void
11573 init_fp_table (void)
11574 {
11575 REAL_VALUE_TYPE r;
11576
11577 r = REAL_VALUE_ATOF ("0", DFmode);
11578 value_fp0 = r;
11579 fp_consts_inited = true;
11580 }
11581
11582 /* Return TRUE if rtx X is a valid immediate FP constant. */
11583 int
11584 arm_const_double_rtx (rtx x)
11585 {
11586 const REAL_VALUE_TYPE *r;
11587
11588 if (!fp_consts_inited)
11589 init_fp_table ();
11590
11591 r = CONST_DOUBLE_REAL_VALUE (x);
11592 if (REAL_VALUE_MINUS_ZERO (*r))
11593 return 0;
11594
11595 if (real_equal (r, &value_fp0))
11596 return 1;
11597
11598 return 0;
11599 }
11600
11601 /* VFPv3 has a fairly wide range of representable immediates, formed from
11602 "quarter-precision" floating-point values. These can be evaluated using this
11603 formula (with ^ for exponentiation):
11604
11605 -1^s * n * 2^-r
11606
11607 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11608 16 <= n <= 31 and 0 <= r <= 7.
11609
11610 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11611
11612 - A (most-significant) is the sign bit.
11613 - BCD are the exponent (encoded as r XOR 3).
11614 - EFGH are the mantissa (encoded as n - 16).
11615 */
11616
11617 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11618 fconst[sd] instruction, or -1 if X isn't suitable. */
11619 static int
11620 vfp3_const_double_index (rtx x)
11621 {
11622 REAL_VALUE_TYPE r, m;
11623 int sign, exponent;
11624 unsigned HOST_WIDE_INT mantissa, mant_hi;
11625 unsigned HOST_WIDE_INT mask;
11626 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11627 bool fail;
11628
11629 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11630 return -1;
11631
11632 r = *CONST_DOUBLE_REAL_VALUE (x);
11633
11634 /* We can't represent these things, so detect them first. */
11635 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11636 return -1;
11637
11638 /* Extract sign, exponent and mantissa. */
11639 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11640 r = real_value_abs (&r);
11641 exponent = REAL_EXP (&r);
11642 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11643 highest (sign) bit, with a fixed binary point at bit point_pos.
11644 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11645 bits for the mantissa, this may fail (low bits would be lost). */
11646 real_ldexp (&m, &r, point_pos - exponent);
11647 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11648 mantissa = w.elt (0);
11649 mant_hi = w.elt (1);
11650
11651 /* If there are bits set in the low part of the mantissa, we can't
11652 represent this value. */
11653 if (mantissa != 0)
11654 return -1;
11655
11656 /* Now make it so that mantissa contains the most-significant bits, and move
11657 the point_pos to indicate that the least-significant bits have been
11658 discarded. */
11659 point_pos -= HOST_BITS_PER_WIDE_INT;
11660 mantissa = mant_hi;
11661
11662 /* We can permit four significant bits of mantissa only, plus a high bit
11663 which is always 1. */
11664 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11665 if ((mantissa & mask) != 0)
11666 return -1;
11667
11668 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11669 mantissa >>= point_pos - 5;
11670
11671 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11672 floating-point immediate zero with Neon using an integer-zero load, but
11673 that case is handled elsewhere.) */
11674 if (mantissa == 0)
11675 return -1;
11676
11677 gcc_assert (mantissa >= 16 && mantissa <= 31);
11678
11679 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11680 normalized significands are in the range [1, 2). (Our mantissa is shifted
11681 left 4 places at this point relative to normalized IEEE754 values). GCC
11682 internally uses [0.5, 1) (see real.c), so the exponent returned from
11683 REAL_EXP must be altered. */
11684 exponent = 5 - exponent;
11685
11686 if (exponent < 0 || exponent > 7)
11687 return -1;
11688
11689 /* Sign, mantissa and exponent are now in the correct form to plug into the
11690 formula described in the comment above. */
11691 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11692 }
11693
11694 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11695 int
11696 vfp3_const_double_rtx (rtx x)
11697 {
11698 if (!TARGET_VFP3)
11699 return 0;
11700
11701 return vfp3_const_double_index (x) != -1;
11702 }
11703
11704 /* Recognize immediates which can be used in various Neon instructions. Legal
11705 immediates are described by the following table (for VMVN variants, the
11706 bitwise inverse of the constant shown is recognized. In either case, VMOV
11707 is output and the correct instruction to use for a given constant is chosen
11708 by the assembler). The constant shown is replicated across all elements of
11709 the destination vector.
11710
11711 insn elems variant constant (binary)
11712 ---- ----- ------- -----------------
11713 vmov i32 0 00000000 00000000 00000000 abcdefgh
11714 vmov i32 1 00000000 00000000 abcdefgh 00000000
11715 vmov i32 2 00000000 abcdefgh 00000000 00000000
11716 vmov i32 3 abcdefgh 00000000 00000000 00000000
11717 vmov i16 4 00000000 abcdefgh
11718 vmov i16 5 abcdefgh 00000000
11719 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11720 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11721 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11722 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11723 vmvn i16 10 00000000 abcdefgh
11724 vmvn i16 11 abcdefgh 00000000
11725 vmov i32 12 00000000 00000000 abcdefgh 11111111
11726 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11727 vmov i32 14 00000000 abcdefgh 11111111 11111111
11728 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11729 vmov i8 16 abcdefgh
11730 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11731 eeeeeeee ffffffff gggggggg hhhhhhhh
11732 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11733 vmov f32 19 00000000 00000000 00000000 00000000
11734
11735 For case 18, B = !b. Representable values are exactly those accepted by
11736 vfp3_const_double_index, but are output as floating-point numbers rather
11737 than indices.
11738
11739 For case 19, we will change it to vmov.i32 when assembling.
11740
11741 Variants 0-5 (inclusive) may also be used as immediates for the second
11742 operand of VORR/VBIC instructions.
11743
11744 The INVERSE argument causes the bitwise inverse of the given operand to be
11745 recognized instead (used for recognizing legal immediates for the VAND/VORN
11746 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11747 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11748 output, rather than the real insns vbic/vorr).
11749
11750 INVERSE makes no difference to the recognition of float vectors.
11751
11752 The return value is the variant of immediate as shown in the above table, or
11753 -1 if the given value doesn't match any of the listed patterns.
11754 */
11755 static int
11756 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11757 rtx *modconst, int *elementwidth)
11758 {
11759 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11760 matches = 1; \
11761 for (i = 0; i < idx; i += (STRIDE)) \
11762 if (!(TEST)) \
11763 matches = 0; \
11764 if (matches) \
11765 { \
11766 immtype = (CLASS); \
11767 elsize = (ELSIZE); \
11768 break; \
11769 }
11770
11771 unsigned int i, elsize = 0, idx = 0, n_elts;
11772 unsigned int innersize;
11773 unsigned char bytes[16];
11774 int immtype = -1, matches;
11775 unsigned int invmask = inverse ? 0xff : 0;
11776 bool vector = GET_CODE (op) == CONST_VECTOR;
11777
11778 if (vector)
11779 n_elts = CONST_VECTOR_NUNITS (op);
11780 else
11781 {
11782 n_elts = 1;
11783 if (mode == VOIDmode)
11784 mode = DImode;
11785 }
11786
11787 innersize = GET_MODE_UNIT_SIZE (mode);
11788
11789 /* Vectors of float constants. */
11790 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11791 {
11792 rtx el0 = CONST_VECTOR_ELT (op, 0);
11793
11794 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11795 return -1;
11796
11797 /* FP16 vectors cannot be represented. */
11798 if (GET_MODE_INNER (mode) == HFmode)
11799 return -1;
11800
11801 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11802 are distinct in this context. */
11803 if (!const_vec_duplicate_p (op))
11804 return -1;
11805
11806 if (modconst)
11807 *modconst = CONST_VECTOR_ELT (op, 0);
11808
11809 if (elementwidth)
11810 *elementwidth = 0;
11811
11812 if (el0 == CONST0_RTX (GET_MODE (el0)))
11813 return 19;
11814 else
11815 return 18;
11816 }
11817
11818 /* The tricks done in the code below apply for little-endian vector layout.
11819 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11820 FIXME: Implement logic for big-endian vectors. */
11821 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11822 return -1;
11823
11824 /* Splat vector constant out into a byte vector. */
11825 for (i = 0; i < n_elts; i++)
11826 {
11827 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11828 unsigned HOST_WIDE_INT elpart;
11829
11830 gcc_assert (CONST_INT_P (el));
11831 elpart = INTVAL (el);
11832
11833 for (unsigned int byte = 0; byte < innersize; byte++)
11834 {
11835 bytes[idx++] = (elpart & 0xff) ^ invmask;
11836 elpart >>= BITS_PER_UNIT;
11837 }
11838 }
11839
11840 /* Sanity check. */
11841 gcc_assert (idx == GET_MODE_SIZE (mode));
11842
11843 do
11844 {
11845 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11846 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11847
11848 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11849 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11850
11851 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11852 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11853
11854 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11855 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11856
11857 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11858
11859 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11860
11861 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11862 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11863
11864 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11865 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11866
11867 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11868 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11869
11870 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11871 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11872
11873 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11874
11875 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11876
11877 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11878 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11879
11880 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11881 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11882
11883 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11884 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11885
11886 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11887 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11888
11889 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11890
11891 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11892 && bytes[i] == bytes[(i + 8) % idx]);
11893 }
11894 while (0);
11895
11896 if (immtype == -1)
11897 return -1;
11898
11899 if (elementwidth)
11900 *elementwidth = elsize;
11901
11902 if (modconst)
11903 {
11904 unsigned HOST_WIDE_INT imm = 0;
11905
11906 /* Un-invert bytes of recognized vector, if necessary. */
11907 if (invmask != 0)
11908 for (i = 0; i < idx; i++)
11909 bytes[i] ^= invmask;
11910
11911 if (immtype == 17)
11912 {
11913 /* FIXME: Broken on 32-bit H_W_I hosts. */
11914 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11915
11916 for (i = 0; i < 8; i++)
11917 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11918 << (i * BITS_PER_UNIT);
11919
11920 *modconst = GEN_INT (imm);
11921 }
11922 else
11923 {
11924 unsigned HOST_WIDE_INT imm = 0;
11925
11926 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11927 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11928
11929 *modconst = GEN_INT (imm);
11930 }
11931 }
11932
11933 return immtype;
11934 #undef CHECK
11935 }
11936
11937 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11938 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11939 float elements), and a modified constant (whatever should be output for a
11940 VMOV) in *MODCONST. */
11941
11942 int
11943 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11944 rtx *modconst, int *elementwidth)
11945 {
11946 rtx tmpconst;
11947 int tmpwidth;
11948 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11949
11950 if (retval == -1)
11951 return 0;
11952
11953 if (modconst)
11954 *modconst = tmpconst;
11955
11956 if (elementwidth)
11957 *elementwidth = tmpwidth;
11958
11959 return 1;
11960 }
11961
11962 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11963 the immediate is valid, write a constant suitable for using as an operand
11964 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11965 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11966
11967 int
11968 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11969 rtx *modconst, int *elementwidth)
11970 {
11971 rtx tmpconst;
11972 int tmpwidth;
11973 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11974
11975 if (retval < 0 || retval > 5)
11976 return 0;
11977
11978 if (modconst)
11979 *modconst = tmpconst;
11980
11981 if (elementwidth)
11982 *elementwidth = tmpwidth;
11983
11984 return 1;
11985 }
11986
11987 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11988 the immediate is valid, write a constant suitable for using as an operand
11989 to VSHR/VSHL to *MODCONST and the corresponding element width to
11990 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11991 because they have different limitations. */
11992
11993 int
11994 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11995 rtx *modconst, int *elementwidth,
11996 bool isleftshift)
11997 {
11998 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11999 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12000 unsigned HOST_WIDE_INT last_elt = 0;
12001 unsigned HOST_WIDE_INT maxshift;
12002
12003 /* Split vector constant out into a byte vector. */
12004 for (i = 0; i < n_elts; i++)
12005 {
12006 rtx el = CONST_VECTOR_ELT (op, i);
12007 unsigned HOST_WIDE_INT elpart;
12008
12009 if (CONST_INT_P (el))
12010 elpart = INTVAL (el);
12011 else if (CONST_DOUBLE_P (el))
12012 return 0;
12013 else
12014 gcc_unreachable ();
12015
12016 if (i != 0 && elpart != last_elt)
12017 return 0;
12018
12019 last_elt = elpart;
12020 }
12021
12022 /* Shift less than element size. */
12023 maxshift = innersize * 8;
12024
12025 if (isleftshift)
12026 {
12027 /* Left shift immediate value can be from 0 to <size>-1. */
12028 if (last_elt >= maxshift)
12029 return 0;
12030 }
12031 else
12032 {
12033 /* Right shift immediate value can be from 1 to <size>. */
12034 if (last_elt == 0 || last_elt > maxshift)
12035 return 0;
12036 }
12037
12038 if (elementwidth)
12039 *elementwidth = innersize * 8;
12040
12041 if (modconst)
12042 *modconst = CONST_VECTOR_ELT (op, 0);
12043
12044 return 1;
12045 }
12046
12047 /* Return a string suitable for output of Neon immediate logic operation
12048 MNEM. */
12049
12050 char *
12051 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12052 int inverse, int quad)
12053 {
12054 int width, is_valid;
12055 static char templ[40];
12056
12057 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12058
12059 gcc_assert (is_valid != 0);
12060
12061 if (quad)
12062 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12063 else
12064 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12065
12066 return templ;
12067 }
12068
12069 /* Return a string suitable for output of Neon immediate shift operation
12070 (VSHR or VSHL) MNEM. */
12071
12072 char *
12073 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12074 machine_mode mode, int quad,
12075 bool isleftshift)
12076 {
12077 int width, is_valid;
12078 static char templ[40];
12079
12080 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12081 gcc_assert (is_valid != 0);
12082
12083 if (quad)
12084 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12085 else
12086 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12087
12088 return templ;
12089 }
12090
12091 /* Output a sequence of pairwise operations to implement a reduction.
12092 NOTE: We do "too much work" here, because pairwise operations work on two
12093 registers-worth of operands in one go. Unfortunately we can't exploit those
12094 extra calculations to do the full operation in fewer steps, I don't think.
12095 Although all vector elements of the result but the first are ignored, we
12096 actually calculate the same result in each of the elements. An alternative
12097 such as initially loading a vector with zero to use as each of the second
12098 operands would use up an additional register and take an extra instruction,
12099 for no particular gain. */
12100
12101 void
12102 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12103 rtx (*reduc) (rtx, rtx, rtx))
12104 {
12105 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12106 rtx tmpsum = op1;
12107
12108 for (i = parts / 2; i >= 1; i /= 2)
12109 {
12110 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12111 emit_insn (reduc (dest, tmpsum, tmpsum));
12112 tmpsum = dest;
12113 }
12114 }
12115
12116 /* If VALS is a vector constant that can be loaded into a register
12117 using VDUP, generate instructions to do so and return an RTX to
12118 assign to the register. Otherwise return NULL_RTX. */
12119
12120 static rtx
12121 neon_vdup_constant (rtx vals)
12122 {
12123 machine_mode mode = GET_MODE (vals);
12124 machine_mode inner_mode = GET_MODE_INNER (mode);
12125 rtx x;
12126
12127 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12128 return NULL_RTX;
12129
12130 if (!const_vec_duplicate_p (vals, &x))
12131 /* The elements are not all the same. We could handle repeating
12132 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12133 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12134 vdup.i16). */
12135 return NULL_RTX;
12136
12137 /* We can load this constant by using VDUP and a constant in a
12138 single ARM register. This will be cheaper than a vector
12139 load. */
12140
12141 x = copy_to_mode_reg (inner_mode, x);
12142 return gen_rtx_VEC_DUPLICATE (mode, x);
12143 }
12144
12145 /* Generate code to load VALS, which is a PARALLEL containing only
12146 constants (for vec_init) or CONST_VECTOR, efficiently into a
12147 register. Returns an RTX to copy into the register, or NULL_RTX
12148 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12149
12150 rtx
12151 neon_make_constant (rtx vals)
12152 {
12153 machine_mode mode = GET_MODE (vals);
12154 rtx target;
12155 rtx const_vec = NULL_RTX;
12156 int n_elts = GET_MODE_NUNITS (mode);
12157 int n_const = 0;
12158 int i;
12159
12160 if (GET_CODE (vals) == CONST_VECTOR)
12161 const_vec = vals;
12162 else if (GET_CODE (vals) == PARALLEL)
12163 {
12164 /* A CONST_VECTOR must contain only CONST_INTs and
12165 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12166 Only store valid constants in a CONST_VECTOR. */
12167 for (i = 0; i < n_elts; ++i)
12168 {
12169 rtx x = XVECEXP (vals, 0, i);
12170 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12171 n_const++;
12172 }
12173 if (n_const == n_elts)
12174 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12175 }
12176 else
12177 gcc_unreachable ();
12178
12179 if (const_vec != NULL
12180 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12181 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12182 return const_vec;
12183 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12184 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12185 pipeline cycle; creating the constant takes one or two ARM
12186 pipeline cycles. */
12187 return target;
12188 else if (const_vec != NULL_RTX)
12189 /* Load from constant pool. On Cortex-A8 this takes two cycles
12190 (for either double or quad vectors). We can not take advantage
12191 of single-cycle VLD1 because we need a PC-relative addressing
12192 mode. */
12193 return const_vec;
12194 else
12195 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12196 We can not construct an initializer. */
12197 return NULL_RTX;
12198 }
12199
12200 /* Initialize vector TARGET to VALS. */
12201
12202 void
12203 neon_expand_vector_init (rtx target, rtx vals)
12204 {
12205 machine_mode mode = GET_MODE (target);
12206 machine_mode inner_mode = GET_MODE_INNER (mode);
12207 int n_elts = GET_MODE_NUNITS (mode);
12208 int n_var = 0, one_var = -1;
12209 bool all_same = true;
12210 rtx x, mem;
12211 int i;
12212
12213 for (i = 0; i < n_elts; ++i)
12214 {
12215 x = XVECEXP (vals, 0, i);
12216 if (!CONSTANT_P (x))
12217 ++n_var, one_var = i;
12218
12219 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12220 all_same = false;
12221 }
12222
12223 if (n_var == 0)
12224 {
12225 rtx constant = neon_make_constant (vals);
12226 if (constant != NULL_RTX)
12227 {
12228 emit_move_insn (target, constant);
12229 return;
12230 }
12231 }
12232
12233 /* Splat a single non-constant element if we can. */
12234 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12235 {
12236 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12237 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12238 return;
12239 }
12240
12241 /* One field is non-constant. Load constant then overwrite varying
12242 field. This is more efficient than using the stack. */
12243 if (n_var == 1)
12244 {
12245 rtx copy = copy_rtx (vals);
12246 rtx index = GEN_INT (one_var);
12247
12248 /* Load constant part of vector, substitute neighboring value for
12249 varying element. */
12250 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12251 neon_expand_vector_init (target, copy);
12252
12253 /* Insert variable. */
12254 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12255 switch (mode)
12256 {
12257 case E_V8QImode:
12258 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12259 break;
12260 case E_V16QImode:
12261 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12262 break;
12263 case E_V4HImode:
12264 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12265 break;
12266 case E_V8HImode:
12267 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12268 break;
12269 case E_V2SImode:
12270 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12271 break;
12272 case E_V4SImode:
12273 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12274 break;
12275 case E_V2SFmode:
12276 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12277 break;
12278 case E_V4SFmode:
12279 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12280 break;
12281 case E_V2DImode:
12282 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12283 break;
12284 default:
12285 gcc_unreachable ();
12286 }
12287 return;
12288 }
12289
12290 /* Construct the vector in memory one field at a time
12291 and load the whole vector. */
12292 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12293 for (i = 0; i < n_elts; i++)
12294 emit_move_insn (adjust_address_nv (mem, inner_mode,
12295 i * GET_MODE_SIZE (inner_mode)),
12296 XVECEXP (vals, 0, i));
12297 emit_move_insn (target, mem);
12298 }
12299
12300 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12301 ERR if it doesn't. EXP indicates the source location, which includes the
12302 inlining history for intrinsics. */
12303
12304 static void
12305 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12306 const_tree exp, const char *desc)
12307 {
12308 HOST_WIDE_INT lane;
12309
12310 gcc_assert (CONST_INT_P (operand));
12311
12312 lane = INTVAL (operand);
12313
12314 if (lane < low || lane >= high)
12315 {
12316 if (exp)
12317 error ("%K%s %wd out of range %wd - %wd",
12318 exp, desc, lane, low, high - 1);
12319 else
12320 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12321 }
12322 }
12323
12324 /* Bounds-check lanes. */
12325
12326 void
12327 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12328 const_tree exp)
12329 {
12330 bounds_check (operand, low, high, exp, "lane");
12331 }
12332
12333 /* Bounds-check constants. */
12334
12335 void
12336 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12337 {
12338 bounds_check (operand, low, high, NULL_TREE, "constant");
12339 }
12340
12341 HOST_WIDE_INT
12342 neon_element_bits (machine_mode mode)
12343 {
12344 return GET_MODE_UNIT_BITSIZE (mode);
12345 }
12346
12347 \f
12348 /* Predicates for `match_operand' and `match_operator'. */
12349
12350 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12351 WB is true if full writeback address modes are allowed and is false
12352 if limited writeback address modes (POST_INC and PRE_DEC) are
12353 allowed. */
12354
12355 int
12356 arm_coproc_mem_operand (rtx op, bool wb)
12357 {
12358 rtx ind;
12359
12360 /* Reject eliminable registers. */
12361 if (! (reload_in_progress || reload_completed || lra_in_progress)
12362 && ( reg_mentioned_p (frame_pointer_rtx, op)
12363 || reg_mentioned_p (arg_pointer_rtx, op)
12364 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12365 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12366 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12367 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12368 return FALSE;
12369
12370 /* Constants are converted into offsets from labels. */
12371 if (!MEM_P (op))
12372 return FALSE;
12373
12374 ind = XEXP (op, 0);
12375
12376 if (reload_completed
12377 && (GET_CODE (ind) == LABEL_REF
12378 || (GET_CODE (ind) == CONST
12379 && GET_CODE (XEXP (ind, 0)) == PLUS
12380 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12381 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12382 return TRUE;
12383
12384 /* Match: (mem (reg)). */
12385 if (REG_P (ind))
12386 return arm_address_register_rtx_p (ind, 0);
12387
12388 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12389 acceptable in any case (subject to verification by
12390 arm_address_register_rtx_p). We need WB to be true to accept
12391 PRE_INC and POST_DEC. */
12392 if (GET_CODE (ind) == POST_INC
12393 || GET_CODE (ind) == PRE_DEC
12394 || (wb
12395 && (GET_CODE (ind) == PRE_INC
12396 || GET_CODE (ind) == POST_DEC)))
12397 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12398
12399 if (wb
12400 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12401 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12402 && GET_CODE (XEXP (ind, 1)) == PLUS
12403 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12404 ind = XEXP (ind, 1);
12405
12406 /* Match:
12407 (plus (reg)
12408 (const)). */
12409 if (GET_CODE (ind) == PLUS
12410 && REG_P (XEXP (ind, 0))
12411 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12412 && CONST_INT_P (XEXP (ind, 1))
12413 && INTVAL (XEXP (ind, 1)) > -1024
12414 && INTVAL (XEXP (ind, 1)) < 1024
12415 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12416 return TRUE;
12417
12418 return FALSE;
12419 }
12420
12421 /* Return TRUE if OP is a memory operand which we can load or store a vector
12422 to/from. TYPE is one of the following values:
12423 0 - Vector load/stor (vldr)
12424 1 - Core registers (ldm)
12425 2 - Element/structure loads (vld1)
12426 */
12427 int
12428 neon_vector_mem_operand (rtx op, int type, bool strict)
12429 {
12430 rtx ind;
12431
12432 /* Reject eliminable registers. */
12433 if (strict && ! (reload_in_progress || reload_completed)
12434 && (reg_mentioned_p (frame_pointer_rtx, op)
12435 || reg_mentioned_p (arg_pointer_rtx, op)
12436 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12437 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12438 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12439 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12440 return FALSE;
12441
12442 /* Constants are converted into offsets from labels. */
12443 if (!MEM_P (op))
12444 return FALSE;
12445
12446 ind = XEXP (op, 0);
12447
12448 if (reload_completed
12449 && (GET_CODE (ind) == LABEL_REF
12450 || (GET_CODE (ind) == CONST
12451 && GET_CODE (XEXP (ind, 0)) == PLUS
12452 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12453 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12454 return TRUE;
12455
12456 /* Match: (mem (reg)). */
12457 if (REG_P (ind))
12458 return arm_address_register_rtx_p (ind, 0);
12459
12460 /* Allow post-increment with Neon registers. */
12461 if ((type != 1 && GET_CODE (ind) == POST_INC)
12462 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12463 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12464
12465 /* Allow post-increment by register for VLDn */
12466 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12467 && GET_CODE (XEXP (ind, 1)) == PLUS
12468 && REG_P (XEXP (XEXP (ind, 1), 1)))
12469 return true;
12470
12471 /* Match:
12472 (plus (reg)
12473 (const)). */
12474 if (type == 0
12475 && GET_CODE (ind) == PLUS
12476 && REG_P (XEXP (ind, 0))
12477 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12478 && CONST_INT_P (XEXP (ind, 1))
12479 && INTVAL (XEXP (ind, 1)) > -1024
12480 /* For quad modes, we restrict the constant offset to be slightly less
12481 than what the instruction format permits. We have no such constraint
12482 on double mode offsets. (This must match arm_legitimate_index_p.) */
12483 && (INTVAL (XEXP (ind, 1))
12484 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12485 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12486 return TRUE;
12487
12488 return FALSE;
12489 }
12490
12491 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12492 type. */
12493 int
12494 neon_struct_mem_operand (rtx op)
12495 {
12496 rtx ind;
12497
12498 /* Reject eliminable registers. */
12499 if (! (reload_in_progress || reload_completed)
12500 && ( reg_mentioned_p (frame_pointer_rtx, op)
12501 || reg_mentioned_p (arg_pointer_rtx, op)
12502 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12503 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12504 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12505 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12506 return FALSE;
12507
12508 /* Constants are converted into offsets from labels. */
12509 if (!MEM_P (op))
12510 return FALSE;
12511
12512 ind = XEXP (op, 0);
12513
12514 if (reload_completed
12515 && (GET_CODE (ind) == LABEL_REF
12516 || (GET_CODE (ind) == CONST
12517 && GET_CODE (XEXP (ind, 0)) == PLUS
12518 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12519 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12520 return TRUE;
12521
12522 /* Match: (mem (reg)). */
12523 if (REG_P (ind))
12524 return arm_address_register_rtx_p (ind, 0);
12525
12526 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12527 if (GET_CODE (ind) == POST_INC
12528 || GET_CODE (ind) == PRE_DEC)
12529 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12530
12531 return FALSE;
12532 }
12533
12534 /* Return true if X is a register that will be eliminated later on. */
12535 int
12536 arm_eliminable_register (rtx x)
12537 {
12538 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12539 || REGNO (x) == ARG_POINTER_REGNUM
12540 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12541 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12542 }
12543
12544 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12545 coprocessor registers. Otherwise return NO_REGS. */
12546
12547 enum reg_class
12548 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12549 {
12550 if (mode == HFmode)
12551 {
12552 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12553 return GENERAL_REGS;
12554 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12555 return NO_REGS;
12556 return GENERAL_REGS;
12557 }
12558
12559 /* The neon move patterns handle all legitimate vector and struct
12560 addresses. */
12561 if (TARGET_NEON
12562 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12563 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12564 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12565 || VALID_NEON_STRUCT_MODE (mode)))
12566 return NO_REGS;
12567
12568 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12569 return NO_REGS;
12570
12571 return GENERAL_REGS;
12572 }
12573
12574 /* Values which must be returned in the most-significant end of the return
12575 register. */
12576
12577 static bool
12578 arm_return_in_msb (const_tree valtype)
12579 {
12580 return (TARGET_AAPCS_BASED
12581 && BYTES_BIG_ENDIAN
12582 && (AGGREGATE_TYPE_P (valtype)
12583 || TREE_CODE (valtype) == COMPLEX_TYPE
12584 || FIXED_POINT_TYPE_P (valtype)));
12585 }
12586
12587 /* Return TRUE if X references a SYMBOL_REF. */
12588 int
12589 symbol_mentioned_p (rtx x)
12590 {
12591 const char * fmt;
12592 int i;
12593
12594 if (GET_CODE (x) == SYMBOL_REF)
12595 return 1;
12596
12597 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12598 are constant offsets, not symbols. */
12599 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12600 return 0;
12601
12602 fmt = GET_RTX_FORMAT (GET_CODE (x));
12603
12604 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12605 {
12606 if (fmt[i] == 'E')
12607 {
12608 int j;
12609
12610 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12611 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12612 return 1;
12613 }
12614 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12615 return 1;
12616 }
12617
12618 return 0;
12619 }
12620
12621 /* Return TRUE if X references a LABEL_REF. */
12622 int
12623 label_mentioned_p (rtx x)
12624 {
12625 const char * fmt;
12626 int i;
12627
12628 if (GET_CODE (x) == LABEL_REF)
12629 return 1;
12630
12631 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12632 instruction, but they are constant offsets, not symbols. */
12633 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12634 return 0;
12635
12636 fmt = GET_RTX_FORMAT (GET_CODE (x));
12637 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12638 {
12639 if (fmt[i] == 'E')
12640 {
12641 int j;
12642
12643 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12644 if (label_mentioned_p (XVECEXP (x, i, j)))
12645 return 1;
12646 }
12647 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12648 return 1;
12649 }
12650
12651 return 0;
12652 }
12653
12654 int
12655 tls_mentioned_p (rtx x)
12656 {
12657 switch (GET_CODE (x))
12658 {
12659 case CONST:
12660 return tls_mentioned_p (XEXP (x, 0));
12661
12662 case UNSPEC:
12663 if (XINT (x, 1) == UNSPEC_TLS)
12664 return 1;
12665
12666 /* Fall through. */
12667 default:
12668 return 0;
12669 }
12670 }
12671
12672 /* Must not copy any rtx that uses a pc-relative address.
12673 Also, disallow copying of load-exclusive instructions that
12674 may appear after splitting of compare-and-swap-style operations
12675 so as to prevent those loops from being transformed away from their
12676 canonical forms (see PR 69904). */
12677
12678 static bool
12679 arm_cannot_copy_insn_p (rtx_insn *insn)
12680 {
12681 /* The tls call insn cannot be copied, as it is paired with a data
12682 word. */
12683 if (recog_memoized (insn) == CODE_FOR_tlscall)
12684 return true;
12685
12686 subrtx_iterator::array_type array;
12687 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12688 {
12689 const_rtx x = *iter;
12690 if (GET_CODE (x) == UNSPEC
12691 && (XINT (x, 1) == UNSPEC_PIC_BASE
12692 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12693 return true;
12694 }
12695
12696 rtx set = single_set (insn);
12697 if (set)
12698 {
12699 rtx src = SET_SRC (set);
12700 if (GET_CODE (src) == ZERO_EXTEND)
12701 src = XEXP (src, 0);
12702
12703 /* Catch the load-exclusive and load-acquire operations. */
12704 if (GET_CODE (src) == UNSPEC_VOLATILE
12705 && (XINT (src, 1) == VUNSPEC_LL
12706 || XINT (src, 1) == VUNSPEC_LAX))
12707 return true;
12708 }
12709 return false;
12710 }
12711
12712 enum rtx_code
12713 minmax_code (rtx x)
12714 {
12715 enum rtx_code code = GET_CODE (x);
12716
12717 switch (code)
12718 {
12719 case SMAX:
12720 return GE;
12721 case SMIN:
12722 return LE;
12723 case UMIN:
12724 return LEU;
12725 case UMAX:
12726 return GEU;
12727 default:
12728 gcc_unreachable ();
12729 }
12730 }
12731
12732 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12733
12734 bool
12735 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12736 int *mask, bool *signed_sat)
12737 {
12738 /* The high bound must be a power of two minus one. */
12739 int log = exact_log2 (INTVAL (hi_bound) + 1);
12740 if (log == -1)
12741 return false;
12742
12743 /* The low bound is either zero (for usat) or one less than the
12744 negation of the high bound (for ssat). */
12745 if (INTVAL (lo_bound) == 0)
12746 {
12747 if (mask)
12748 *mask = log;
12749 if (signed_sat)
12750 *signed_sat = false;
12751
12752 return true;
12753 }
12754
12755 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12756 {
12757 if (mask)
12758 *mask = log + 1;
12759 if (signed_sat)
12760 *signed_sat = true;
12761
12762 return true;
12763 }
12764
12765 return false;
12766 }
12767
12768 /* Return 1 if memory locations are adjacent. */
12769 int
12770 adjacent_mem_locations (rtx a, rtx b)
12771 {
12772 /* We don't guarantee to preserve the order of these memory refs. */
12773 if (volatile_refs_p (a) || volatile_refs_p (b))
12774 return 0;
12775
12776 if ((REG_P (XEXP (a, 0))
12777 || (GET_CODE (XEXP (a, 0)) == PLUS
12778 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12779 && (REG_P (XEXP (b, 0))
12780 || (GET_CODE (XEXP (b, 0)) == PLUS
12781 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12782 {
12783 HOST_WIDE_INT val0 = 0, val1 = 0;
12784 rtx reg0, reg1;
12785 int val_diff;
12786
12787 if (GET_CODE (XEXP (a, 0)) == PLUS)
12788 {
12789 reg0 = XEXP (XEXP (a, 0), 0);
12790 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12791 }
12792 else
12793 reg0 = XEXP (a, 0);
12794
12795 if (GET_CODE (XEXP (b, 0)) == PLUS)
12796 {
12797 reg1 = XEXP (XEXP (b, 0), 0);
12798 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12799 }
12800 else
12801 reg1 = XEXP (b, 0);
12802
12803 /* Don't accept any offset that will require multiple
12804 instructions to handle, since this would cause the
12805 arith_adjacentmem pattern to output an overlong sequence. */
12806 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12807 return 0;
12808
12809 /* Don't allow an eliminable register: register elimination can make
12810 the offset too large. */
12811 if (arm_eliminable_register (reg0))
12812 return 0;
12813
12814 val_diff = val1 - val0;
12815
12816 if (arm_ld_sched)
12817 {
12818 /* If the target has load delay slots, then there's no benefit
12819 to using an ldm instruction unless the offset is zero and
12820 we are optimizing for size. */
12821 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12822 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12823 && (val_diff == 4 || val_diff == -4));
12824 }
12825
12826 return ((REGNO (reg0) == REGNO (reg1))
12827 && (val_diff == 4 || val_diff == -4));
12828 }
12829
12830 return 0;
12831 }
12832
12833 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12834 for load operations, false for store operations. CONSECUTIVE is true
12835 if the register numbers in the operation must be consecutive in the register
12836 bank. RETURN_PC is true if value is to be loaded in PC.
12837 The pattern we are trying to match for load is:
12838 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12839 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12840 :
12841 :
12842 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12843 ]
12844 where
12845 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12846 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12847 3. If consecutive is TRUE, then for kth register being loaded,
12848 REGNO (R_dk) = REGNO (R_d0) + k.
12849 The pattern for store is similar. */
12850 bool
12851 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12852 bool consecutive, bool return_pc)
12853 {
12854 HOST_WIDE_INT count = XVECLEN (op, 0);
12855 rtx reg, mem, addr;
12856 unsigned regno;
12857 unsigned first_regno;
12858 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12859 rtx elt;
12860 bool addr_reg_in_reglist = false;
12861 bool update = false;
12862 int reg_increment;
12863 int offset_adj;
12864 int regs_per_val;
12865
12866 /* If not in SImode, then registers must be consecutive
12867 (e.g., VLDM instructions for DFmode). */
12868 gcc_assert ((mode == SImode) || consecutive);
12869 /* Setting return_pc for stores is illegal. */
12870 gcc_assert (!return_pc || load);
12871
12872 /* Set up the increments and the regs per val based on the mode. */
12873 reg_increment = GET_MODE_SIZE (mode);
12874 regs_per_val = reg_increment / 4;
12875 offset_adj = return_pc ? 1 : 0;
12876
12877 if (count <= 1
12878 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12879 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12880 return false;
12881
12882 /* Check if this is a write-back. */
12883 elt = XVECEXP (op, 0, offset_adj);
12884 if (GET_CODE (SET_SRC (elt)) == PLUS)
12885 {
12886 i++;
12887 base = 1;
12888 update = true;
12889
12890 /* The offset adjustment must be the number of registers being
12891 popped times the size of a single register. */
12892 if (!REG_P (SET_DEST (elt))
12893 || !REG_P (XEXP (SET_SRC (elt), 0))
12894 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12895 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12896 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12897 ((count - 1 - offset_adj) * reg_increment))
12898 return false;
12899 }
12900
12901 i = i + offset_adj;
12902 base = base + offset_adj;
12903 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12904 success depends on the type: VLDM can do just one reg,
12905 LDM must do at least two. */
12906 if ((count <= i) && (mode == SImode))
12907 return false;
12908
12909 elt = XVECEXP (op, 0, i - 1);
12910 if (GET_CODE (elt) != SET)
12911 return false;
12912
12913 if (load)
12914 {
12915 reg = SET_DEST (elt);
12916 mem = SET_SRC (elt);
12917 }
12918 else
12919 {
12920 reg = SET_SRC (elt);
12921 mem = SET_DEST (elt);
12922 }
12923
12924 if (!REG_P (reg) || !MEM_P (mem))
12925 return false;
12926
12927 regno = REGNO (reg);
12928 first_regno = regno;
12929 addr = XEXP (mem, 0);
12930 if (GET_CODE (addr) == PLUS)
12931 {
12932 if (!CONST_INT_P (XEXP (addr, 1)))
12933 return false;
12934
12935 offset = INTVAL (XEXP (addr, 1));
12936 addr = XEXP (addr, 0);
12937 }
12938
12939 if (!REG_P (addr))
12940 return false;
12941
12942 /* Don't allow SP to be loaded unless it is also the base register. It
12943 guarantees that SP is reset correctly when an LDM instruction
12944 is interrupted. Otherwise, we might end up with a corrupt stack. */
12945 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12946 return false;
12947
12948 for (; i < count; i++)
12949 {
12950 elt = XVECEXP (op, 0, i);
12951 if (GET_CODE (elt) != SET)
12952 return false;
12953
12954 if (load)
12955 {
12956 reg = SET_DEST (elt);
12957 mem = SET_SRC (elt);
12958 }
12959 else
12960 {
12961 reg = SET_SRC (elt);
12962 mem = SET_DEST (elt);
12963 }
12964
12965 if (!REG_P (reg)
12966 || GET_MODE (reg) != mode
12967 || REGNO (reg) <= regno
12968 || (consecutive
12969 && (REGNO (reg) !=
12970 (unsigned int) (first_regno + regs_per_val * (i - base))))
12971 /* Don't allow SP to be loaded unless it is also the base register. It
12972 guarantees that SP is reset correctly when an LDM instruction
12973 is interrupted. Otherwise, we might end up with a corrupt stack. */
12974 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12975 || !MEM_P (mem)
12976 || GET_MODE (mem) != mode
12977 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12978 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12979 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12980 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12981 offset + (i - base) * reg_increment))
12982 && (!REG_P (XEXP (mem, 0))
12983 || offset + (i - base) * reg_increment != 0)))
12984 return false;
12985
12986 regno = REGNO (reg);
12987 if (regno == REGNO (addr))
12988 addr_reg_in_reglist = true;
12989 }
12990
12991 if (load)
12992 {
12993 if (update && addr_reg_in_reglist)
12994 return false;
12995
12996 /* For Thumb-1, address register is always modified - either by write-back
12997 or by explicit load. If the pattern does not describe an update,
12998 then the address register must be in the list of loaded registers. */
12999 if (TARGET_THUMB1)
13000 return update || addr_reg_in_reglist;
13001 }
13002
13003 return true;
13004 }
13005
13006 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13007 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13008 instruction. ADD_OFFSET is nonzero if the base address register needs
13009 to be modified with an add instruction before we can use it. */
13010
13011 static bool
13012 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13013 int nops, HOST_WIDE_INT add_offset)
13014 {
13015 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13016 if the offset isn't small enough. The reason 2 ldrs are faster
13017 is because these ARMs are able to do more than one cache access
13018 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13019 whilst the ARM8 has a double bandwidth cache. This means that
13020 these cores can do both an instruction fetch and a data fetch in
13021 a single cycle, so the trick of calculating the address into a
13022 scratch register (one of the result regs) and then doing a load
13023 multiple actually becomes slower (and no smaller in code size).
13024 That is the transformation
13025
13026 ldr rd1, [rbase + offset]
13027 ldr rd2, [rbase + offset + 4]
13028
13029 to
13030
13031 add rd1, rbase, offset
13032 ldmia rd1, {rd1, rd2}
13033
13034 produces worse code -- '3 cycles + any stalls on rd2' instead of
13035 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13036 access per cycle, the first sequence could never complete in less
13037 than 6 cycles, whereas the ldm sequence would only take 5 and
13038 would make better use of sequential accesses if not hitting the
13039 cache.
13040
13041 We cheat here and test 'arm_ld_sched' which we currently know to
13042 only be true for the ARM8, ARM9 and StrongARM. If this ever
13043 changes, then the test below needs to be reworked. */
13044 if (nops == 2 && arm_ld_sched && add_offset != 0)
13045 return false;
13046
13047 /* XScale has load-store double instructions, but they have stricter
13048 alignment requirements than load-store multiple, so we cannot
13049 use them.
13050
13051 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13052 the pipeline until completion.
13053
13054 NREGS CYCLES
13055 1 3
13056 2 4
13057 3 5
13058 4 6
13059
13060 An ldr instruction takes 1-3 cycles, but does not block the
13061 pipeline.
13062
13063 NREGS CYCLES
13064 1 1-3
13065 2 2-6
13066 3 3-9
13067 4 4-12
13068
13069 Best case ldr will always win. However, the more ldr instructions
13070 we issue, the less likely we are to be able to schedule them well.
13071 Using ldr instructions also increases code size.
13072
13073 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13074 for counts of 3 or 4 regs. */
13075 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13076 return false;
13077 return true;
13078 }
13079
13080 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13081 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13082 an array ORDER which describes the sequence to use when accessing the
13083 offsets that produces an ascending order. In this sequence, each
13084 offset must be larger by exactly 4 than the previous one. ORDER[0]
13085 must have been filled in with the lowest offset by the caller.
13086 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13087 we use to verify that ORDER produces an ascending order of registers.
13088 Return true if it was possible to construct such an order, false if
13089 not. */
13090
13091 static bool
13092 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13093 int *unsorted_regs)
13094 {
13095 int i;
13096 for (i = 1; i < nops; i++)
13097 {
13098 int j;
13099
13100 order[i] = order[i - 1];
13101 for (j = 0; j < nops; j++)
13102 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13103 {
13104 /* We must find exactly one offset that is higher than the
13105 previous one by 4. */
13106 if (order[i] != order[i - 1])
13107 return false;
13108 order[i] = j;
13109 }
13110 if (order[i] == order[i - 1])
13111 return false;
13112 /* The register numbers must be ascending. */
13113 if (unsorted_regs != NULL
13114 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13115 return false;
13116 }
13117 return true;
13118 }
13119
13120 /* Used to determine in a peephole whether a sequence of load
13121 instructions can be changed into a load-multiple instruction.
13122 NOPS is the number of separate load instructions we are examining. The
13123 first NOPS entries in OPERANDS are the destination registers, the
13124 next NOPS entries are memory operands. If this function is
13125 successful, *BASE is set to the common base register of the memory
13126 accesses; *LOAD_OFFSET is set to the first memory location's offset
13127 from that base register.
13128 REGS is an array filled in with the destination register numbers.
13129 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13130 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13131 the sequence of registers in REGS matches the loads from ascending memory
13132 locations, and the function verifies that the register numbers are
13133 themselves ascending. If CHECK_REGS is false, the register numbers
13134 are stored in the order they are found in the operands. */
13135 static int
13136 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13137 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13138 {
13139 int unsorted_regs[MAX_LDM_STM_OPS];
13140 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13141 int order[MAX_LDM_STM_OPS];
13142 rtx base_reg_rtx = NULL;
13143 int base_reg = -1;
13144 int i, ldm_case;
13145
13146 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13147 easily extended if required. */
13148 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13149
13150 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13151
13152 /* Loop over the operands and check that the memory references are
13153 suitable (i.e. immediate offsets from the same base register). At
13154 the same time, extract the target register, and the memory
13155 offsets. */
13156 for (i = 0; i < nops; i++)
13157 {
13158 rtx reg;
13159 rtx offset;
13160
13161 /* Convert a subreg of a mem into the mem itself. */
13162 if (GET_CODE (operands[nops + i]) == SUBREG)
13163 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13164
13165 gcc_assert (MEM_P (operands[nops + i]));
13166
13167 /* Don't reorder volatile memory references; it doesn't seem worth
13168 looking for the case where the order is ok anyway. */
13169 if (MEM_VOLATILE_P (operands[nops + i]))
13170 return 0;
13171
13172 offset = const0_rtx;
13173
13174 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13175 || (GET_CODE (reg) == SUBREG
13176 && REG_P (reg = SUBREG_REG (reg))))
13177 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13178 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13179 || (GET_CODE (reg) == SUBREG
13180 && REG_P (reg = SUBREG_REG (reg))))
13181 && (CONST_INT_P (offset
13182 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13183 {
13184 if (i == 0)
13185 {
13186 base_reg = REGNO (reg);
13187 base_reg_rtx = reg;
13188 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13189 return 0;
13190 }
13191 else if (base_reg != (int) REGNO (reg))
13192 /* Not addressed from the same base register. */
13193 return 0;
13194
13195 unsorted_regs[i] = (REG_P (operands[i])
13196 ? REGNO (operands[i])
13197 : REGNO (SUBREG_REG (operands[i])));
13198
13199 /* If it isn't an integer register, or if it overwrites the
13200 base register but isn't the last insn in the list, then
13201 we can't do this. */
13202 if (unsorted_regs[i] < 0
13203 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13204 || unsorted_regs[i] > 14
13205 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13206 return 0;
13207
13208 /* Don't allow SP to be loaded unless it is also the base
13209 register. It guarantees that SP is reset correctly when
13210 an LDM instruction is interrupted. Otherwise, we might
13211 end up with a corrupt stack. */
13212 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13213 return 0;
13214
13215 unsorted_offsets[i] = INTVAL (offset);
13216 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13217 order[0] = i;
13218 }
13219 else
13220 /* Not a suitable memory address. */
13221 return 0;
13222 }
13223
13224 /* All the useful information has now been extracted from the
13225 operands into unsorted_regs and unsorted_offsets; additionally,
13226 order[0] has been set to the lowest offset in the list. Sort
13227 the offsets into order, verifying that they are adjacent, and
13228 check that the register numbers are ascending. */
13229 if (!compute_offset_order (nops, unsorted_offsets, order,
13230 check_regs ? unsorted_regs : NULL))
13231 return 0;
13232
13233 if (saved_order)
13234 memcpy (saved_order, order, sizeof order);
13235
13236 if (base)
13237 {
13238 *base = base_reg;
13239
13240 for (i = 0; i < nops; i++)
13241 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13242
13243 *load_offset = unsorted_offsets[order[0]];
13244 }
13245
13246 if (TARGET_THUMB1
13247 && !peep2_reg_dead_p (nops, base_reg_rtx))
13248 return 0;
13249
13250 if (unsorted_offsets[order[0]] == 0)
13251 ldm_case = 1; /* ldmia */
13252 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13253 ldm_case = 2; /* ldmib */
13254 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13255 ldm_case = 3; /* ldmda */
13256 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13257 ldm_case = 4; /* ldmdb */
13258 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13259 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13260 ldm_case = 5;
13261 else
13262 return 0;
13263
13264 if (!multiple_operation_profitable_p (false, nops,
13265 ldm_case == 5
13266 ? unsorted_offsets[order[0]] : 0))
13267 return 0;
13268
13269 return ldm_case;
13270 }
13271
13272 /* Used to determine in a peephole whether a sequence of store instructions can
13273 be changed into a store-multiple instruction.
13274 NOPS is the number of separate store instructions we are examining.
13275 NOPS_TOTAL is the total number of instructions recognized by the peephole
13276 pattern.
13277 The first NOPS entries in OPERANDS are the source registers, the next
13278 NOPS entries are memory operands. If this function is successful, *BASE is
13279 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13280 to the first memory location's offset from that base register. REGS is an
13281 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13282 likewise filled with the corresponding rtx's.
13283 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13284 numbers to an ascending order of stores.
13285 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13286 from ascending memory locations, and the function verifies that the register
13287 numbers are themselves ascending. If CHECK_REGS is false, the register
13288 numbers are stored in the order they are found in the operands. */
13289 static int
13290 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13291 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13292 HOST_WIDE_INT *load_offset, bool check_regs)
13293 {
13294 int unsorted_regs[MAX_LDM_STM_OPS];
13295 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13296 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13297 int order[MAX_LDM_STM_OPS];
13298 int base_reg = -1;
13299 rtx base_reg_rtx = NULL;
13300 int i, stm_case;
13301
13302 /* Write back of base register is currently only supported for Thumb 1. */
13303 int base_writeback = TARGET_THUMB1;
13304
13305 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13306 easily extended if required. */
13307 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13308
13309 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13310
13311 /* Loop over the operands and check that the memory references are
13312 suitable (i.e. immediate offsets from the same base register). At
13313 the same time, extract the target register, and the memory
13314 offsets. */
13315 for (i = 0; i < nops; i++)
13316 {
13317 rtx reg;
13318 rtx offset;
13319
13320 /* Convert a subreg of a mem into the mem itself. */
13321 if (GET_CODE (operands[nops + i]) == SUBREG)
13322 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13323
13324 gcc_assert (MEM_P (operands[nops + i]));
13325
13326 /* Don't reorder volatile memory references; it doesn't seem worth
13327 looking for the case where the order is ok anyway. */
13328 if (MEM_VOLATILE_P (operands[nops + i]))
13329 return 0;
13330
13331 offset = const0_rtx;
13332
13333 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13334 || (GET_CODE (reg) == SUBREG
13335 && REG_P (reg = SUBREG_REG (reg))))
13336 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13337 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13338 || (GET_CODE (reg) == SUBREG
13339 && REG_P (reg = SUBREG_REG (reg))))
13340 && (CONST_INT_P (offset
13341 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13342 {
13343 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13344 ? operands[i] : SUBREG_REG (operands[i]));
13345 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13346
13347 if (i == 0)
13348 {
13349 base_reg = REGNO (reg);
13350 base_reg_rtx = reg;
13351 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13352 return 0;
13353 }
13354 else if (base_reg != (int) REGNO (reg))
13355 /* Not addressed from the same base register. */
13356 return 0;
13357
13358 /* If it isn't an integer register, then we can't do this. */
13359 if (unsorted_regs[i] < 0
13360 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13361 /* The effects are unpredictable if the base register is
13362 both updated and stored. */
13363 || (base_writeback && unsorted_regs[i] == base_reg)
13364 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13365 || unsorted_regs[i] > 14)
13366 return 0;
13367
13368 unsorted_offsets[i] = INTVAL (offset);
13369 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13370 order[0] = i;
13371 }
13372 else
13373 /* Not a suitable memory address. */
13374 return 0;
13375 }
13376
13377 /* All the useful information has now been extracted from the
13378 operands into unsorted_regs and unsorted_offsets; additionally,
13379 order[0] has been set to the lowest offset in the list. Sort
13380 the offsets into order, verifying that they are adjacent, and
13381 check that the register numbers are ascending. */
13382 if (!compute_offset_order (nops, unsorted_offsets, order,
13383 check_regs ? unsorted_regs : NULL))
13384 return 0;
13385
13386 if (saved_order)
13387 memcpy (saved_order, order, sizeof order);
13388
13389 if (base)
13390 {
13391 *base = base_reg;
13392
13393 for (i = 0; i < nops; i++)
13394 {
13395 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13396 if (reg_rtxs)
13397 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13398 }
13399
13400 *load_offset = unsorted_offsets[order[0]];
13401 }
13402
13403 if (TARGET_THUMB1
13404 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13405 return 0;
13406
13407 if (unsorted_offsets[order[0]] == 0)
13408 stm_case = 1; /* stmia */
13409 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13410 stm_case = 2; /* stmib */
13411 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13412 stm_case = 3; /* stmda */
13413 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13414 stm_case = 4; /* stmdb */
13415 else
13416 return 0;
13417
13418 if (!multiple_operation_profitable_p (false, nops, 0))
13419 return 0;
13420
13421 return stm_case;
13422 }
13423 \f
13424 /* Routines for use in generating RTL. */
13425
13426 /* Generate a load-multiple instruction. COUNT is the number of loads in
13427 the instruction; REGS and MEMS are arrays containing the operands.
13428 BASEREG is the base register to be used in addressing the memory operands.
13429 WBACK_OFFSET is nonzero if the instruction should update the base
13430 register. */
13431
13432 static rtx
13433 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13434 HOST_WIDE_INT wback_offset)
13435 {
13436 int i = 0, j;
13437 rtx result;
13438
13439 if (!multiple_operation_profitable_p (false, count, 0))
13440 {
13441 rtx seq;
13442
13443 start_sequence ();
13444
13445 for (i = 0; i < count; i++)
13446 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13447
13448 if (wback_offset != 0)
13449 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13450
13451 seq = get_insns ();
13452 end_sequence ();
13453
13454 return seq;
13455 }
13456
13457 result = gen_rtx_PARALLEL (VOIDmode,
13458 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13459 if (wback_offset != 0)
13460 {
13461 XVECEXP (result, 0, 0)
13462 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13463 i = 1;
13464 count++;
13465 }
13466
13467 for (j = 0; i < count; i++, j++)
13468 XVECEXP (result, 0, i)
13469 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13470
13471 return result;
13472 }
13473
13474 /* Generate a store-multiple instruction. COUNT is the number of stores in
13475 the instruction; REGS and MEMS are arrays containing the operands.
13476 BASEREG is the base register to be used in addressing the memory operands.
13477 WBACK_OFFSET is nonzero if the instruction should update the base
13478 register. */
13479
13480 static rtx
13481 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13482 HOST_WIDE_INT wback_offset)
13483 {
13484 int i = 0, j;
13485 rtx result;
13486
13487 if (GET_CODE (basereg) == PLUS)
13488 basereg = XEXP (basereg, 0);
13489
13490 if (!multiple_operation_profitable_p (false, count, 0))
13491 {
13492 rtx seq;
13493
13494 start_sequence ();
13495
13496 for (i = 0; i < count; i++)
13497 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13498
13499 if (wback_offset != 0)
13500 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13501
13502 seq = get_insns ();
13503 end_sequence ();
13504
13505 return seq;
13506 }
13507
13508 result = gen_rtx_PARALLEL (VOIDmode,
13509 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13510 if (wback_offset != 0)
13511 {
13512 XVECEXP (result, 0, 0)
13513 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13514 i = 1;
13515 count++;
13516 }
13517
13518 for (j = 0; i < count; i++, j++)
13519 XVECEXP (result, 0, i)
13520 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13521
13522 return result;
13523 }
13524
13525 /* Generate either a load-multiple or a store-multiple instruction. This
13526 function can be used in situations where we can start with a single MEM
13527 rtx and adjust its address upwards.
13528 COUNT is the number of operations in the instruction, not counting a
13529 possible update of the base register. REGS is an array containing the
13530 register operands.
13531 BASEREG is the base register to be used in addressing the memory operands,
13532 which are constructed from BASEMEM.
13533 WRITE_BACK specifies whether the generated instruction should include an
13534 update of the base register.
13535 OFFSETP is used to pass an offset to and from this function; this offset
13536 is not used when constructing the address (instead BASEMEM should have an
13537 appropriate offset in its address), it is used only for setting
13538 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13539
13540 static rtx
13541 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13542 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13543 {
13544 rtx mems[MAX_LDM_STM_OPS];
13545 HOST_WIDE_INT offset = *offsetp;
13546 int i;
13547
13548 gcc_assert (count <= MAX_LDM_STM_OPS);
13549
13550 if (GET_CODE (basereg) == PLUS)
13551 basereg = XEXP (basereg, 0);
13552
13553 for (i = 0; i < count; i++)
13554 {
13555 rtx addr = plus_constant (Pmode, basereg, i * 4);
13556 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13557 offset += 4;
13558 }
13559
13560 if (write_back)
13561 *offsetp = offset;
13562
13563 if (is_load)
13564 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13565 write_back ? 4 * count : 0);
13566 else
13567 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13568 write_back ? 4 * count : 0);
13569 }
13570
13571 rtx
13572 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13573 rtx basemem, HOST_WIDE_INT *offsetp)
13574 {
13575 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13576 offsetp);
13577 }
13578
13579 rtx
13580 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13581 rtx basemem, HOST_WIDE_INT *offsetp)
13582 {
13583 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13584 offsetp);
13585 }
13586
13587 /* Called from a peephole2 expander to turn a sequence of loads into an
13588 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13589 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13590 is true if we can reorder the registers because they are used commutatively
13591 subsequently.
13592 Returns true iff we could generate a new instruction. */
13593
13594 bool
13595 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13596 {
13597 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13598 rtx mems[MAX_LDM_STM_OPS];
13599 int i, j, base_reg;
13600 rtx base_reg_rtx;
13601 HOST_WIDE_INT offset;
13602 int write_back = FALSE;
13603 int ldm_case;
13604 rtx addr;
13605
13606 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13607 &base_reg, &offset, !sort_regs);
13608
13609 if (ldm_case == 0)
13610 return false;
13611
13612 if (sort_regs)
13613 for (i = 0; i < nops - 1; i++)
13614 for (j = i + 1; j < nops; j++)
13615 if (regs[i] > regs[j])
13616 {
13617 int t = regs[i];
13618 regs[i] = regs[j];
13619 regs[j] = t;
13620 }
13621 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13622
13623 if (TARGET_THUMB1)
13624 {
13625 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13626 gcc_assert (ldm_case == 1 || ldm_case == 5);
13627 write_back = TRUE;
13628 }
13629
13630 if (ldm_case == 5)
13631 {
13632 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13633 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13634 offset = 0;
13635 if (!TARGET_THUMB1)
13636 base_reg_rtx = newbase;
13637 }
13638
13639 for (i = 0; i < nops; i++)
13640 {
13641 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13642 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13643 SImode, addr, 0);
13644 }
13645 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13646 write_back ? offset + i * 4 : 0));
13647 return true;
13648 }
13649
13650 /* Called from a peephole2 expander to turn a sequence of stores into an
13651 STM instruction. OPERANDS are the operands found by the peephole matcher;
13652 NOPS indicates how many separate stores we are trying to combine.
13653 Returns true iff we could generate a new instruction. */
13654
13655 bool
13656 gen_stm_seq (rtx *operands, int nops)
13657 {
13658 int i;
13659 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13660 rtx mems[MAX_LDM_STM_OPS];
13661 int base_reg;
13662 rtx base_reg_rtx;
13663 HOST_WIDE_INT offset;
13664 int write_back = FALSE;
13665 int stm_case;
13666 rtx addr;
13667 bool base_reg_dies;
13668
13669 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13670 mem_order, &base_reg, &offset, true);
13671
13672 if (stm_case == 0)
13673 return false;
13674
13675 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13676
13677 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13678 if (TARGET_THUMB1)
13679 {
13680 gcc_assert (base_reg_dies);
13681 write_back = TRUE;
13682 }
13683
13684 if (stm_case == 5)
13685 {
13686 gcc_assert (base_reg_dies);
13687 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13688 offset = 0;
13689 }
13690
13691 addr = plus_constant (Pmode, base_reg_rtx, offset);
13692
13693 for (i = 0; i < nops; i++)
13694 {
13695 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13696 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13697 SImode, addr, 0);
13698 }
13699 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13700 write_back ? offset + i * 4 : 0));
13701 return true;
13702 }
13703
13704 /* Called from a peephole2 expander to turn a sequence of stores that are
13705 preceded by constant loads into an STM instruction. OPERANDS are the
13706 operands found by the peephole matcher; NOPS indicates how many
13707 separate stores we are trying to combine; there are 2 * NOPS
13708 instructions in the peephole.
13709 Returns true iff we could generate a new instruction. */
13710
13711 bool
13712 gen_const_stm_seq (rtx *operands, int nops)
13713 {
13714 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13715 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13716 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13717 rtx mems[MAX_LDM_STM_OPS];
13718 int base_reg;
13719 rtx base_reg_rtx;
13720 HOST_WIDE_INT offset;
13721 int write_back = FALSE;
13722 int stm_case;
13723 rtx addr;
13724 bool base_reg_dies;
13725 int i, j;
13726 HARD_REG_SET allocated;
13727
13728 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13729 mem_order, &base_reg, &offset, false);
13730
13731 if (stm_case == 0)
13732 return false;
13733
13734 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13735
13736 /* If the same register is used more than once, try to find a free
13737 register. */
13738 CLEAR_HARD_REG_SET (allocated);
13739 for (i = 0; i < nops; i++)
13740 {
13741 for (j = i + 1; j < nops; j++)
13742 if (regs[i] == regs[j])
13743 {
13744 rtx t = peep2_find_free_register (0, nops * 2,
13745 TARGET_THUMB1 ? "l" : "r",
13746 SImode, &allocated);
13747 if (t == NULL_RTX)
13748 return false;
13749 reg_rtxs[i] = t;
13750 regs[i] = REGNO (t);
13751 }
13752 }
13753
13754 /* Compute an ordering that maps the register numbers to an ascending
13755 sequence. */
13756 reg_order[0] = 0;
13757 for (i = 0; i < nops; i++)
13758 if (regs[i] < regs[reg_order[0]])
13759 reg_order[0] = i;
13760
13761 for (i = 1; i < nops; i++)
13762 {
13763 int this_order = reg_order[i - 1];
13764 for (j = 0; j < nops; j++)
13765 if (regs[j] > regs[reg_order[i - 1]]
13766 && (this_order == reg_order[i - 1]
13767 || regs[j] < regs[this_order]))
13768 this_order = j;
13769 reg_order[i] = this_order;
13770 }
13771
13772 /* Ensure that registers that must be live after the instruction end
13773 up with the correct value. */
13774 for (i = 0; i < nops; i++)
13775 {
13776 int this_order = reg_order[i];
13777 if ((this_order != mem_order[i]
13778 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13779 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13780 return false;
13781 }
13782
13783 /* Load the constants. */
13784 for (i = 0; i < nops; i++)
13785 {
13786 rtx op = operands[2 * nops + mem_order[i]];
13787 sorted_regs[i] = regs[reg_order[i]];
13788 emit_move_insn (reg_rtxs[reg_order[i]], op);
13789 }
13790
13791 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13792
13793 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13794 if (TARGET_THUMB1)
13795 {
13796 gcc_assert (base_reg_dies);
13797 write_back = TRUE;
13798 }
13799
13800 if (stm_case == 5)
13801 {
13802 gcc_assert (base_reg_dies);
13803 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13804 offset = 0;
13805 }
13806
13807 addr = plus_constant (Pmode, base_reg_rtx, offset);
13808
13809 for (i = 0; i < nops; i++)
13810 {
13811 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13812 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13813 SImode, addr, 0);
13814 }
13815 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13816 write_back ? offset + i * 4 : 0));
13817 return true;
13818 }
13819
13820 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13821 unaligned copies on processors which support unaligned semantics for those
13822 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13823 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13824 An interleave factor of 1 (the minimum) will perform no interleaving.
13825 Load/store multiple are used for aligned addresses where possible. */
13826
13827 static void
13828 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13829 HOST_WIDE_INT length,
13830 unsigned int interleave_factor)
13831 {
13832 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13833 int *regnos = XALLOCAVEC (int, interleave_factor);
13834 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13835 HOST_WIDE_INT i, j;
13836 HOST_WIDE_INT remaining = length, words;
13837 rtx halfword_tmp = NULL, byte_tmp = NULL;
13838 rtx dst, src;
13839 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13840 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13841 HOST_WIDE_INT srcoffset, dstoffset;
13842 HOST_WIDE_INT src_autoinc, dst_autoinc;
13843 rtx mem, addr;
13844
13845 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13846
13847 /* Use hard registers if we have aligned source or destination so we can use
13848 load/store multiple with contiguous registers. */
13849 if (dst_aligned || src_aligned)
13850 for (i = 0; i < interleave_factor; i++)
13851 regs[i] = gen_rtx_REG (SImode, i);
13852 else
13853 for (i = 0; i < interleave_factor; i++)
13854 regs[i] = gen_reg_rtx (SImode);
13855
13856 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13857 src = copy_addr_to_reg (XEXP (srcbase, 0));
13858
13859 srcoffset = dstoffset = 0;
13860
13861 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13862 For copying the last bytes we want to subtract this offset again. */
13863 src_autoinc = dst_autoinc = 0;
13864
13865 for (i = 0; i < interleave_factor; i++)
13866 regnos[i] = i;
13867
13868 /* Copy BLOCK_SIZE_BYTES chunks. */
13869
13870 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13871 {
13872 /* Load words. */
13873 if (src_aligned && interleave_factor > 1)
13874 {
13875 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13876 TRUE, srcbase, &srcoffset));
13877 src_autoinc += UNITS_PER_WORD * interleave_factor;
13878 }
13879 else
13880 {
13881 for (j = 0; j < interleave_factor; j++)
13882 {
13883 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13884 - src_autoinc));
13885 mem = adjust_automodify_address (srcbase, SImode, addr,
13886 srcoffset + j * UNITS_PER_WORD);
13887 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13888 }
13889 srcoffset += block_size_bytes;
13890 }
13891
13892 /* Store words. */
13893 if (dst_aligned && interleave_factor > 1)
13894 {
13895 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13896 TRUE, dstbase, &dstoffset));
13897 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13898 }
13899 else
13900 {
13901 for (j = 0; j < interleave_factor; j++)
13902 {
13903 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13904 - dst_autoinc));
13905 mem = adjust_automodify_address (dstbase, SImode, addr,
13906 dstoffset + j * UNITS_PER_WORD);
13907 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13908 }
13909 dstoffset += block_size_bytes;
13910 }
13911
13912 remaining -= block_size_bytes;
13913 }
13914
13915 /* Copy any whole words left (note these aren't interleaved with any
13916 subsequent halfword/byte load/stores in the interests of simplicity). */
13917
13918 words = remaining / UNITS_PER_WORD;
13919
13920 gcc_assert (words < interleave_factor);
13921
13922 if (src_aligned && words > 1)
13923 {
13924 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13925 &srcoffset));
13926 src_autoinc += UNITS_PER_WORD * words;
13927 }
13928 else
13929 {
13930 for (j = 0; j < words; j++)
13931 {
13932 addr = plus_constant (Pmode, src,
13933 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13934 mem = adjust_automodify_address (srcbase, SImode, addr,
13935 srcoffset + j * UNITS_PER_WORD);
13936 if (src_aligned)
13937 emit_move_insn (regs[j], mem);
13938 else
13939 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13940 }
13941 srcoffset += words * UNITS_PER_WORD;
13942 }
13943
13944 if (dst_aligned && words > 1)
13945 {
13946 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13947 &dstoffset));
13948 dst_autoinc += words * UNITS_PER_WORD;
13949 }
13950 else
13951 {
13952 for (j = 0; j < words; j++)
13953 {
13954 addr = plus_constant (Pmode, dst,
13955 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13956 mem = adjust_automodify_address (dstbase, SImode, addr,
13957 dstoffset + j * UNITS_PER_WORD);
13958 if (dst_aligned)
13959 emit_move_insn (mem, regs[j]);
13960 else
13961 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13962 }
13963 dstoffset += words * UNITS_PER_WORD;
13964 }
13965
13966 remaining -= words * UNITS_PER_WORD;
13967
13968 gcc_assert (remaining < 4);
13969
13970 /* Copy a halfword if necessary. */
13971
13972 if (remaining >= 2)
13973 {
13974 halfword_tmp = gen_reg_rtx (SImode);
13975
13976 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13977 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13978 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13979
13980 /* Either write out immediately, or delay until we've loaded the last
13981 byte, depending on interleave factor. */
13982 if (interleave_factor == 1)
13983 {
13984 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13985 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13986 emit_insn (gen_unaligned_storehi (mem,
13987 gen_lowpart (HImode, halfword_tmp)));
13988 halfword_tmp = NULL;
13989 dstoffset += 2;
13990 }
13991
13992 remaining -= 2;
13993 srcoffset += 2;
13994 }
13995
13996 gcc_assert (remaining < 2);
13997
13998 /* Copy last byte. */
13999
14000 if ((remaining & 1) != 0)
14001 {
14002 byte_tmp = gen_reg_rtx (SImode);
14003
14004 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14005 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14006 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14007
14008 if (interleave_factor == 1)
14009 {
14010 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14011 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14012 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14013 byte_tmp = NULL;
14014 dstoffset++;
14015 }
14016
14017 remaining--;
14018 srcoffset++;
14019 }
14020
14021 /* Store last halfword if we haven't done so already. */
14022
14023 if (halfword_tmp)
14024 {
14025 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14026 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14027 emit_insn (gen_unaligned_storehi (mem,
14028 gen_lowpart (HImode, halfword_tmp)));
14029 dstoffset += 2;
14030 }
14031
14032 /* Likewise for last byte. */
14033
14034 if (byte_tmp)
14035 {
14036 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14037 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14038 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14039 dstoffset++;
14040 }
14041
14042 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14043 }
14044
14045 /* From mips_adjust_block_mem:
14046
14047 Helper function for doing a loop-based block operation on memory
14048 reference MEM. Each iteration of the loop will operate on LENGTH
14049 bytes of MEM.
14050
14051 Create a new base register for use within the loop and point it to
14052 the start of MEM. Create a new memory reference that uses this
14053 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14054
14055 static void
14056 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14057 rtx *loop_mem)
14058 {
14059 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14060
14061 /* Although the new mem does not refer to a known location,
14062 it does keep up to LENGTH bytes of alignment. */
14063 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14064 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14065 }
14066
14067 /* From mips_block_move_loop:
14068
14069 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14070 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14071 the memory regions do not overlap. */
14072
14073 static void
14074 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14075 unsigned int interleave_factor,
14076 HOST_WIDE_INT bytes_per_iter)
14077 {
14078 rtx src_reg, dest_reg, final_src, test;
14079 HOST_WIDE_INT leftover;
14080
14081 leftover = length % bytes_per_iter;
14082 length -= leftover;
14083
14084 /* Create registers and memory references for use within the loop. */
14085 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14086 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14087
14088 /* Calculate the value that SRC_REG should have after the last iteration of
14089 the loop. */
14090 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14091 0, 0, OPTAB_WIDEN);
14092
14093 /* Emit the start of the loop. */
14094 rtx_code_label *label = gen_label_rtx ();
14095 emit_label (label);
14096
14097 /* Emit the loop body. */
14098 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14099 interleave_factor);
14100
14101 /* Move on to the next block. */
14102 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14103 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14104
14105 /* Emit the loop condition. */
14106 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14107 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14108
14109 /* Mop up any left-over bytes. */
14110 if (leftover)
14111 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14112 }
14113
14114 /* Emit a block move when either the source or destination is unaligned (not
14115 aligned to a four-byte boundary). This may need further tuning depending on
14116 core type, optimize_size setting, etc. */
14117
14118 static int
14119 arm_movmemqi_unaligned (rtx *operands)
14120 {
14121 HOST_WIDE_INT length = INTVAL (operands[2]);
14122
14123 if (optimize_size)
14124 {
14125 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14126 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14127 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14128 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14129 or dst_aligned though: allow more interleaving in those cases since the
14130 resulting code can be smaller. */
14131 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14132 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14133
14134 if (length > 12)
14135 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14136 interleave_factor, bytes_per_iter);
14137 else
14138 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14139 interleave_factor);
14140 }
14141 else
14142 {
14143 /* Note that the loop created by arm_block_move_unaligned_loop may be
14144 subject to loop unrolling, which makes tuning this condition a little
14145 redundant. */
14146 if (length > 32)
14147 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14148 else
14149 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14150 }
14151
14152 return 1;
14153 }
14154
14155 int
14156 arm_gen_movmemqi (rtx *operands)
14157 {
14158 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14159 HOST_WIDE_INT srcoffset, dstoffset;
14160 rtx src, dst, srcbase, dstbase;
14161 rtx part_bytes_reg = NULL;
14162 rtx mem;
14163
14164 if (!CONST_INT_P (operands[2])
14165 || !CONST_INT_P (operands[3])
14166 || INTVAL (operands[2]) > 64)
14167 return 0;
14168
14169 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14170 return arm_movmemqi_unaligned (operands);
14171
14172 if (INTVAL (operands[3]) & 3)
14173 return 0;
14174
14175 dstbase = operands[0];
14176 srcbase = operands[1];
14177
14178 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14179 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14180
14181 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14182 out_words_to_go = INTVAL (operands[2]) / 4;
14183 last_bytes = INTVAL (operands[2]) & 3;
14184 dstoffset = srcoffset = 0;
14185
14186 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14187 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14188
14189 while (in_words_to_go >= 2)
14190 {
14191 if (in_words_to_go > 4)
14192 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14193 TRUE, srcbase, &srcoffset));
14194 else
14195 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14196 src, FALSE, srcbase,
14197 &srcoffset));
14198
14199 if (out_words_to_go)
14200 {
14201 if (out_words_to_go > 4)
14202 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14203 TRUE, dstbase, &dstoffset));
14204 else if (out_words_to_go != 1)
14205 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14206 out_words_to_go, dst,
14207 (last_bytes == 0
14208 ? FALSE : TRUE),
14209 dstbase, &dstoffset));
14210 else
14211 {
14212 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14213 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14214 if (last_bytes != 0)
14215 {
14216 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14217 dstoffset += 4;
14218 }
14219 }
14220 }
14221
14222 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14223 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14224 }
14225
14226 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14227 if (out_words_to_go)
14228 {
14229 rtx sreg;
14230
14231 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14232 sreg = copy_to_reg (mem);
14233
14234 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14235 emit_move_insn (mem, sreg);
14236 in_words_to_go--;
14237
14238 gcc_assert (!in_words_to_go); /* Sanity check */
14239 }
14240
14241 if (in_words_to_go)
14242 {
14243 gcc_assert (in_words_to_go > 0);
14244
14245 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14246 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14247 }
14248
14249 gcc_assert (!last_bytes || part_bytes_reg);
14250
14251 if (BYTES_BIG_ENDIAN && last_bytes)
14252 {
14253 rtx tmp = gen_reg_rtx (SImode);
14254
14255 /* The bytes we want are in the top end of the word. */
14256 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14257 GEN_INT (8 * (4 - last_bytes))));
14258 part_bytes_reg = tmp;
14259
14260 while (last_bytes)
14261 {
14262 mem = adjust_automodify_address (dstbase, QImode,
14263 plus_constant (Pmode, dst,
14264 last_bytes - 1),
14265 dstoffset + last_bytes - 1);
14266 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14267
14268 if (--last_bytes)
14269 {
14270 tmp = gen_reg_rtx (SImode);
14271 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14272 part_bytes_reg = tmp;
14273 }
14274 }
14275
14276 }
14277 else
14278 {
14279 if (last_bytes > 1)
14280 {
14281 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14282 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14283 last_bytes -= 2;
14284 if (last_bytes)
14285 {
14286 rtx tmp = gen_reg_rtx (SImode);
14287 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14288 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14289 part_bytes_reg = tmp;
14290 dstoffset += 2;
14291 }
14292 }
14293
14294 if (last_bytes)
14295 {
14296 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14297 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14298 }
14299 }
14300
14301 return 1;
14302 }
14303
14304 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14305 by mode size. */
14306 inline static rtx
14307 next_consecutive_mem (rtx mem)
14308 {
14309 machine_mode mode = GET_MODE (mem);
14310 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14311 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14312
14313 return adjust_automodify_address (mem, mode, addr, offset);
14314 }
14315
14316 /* Copy using LDRD/STRD instructions whenever possible.
14317 Returns true upon success. */
14318 bool
14319 gen_movmem_ldrd_strd (rtx *operands)
14320 {
14321 unsigned HOST_WIDE_INT len;
14322 HOST_WIDE_INT align;
14323 rtx src, dst, base;
14324 rtx reg0;
14325 bool src_aligned, dst_aligned;
14326 bool src_volatile, dst_volatile;
14327
14328 gcc_assert (CONST_INT_P (operands[2]));
14329 gcc_assert (CONST_INT_P (operands[3]));
14330
14331 len = UINTVAL (operands[2]);
14332 if (len > 64)
14333 return false;
14334
14335 /* Maximum alignment we can assume for both src and dst buffers. */
14336 align = INTVAL (operands[3]);
14337
14338 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14339 return false;
14340
14341 /* Place src and dst addresses in registers
14342 and update the corresponding mem rtx. */
14343 dst = operands[0];
14344 dst_volatile = MEM_VOLATILE_P (dst);
14345 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14346 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14347 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14348
14349 src = operands[1];
14350 src_volatile = MEM_VOLATILE_P (src);
14351 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14352 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14353 src = adjust_automodify_address (src, VOIDmode, base, 0);
14354
14355 if (!unaligned_access && !(src_aligned && dst_aligned))
14356 return false;
14357
14358 if (src_volatile || dst_volatile)
14359 return false;
14360
14361 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14362 if (!(dst_aligned || src_aligned))
14363 return arm_gen_movmemqi (operands);
14364
14365 /* If the either src or dst is unaligned we'll be accessing it as pairs
14366 of unaligned SImode accesses. Otherwise we can generate DImode
14367 ldrd/strd instructions. */
14368 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14369 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14370
14371 while (len >= 8)
14372 {
14373 len -= 8;
14374 reg0 = gen_reg_rtx (DImode);
14375 rtx low_reg = NULL_RTX;
14376 rtx hi_reg = NULL_RTX;
14377
14378 if (!src_aligned || !dst_aligned)
14379 {
14380 low_reg = gen_lowpart (SImode, reg0);
14381 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14382 }
14383 if (src_aligned)
14384 emit_move_insn (reg0, src);
14385 else
14386 {
14387 emit_insn (gen_unaligned_loadsi (low_reg, src));
14388 src = next_consecutive_mem (src);
14389 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14390 }
14391
14392 if (dst_aligned)
14393 emit_move_insn (dst, reg0);
14394 else
14395 {
14396 emit_insn (gen_unaligned_storesi (dst, low_reg));
14397 dst = next_consecutive_mem (dst);
14398 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14399 }
14400
14401 src = next_consecutive_mem (src);
14402 dst = next_consecutive_mem (dst);
14403 }
14404
14405 gcc_assert (len < 8);
14406 if (len >= 4)
14407 {
14408 /* More than a word but less than a double-word to copy. Copy a word. */
14409 reg0 = gen_reg_rtx (SImode);
14410 src = adjust_address (src, SImode, 0);
14411 dst = adjust_address (dst, SImode, 0);
14412 if (src_aligned)
14413 emit_move_insn (reg0, src);
14414 else
14415 emit_insn (gen_unaligned_loadsi (reg0, src));
14416
14417 if (dst_aligned)
14418 emit_move_insn (dst, reg0);
14419 else
14420 emit_insn (gen_unaligned_storesi (dst, reg0));
14421
14422 src = next_consecutive_mem (src);
14423 dst = next_consecutive_mem (dst);
14424 len -= 4;
14425 }
14426
14427 if (len == 0)
14428 return true;
14429
14430 /* Copy the remaining bytes. */
14431 if (len >= 2)
14432 {
14433 dst = adjust_address (dst, HImode, 0);
14434 src = adjust_address (src, HImode, 0);
14435 reg0 = gen_reg_rtx (SImode);
14436 if (src_aligned)
14437 emit_insn (gen_zero_extendhisi2 (reg0, src));
14438 else
14439 emit_insn (gen_unaligned_loadhiu (reg0, src));
14440
14441 if (dst_aligned)
14442 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14443 else
14444 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14445
14446 src = next_consecutive_mem (src);
14447 dst = next_consecutive_mem (dst);
14448 if (len == 2)
14449 return true;
14450 }
14451
14452 dst = adjust_address (dst, QImode, 0);
14453 src = adjust_address (src, QImode, 0);
14454 reg0 = gen_reg_rtx (QImode);
14455 emit_move_insn (reg0, src);
14456 emit_move_insn (dst, reg0);
14457 return true;
14458 }
14459
14460 /* Select a dominance comparison mode if possible for a test of the general
14461 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14462 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14463 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14464 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14465 In all cases OP will be either EQ or NE, but we don't need to know which
14466 here. If we are unable to support a dominance comparison we return
14467 CC mode. This will then fail to match for the RTL expressions that
14468 generate this call. */
14469 machine_mode
14470 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14471 {
14472 enum rtx_code cond1, cond2;
14473 int swapped = 0;
14474
14475 /* Currently we will probably get the wrong result if the individual
14476 comparisons are not simple. This also ensures that it is safe to
14477 reverse a comparison if necessary. */
14478 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14479 != CCmode)
14480 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14481 != CCmode))
14482 return CCmode;
14483
14484 /* The if_then_else variant of this tests the second condition if the
14485 first passes, but is true if the first fails. Reverse the first
14486 condition to get a true "inclusive-or" expression. */
14487 if (cond_or == DOM_CC_NX_OR_Y)
14488 cond1 = reverse_condition (cond1);
14489
14490 /* If the comparisons are not equal, and one doesn't dominate the other,
14491 then we can't do this. */
14492 if (cond1 != cond2
14493 && !comparison_dominates_p (cond1, cond2)
14494 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14495 return CCmode;
14496
14497 if (swapped)
14498 std::swap (cond1, cond2);
14499
14500 switch (cond1)
14501 {
14502 case EQ:
14503 if (cond_or == DOM_CC_X_AND_Y)
14504 return CC_DEQmode;
14505
14506 switch (cond2)
14507 {
14508 case EQ: return CC_DEQmode;
14509 case LE: return CC_DLEmode;
14510 case LEU: return CC_DLEUmode;
14511 case GE: return CC_DGEmode;
14512 case GEU: return CC_DGEUmode;
14513 default: gcc_unreachable ();
14514 }
14515
14516 case LT:
14517 if (cond_or == DOM_CC_X_AND_Y)
14518 return CC_DLTmode;
14519
14520 switch (cond2)
14521 {
14522 case LT:
14523 return CC_DLTmode;
14524 case LE:
14525 return CC_DLEmode;
14526 case NE:
14527 return CC_DNEmode;
14528 default:
14529 gcc_unreachable ();
14530 }
14531
14532 case GT:
14533 if (cond_or == DOM_CC_X_AND_Y)
14534 return CC_DGTmode;
14535
14536 switch (cond2)
14537 {
14538 case GT:
14539 return CC_DGTmode;
14540 case GE:
14541 return CC_DGEmode;
14542 case NE:
14543 return CC_DNEmode;
14544 default:
14545 gcc_unreachable ();
14546 }
14547
14548 case LTU:
14549 if (cond_or == DOM_CC_X_AND_Y)
14550 return CC_DLTUmode;
14551
14552 switch (cond2)
14553 {
14554 case LTU:
14555 return CC_DLTUmode;
14556 case LEU:
14557 return CC_DLEUmode;
14558 case NE:
14559 return CC_DNEmode;
14560 default:
14561 gcc_unreachable ();
14562 }
14563
14564 case GTU:
14565 if (cond_or == DOM_CC_X_AND_Y)
14566 return CC_DGTUmode;
14567
14568 switch (cond2)
14569 {
14570 case GTU:
14571 return CC_DGTUmode;
14572 case GEU:
14573 return CC_DGEUmode;
14574 case NE:
14575 return CC_DNEmode;
14576 default:
14577 gcc_unreachable ();
14578 }
14579
14580 /* The remaining cases only occur when both comparisons are the
14581 same. */
14582 case NE:
14583 gcc_assert (cond1 == cond2);
14584 return CC_DNEmode;
14585
14586 case LE:
14587 gcc_assert (cond1 == cond2);
14588 return CC_DLEmode;
14589
14590 case GE:
14591 gcc_assert (cond1 == cond2);
14592 return CC_DGEmode;
14593
14594 case LEU:
14595 gcc_assert (cond1 == cond2);
14596 return CC_DLEUmode;
14597
14598 case GEU:
14599 gcc_assert (cond1 == cond2);
14600 return CC_DGEUmode;
14601
14602 default:
14603 gcc_unreachable ();
14604 }
14605 }
14606
14607 machine_mode
14608 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14609 {
14610 /* All floating point compares return CCFP if it is an equality
14611 comparison, and CCFPE otherwise. */
14612 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14613 {
14614 switch (op)
14615 {
14616 case EQ:
14617 case NE:
14618 case UNORDERED:
14619 case ORDERED:
14620 case UNLT:
14621 case UNLE:
14622 case UNGT:
14623 case UNGE:
14624 case UNEQ:
14625 case LTGT:
14626 return CCFPmode;
14627
14628 case LT:
14629 case LE:
14630 case GT:
14631 case GE:
14632 return CCFPEmode;
14633
14634 default:
14635 gcc_unreachable ();
14636 }
14637 }
14638
14639 /* A compare with a shifted operand. Because of canonicalization, the
14640 comparison will have to be swapped when we emit the assembler. */
14641 if (GET_MODE (y) == SImode
14642 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14643 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14644 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14645 || GET_CODE (x) == ROTATERT))
14646 return CC_SWPmode;
14647
14648 /* This operation is performed swapped, but since we only rely on the Z
14649 flag we don't need an additional mode. */
14650 if (GET_MODE (y) == SImode
14651 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14652 && GET_CODE (x) == NEG
14653 && (op == EQ || op == NE))
14654 return CC_Zmode;
14655
14656 /* This is a special case that is used by combine to allow a
14657 comparison of a shifted byte load to be split into a zero-extend
14658 followed by a comparison of the shifted integer (only valid for
14659 equalities and unsigned inequalities). */
14660 if (GET_MODE (x) == SImode
14661 && GET_CODE (x) == ASHIFT
14662 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14663 && GET_CODE (XEXP (x, 0)) == SUBREG
14664 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14665 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14666 && (op == EQ || op == NE
14667 || op == GEU || op == GTU || op == LTU || op == LEU)
14668 && CONST_INT_P (y))
14669 return CC_Zmode;
14670
14671 /* A construct for a conditional compare, if the false arm contains
14672 0, then both conditions must be true, otherwise either condition
14673 must be true. Not all conditions are possible, so CCmode is
14674 returned if it can't be done. */
14675 if (GET_CODE (x) == IF_THEN_ELSE
14676 && (XEXP (x, 2) == const0_rtx
14677 || XEXP (x, 2) == const1_rtx)
14678 && COMPARISON_P (XEXP (x, 0))
14679 && COMPARISON_P (XEXP (x, 1)))
14680 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14681 INTVAL (XEXP (x, 2)));
14682
14683 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14684 if (GET_CODE (x) == AND
14685 && (op == EQ || op == NE)
14686 && COMPARISON_P (XEXP (x, 0))
14687 && COMPARISON_P (XEXP (x, 1)))
14688 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14689 DOM_CC_X_AND_Y);
14690
14691 if (GET_CODE (x) == IOR
14692 && (op == EQ || op == NE)
14693 && COMPARISON_P (XEXP (x, 0))
14694 && COMPARISON_P (XEXP (x, 1)))
14695 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14696 DOM_CC_X_OR_Y);
14697
14698 /* An operation (on Thumb) where we want to test for a single bit.
14699 This is done by shifting that bit up into the top bit of a
14700 scratch register; we can then branch on the sign bit. */
14701 if (TARGET_THUMB1
14702 && GET_MODE (x) == SImode
14703 && (op == EQ || op == NE)
14704 && GET_CODE (x) == ZERO_EXTRACT
14705 && XEXP (x, 1) == const1_rtx)
14706 return CC_Nmode;
14707
14708 /* An operation that sets the condition codes as a side-effect, the
14709 V flag is not set correctly, so we can only use comparisons where
14710 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14711 instead.) */
14712 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14713 if (GET_MODE (x) == SImode
14714 && y == const0_rtx
14715 && (op == EQ || op == NE || op == LT || op == GE)
14716 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14717 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14718 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14719 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14720 || GET_CODE (x) == LSHIFTRT
14721 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14722 || GET_CODE (x) == ROTATERT
14723 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14724 return CC_NOOVmode;
14725
14726 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14727 return CC_Zmode;
14728
14729 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14730 && GET_CODE (x) == PLUS
14731 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14732 return CC_Cmode;
14733
14734 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14735 {
14736 switch (op)
14737 {
14738 case EQ:
14739 case NE:
14740 /* A DImode comparison against zero can be implemented by
14741 or'ing the two halves together. */
14742 if (y == const0_rtx)
14743 return CC_Zmode;
14744
14745 /* We can do an equality test in three Thumb instructions. */
14746 if (!TARGET_32BIT)
14747 return CC_Zmode;
14748
14749 /* FALLTHROUGH */
14750
14751 case LTU:
14752 case LEU:
14753 case GTU:
14754 case GEU:
14755 /* DImode unsigned comparisons can be implemented by cmp +
14756 cmpeq without a scratch register. Not worth doing in
14757 Thumb-2. */
14758 if (TARGET_32BIT)
14759 return CC_CZmode;
14760
14761 /* FALLTHROUGH */
14762
14763 case LT:
14764 case LE:
14765 case GT:
14766 case GE:
14767 /* DImode signed and unsigned comparisons can be implemented
14768 by cmp + sbcs with a scratch register, but that does not
14769 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14770 gcc_assert (op != EQ && op != NE);
14771 return CC_NCVmode;
14772
14773 default:
14774 gcc_unreachable ();
14775 }
14776 }
14777
14778 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14779 return GET_MODE (x);
14780
14781 return CCmode;
14782 }
14783
14784 /* X and Y are two things to compare using CODE. Emit the compare insn and
14785 return the rtx for register 0 in the proper mode. FP means this is a
14786 floating point compare: I don't think that it is needed on the arm. */
14787 rtx
14788 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14789 {
14790 machine_mode mode;
14791 rtx cc_reg;
14792 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14793
14794 /* We might have X as a constant, Y as a register because of the predicates
14795 used for cmpdi. If so, force X to a register here. */
14796 if (dimode_comparison && !REG_P (x))
14797 x = force_reg (DImode, x);
14798
14799 mode = SELECT_CC_MODE (code, x, y);
14800 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14801
14802 if (dimode_comparison
14803 && mode != CC_CZmode)
14804 {
14805 rtx clobber, set;
14806
14807 /* To compare two non-zero values for equality, XOR them and
14808 then compare against zero. Not used for ARM mode; there
14809 CC_CZmode is cheaper. */
14810 if (mode == CC_Zmode && y != const0_rtx)
14811 {
14812 gcc_assert (!reload_completed);
14813 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14814 y = const0_rtx;
14815 }
14816
14817 /* A scratch register is required. */
14818 if (reload_completed)
14819 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14820 else
14821 scratch = gen_rtx_SCRATCH (SImode);
14822
14823 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14824 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14825 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14826 }
14827 else
14828 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14829
14830 return cc_reg;
14831 }
14832
14833 /* Generate a sequence of insns that will generate the correct return
14834 address mask depending on the physical architecture that the program
14835 is running on. */
14836 rtx
14837 arm_gen_return_addr_mask (void)
14838 {
14839 rtx reg = gen_reg_rtx (Pmode);
14840
14841 emit_insn (gen_return_addr_mask (reg));
14842 return reg;
14843 }
14844
14845 void
14846 arm_reload_in_hi (rtx *operands)
14847 {
14848 rtx ref = operands[1];
14849 rtx base, scratch;
14850 HOST_WIDE_INT offset = 0;
14851
14852 if (GET_CODE (ref) == SUBREG)
14853 {
14854 offset = SUBREG_BYTE (ref);
14855 ref = SUBREG_REG (ref);
14856 }
14857
14858 if (REG_P (ref))
14859 {
14860 /* We have a pseudo which has been spilt onto the stack; there
14861 are two cases here: the first where there is a simple
14862 stack-slot replacement and a second where the stack-slot is
14863 out of range, or is used as a subreg. */
14864 if (reg_equiv_mem (REGNO (ref)))
14865 {
14866 ref = reg_equiv_mem (REGNO (ref));
14867 base = find_replacement (&XEXP (ref, 0));
14868 }
14869 else
14870 /* The slot is out of range, or was dressed up in a SUBREG. */
14871 base = reg_equiv_address (REGNO (ref));
14872
14873 /* PR 62554: If there is no equivalent memory location then just move
14874 the value as an SImode register move. This happens when the target
14875 architecture variant does not have an HImode register move. */
14876 if (base == NULL)
14877 {
14878 gcc_assert (REG_P (operands[0]));
14879 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14880 gen_rtx_SUBREG (SImode, ref, 0)));
14881 return;
14882 }
14883 }
14884 else
14885 base = find_replacement (&XEXP (ref, 0));
14886
14887 /* Handle the case where the address is too complex to be offset by 1. */
14888 if (GET_CODE (base) == MINUS
14889 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14890 {
14891 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14892
14893 emit_set_insn (base_plus, base);
14894 base = base_plus;
14895 }
14896 else if (GET_CODE (base) == PLUS)
14897 {
14898 /* The addend must be CONST_INT, or we would have dealt with it above. */
14899 HOST_WIDE_INT hi, lo;
14900
14901 offset += INTVAL (XEXP (base, 1));
14902 base = XEXP (base, 0);
14903
14904 /* Rework the address into a legal sequence of insns. */
14905 /* Valid range for lo is -4095 -> 4095 */
14906 lo = (offset >= 0
14907 ? (offset & 0xfff)
14908 : -((-offset) & 0xfff));
14909
14910 /* Corner case, if lo is the max offset then we would be out of range
14911 once we have added the additional 1 below, so bump the msb into the
14912 pre-loading insn(s). */
14913 if (lo == 4095)
14914 lo &= 0x7ff;
14915
14916 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14917 ^ (HOST_WIDE_INT) 0x80000000)
14918 - (HOST_WIDE_INT) 0x80000000);
14919
14920 gcc_assert (hi + lo == offset);
14921
14922 if (hi != 0)
14923 {
14924 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14925
14926 /* Get the base address; addsi3 knows how to handle constants
14927 that require more than one insn. */
14928 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14929 base = base_plus;
14930 offset = lo;
14931 }
14932 }
14933
14934 /* Operands[2] may overlap operands[0] (though it won't overlap
14935 operands[1]), that's why we asked for a DImode reg -- so we can
14936 use the bit that does not overlap. */
14937 if (REGNO (operands[2]) == REGNO (operands[0]))
14938 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14939 else
14940 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14941
14942 emit_insn (gen_zero_extendqisi2 (scratch,
14943 gen_rtx_MEM (QImode,
14944 plus_constant (Pmode, base,
14945 offset))));
14946 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14947 gen_rtx_MEM (QImode,
14948 plus_constant (Pmode, base,
14949 offset + 1))));
14950 if (!BYTES_BIG_ENDIAN)
14951 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14952 gen_rtx_IOR (SImode,
14953 gen_rtx_ASHIFT
14954 (SImode,
14955 gen_rtx_SUBREG (SImode, operands[0], 0),
14956 GEN_INT (8)),
14957 scratch));
14958 else
14959 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14960 gen_rtx_IOR (SImode,
14961 gen_rtx_ASHIFT (SImode, scratch,
14962 GEN_INT (8)),
14963 gen_rtx_SUBREG (SImode, operands[0], 0)));
14964 }
14965
14966 /* Handle storing a half-word to memory during reload by synthesizing as two
14967 byte stores. Take care not to clobber the input values until after we
14968 have moved them somewhere safe. This code assumes that if the DImode
14969 scratch in operands[2] overlaps either the input value or output address
14970 in some way, then that value must die in this insn (we absolutely need
14971 two scratch registers for some corner cases). */
14972 void
14973 arm_reload_out_hi (rtx *operands)
14974 {
14975 rtx ref = operands[0];
14976 rtx outval = operands[1];
14977 rtx base, scratch;
14978 HOST_WIDE_INT offset = 0;
14979
14980 if (GET_CODE (ref) == SUBREG)
14981 {
14982 offset = SUBREG_BYTE (ref);
14983 ref = SUBREG_REG (ref);
14984 }
14985
14986 if (REG_P (ref))
14987 {
14988 /* We have a pseudo which has been spilt onto the stack; there
14989 are two cases here: the first where there is a simple
14990 stack-slot replacement and a second where the stack-slot is
14991 out of range, or is used as a subreg. */
14992 if (reg_equiv_mem (REGNO (ref)))
14993 {
14994 ref = reg_equiv_mem (REGNO (ref));
14995 base = find_replacement (&XEXP (ref, 0));
14996 }
14997 else
14998 /* The slot is out of range, or was dressed up in a SUBREG. */
14999 base = reg_equiv_address (REGNO (ref));
15000
15001 /* PR 62254: If there is no equivalent memory location then just move
15002 the value as an SImode register move. This happens when the target
15003 architecture variant does not have an HImode register move. */
15004 if (base == NULL)
15005 {
15006 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15007
15008 if (REG_P (outval))
15009 {
15010 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15011 gen_rtx_SUBREG (SImode, outval, 0)));
15012 }
15013 else /* SUBREG_P (outval) */
15014 {
15015 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15016 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15017 SUBREG_REG (outval)));
15018 else
15019 /* FIXME: Handle other cases ? */
15020 gcc_unreachable ();
15021 }
15022 return;
15023 }
15024 }
15025 else
15026 base = find_replacement (&XEXP (ref, 0));
15027
15028 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15029
15030 /* Handle the case where the address is too complex to be offset by 1. */
15031 if (GET_CODE (base) == MINUS
15032 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15033 {
15034 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15035
15036 /* Be careful not to destroy OUTVAL. */
15037 if (reg_overlap_mentioned_p (base_plus, outval))
15038 {
15039 /* Updating base_plus might destroy outval, see if we can
15040 swap the scratch and base_plus. */
15041 if (!reg_overlap_mentioned_p (scratch, outval))
15042 std::swap (scratch, base_plus);
15043 else
15044 {
15045 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15046
15047 /* Be conservative and copy OUTVAL into the scratch now,
15048 this should only be necessary if outval is a subreg
15049 of something larger than a word. */
15050 /* XXX Might this clobber base? I can't see how it can,
15051 since scratch is known to overlap with OUTVAL, and
15052 must be wider than a word. */
15053 emit_insn (gen_movhi (scratch_hi, outval));
15054 outval = scratch_hi;
15055 }
15056 }
15057
15058 emit_set_insn (base_plus, base);
15059 base = base_plus;
15060 }
15061 else if (GET_CODE (base) == PLUS)
15062 {
15063 /* The addend must be CONST_INT, or we would have dealt with it above. */
15064 HOST_WIDE_INT hi, lo;
15065
15066 offset += INTVAL (XEXP (base, 1));
15067 base = XEXP (base, 0);
15068
15069 /* Rework the address into a legal sequence of insns. */
15070 /* Valid range for lo is -4095 -> 4095 */
15071 lo = (offset >= 0
15072 ? (offset & 0xfff)
15073 : -((-offset) & 0xfff));
15074
15075 /* Corner case, if lo is the max offset then we would be out of range
15076 once we have added the additional 1 below, so bump the msb into the
15077 pre-loading insn(s). */
15078 if (lo == 4095)
15079 lo &= 0x7ff;
15080
15081 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15082 ^ (HOST_WIDE_INT) 0x80000000)
15083 - (HOST_WIDE_INT) 0x80000000);
15084
15085 gcc_assert (hi + lo == offset);
15086
15087 if (hi != 0)
15088 {
15089 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15090
15091 /* Be careful not to destroy OUTVAL. */
15092 if (reg_overlap_mentioned_p (base_plus, outval))
15093 {
15094 /* Updating base_plus might destroy outval, see if we
15095 can swap the scratch and base_plus. */
15096 if (!reg_overlap_mentioned_p (scratch, outval))
15097 std::swap (scratch, base_plus);
15098 else
15099 {
15100 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15101
15102 /* Be conservative and copy outval into scratch now,
15103 this should only be necessary if outval is a
15104 subreg of something larger than a word. */
15105 /* XXX Might this clobber base? I can't see how it
15106 can, since scratch is known to overlap with
15107 outval. */
15108 emit_insn (gen_movhi (scratch_hi, outval));
15109 outval = scratch_hi;
15110 }
15111 }
15112
15113 /* Get the base address; addsi3 knows how to handle constants
15114 that require more than one insn. */
15115 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15116 base = base_plus;
15117 offset = lo;
15118 }
15119 }
15120
15121 if (BYTES_BIG_ENDIAN)
15122 {
15123 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15124 plus_constant (Pmode, base,
15125 offset + 1)),
15126 gen_lowpart (QImode, outval)));
15127 emit_insn (gen_lshrsi3 (scratch,
15128 gen_rtx_SUBREG (SImode, outval, 0),
15129 GEN_INT (8)));
15130 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15131 offset)),
15132 gen_lowpart (QImode, scratch)));
15133 }
15134 else
15135 {
15136 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15137 offset)),
15138 gen_lowpart (QImode, outval)));
15139 emit_insn (gen_lshrsi3 (scratch,
15140 gen_rtx_SUBREG (SImode, outval, 0),
15141 GEN_INT (8)));
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15143 plus_constant (Pmode, base,
15144 offset + 1)),
15145 gen_lowpart (QImode, scratch)));
15146 }
15147 }
15148
15149 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15150 (padded to the size of a word) should be passed in a register. */
15151
15152 static bool
15153 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15154 {
15155 if (TARGET_AAPCS_BASED)
15156 return must_pass_in_stack_var_size (mode, type);
15157 else
15158 return must_pass_in_stack_var_size_or_pad (mode, type);
15159 }
15160
15161
15162 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15163 byte of a stack argument has useful data. For legacy APCS ABIs we use
15164 the default. For AAPCS based ABIs small aggregate types are placed
15165 in the lowest memory address. */
15166
15167 static pad_direction
15168 arm_function_arg_padding (machine_mode mode, const_tree type)
15169 {
15170 if (!TARGET_AAPCS_BASED)
15171 return default_function_arg_padding (mode, type);
15172
15173 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15174 return PAD_DOWNWARD;
15175
15176 return PAD_UPWARD;
15177 }
15178
15179
15180 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15181 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15182 register has useful data, and return the opposite if the most
15183 significant byte does. */
15184
15185 bool
15186 arm_pad_reg_upward (machine_mode mode,
15187 tree type, int first ATTRIBUTE_UNUSED)
15188 {
15189 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15190 {
15191 /* For AAPCS, small aggregates, small fixed-point types,
15192 and small complex types are always padded upwards. */
15193 if (type)
15194 {
15195 if ((AGGREGATE_TYPE_P (type)
15196 || TREE_CODE (type) == COMPLEX_TYPE
15197 || FIXED_POINT_TYPE_P (type))
15198 && int_size_in_bytes (type) <= 4)
15199 return true;
15200 }
15201 else
15202 {
15203 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15204 && GET_MODE_SIZE (mode) <= 4)
15205 return true;
15206 }
15207 }
15208
15209 /* Otherwise, use default padding. */
15210 return !BYTES_BIG_ENDIAN;
15211 }
15212
15213 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15214 assuming that the address in the base register is word aligned. */
15215 bool
15216 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15217 {
15218 HOST_WIDE_INT max_offset;
15219
15220 /* Offset must be a multiple of 4 in Thumb mode. */
15221 if (TARGET_THUMB2 && ((offset & 3) != 0))
15222 return false;
15223
15224 if (TARGET_THUMB2)
15225 max_offset = 1020;
15226 else if (TARGET_ARM)
15227 max_offset = 255;
15228 else
15229 return false;
15230
15231 return ((offset <= max_offset) && (offset >= -max_offset));
15232 }
15233
15234 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15235 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15236 Assumes that the address in the base register RN is word aligned. Pattern
15237 guarantees that both memory accesses use the same base register,
15238 the offsets are constants within the range, and the gap between the offsets is 4.
15239 If preload complete then check that registers are legal. WBACK indicates whether
15240 address is updated. LOAD indicates whether memory access is load or store. */
15241 bool
15242 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15243 bool wback, bool load)
15244 {
15245 unsigned int t, t2, n;
15246
15247 if (!reload_completed)
15248 return true;
15249
15250 if (!offset_ok_for_ldrd_strd (offset))
15251 return false;
15252
15253 t = REGNO (rt);
15254 t2 = REGNO (rt2);
15255 n = REGNO (rn);
15256
15257 if ((TARGET_THUMB2)
15258 && ((wback && (n == t || n == t2))
15259 || (t == SP_REGNUM)
15260 || (t == PC_REGNUM)
15261 || (t2 == SP_REGNUM)
15262 || (t2 == PC_REGNUM)
15263 || (!load && (n == PC_REGNUM))
15264 || (load && (t == t2))
15265 /* Triggers Cortex-M3 LDRD errata. */
15266 || (!wback && load && fix_cm3_ldrd && (n == t))))
15267 return false;
15268
15269 if ((TARGET_ARM)
15270 && ((wback && (n == t || n == t2))
15271 || (t2 == PC_REGNUM)
15272 || (t % 2 != 0) /* First destination register is not even. */
15273 || (t2 != t + 1)
15274 /* PC can be used as base register (for offset addressing only),
15275 but it is depricated. */
15276 || (n == PC_REGNUM)))
15277 return false;
15278
15279 return true;
15280 }
15281
15282 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15283 operand MEM's address contains an immediate offset from the base
15284 register and has no side effects, in which case it sets BASE and
15285 OFFSET accordingly. */
15286 static bool
15287 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15288 {
15289 rtx addr;
15290
15291 gcc_assert (base != NULL && offset != NULL);
15292
15293 /* TODO: Handle more general memory operand patterns, such as
15294 PRE_DEC and PRE_INC. */
15295
15296 if (side_effects_p (mem))
15297 return false;
15298
15299 /* Can't deal with subregs. */
15300 if (GET_CODE (mem) == SUBREG)
15301 return false;
15302
15303 gcc_assert (MEM_P (mem));
15304
15305 *offset = const0_rtx;
15306
15307 addr = XEXP (mem, 0);
15308
15309 /* If addr isn't valid for DImode, then we can't handle it. */
15310 if (!arm_legitimate_address_p (DImode, addr,
15311 reload_in_progress || reload_completed))
15312 return false;
15313
15314 if (REG_P (addr))
15315 {
15316 *base = addr;
15317 return true;
15318 }
15319 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15320 {
15321 *base = XEXP (addr, 0);
15322 *offset = XEXP (addr, 1);
15323 return (REG_P (*base) && CONST_INT_P (*offset));
15324 }
15325
15326 return false;
15327 }
15328
15329 /* Called from a peephole2 to replace two word-size accesses with a
15330 single LDRD/STRD instruction. Returns true iff we can generate a
15331 new instruction sequence. That is, both accesses use the same base
15332 register and the gap between constant offsets is 4. This function
15333 may reorder its operands to match ldrd/strd RTL templates.
15334 OPERANDS are the operands found by the peephole matcher;
15335 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15336 corresponding memory operands. LOAD indicaates whether the access
15337 is load or store. CONST_STORE indicates a store of constant
15338 integer values held in OPERANDS[4,5] and assumes that the pattern
15339 is of length 4 insn, for the purpose of checking dead registers.
15340 COMMUTE indicates that register operands may be reordered. */
15341 bool
15342 gen_operands_ldrd_strd (rtx *operands, bool load,
15343 bool const_store, bool commute)
15344 {
15345 int nops = 2;
15346 HOST_WIDE_INT offsets[2], offset;
15347 rtx base = NULL_RTX;
15348 rtx cur_base, cur_offset, tmp;
15349 int i, gap;
15350 HARD_REG_SET regset;
15351
15352 gcc_assert (!const_store || !load);
15353 /* Check that the memory references are immediate offsets from the
15354 same base register. Extract the base register, the destination
15355 registers, and the corresponding memory offsets. */
15356 for (i = 0; i < nops; i++)
15357 {
15358 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15359 return false;
15360
15361 if (i == 0)
15362 base = cur_base;
15363 else if (REGNO (base) != REGNO (cur_base))
15364 return false;
15365
15366 offsets[i] = INTVAL (cur_offset);
15367 if (GET_CODE (operands[i]) == SUBREG)
15368 {
15369 tmp = SUBREG_REG (operands[i]);
15370 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15371 operands[i] = tmp;
15372 }
15373 }
15374
15375 /* Make sure there is no dependency between the individual loads. */
15376 if (load && REGNO (operands[0]) == REGNO (base))
15377 return false; /* RAW */
15378
15379 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15380 return false; /* WAW */
15381
15382 /* If the same input register is used in both stores
15383 when storing different constants, try to find a free register.
15384 For example, the code
15385 mov r0, 0
15386 str r0, [r2]
15387 mov r0, 1
15388 str r0, [r2, #4]
15389 can be transformed into
15390 mov r1, 0
15391 mov r0, 1
15392 strd r1, r0, [r2]
15393 in Thumb mode assuming that r1 is free.
15394 For ARM mode do the same but only if the starting register
15395 can be made to be even. */
15396 if (const_store
15397 && REGNO (operands[0]) == REGNO (operands[1])
15398 && INTVAL (operands[4]) != INTVAL (operands[5]))
15399 {
15400 if (TARGET_THUMB2)
15401 {
15402 CLEAR_HARD_REG_SET (regset);
15403 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15404 if (tmp == NULL_RTX)
15405 return false;
15406
15407 /* Use the new register in the first load to ensure that
15408 if the original input register is not dead after peephole,
15409 then it will have the correct constant value. */
15410 operands[0] = tmp;
15411 }
15412 else if (TARGET_ARM)
15413 {
15414 int regno = REGNO (operands[0]);
15415 if (!peep2_reg_dead_p (4, operands[0]))
15416 {
15417 /* When the input register is even and is not dead after the
15418 pattern, it has to hold the second constant but we cannot
15419 form a legal STRD in ARM mode with this register as the second
15420 register. */
15421 if (regno % 2 == 0)
15422 return false;
15423
15424 /* Is regno-1 free? */
15425 SET_HARD_REG_SET (regset);
15426 CLEAR_HARD_REG_BIT(regset, regno - 1);
15427 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15428 if (tmp == NULL_RTX)
15429 return false;
15430
15431 operands[0] = tmp;
15432 }
15433 else
15434 {
15435 /* Find a DImode register. */
15436 CLEAR_HARD_REG_SET (regset);
15437 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15438 if (tmp != NULL_RTX)
15439 {
15440 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15441 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15442 }
15443 else
15444 {
15445 /* Can we use the input register to form a DI register? */
15446 SET_HARD_REG_SET (regset);
15447 CLEAR_HARD_REG_BIT(regset,
15448 regno % 2 == 0 ? regno + 1 : regno - 1);
15449 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15450 if (tmp == NULL_RTX)
15451 return false;
15452 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15453 }
15454 }
15455
15456 gcc_assert (operands[0] != NULL_RTX);
15457 gcc_assert (operands[1] != NULL_RTX);
15458 gcc_assert (REGNO (operands[0]) % 2 == 0);
15459 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15460 }
15461 }
15462
15463 /* Make sure the instructions are ordered with lower memory access first. */
15464 if (offsets[0] > offsets[1])
15465 {
15466 gap = offsets[0] - offsets[1];
15467 offset = offsets[1];
15468
15469 /* Swap the instructions such that lower memory is accessed first. */
15470 std::swap (operands[0], operands[1]);
15471 std::swap (operands[2], operands[3]);
15472 if (const_store)
15473 std::swap (operands[4], operands[5]);
15474 }
15475 else
15476 {
15477 gap = offsets[1] - offsets[0];
15478 offset = offsets[0];
15479 }
15480
15481 /* Make sure accesses are to consecutive memory locations. */
15482 if (gap != 4)
15483 return false;
15484
15485 /* Make sure we generate legal instructions. */
15486 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15487 false, load))
15488 return true;
15489
15490 /* In Thumb state, where registers are almost unconstrained, there
15491 is little hope to fix it. */
15492 if (TARGET_THUMB2)
15493 return false;
15494
15495 if (load && commute)
15496 {
15497 /* Try reordering registers. */
15498 std::swap (operands[0], operands[1]);
15499 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15500 false, load))
15501 return true;
15502 }
15503
15504 if (const_store)
15505 {
15506 /* If input registers are dead after this pattern, they can be
15507 reordered or replaced by other registers that are free in the
15508 current pattern. */
15509 if (!peep2_reg_dead_p (4, operands[0])
15510 || !peep2_reg_dead_p (4, operands[1]))
15511 return false;
15512
15513 /* Try to reorder the input registers. */
15514 /* For example, the code
15515 mov r0, 0
15516 mov r1, 1
15517 str r1, [r2]
15518 str r0, [r2, #4]
15519 can be transformed into
15520 mov r1, 0
15521 mov r0, 1
15522 strd r0, [r2]
15523 */
15524 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15525 false, false))
15526 {
15527 std::swap (operands[0], operands[1]);
15528 return true;
15529 }
15530
15531 /* Try to find a free DI register. */
15532 CLEAR_HARD_REG_SET (regset);
15533 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15534 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15535 while (true)
15536 {
15537 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15538 if (tmp == NULL_RTX)
15539 return false;
15540
15541 /* DREG must be an even-numbered register in DImode.
15542 Split it into SI registers. */
15543 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15544 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15545 gcc_assert (operands[0] != NULL_RTX);
15546 gcc_assert (operands[1] != NULL_RTX);
15547 gcc_assert (REGNO (operands[0]) % 2 == 0);
15548 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15549
15550 return (operands_ok_ldrd_strd (operands[0], operands[1],
15551 base, offset,
15552 false, load));
15553 }
15554 }
15555
15556 return false;
15557 }
15558
15559
15560
15561 \f
15562 /* Print a symbolic form of X to the debug file, F. */
15563 static void
15564 arm_print_value (FILE *f, rtx x)
15565 {
15566 switch (GET_CODE (x))
15567 {
15568 case CONST_INT:
15569 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15570 return;
15571
15572 case CONST_DOUBLE:
15573 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15574 return;
15575
15576 case CONST_VECTOR:
15577 {
15578 int i;
15579
15580 fprintf (f, "<");
15581 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15582 {
15583 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15584 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15585 fputc (',', f);
15586 }
15587 fprintf (f, ">");
15588 }
15589 return;
15590
15591 case CONST_STRING:
15592 fprintf (f, "\"%s\"", XSTR (x, 0));
15593 return;
15594
15595 case SYMBOL_REF:
15596 fprintf (f, "`%s'", XSTR (x, 0));
15597 return;
15598
15599 case LABEL_REF:
15600 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15601 return;
15602
15603 case CONST:
15604 arm_print_value (f, XEXP (x, 0));
15605 return;
15606
15607 case PLUS:
15608 arm_print_value (f, XEXP (x, 0));
15609 fprintf (f, "+");
15610 arm_print_value (f, XEXP (x, 1));
15611 return;
15612
15613 case PC:
15614 fprintf (f, "pc");
15615 return;
15616
15617 default:
15618 fprintf (f, "????");
15619 return;
15620 }
15621 }
15622 \f
15623 /* Routines for manipulation of the constant pool. */
15624
15625 /* Arm instructions cannot load a large constant directly into a
15626 register; they have to come from a pc relative load. The constant
15627 must therefore be placed in the addressable range of the pc
15628 relative load. Depending on the precise pc relative load
15629 instruction the range is somewhere between 256 bytes and 4k. This
15630 means that we often have to dump a constant inside a function, and
15631 generate code to branch around it.
15632
15633 It is important to minimize this, since the branches will slow
15634 things down and make the code larger.
15635
15636 Normally we can hide the table after an existing unconditional
15637 branch so that there is no interruption of the flow, but in the
15638 worst case the code looks like this:
15639
15640 ldr rn, L1
15641 ...
15642 b L2
15643 align
15644 L1: .long value
15645 L2:
15646 ...
15647
15648 ldr rn, L3
15649 ...
15650 b L4
15651 align
15652 L3: .long value
15653 L4:
15654 ...
15655
15656 We fix this by performing a scan after scheduling, which notices
15657 which instructions need to have their operands fetched from the
15658 constant table and builds the table.
15659
15660 The algorithm starts by building a table of all the constants that
15661 need fixing up and all the natural barriers in the function (places
15662 where a constant table can be dropped without breaking the flow).
15663 For each fixup we note how far the pc-relative replacement will be
15664 able to reach and the offset of the instruction into the function.
15665
15666 Having built the table we then group the fixes together to form
15667 tables that are as large as possible (subject to addressing
15668 constraints) and emit each table of constants after the last
15669 barrier that is within range of all the instructions in the group.
15670 If a group does not contain a barrier, then we forcibly create one
15671 by inserting a jump instruction into the flow. Once the table has
15672 been inserted, the insns are then modified to reference the
15673 relevant entry in the pool.
15674
15675 Possible enhancements to the algorithm (not implemented) are:
15676
15677 1) For some processors and object formats, there may be benefit in
15678 aligning the pools to the start of cache lines; this alignment
15679 would need to be taken into account when calculating addressability
15680 of a pool. */
15681
15682 /* These typedefs are located at the start of this file, so that
15683 they can be used in the prototypes there. This comment is to
15684 remind readers of that fact so that the following structures
15685 can be understood more easily.
15686
15687 typedef struct minipool_node Mnode;
15688 typedef struct minipool_fixup Mfix; */
15689
15690 struct minipool_node
15691 {
15692 /* Doubly linked chain of entries. */
15693 Mnode * next;
15694 Mnode * prev;
15695 /* The maximum offset into the code that this entry can be placed. While
15696 pushing fixes for forward references, all entries are sorted in order
15697 of increasing max_address. */
15698 HOST_WIDE_INT max_address;
15699 /* Similarly for an entry inserted for a backwards ref. */
15700 HOST_WIDE_INT min_address;
15701 /* The number of fixes referencing this entry. This can become zero
15702 if we "unpush" an entry. In this case we ignore the entry when we
15703 come to emit the code. */
15704 int refcount;
15705 /* The offset from the start of the minipool. */
15706 HOST_WIDE_INT offset;
15707 /* The value in table. */
15708 rtx value;
15709 /* The mode of value. */
15710 machine_mode mode;
15711 /* The size of the value. With iWMMXt enabled
15712 sizes > 4 also imply an alignment of 8-bytes. */
15713 int fix_size;
15714 };
15715
15716 struct minipool_fixup
15717 {
15718 Mfix * next;
15719 rtx_insn * insn;
15720 HOST_WIDE_INT address;
15721 rtx * loc;
15722 machine_mode mode;
15723 int fix_size;
15724 rtx value;
15725 Mnode * minipool;
15726 HOST_WIDE_INT forwards;
15727 HOST_WIDE_INT backwards;
15728 };
15729
15730 /* Fixes less than a word need padding out to a word boundary. */
15731 #define MINIPOOL_FIX_SIZE(mode) \
15732 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15733
15734 static Mnode * minipool_vector_head;
15735 static Mnode * minipool_vector_tail;
15736 static rtx_code_label *minipool_vector_label;
15737 static int minipool_pad;
15738
15739 /* The linked list of all minipool fixes required for this function. */
15740 Mfix * minipool_fix_head;
15741 Mfix * minipool_fix_tail;
15742 /* The fix entry for the current minipool, once it has been placed. */
15743 Mfix * minipool_barrier;
15744
15745 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15746 #define JUMP_TABLES_IN_TEXT_SECTION 0
15747 #endif
15748
15749 static HOST_WIDE_INT
15750 get_jump_table_size (rtx_jump_table_data *insn)
15751 {
15752 /* ADDR_VECs only take room if read-only data does into the text
15753 section. */
15754 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15755 {
15756 rtx body = PATTERN (insn);
15757 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15758 HOST_WIDE_INT size;
15759 HOST_WIDE_INT modesize;
15760
15761 modesize = GET_MODE_SIZE (GET_MODE (body));
15762 size = modesize * XVECLEN (body, elt);
15763 switch (modesize)
15764 {
15765 case 1:
15766 /* Round up size of TBB table to a halfword boundary. */
15767 size = (size + 1) & ~HOST_WIDE_INT_1;
15768 break;
15769 case 2:
15770 /* No padding necessary for TBH. */
15771 break;
15772 case 4:
15773 /* Add two bytes for alignment on Thumb. */
15774 if (TARGET_THUMB)
15775 size += 2;
15776 break;
15777 default:
15778 gcc_unreachable ();
15779 }
15780 return size;
15781 }
15782
15783 return 0;
15784 }
15785
15786 /* Return the maximum amount of padding that will be inserted before
15787 label LABEL. */
15788
15789 static HOST_WIDE_INT
15790 get_label_padding (rtx label)
15791 {
15792 HOST_WIDE_INT align, min_insn_size;
15793
15794 align = 1 << label_to_alignment (label);
15795 min_insn_size = TARGET_THUMB ? 2 : 4;
15796 return align > min_insn_size ? align - min_insn_size : 0;
15797 }
15798
15799 /* Move a minipool fix MP from its current location to before MAX_MP.
15800 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15801 constraints may need updating. */
15802 static Mnode *
15803 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15804 HOST_WIDE_INT max_address)
15805 {
15806 /* The code below assumes these are different. */
15807 gcc_assert (mp != max_mp);
15808
15809 if (max_mp == NULL)
15810 {
15811 if (max_address < mp->max_address)
15812 mp->max_address = max_address;
15813 }
15814 else
15815 {
15816 if (max_address > max_mp->max_address - mp->fix_size)
15817 mp->max_address = max_mp->max_address - mp->fix_size;
15818 else
15819 mp->max_address = max_address;
15820
15821 /* Unlink MP from its current position. Since max_mp is non-null,
15822 mp->prev must be non-null. */
15823 mp->prev->next = mp->next;
15824 if (mp->next != NULL)
15825 mp->next->prev = mp->prev;
15826 else
15827 minipool_vector_tail = mp->prev;
15828
15829 /* Re-insert it before MAX_MP. */
15830 mp->next = max_mp;
15831 mp->prev = max_mp->prev;
15832 max_mp->prev = mp;
15833
15834 if (mp->prev != NULL)
15835 mp->prev->next = mp;
15836 else
15837 minipool_vector_head = mp;
15838 }
15839
15840 /* Save the new entry. */
15841 max_mp = mp;
15842
15843 /* Scan over the preceding entries and adjust their addresses as
15844 required. */
15845 while (mp->prev != NULL
15846 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15847 {
15848 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15849 mp = mp->prev;
15850 }
15851
15852 return max_mp;
15853 }
15854
15855 /* Add a constant to the minipool for a forward reference. Returns the
15856 node added or NULL if the constant will not fit in this pool. */
15857 static Mnode *
15858 add_minipool_forward_ref (Mfix *fix)
15859 {
15860 /* If set, max_mp is the first pool_entry that has a lower
15861 constraint than the one we are trying to add. */
15862 Mnode * max_mp = NULL;
15863 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15864 Mnode * mp;
15865
15866 /* If the minipool starts before the end of FIX->INSN then this FIX
15867 can not be placed into the current pool. Furthermore, adding the
15868 new constant pool entry may cause the pool to start FIX_SIZE bytes
15869 earlier. */
15870 if (minipool_vector_head &&
15871 (fix->address + get_attr_length (fix->insn)
15872 >= minipool_vector_head->max_address - fix->fix_size))
15873 return NULL;
15874
15875 /* Scan the pool to see if a constant with the same value has
15876 already been added. While we are doing this, also note the
15877 location where we must insert the constant if it doesn't already
15878 exist. */
15879 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15880 {
15881 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15882 && fix->mode == mp->mode
15883 && (!LABEL_P (fix->value)
15884 || (CODE_LABEL_NUMBER (fix->value)
15885 == CODE_LABEL_NUMBER (mp->value)))
15886 && rtx_equal_p (fix->value, mp->value))
15887 {
15888 /* More than one fix references this entry. */
15889 mp->refcount++;
15890 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15891 }
15892
15893 /* Note the insertion point if necessary. */
15894 if (max_mp == NULL
15895 && mp->max_address > max_address)
15896 max_mp = mp;
15897
15898 /* If we are inserting an 8-bytes aligned quantity and
15899 we have not already found an insertion point, then
15900 make sure that all such 8-byte aligned quantities are
15901 placed at the start of the pool. */
15902 if (ARM_DOUBLEWORD_ALIGN
15903 && max_mp == NULL
15904 && fix->fix_size >= 8
15905 && mp->fix_size < 8)
15906 {
15907 max_mp = mp;
15908 max_address = mp->max_address;
15909 }
15910 }
15911
15912 /* The value is not currently in the minipool, so we need to create
15913 a new entry for it. If MAX_MP is NULL, the entry will be put on
15914 the end of the list since the placement is less constrained than
15915 any existing entry. Otherwise, we insert the new fix before
15916 MAX_MP and, if necessary, adjust the constraints on the other
15917 entries. */
15918 mp = XNEW (Mnode);
15919 mp->fix_size = fix->fix_size;
15920 mp->mode = fix->mode;
15921 mp->value = fix->value;
15922 mp->refcount = 1;
15923 /* Not yet required for a backwards ref. */
15924 mp->min_address = -65536;
15925
15926 if (max_mp == NULL)
15927 {
15928 mp->max_address = max_address;
15929 mp->next = NULL;
15930 mp->prev = minipool_vector_tail;
15931
15932 if (mp->prev == NULL)
15933 {
15934 minipool_vector_head = mp;
15935 minipool_vector_label = gen_label_rtx ();
15936 }
15937 else
15938 mp->prev->next = mp;
15939
15940 minipool_vector_tail = mp;
15941 }
15942 else
15943 {
15944 if (max_address > max_mp->max_address - mp->fix_size)
15945 mp->max_address = max_mp->max_address - mp->fix_size;
15946 else
15947 mp->max_address = max_address;
15948
15949 mp->next = max_mp;
15950 mp->prev = max_mp->prev;
15951 max_mp->prev = mp;
15952 if (mp->prev != NULL)
15953 mp->prev->next = mp;
15954 else
15955 minipool_vector_head = mp;
15956 }
15957
15958 /* Save the new entry. */
15959 max_mp = mp;
15960
15961 /* Scan over the preceding entries and adjust their addresses as
15962 required. */
15963 while (mp->prev != NULL
15964 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15965 {
15966 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15967 mp = mp->prev;
15968 }
15969
15970 return max_mp;
15971 }
15972
15973 static Mnode *
15974 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15975 HOST_WIDE_INT min_address)
15976 {
15977 HOST_WIDE_INT offset;
15978
15979 /* The code below assumes these are different. */
15980 gcc_assert (mp != min_mp);
15981
15982 if (min_mp == NULL)
15983 {
15984 if (min_address > mp->min_address)
15985 mp->min_address = min_address;
15986 }
15987 else
15988 {
15989 /* We will adjust this below if it is too loose. */
15990 mp->min_address = min_address;
15991
15992 /* Unlink MP from its current position. Since min_mp is non-null,
15993 mp->next must be non-null. */
15994 mp->next->prev = mp->prev;
15995 if (mp->prev != NULL)
15996 mp->prev->next = mp->next;
15997 else
15998 minipool_vector_head = mp->next;
15999
16000 /* Reinsert it after MIN_MP. */
16001 mp->prev = min_mp;
16002 mp->next = min_mp->next;
16003 min_mp->next = mp;
16004 if (mp->next != NULL)
16005 mp->next->prev = mp;
16006 else
16007 minipool_vector_tail = mp;
16008 }
16009
16010 min_mp = mp;
16011
16012 offset = 0;
16013 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16014 {
16015 mp->offset = offset;
16016 if (mp->refcount > 0)
16017 offset += mp->fix_size;
16018
16019 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16020 mp->next->min_address = mp->min_address + mp->fix_size;
16021 }
16022
16023 return min_mp;
16024 }
16025
16026 /* Add a constant to the minipool for a backward reference. Returns the
16027 node added or NULL if the constant will not fit in this pool.
16028
16029 Note that the code for insertion for a backwards reference can be
16030 somewhat confusing because the calculated offsets for each fix do
16031 not take into account the size of the pool (which is still under
16032 construction. */
16033 static Mnode *
16034 add_minipool_backward_ref (Mfix *fix)
16035 {
16036 /* If set, min_mp is the last pool_entry that has a lower constraint
16037 than the one we are trying to add. */
16038 Mnode *min_mp = NULL;
16039 /* This can be negative, since it is only a constraint. */
16040 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16041 Mnode *mp;
16042
16043 /* If we can't reach the current pool from this insn, or if we can't
16044 insert this entry at the end of the pool without pushing other
16045 fixes out of range, then we don't try. This ensures that we
16046 can't fail later on. */
16047 if (min_address >= minipool_barrier->address
16048 || (minipool_vector_tail->min_address + fix->fix_size
16049 >= minipool_barrier->address))
16050 return NULL;
16051
16052 /* Scan the pool to see if a constant with the same value has
16053 already been added. While we are doing this, also note the
16054 location where we must insert the constant if it doesn't already
16055 exist. */
16056 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16057 {
16058 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16059 && fix->mode == mp->mode
16060 && (!LABEL_P (fix->value)
16061 || (CODE_LABEL_NUMBER (fix->value)
16062 == CODE_LABEL_NUMBER (mp->value)))
16063 && rtx_equal_p (fix->value, mp->value)
16064 /* Check that there is enough slack to move this entry to the
16065 end of the table (this is conservative). */
16066 && (mp->max_address
16067 > (minipool_barrier->address
16068 + minipool_vector_tail->offset
16069 + minipool_vector_tail->fix_size)))
16070 {
16071 mp->refcount++;
16072 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16073 }
16074
16075 if (min_mp != NULL)
16076 mp->min_address += fix->fix_size;
16077 else
16078 {
16079 /* Note the insertion point if necessary. */
16080 if (mp->min_address < min_address)
16081 {
16082 /* For now, we do not allow the insertion of 8-byte alignment
16083 requiring nodes anywhere but at the start of the pool. */
16084 if (ARM_DOUBLEWORD_ALIGN
16085 && fix->fix_size >= 8 && mp->fix_size < 8)
16086 return NULL;
16087 else
16088 min_mp = mp;
16089 }
16090 else if (mp->max_address
16091 < minipool_barrier->address + mp->offset + fix->fix_size)
16092 {
16093 /* Inserting before this entry would push the fix beyond
16094 its maximum address (which can happen if we have
16095 re-located a forwards fix); force the new fix to come
16096 after it. */
16097 if (ARM_DOUBLEWORD_ALIGN
16098 && fix->fix_size >= 8 && mp->fix_size < 8)
16099 return NULL;
16100 else
16101 {
16102 min_mp = mp;
16103 min_address = mp->min_address + fix->fix_size;
16104 }
16105 }
16106 /* Do not insert a non-8-byte aligned quantity before 8-byte
16107 aligned quantities. */
16108 else if (ARM_DOUBLEWORD_ALIGN
16109 && fix->fix_size < 8
16110 && mp->fix_size >= 8)
16111 {
16112 min_mp = mp;
16113 min_address = mp->min_address + fix->fix_size;
16114 }
16115 }
16116 }
16117
16118 /* We need to create a new entry. */
16119 mp = XNEW (Mnode);
16120 mp->fix_size = fix->fix_size;
16121 mp->mode = fix->mode;
16122 mp->value = fix->value;
16123 mp->refcount = 1;
16124 mp->max_address = minipool_barrier->address + 65536;
16125
16126 mp->min_address = min_address;
16127
16128 if (min_mp == NULL)
16129 {
16130 mp->prev = NULL;
16131 mp->next = minipool_vector_head;
16132
16133 if (mp->next == NULL)
16134 {
16135 minipool_vector_tail = mp;
16136 minipool_vector_label = gen_label_rtx ();
16137 }
16138 else
16139 mp->next->prev = mp;
16140
16141 minipool_vector_head = mp;
16142 }
16143 else
16144 {
16145 mp->next = min_mp->next;
16146 mp->prev = min_mp;
16147 min_mp->next = mp;
16148
16149 if (mp->next != NULL)
16150 mp->next->prev = mp;
16151 else
16152 minipool_vector_tail = mp;
16153 }
16154
16155 /* Save the new entry. */
16156 min_mp = mp;
16157
16158 if (mp->prev)
16159 mp = mp->prev;
16160 else
16161 mp->offset = 0;
16162
16163 /* Scan over the following entries and adjust their offsets. */
16164 while (mp->next != NULL)
16165 {
16166 if (mp->next->min_address < mp->min_address + mp->fix_size)
16167 mp->next->min_address = mp->min_address + mp->fix_size;
16168
16169 if (mp->refcount)
16170 mp->next->offset = mp->offset + mp->fix_size;
16171 else
16172 mp->next->offset = mp->offset;
16173
16174 mp = mp->next;
16175 }
16176
16177 return min_mp;
16178 }
16179
16180 static void
16181 assign_minipool_offsets (Mfix *barrier)
16182 {
16183 HOST_WIDE_INT offset = 0;
16184 Mnode *mp;
16185
16186 minipool_barrier = barrier;
16187
16188 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16189 {
16190 mp->offset = offset;
16191
16192 if (mp->refcount > 0)
16193 offset += mp->fix_size;
16194 }
16195 }
16196
16197 /* Output the literal table */
16198 static void
16199 dump_minipool (rtx_insn *scan)
16200 {
16201 Mnode * mp;
16202 Mnode * nmp;
16203 int align64 = 0;
16204
16205 if (ARM_DOUBLEWORD_ALIGN)
16206 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16207 if (mp->refcount > 0 && mp->fix_size >= 8)
16208 {
16209 align64 = 1;
16210 break;
16211 }
16212
16213 if (dump_file)
16214 fprintf (dump_file,
16215 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16216 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16217
16218 scan = emit_label_after (gen_label_rtx (), scan);
16219 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16220 scan = emit_label_after (minipool_vector_label, scan);
16221
16222 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16223 {
16224 if (mp->refcount > 0)
16225 {
16226 if (dump_file)
16227 {
16228 fprintf (dump_file,
16229 ";; Offset %u, min %ld, max %ld ",
16230 (unsigned) mp->offset, (unsigned long) mp->min_address,
16231 (unsigned long) mp->max_address);
16232 arm_print_value (dump_file, mp->value);
16233 fputc ('\n', dump_file);
16234 }
16235
16236 rtx val = copy_rtx (mp->value);
16237
16238 switch (GET_MODE_SIZE (mp->mode))
16239 {
16240 #ifdef HAVE_consttable_1
16241 case 1:
16242 scan = emit_insn_after (gen_consttable_1 (val), scan);
16243 break;
16244
16245 #endif
16246 #ifdef HAVE_consttable_2
16247 case 2:
16248 scan = emit_insn_after (gen_consttable_2 (val), scan);
16249 break;
16250
16251 #endif
16252 #ifdef HAVE_consttable_4
16253 case 4:
16254 scan = emit_insn_after (gen_consttable_4 (val), scan);
16255 break;
16256
16257 #endif
16258 #ifdef HAVE_consttable_8
16259 case 8:
16260 scan = emit_insn_after (gen_consttable_8 (val), scan);
16261 break;
16262
16263 #endif
16264 #ifdef HAVE_consttable_16
16265 case 16:
16266 scan = emit_insn_after (gen_consttable_16 (val), scan);
16267 break;
16268
16269 #endif
16270 default:
16271 gcc_unreachable ();
16272 }
16273 }
16274
16275 nmp = mp->next;
16276 free (mp);
16277 }
16278
16279 minipool_vector_head = minipool_vector_tail = NULL;
16280 scan = emit_insn_after (gen_consttable_end (), scan);
16281 scan = emit_barrier_after (scan);
16282 }
16283
16284 /* Return the cost of forcibly inserting a barrier after INSN. */
16285 static int
16286 arm_barrier_cost (rtx_insn *insn)
16287 {
16288 /* Basing the location of the pool on the loop depth is preferable,
16289 but at the moment, the basic block information seems to be
16290 corrupt by this stage of the compilation. */
16291 int base_cost = 50;
16292 rtx_insn *next = next_nonnote_insn (insn);
16293
16294 if (next != NULL && LABEL_P (next))
16295 base_cost -= 20;
16296
16297 switch (GET_CODE (insn))
16298 {
16299 case CODE_LABEL:
16300 /* It will always be better to place the table before the label, rather
16301 than after it. */
16302 return 50;
16303
16304 case INSN:
16305 case CALL_INSN:
16306 return base_cost;
16307
16308 case JUMP_INSN:
16309 return base_cost - 10;
16310
16311 default:
16312 return base_cost + 10;
16313 }
16314 }
16315
16316 /* Find the best place in the insn stream in the range
16317 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16318 Create the barrier by inserting a jump and add a new fix entry for
16319 it. */
16320 static Mfix *
16321 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16322 {
16323 HOST_WIDE_INT count = 0;
16324 rtx_barrier *barrier;
16325 rtx_insn *from = fix->insn;
16326 /* The instruction after which we will insert the jump. */
16327 rtx_insn *selected = NULL;
16328 int selected_cost;
16329 /* The address at which the jump instruction will be placed. */
16330 HOST_WIDE_INT selected_address;
16331 Mfix * new_fix;
16332 HOST_WIDE_INT max_count = max_address - fix->address;
16333 rtx_code_label *label = gen_label_rtx ();
16334
16335 selected_cost = arm_barrier_cost (from);
16336 selected_address = fix->address;
16337
16338 while (from && count < max_count)
16339 {
16340 rtx_jump_table_data *tmp;
16341 int new_cost;
16342
16343 /* This code shouldn't have been called if there was a natural barrier
16344 within range. */
16345 gcc_assert (!BARRIER_P (from));
16346
16347 /* Count the length of this insn. This must stay in sync with the
16348 code that pushes minipool fixes. */
16349 if (LABEL_P (from))
16350 count += get_label_padding (from);
16351 else
16352 count += get_attr_length (from);
16353
16354 /* If there is a jump table, add its length. */
16355 if (tablejump_p (from, NULL, &tmp))
16356 {
16357 count += get_jump_table_size (tmp);
16358
16359 /* Jump tables aren't in a basic block, so base the cost on
16360 the dispatch insn. If we select this location, we will
16361 still put the pool after the table. */
16362 new_cost = arm_barrier_cost (from);
16363
16364 if (count < max_count
16365 && (!selected || new_cost <= selected_cost))
16366 {
16367 selected = tmp;
16368 selected_cost = new_cost;
16369 selected_address = fix->address + count;
16370 }
16371
16372 /* Continue after the dispatch table. */
16373 from = NEXT_INSN (tmp);
16374 continue;
16375 }
16376
16377 new_cost = arm_barrier_cost (from);
16378
16379 if (count < max_count
16380 && (!selected || new_cost <= selected_cost))
16381 {
16382 selected = from;
16383 selected_cost = new_cost;
16384 selected_address = fix->address + count;
16385 }
16386
16387 from = NEXT_INSN (from);
16388 }
16389
16390 /* Make sure that we found a place to insert the jump. */
16391 gcc_assert (selected);
16392
16393 /* Make sure we do not split a call and its corresponding
16394 CALL_ARG_LOCATION note. */
16395 if (CALL_P (selected))
16396 {
16397 rtx_insn *next = NEXT_INSN (selected);
16398 if (next && NOTE_P (next)
16399 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16400 selected = next;
16401 }
16402
16403 /* Create a new JUMP_INSN that branches around a barrier. */
16404 from = emit_jump_insn_after (gen_jump (label), selected);
16405 JUMP_LABEL (from) = label;
16406 barrier = emit_barrier_after (from);
16407 emit_label_after (label, barrier);
16408
16409 /* Create a minipool barrier entry for the new barrier. */
16410 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16411 new_fix->insn = barrier;
16412 new_fix->address = selected_address;
16413 new_fix->next = fix->next;
16414 fix->next = new_fix;
16415
16416 return new_fix;
16417 }
16418
16419 /* Record that there is a natural barrier in the insn stream at
16420 ADDRESS. */
16421 static void
16422 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16423 {
16424 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16425
16426 fix->insn = insn;
16427 fix->address = address;
16428
16429 fix->next = NULL;
16430 if (minipool_fix_head != NULL)
16431 minipool_fix_tail->next = fix;
16432 else
16433 minipool_fix_head = fix;
16434
16435 minipool_fix_tail = fix;
16436 }
16437
16438 /* Record INSN, which will need fixing up to load a value from the
16439 minipool. ADDRESS is the offset of the insn since the start of the
16440 function; LOC is a pointer to the part of the insn which requires
16441 fixing; VALUE is the constant that must be loaded, which is of type
16442 MODE. */
16443 static void
16444 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16445 machine_mode mode, rtx value)
16446 {
16447 gcc_assert (!arm_disable_literal_pool);
16448 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16449
16450 fix->insn = insn;
16451 fix->address = address;
16452 fix->loc = loc;
16453 fix->mode = mode;
16454 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16455 fix->value = value;
16456 fix->forwards = get_attr_pool_range (insn);
16457 fix->backwards = get_attr_neg_pool_range (insn);
16458 fix->minipool = NULL;
16459
16460 /* If an insn doesn't have a range defined for it, then it isn't
16461 expecting to be reworked by this code. Better to stop now than
16462 to generate duff assembly code. */
16463 gcc_assert (fix->forwards || fix->backwards);
16464
16465 /* If an entry requires 8-byte alignment then assume all constant pools
16466 require 4 bytes of padding. Trying to do this later on a per-pool
16467 basis is awkward because existing pool entries have to be modified. */
16468 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16469 minipool_pad = 4;
16470
16471 if (dump_file)
16472 {
16473 fprintf (dump_file,
16474 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16475 GET_MODE_NAME (mode),
16476 INSN_UID (insn), (unsigned long) address,
16477 -1 * (long)fix->backwards, (long)fix->forwards);
16478 arm_print_value (dump_file, fix->value);
16479 fprintf (dump_file, "\n");
16480 }
16481
16482 /* Add it to the chain of fixes. */
16483 fix->next = NULL;
16484
16485 if (minipool_fix_head != NULL)
16486 minipool_fix_tail->next = fix;
16487 else
16488 minipool_fix_head = fix;
16489
16490 minipool_fix_tail = fix;
16491 }
16492
16493 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16494 Returns the number of insns needed, or 99 if we always want to synthesize
16495 the value. */
16496 int
16497 arm_max_const_double_inline_cost ()
16498 {
16499 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16500 }
16501
16502 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16503 Returns the number of insns needed, or 99 if we don't know how to
16504 do it. */
16505 int
16506 arm_const_double_inline_cost (rtx val)
16507 {
16508 rtx lowpart, highpart;
16509 machine_mode mode;
16510
16511 mode = GET_MODE (val);
16512
16513 if (mode == VOIDmode)
16514 mode = DImode;
16515
16516 gcc_assert (GET_MODE_SIZE (mode) == 8);
16517
16518 lowpart = gen_lowpart (SImode, val);
16519 highpart = gen_highpart_mode (SImode, mode, val);
16520
16521 gcc_assert (CONST_INT_P (lowpart));
16522 gcc_assert (CONST_INT_P (highpart));
16523
16524 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16525 NULL_RTX, NULL_RTX, 0, 0)
16526 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16527 NULL_RTX, NULL_RTX, 0, 0));
16528 }
16529
16530 /* Cost of loading a SImode constant. */
16531 static inline int
16532 arm_const_inline_cost (enum rtx_code code, rtx val)
16533 {
16534 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16535 NULL_RTX, NULL_RTX, 1, 0);
16536 }
16537
16538 /* Return true if it is worthwhile to split a 64-bit constant into two
16539 32-bit operations. This is the case if optimizing for size, or
16540 if we have load delay slots, or if one 32-bit part can be done with
16541 a single data operation. */
16542 bool
16543 arm_const_double_by_parts (rtx val)
16544 {
16545 machine_mode mode = GET_MODE (val);
16546 rtx part;
16547
16548 if (optimize_size || arm_ld_sched)
16549 return true;
16550
16551 if (mode == VOIDmode)
16552 mode = DImode;
16553
16554 part = gen_highpart_mode (SImode, mode, val);
16555
16556 gcc_assert (CONST_INT_P (part));
16557
16558 if (const_ok_for_arm (INTVAL (part))
16559 || const_ok_for_arm (~INTVAL (part)))
16560 return true;
16561
16562 part = gen_lowpart (SImode, val);
16563
16564 gcc_assert (CONST_INT_P (part));
16565
16566 if (const_ok_for_arm (INTVAL (part))
16567 || const_ok_for_arm (~INTVAL (part)))
16568 return true;
16569
16570 return false;
16571 }
16572
16573 /* Return true if it is possible to inline both the high and low parts
16574 of a 64-bit constant into 32-bit data processing instructions. */
16575 bool
16576 arm_const_double_by_immediates (rtx val)
16577 {
16578 machine_mode mode = GET_MODE (val);
16579 rtx part;
16580
16581 if (mode == VOIDmode)
16582 mode = DImode;
16583
16584 part = gen_highpart_mode (SImode, mode, val);
16585
16586 gcc_assert (CONST_INT_P (part));
16587
16588 if (!const_ok_for_arm (INTVAL (part)))
16589 return false;
16590
16591 part = gen_lowpart (SImode, val);
16592
16593 gcc_assert (CONST_INT_P (part));
16594
16595 if (!const_ok_for_arm (INTVAL (part)))
16596 return false;
16597
16598 return true;
16599 }
16600
16601 /* Scan INSN and note any of its operands that need fixing.
16602 If DO_PUSHES is false we do not actually push any of the fixups
16603 needed. */
16604 static void
16605 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16606 {
16607 int opno;
16608
16609 extract_constrain_insn (insn);
16610
16611 if (recog_data.n_alternatives == 0)
16612 return;
16613
16614 /* Fill in recog_op_alt with information about the constraints of
16615 this insn. */
16616 preprocess_constraints (insn);
16617
16618 const operand_alternative *op_alt = which_op_alt ();
16619 for (opno = 0; opno < recog_data.n_operands; opno++)
16620 {
16621 /* Things we need to fix can only occur in inputs. */
16622 if (recog_data.operand_type[opno] != OP_IN)
16623 continue;
16624
16625 /* If this alternative is a memory reference, then any mention
16626 of constants in this alternative is really to fool reload
16627 into allowing us to accept one there. We need to fix them up
16628 now so that we output the right code. */
16629 if (op_alt[opno].memory_ok)
16630 {
16631 rtx op = recog_data.operand[opno];
16632
16633 if (CONSTANT_P (op))
16634 {
16635 if (do_pushes)
16636 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16637 recog_data.operand_mode[opno], op);
16638 }
16639 else if (MEM_P (op)
16640 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16641 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16642 {
16643 if (do_pushes)
16644 {
16645 rtx cop = avoid_constant_pool_reference (op);
16646
16647 /* Casting the address of something to a mode narrower
16648 than a word can cause avoid_constant_pool_reference()
16649 to return the pool reference itself. That's no good to
16650 us here. Lets just hope that we can use the
16651 constant pool value directly. */
16652 if (op == cop)
16653 cop = get_pool_constant (XEXP (op, 0));
16654
16655 push_minipool_fix (insn, address,
16656 recog_data.operand_loc[opno],
16657 recog_data.operand_mode[opno], cop);
16658 }
16659
16660 }
16661 }
16662 }
16663
16664 return;
16665 }
16666
16667 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16668 and unions in the context of ARMv8-M Security Extensions. It is used as a
16669 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16670 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16671 or four masks, depending on whether it is being computed for a
16672 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16673 respectively. The tree for the type of the argument or a field within an
16674 argument is passed in ARG_TYPE, the current register this argument or field
16675 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16676 argument or field starts at is passed in STARTING_BIT and the last used bit
16677 is kept in LAST_USED_BIT which is also updated accordingly. */
16678
16679 static unsigned HOST_WIDE_INT
16680 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16681 uint32_t * padding_bits_to_clear,
16682 unsigned starting_bit, int * last_used_bit)
16683
16684 {
16685 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16686
16687 if (TREE_CODE (arg_type) == RECORD_TYPE)
16688 {
16689 unsigned current_bit = starting_bit;
16690 tree field;
16691 long int offset, size;
16692
16693
16694 field = TYPE_FIELDS (arg_type);
16695 while (field)
16696 {
16697 /* The offset within a structure is always an offset from
16698 the start of that structure. Make sure we take that into the
16699 calculation of the register based offset that we use here. */
16700 offset = starting_bit;
16701 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16702 offset %= 32;
16703
16704 /* This is the actual size of the field, for bitfields this is the
16705 bitfield width and not the container size. */
16706 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16707
16708 if (*last_used_bit != offset)
16709 {
16710 if (offset < *last_used_bit)
16711 {
16712 /* This field's offset is before the 'last_used_bit', that
16713 means this field goes on the next register. So we need to
16714 pad the rest of the current register and increase the
16715 register number. */
16716 uint32_t mask;
16717 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16718 mask++;
16719
16720 padding_bits_to_clear[*regno] |= mask;
16721 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16722 (*regno)++;
16723 }
16724 else
16725 {
16726 /* Otherwise we pad the bits between the last field's end and
16727 the start of the new field. */
16728 uint32_t mask;
16729
16730 mask = ((uint32_t)-1) >> (32 - offset);
16731 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16732 padding_bits_to_clear[*regno] |= mask;
16733 }
16734 current_bit = offset;
16735 }
16736
16737 /* Calculate further padding bits for inner structs/unions too. */
16738 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16739 {
16740 *last_used_bit = current_bit;
16741 not_to_clear_reg_mask
16742 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16743 padding_bits_to_clear, offset,
16744 last_used_bit);
16745 }
16746 else
16747 {
16748 /* Update 'current_bit' with this field's size. If the
16749 'current_bit' lies in a subsequent register, update 'regno' and
16750 reset 'current_bit' to point to the current bit in that new
16751 register. */
16752 current_bit += size;
16753 while (current_bit >= 32)
16754 {
16755 current_bit-=32;
16756 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16757 (*regno)++;
16758 }
16759 *last_used_bit = current_bit;
16760 }
16761
16762 field = TREE_CHAIN (field);
16763 }
16764 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16765 }
16766 else if (TREE_CODE (arg_type) == UNION_TYPE)
16767 {
16768 tree field, field_t;
16769 int i, regno_t, field_size;
16770 int max_reg = -1;
16771 int max_bit = -1;
16772 uint32_t mask;
16773 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16774 = {-1, -1, -1, -1};
16775
16776 /* To compute the padding bits in a union we only consider bits as
16777 padding bits if they are always either a padding bit or fall outside a
16778 fields size for all fields in the union. */
16779 field = TYPE_FIELDS (arg_type);
16780 while (field)
16781 {
16782 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16783 = {0U, 0U, 0U, 0U};
16784 int last_used_bit_t = *last_used_bit;
16785 regno_t = *regno;
16786 field_t = TREE_TYPE (field);
16787
16788 /* If the field's type is either a record or a union make sure to
16789 compute their padding bits too. */
16790 if (RECORD_OR_UNION_TYPE_P (field_t))
16791 not_to_clear_reg_mask
16792 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16793 &padding_bits_to_clear_t[0],
16794 starting_bit, &last_used_bit_t);
16795 else
16796 {
16797 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16798 regno_t = (field_size / 32) + *regno;
16799 last_used_bit_t = (starting_bit + field_size) % 32;
16800 }
16801
16802 for (i = *regno; i < regno_t; i++)
16803 {
16804 /* For all but the last register used by this field only keep the
16805 padding bits that were padding bits in this field. */
16806 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16807 }
16808
16809 /* For the last register, keep all padding bits that were padding
16810 bits in this field and any padding bits that are still valid
16811 as padding bits but fall outside of this field's size. */
16812 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16813 padding_bits_to_clear_res[regno_t]
16814 &= padding_bits_to_clear_t[regno_t] | mask;
16815
16816 /* Update the maximum size of the fields in terms of registers used
16817 ('max_reg') and the 'last_used_bit' in said register. */
16818 if (max_reg < regno_t)
16819 {
16820 max_reg = regno_t;
16821 max_bit = last_used_bit_t;
16822 }
16823 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16824 max_bit = last_used_bit_t;
16825
16826 field = TREE_CHAIN (field);
16827 }
16828
16829 /* Update the current padding_bits_to_clear using the intersection of the
16830 padding bits of all the fields. */
16831 for (i=*regno; i < max_reg; i++)
16832 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16833
16834 /* Do not keep trailing padding bits, we do not know yet whether this
16835 is the end of the argument. */
16836 mask = ((uint32_t) 1 << max_bit) - 1;
16837 padding_bits_to_clear[max_reg]
16838 |= padding_bits_to_clear_res[max_reg] & mask;
16839
16840 *regno = max_reg;
16841 *last_used_bit = max_bit;
16842 }
16843 else
16844 /* This function should only be used for structs and unions. */
16845 gcc_unreachable ();
16846
16847 return not_to_clear_reg_mask;
16848 }
16849
16850 /* In the context of ARMv8-M Security Extensions, this function is used for both
16851 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16852 registers are used when returning or passing arguments, which is then
16853 returned as a mask. It will also compute a mask to indicate padding/unused
16854 bits for each of these registers, and passes this through the
16855 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16856 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16857 the starting register used to pass this argument or return value is passed
16858 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16859 for struct and union types. */
16860
16861 static unsigned HOST_WIDE_INT
16862 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16863 uint32_t * padding_bits_to_clear)
16864
16865 {
16866 int last_used_bit = 0;
16867 unsigned HOST_WIDE_INT not_to_clear_mask;
16868
16869 if (RECORD_OR_UNION_TYPE_P (arg_type))
16870 {
16871 not_to_clear_mask
16872 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16873 padding_bits_to_clear, 0,
16874 &last_used_bit);
16875
16876
16877 /* If the 'last_used_bit' is not zero, that means we are still using a
16878 part of the last 'regno'. In such cases we must clear the trailing
16879 bits. Otherwise we are not using regno and we should mark it as to
16880 clear. */
16881 if (last_used_bit != 0)
16882 padding_bits_to_clear[regno]
16883 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16884 else
16885 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16886 }
16887 else
16888 {
16889 not_to_clear_mask = 0;
16890 /* We are not dealing with structs nor unions. So these arguments may be
16891 passed in floating point registers too. In some cases a BLKmode is
16892 used when returning or passing arguments in multiple VFP registers. */
16893 if (GET_MODE (arg_rtx) == BLKmode)
16894 {
16895 int i, arg_regs;
16896 rtx reg;
16897
16898 /* This should really only occur when dealing with the hard-float
16899 ABI. */
16900 gcc_assert (TARGET_HARD_FLOAT_ABI);
16901
16902 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16903 {
16904 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16905 gcc_assert (REG_P (reg));
16906
16907 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16908
16909 /* If we are dealing with DF mode, make sure we don't
16910 clear either of the registers it addresses. */
16911 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16912 if (arg_regs > 1)
16913 {
16914 unsigned HOST_WIDE_INT mask;
16915 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16916 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16917 not_to_clear_mask |= mask;
16918 }
16919 }
16920 }
16921 else
16922 {
16923 /* Otherwise we can rely on the MODE to determine how many registers
16924 are being used by this argument. */
16925 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16926 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16927 if (arg_regs > 1)
16928 {
16929 unsigned HOST_WIDE_INT
16930 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16931 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16932 not_to_clear_mask |= mask;
16933 }
16934 }
16935 }
16936
16937 return not_to_clear_mask;
16938 }
16939
16940 /* Clears caller saved registers not used to pass arguments before a
16941 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16942 registers is done in __gnu_cmse_nonsecure_call libcall.
16943 See libgcc/config/arm/cmse_nonsecure_call.S. */
16944
16945 static void
16946 cmse_nonsecure_call_clear_caller_saved (void)
16947 {
16948 basic_block bb;
16949
16950 FOR_EACH_BB_FN (bb, cfun)
16951 {
16952 rtx_insn *insn;
16953
16954 FOR_BB_INSNS (bb, insn)
16955 {
16956 uint64_t to_clear_mask, float_mask;
16957 rtx_insn *seq;
16958 rtx pat, call, unspec, reg, cleared_reg, tmp;
16959 unsigned int regno, maxregno;
16960 rtx address;
16961 CUMULATIVE_ARGS args_so_far_v;
16962 cumulative_args_t args_so_far;
16963 tree arg_type, fntype;
16964 bool using_r4, first_param = true;
16965 function_args_iterator args_iter;
16966 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16967 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16968
16969 if (!NONDEBUG_INSN_P (insn))
16970 continue;
16971
16972 if (!CALL_P (insn))
16973 continue;
16974
16975 pat = PATTERN (insn);
16976 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16977 call = XVECEXP (pat, 0, 0);
16978
16979 /* Get the real call RTX if the insn sets a value, ie. returns. */
16980 if (GET_CODE (call) == SET)
16981 call = SET_SRC (call);
16982
16983 /* Check if it is a cmse_nonsecure_call. */
16984 unspec = XEXP (call, 0);
16985 if (GET_CODE (unspec) != UNSPEC
16986 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16987 continue;
16988
16989 /* Determine the caller-saved registers we need to clear. */
16990 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16991 maxregno = NUM_ARG_REGS - 1;
16992 /* Only look at the caller-saved floating point registers in case of
16993 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16994 lazy store and loads which clear both caller- and callee-saved
16995 registers. */
16996 if (TARGET_HARD_FLOAT_ABI)
16997 {
16998 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16999 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17000 to_clear_mask |= float_mask;
17001 maxregno = D7_VFP_REGNUM;
17002 }
17003
17004 /* Make sure the register used to hold the function address is not
17005 cleared. */
17006 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17007 gcc_assert (MEM_P (address));
17008 gcc_assert (REG_P (XEXP (address, 0)));
17009 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17010
17011 /* Set basic block of call insn so that df rescan is performed on
17012 insns inserted here. */
17013 set_block_for_insn (insn, bb);
17014 df_set_flags (DF_DEFER_INSN_RESCAN);
17015 start_sequence ();
17016
17017 /* Make sure the scheduler doesn't schedule other insns beyond
17018 here. */
17019 emit_insn (gen_blockage ());
17020
17021 /* Walk through all arguments and clear registers appropriately.
17022 */
17023 fntype = TREE_TYPE (MEM_EXPR (address));
17024 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17025 NULL_TREE);
17026 args_so_far = pack_cumulative_args (&args_so_far_v);
17027 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17028 {
17029 rtx arg_rtx;
17030 machine_mode arg_mode = TYPE_MODE (arg_type);
17031
17032 if (VOID_TYPE_P (arg_type))
17033 continue;
17034
17035 if (!first_param)
17036 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17037 true);
17038
17039 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17040 true);
17041 gcc_assert (REG_P (arg_rtx));
17042 to_clear_mask
17043 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17044 REGNO (arg_rtx),
17045 padding_bits_to_clear_ptr);
17046
17047 first_param = false;
17048 }
17049
17050 /* Clear padding bits where needed. */
17051 cleared_reg = XEXP (address, 0);
17052 reg = gen_rtx_REG (SImode, IP_REGNUM);
17053 using_r4 = false;
17054 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17055 {
17056 if (padding_bits_to_clear[regno] == 0)
17057 continue;
17058
17059 /* If this is a Thumb-1 target copy the address of the function
17060 we are calling from 'r4' into 'ip' such that we can use r4 to
17061 clear the unused bits in the arguments. */
17062 if (TARGET_THUMB1 && !using_r4)
17063 {
17064 using_r4 = true;
17065 reg = cleared_reg;
17066 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17067 reg);
17068 }
17069
17070 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17071 emit_move_insn (reg, tmp);
17072 /* Also fill the top half of the negated
17073 padding_bits_to_clear. */
17074 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17075 {
17076 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17077 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17078 GEN_INT (16),
17079 GEN_INT (16)),
17080 tmp));
17081 }
17082
17083 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17084 gen_rtx_REG (SImode, regno),
17085 reg));
17086
17087 }
17088 if (using_r4)
17089 emit_move_insn (cleared_reg,
17090 gen_rtx_REG (SImode, IP_REGNUM));
17091
17092 /* We use right shift and left shift to clear the LSB of the address
17093 we jump to instead of using bic, to avoid having to use an extra
17094 register on Thumb-1. */
17095 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17096 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17097 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17098 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17099
17100 /* Clearing all registers that leak before doing a non-secure
17101 call. */
17102 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17103 {
17104 if (!(to_clear_mask & (1LL << regno)))
17105 continue;
17106
17107 /* If regno is an even vfp register and its successor is also to
17108 be cleared, use vmov. */
17109 if (IS_VFP_REGNUM (regno))
17110 {
17111 if (TARGET_VFP_DOUBLE
17112 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17113 && to_clear_mask & (1LL << (regno + 1)))
17114 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17115 CONST0_RTX (DFmode));
17116 else
17117 emit_move_insn (gen_rtx_REG (SFmode, regno),
17118 CONST0_RTX (SFmode));
17119 }
17120 else
17121 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17122 }
17123
17124 seq = get_insns ();
17125 end_sequence ();
17126 emit_insn_before (seq, insn);
17127
17128 }
17129 }
17130 }
17131
17132 /* Rewrite move insn into subtract of 0 if the condition codes will
17133 be useful in next conditional jump insn. */
17134
17135 static void
17136 thumb1_reorg (void)
17137 {
17138 basic_block bb;
17139
17140 FOR_EACH_BB_FN (bb, cfun)
17141 {
17142 rtx dest, src;
17143 rtx cmp, op0, op1, set = NULL;
17144 rtx_insn *prev, *insn = BB_END (bb);
17145 bool insn_clobbered = false;
17146
17147 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17148 insn = PREV_INSN (insn);
17149
17150 /* Find the last cbranchsi4_insn in basic block BB. */
17151 if (insn == BB_HEAD (bb)
17152 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17153 continue;
17154
17155 /* Get the register with which we are comparing. */
17156 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17157 op0 = XEXP (cmp, 0);
17158 op1 = XEXP (cmp, 1);
17159
17160 /* Check that comparison is against ZERO. */
17161 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17162 continue;
17163
17164 /* Find the first flag setting insn before INSN in basic block BB. */
17165 gcc_assert (insn != BB_HEAD (bb));
17166 for (prev = PREV_INSN (insn);
17167 (!insn_clobbered
17168 && prev != BB_HEAD (bb)
17169 && (NOTE_P (prev)
17170 || DEBUG_INSN_P (prev)
17171 || ((set = single_set (prev)) != NULL
17172 && get_attr_conds (prev) == CONDS_NOCOND)));
17173 prev = PREV_INSN (prev))
17174 {
17175 if (reg_set_p (op0, prev))
17176 insn_clobbered = true;
17177 }
17178
17179 /* Skip if op0 is clobbered by insn other than prev. */
17180 if (insn_clobbered)
17181 continue;
17182
17183 if (!set)
17184 continue;
17185
17186 dest = SET_DEST (set);
17187 src = SET_SRC (set);
17188 if (!low_register_operand (dest, SImode)
17189 || !low_register_operand (src, SImode))
17190 continue;
17191
17192 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17193 in INSN. Both src and dest of the move insn are checked. */
17194 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17195 {
17196 dest = copy_rtx (dest);
17197 src = copy_rtx (src);
17198 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17199 PATTERN (prev) = gen_rtx_SET (dest, src);
17200 INSN_CODE (prev) = -1;
17201 /* Set test register in INSN to dest. */
17202 XEXP (cmp, 0) = copy_rtx (dest);
17203 INSN_CODE (insn) = -1;
17204 }
17205 }
17206 }
17207
17208 /* Convert instructions to their cc-clobbering variant if possible, since
17209 that allows us to use smaller encodings. */
17210
17211 static void
17212 thumb2_reorg (void)
17213 {
17214 basic_block bb;
17215 regset_head live;
17216
17217 INIT_REG_SET (&live);
17218
17219 /* We are freeing block_for_insn in the toplev to keep compatibility
17220 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17221 compute_bb_for_insn ();
17222 df_analyze ();
17223
17224 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17225
17226 FOR_EACH_BB_FN (bb, cfun)
17227 {
17228 if ((current_tune->disparage_flag_setting_t16_encodings
17229 == tune_params::DISPARAGE_FLAGS_ALL)
17230 && optimize_bb_for_speed_p (bb))
17231 continue;
17232
17233 rtx_insn *insn;
17234 Convert_Action action = SKIP;
17235 Convert_Action action_for_partial_flag_setting
17236 = ((current_tune->disparage_flag_setting_t16_encodings
17237 != tune_params::DISPARAGE_FLAGS_NEITHER)
17238 && optimize_bb_for_speed_p (bb))
17239 ? SKIP : CONV;
17240
17241 COPY_REG_SET (&live, DF_LR_OUT (bb));
17242 df_simulate_initialize_backwards (bb, &live);
17243 FOR_BB_INSNS_REVERSE (bb, insn)
17244 {
17245 if (NONJUMP_INSN_P (insn)
17246 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17247 && GET_CODE (PATTERN (insn)) == SET)
17248 {
17249 action = SKIP;
17250 rtx pat = PATTERN (insn);
17251 rtx dst = XEXP (pat, 0);
17252 rtx src = XEXP (pat, 1);
17253 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17254
17255 if (UNARY_P (src) || BINARY_P (src))
17256 op0 = XEXP (src, 0);
17257
17258 if (BINARY_P (src))
17259 op1 = XEXP (src, 1);
17260
17261 if (low_register_operand (dst, SImode))
17262 {
17263 switch (GET_CODE (src))
17264 {
17265 case PLUS:
17266 /* Adding two registers and storing the result
17267 in the first source is already a 16-bit
17268 operation. */
17269 if (rtx_equal_p (dst, op0)
17270 && register_operand (op1, SImode))
17271 break;
17272
17273 if (low_register_operand (op0, SImode))
17274 {
17275 /* ADDS <Rd>,<Rn>,<Rm> */
17276 if (low_register_operand (op1, SImode))
17277 action = CONV;
17278 /* ADDS <Rdn>,#<imm8> */
17279 /* SUBS <Rdn>,#<imm8> */
17280 else if (rtx_equal_p (dst, op0)
17281 && CONST_INT_P (op1)
17282 && IN_RANGE (INTVAL (op1), -255, 255))
17283 action = CONV;
17284 /* ADDS <Rd>,<Rn>,#<imm3> */
17285 /* SUBS <Rd>,<Rn>,#<imm3> */
17286 else if (CONST_INT_P (op1)
17287 && IN_RANGE (INTVAL (op1), -7, 7))
17288 action = CONV;
17289 }
17290 /* ADCS <Rd>, <Rn> */
17291 else if (GET_CODE (XEXP (src, 0)) == PLUS
17292 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17293 && low_register_operand (XEXP (XEXP (src, 0), 1),
17294 SImode)
17295 && COMPARISON_P (op1)
17296 && cc_register (XEXP (op1, 0), VOIDmode)
17297 && maybe_get_arm_condition_code (op1) == ARM_CS
17298 && XEXP (op1, 1) == const0_rtx)
17299 action = CONV;
17300 break;
17301
17302 case MINUS:
17303 /* RSBS <Rd>,<Rn>,#0
17304 Not handled here: see NEG below. */
17305 /* SUBS <Rd>,<Rn>,#<imm3>
17306 SUBS <Rdn>,#<imm8>
17307 Not handled here: see PLUS above. */
17308 /* SUBS <Rd>,<Rn>,<Rm> */
17309 if (low_register_operand (op0, SImode)
17310 && low_register_operand (op1, SImode))
17311 action = CONV;
17312 break;
17313
17314 case MULT:
17315 /* MULS <Rdm>,<Rn>,<Rdm>
17316 As an exception to the rule, this is only used
17317 when optimizing for size since MULS is slow on all
17318 known implementations. We do not even want to use
17319 MULS in cold code, if optimizing for speed, so we
17320 test the global flag here. */
17321 if (!optimize_size)
17322 break;
17323 /* Fall through. */
17324 case AND:
17325 case IOR:
17326 case XOR:
17327 /* ANDS <Rdn>,<Rm> */
17328 if (rtx_equal_p (dst, op0)
17329 && low_register_operand (op1, SImode))
17330 action = action_for_partial_flag_setting;
17331 else if (rtx_equal_p (dst, op1)
17332 && low_register_operand (op0, SImode))
17333 action = action_for_partial_flag_setting == SKIP
17334 ? SKIP : SWAP_CONV;
17335 break;
17336
17337 case ASHIFTRT:
17338 case ASHIFT:
17339 case LSHIFTRT:
17340 /* ASRS <Rdn>,<Rm> */
17341 /* LSRS <Rdn>,<Rm> */
17342 /* LSLS <Rdn>,<Rm> */
17343 if (rtx_equal_p (dst, op0)
17344 && low_register_operand (op1, SImode))
17345 action = action_for_partial_flag_setting;
17346 /* ASRS <Rd>,<Rm>,#<imm5> */
17347 /* LSRS <Rd>,<Rm>,#<imm5> */
17348 /* LSLS <Rd>,<Rm>,#<imm5> */
17349 else if (low_register_operand (op0, SImode)
17350 && CONST_INT_P (op1)
17351 && IN_RANGE (INTVAL (op1), 0, 31))
17352 action = action_for_partial_flag_setting;
17353 break;
17354
17355 case ROTATERT:
17356 /* RORS <Rdn>,<Rm> */
17357 if (rtx_equal_p (dst, op0)
17358 && low_register_operand (op1, SImode))
17359 action = action_for_partial_flag_setting;
17360 break;
17361
17362 case NOT:
17363 /* MVNS <Rd>,<Rm> */
17364 if (low_register_operand (op0, SImode))
17365 action = action_for_partial_flag_setting;
17366 break;
17367
17368 case NEG:
17369 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17370 if (low_register_operand (op0, SImode))
17371 action = CONV;
17372 break;
17373
17374 case CONST_INT:
17375 /* MOVS <Rd>,#<imm8> */
17376 if (CONST_INT_P (src)
17377 && IN_RANGE (INTVAL (src), 0, 255))
17378 action = action_for_partial_flag_setting;
17379 break;
17380
17381 case REG:
17382 /* MOVS and MOV<c> with registers have different
17383 encodings, so are not relevant here. */
17384 break;
17385
17386 default:
17387 break;
17388 }
17389 }
17390
17391 if (action != SKIP)
17392 {
17393 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17394 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17395 rtvec vec;
17396
17397 if (action == SWAP_CONV)
17398 {
17399 src = copy_rtx (src);
17400 XEXP (src, 0) = op1;
17401 XEXP (src, 1) = op0;
17402 pat = gen_rtx_SET (dst, src);
17403 vec = gen_rtvec (2, pat, clobber);
17404 }
17405 else /* action == CONV */
17406 vec = gen_rtvec (2, pat, clobber);
17407
17408 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17409 INSN_CODE (insn) = -1;
17410 }
17411 }
17412
17413 if (NONDEBUG_INSN_P (insn))
17414 df_simulate_one_insn_backwards (bb, insn, &live);
17415 }
17416 }
17417
17418 CLEAR_REG_SET (&live);
17419 }
17420
17421 /* Gcc puts the pool in the wrong place for ARM, since we can only
17422 load addresses a limited distance around the pc. We do some
17423 special munging to move the constant pool values to the correct
17424 point in the code. */
17425 static void
17426 arm_reorg (void)
17427 {
17428 rtx_insn *insn;
17429 HOST_WIDE_INT address = 0;
17430 Mfix * fix;
17431
17432 if (use_cmse)
17433 cmse_nonsecure_call_clear_caller_saved ();
17434 if (TARGET_THUMB1)
17435 thumb1_reorg ();
17436 else if (TARGET_THUMB2)
17437 thumb2_reorg ();
17438
17439 /* Ensure all insns that must be split have been split at this point.
17440 Otherwise, the pool placement code below may compute incorrect
17441 insn lengths. Note that when optimizing, all insns have already
17442 been split at this point. */
17443 if (!optimize)
17444 split_all_insns_noflow ();
17445
17446 /* Make sure we do not attempt to create a literal pool even though it should
17447 no longer be necessary to create any. */
17448 if (arm_disable_literal_pool)
17449 return ;
17450
17451 minipool_fix_head = minipool_fix_tail = NULL;
17452
17453 /* The first insn must always be a note, or the code below won't
17454 scan it properly. */
17455 insn = get_insns ();
17456 gcc_assert (NOTE_P (insn));
17457 minipool_pad = 0;
17458
17459 /* Scan all the insns and record the operands that will need fixing. */
17460 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17461 {
17462 if (BARRIER_P (insn))
17463 push_minipool_barrier (insn, address);
17464 else if (INSN_P (insn))
17465 {
17466 rtx_jump_table_data *table;
17467
17468 note_invalid_constants (insn, address, true);
17469 address += get_attr_length (insn);
17470
17471 /* If the insn is a vector jump, add the size of the table
17472 and skip the table. */
17473 if (tablejump_p (insn, NULL, &table))
17474 {
17475 address += get_jump_table_size (table);
17476 insn = table;
17477 }
17478 }
17479 else if (LABEL_P (insn))
17480 /* Add the worst-case padding due to alignment. We don't add
17481 the _current_ padding because the minipool insertions
17482 themselves might change it. */
17483 address += get_label_padding (insn);
17484 }
17485
17486 fix = minipool_fix_head;
17487
17488 /* Now scan the fixups and perform the required changes. */
17489 while (fix)
17490 {
17491 Mfix * ftmp;
17492 Mfix * fdel;
17493 Mfix * last_added_fix;
17494 Mfix * last_barrier = NULL;
17495 Mfix * this_fix;
17496
17497 /* Skip any further barriers before the next fix. */
17498 while (fix && BARRIER_P (fix->insn))
17499 fix = fix->next;
17500
17501 /* No more fixes. */
17502 if (fix == NULL)
17503 break;
17504
17505 last_added_fix = NULL;
17506
17507 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17508 {
17509 if (BARRIER_P (ftmp->insn))
17510 {
17511 if (ftmp->address >= minipool_vector_head->max_address)
17512 break;
17513
17514 last_barrier = ftmp;
17515 }
17516 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17517 break;
17518
17519 last_added_fix = ftmp; /* Keep track of the last fix added. */
17520 }
17521
17522 /* If we found a barrier, drop back to that; any fixes that we
17523 could have reached but come after the barrier will now go in
17524 the next mini-pool. */
17525 if (last_barrier != NULL)
17526 {
17527 /* Reduce the refcount for those fixes that won't go into this
17528 pool after all. */
17529 for (fdel = last_barrier->next;
17530 fdel && fdel != ftmp;
17531 fdel = fdel->next)
17532 {
17533 fdel->minipool->refcount--;
17534 fdel->minipool = NULL;
17535 }
17536
17537 ftmp = last_barrier;
17538 }
17539 else
17540 {
17541 /* ftmp is first fix that we can't fit into this pool and
17542 there no natural barriers that we could use. Insert a
17543 new barrier in the code somewhere between the previous
17544 fix and this one, and arrange to jump around it. */
17545 HOST_WIDE_INT max_address;
17546
17547 /* The last item on the list of fixes must be a barrier, so
17548 we can never run off the end of the list of fixes without
17549 last_barrier being set. */
17550 gcc_assert (ftmp);
17551
17552 max_address = minipool_vector_head->max_address;
17553 /* Check that there isn't another fix that is in range that
17554 we couldn't fit into this pool because the pool was
17555 already too large: we need to put the pool before such an
17556 instruction. The pool itself may come just after the
17557 fix because create_fix_barrier also allows space for a
17558 jump instruction. */
17559 if (ftmp->address < max_address)
17560 max_address = ftmp->address + 1;
17561
17562 last_barrier = create_fix_barrier (last_added_fix, max_address);
17563 }
17564
17565 assign_minipool_offsets (last_barrier);
17566
17567 while (ftmp)
17568 {
17569 if (!BARRIER_P (ftmp->insn)
17570 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17571 == NULL))
17572 break;
17573
17574 ftmp = ftmp->next;
17575 }
17576
17577 /* Scan over the fixes we have identified for this pool, fixing them
17578 up and adding the constants to the pool itself. */
17579 for (this_fix = fix; this_fix && ftmp != this_fix;
17580 this_fix = this_fix->next)
17581 if (!BARRIER_P (this_fix->insn))
17582 {
17583 rtx addr
17584 = plus_constant (Pmode,
17585 gen_rtx_LABEL_REF (VOIDmode,
17586 minipool_vector_label),
17587 this_fix->minipool->offset);
17588 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17589 }
17590
17591 dump_minipool (last_barrier->insn);
17592 fix = ftmp;
17593 }
17594
17595 /* From now on we must synthesize any constants that we can't handle
17596 directly. This can happen if the RTL gets split during final
17597 instruction generation. */
17598 cfun->machine->after_arm_reorg = 1;
17599
17600 /* Free the minipool memory. */
17601 obstack_free (&minipool_obstack, minipool_startobj);
17602 }
17603 \f
17604 /* Routines to output assembly language. */
17605
17606 /* Return string representation of passed in real value. */
17607 static const char *
17608 fp_const_from_val (REAL_VALUE_TYPE *r)
17609 {
17610 if (!fp_consts_inited)
17611 init_fp_table ();
17612
17613 gcc_assert (real_equal (r, &value_fp0));
17614 return "0";
17615 }
17616
17617 /* OPERANDS[0] is the entire list of insns that constitute pop,
17618 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17619 is in the list, UPDATE is true iff the list contains explicit
17620 update of base register. */
17621 void
17622 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17623 bool update)
17624 {
17625 int i;
17626 char pattern[100];
17627 int offset;
17628 const char *conditional;
17629 int num_saves = XVECLEN (operands[0], 0);
17630 unsigned int regno;
17631 unsigned int regno_base = REGNO (operands[1]);
17632 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17633
17634 offset = 0;
17635 offset += update ? 1 : 0;
17636 offset += return_pc ? 1 : 0;
17637
17638 /* Is the base register in the list? */
17639 for (i = offset; i < num_saves; i++)
17640 {
17641 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17642 /* If SP is in the list, then the base register must be SP. */
17643 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17644 /* If base register is in the list, there must be no explicit update. */
17645 if (regno == regno_base)
17646 gcc_assert (!update);
17647 }
17648
17649 conditional = reverse ? "%?%D0" : "%?%d0";
17650 /* Can't use POP if returning from an interrupt. */
17651 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17652 sprintf (pattern, "pop%s\t{", conditional);
17653 else
17654 {
17655 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17656 It's just a convention, their semantics are identical. */
17657 if (regno_base == SP_REGNUM)
17658 sprintf (pattern, "ldmfd%s\t", conditional);
17659 else if (update)
17660 sprintf (pattern, "ldmia%s\t", conditional);
17661 else
17662 sprintf (pattern, "ldm%s\t", conditional);
17663
17664 strcat (pattern, reg_names[regno_base]);
17665 if (update)
17666 strcat (pattern, "!, {");
17667 else
17668 strcat (pattern, ", {");
17669 }
17670
17671 /* Output the first destination register. */
17672 strcat (pattern,
17673 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17674
17675 /* Output the rest of the destination registers. */
17676 for (i = offset + 1; i < num_saves; i++)
17677 {
17678 strcat (pattern, ", ");
17679 strcat (pattern,
17680 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17681 }
17682
17683 strcat (pattern, "}");
17684
17685 if (interrupt_p && return_pc)
17686 strcat (pattern, "^");
17687
17688 output_asm_insn (pattern, &cond);
17689 }
17690
17691
17692 /* Output the assembly for a store multiple. */
17693
17694 const char *
17695 vfp_output_vstmd (rtx * operands)
17696 {
17697 char pattern[100];
17698 int p;
17699 int base;
17700 int i;
17701 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17702 ? XEXP (operands[0], 0)
17703 : XEXP (XEXP (operands[0], 0), 0);
17704 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17705
17706 if (push_p)
17707 strcpy (pattern, "vpush%?.64\t{%P1");
17708 else
17709 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17710
17711 p = strlen (pattern);
17712
17713 gcc_assert (REG_P (operands[1]));
17714
17715 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17716 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17717 {
17718 p += sprintf (&pattern[p], ", d%d", base + i);
17719 }
17720 strcpy (&pattern[p], "}");
17721
17722 output_asm_insn (pattern, operands);
17723 return "";
17724 }
17725
17726
17727 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17728 number of bytes pushed. */
17729
17730 static int
17731 vfp_emit_fstmd (int base_reg, int count)
17732 {
17733 rtx par;
17734 rtx dwarf;
17735 rtx tmp, reg;
17736 int i;
17737
17738 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17739 register pairs are stored by a store multiple insn. We avoid this
17740 by pushing an extra pair. */
17741 if (count == 2 && !arm_arch6)
17742 {
17743 if (base_reg == LAST_VFP_REGNUM - 3)
17744 base_reg -= 2;
17745 count++;
17746 }
17747
17748 /* FSTMD may not store more than 16 doubleword registers at once. Split
17749 larger stores into multiple parts (up to a maximum of two, in
17750 practice). */
17751 if (count > 16)
17752 {
17753 int saved;
17754 /* NOTE: base_reg is an internal register number, so each D register
17755 counts as 2. */
17756 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17757 saved += vfp_emit_fstmd (base_reg, 16);
17758 return saved;
17759 }
17760
17761 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17762 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17763
17764 reg = gen_rtx_REG (DFmode, base_reg);
17765 base_reg += 2;
17766
17767 XVECEXP (par, 0, 0)
17768 = gen_rtx_SET (gen_frame_mem
17769 (BLKmode,
17770 gen_rtx_PRE_MODIFY (Pmode,
17771 stack_pointer_rtx,
17772 plus_constant
17773 (Pmode, stack_pointer_rtx,
17774 - (count * 8)))
17775 ),
17776 gen_rtx_UNSPEC (BLKmode,
17777 gen_rtvec (1, reg),
17778 UNSPEC_PUSH_MULT));
17779
17780 tmp = gen_rtx_SET (stack_pointer_rtx,
17781 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17782 RTX_FRAME_RELATED_P (tmp) = 1;
17783 XVECEXP (dwarf, 0, 0) = tmp;
17784
17785 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17786 RTX_FRAME_RELATED_P (tmp) = 1;
17787 XVECEXP (dwarf, 0, 1) = tmp;
17788
17789 for (i = 1; i < count; i++)
17790 {
17791 reg = gen_rtx_REG (DFmode, base_reg);
17792 base_reg += 2;
17793 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17794
17795 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17796 plus_constant (Pmode,
17797 stack_pointer_rtx,
17798 i * 8)),
17799 reg);
17800 RTX_FRAME_RELATED_P (tmp) = 1;
17801 XVECEXP (dwarf, 0, i + 1) = tmp;
17802 }
17803
17804 par = emit_insn (par);
17805 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17806 RTX_FRAME_RELATED_P (par) = 1;
17807
17808 return count * 8;
17809 }
17810
17811 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17812 has the cmse_nonsecure_call attribute and returns false otherwise. */
17813
17814 bool
17815 detect_cmse_nonsecure_call (tree addr)
17816 {
17817 if (!addr)
17818 return FALSE;
17819
17820 tree fntype = TREE_TYPE (addr);
17821 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17822 TYPE_ATTRIBUTES (fntype)))
17823 return TRUE;
17824 return FALSE;
17825 }
17826
17827
17828 /* Emit a call instruction with pattern PAT. ADDR is the address of
17829 the call target. */
17830
17831 void
17832 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17833 {
17834 rtx insn;
17835
17836 insn = emit_call_insn (pat);
17837
17838 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17839 If the call might use such an entry, add a use of the PIC register
17840 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17841 if (TARGET_VXWORKS_RTP
17842 && flag_pic
17843 && !sibcall
17844 && GET_CODE (addr) == SYMBOL_REF
17845 && (SYMBOL_REF_DECL (addr)
17846 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17847 : !SYMBOL_REF_LOCAL_P (addr)))
17848 {
17849 require_pic_register ();
17850 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17851 }
17852
17853 if (TARGET_AAPCS_BASED)
17854 {
17855 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17856 linker. We need to add an IP clobber to allow setting
17857 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17858 is not needed since it's a fixed register. */
17859 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17860 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17861 }
17862 }
17863
17864 /* Output a 'call' insn. */
17865 const char *
17866 output_call (rtx *operands)
17867 {
17868 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17869
17870 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17871 if (REGNO (operands[0]) == LR_REGNUM)
17872 {
17873 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17874 output_asm_insn ("mov%?\t%0, %|lr", operands);
17875 }
17876
17877 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17878
17879 if (TARGET_INTERWORK || arm_arch4t)
17880 output_asm_insn ("bx%?\t%0", operands);
17881 else
17882 output_asm_insn ("mov%?\t%|pc, %0", operands);
17883
17884 return "";
17885 }
17886
17887 /* Output a move from arm registers to arm registers of a long double
17888 OPERANDS[0] is the destination.
17889 OPERANDS[1] is the source. */
17890 const char *
17891 output_mov_long_double_arm_from_arm (rtx *operands)
17892 {
17893 /* We have to be careful here because the two might overlap. */
17894 int dest_start = REGNO (operands[0]);
17895 int src_start = REGNO (operands[1]);
17896 rtx ops[2];
17897 int i;
17898
17899 if (dest_start < src_start)
17900 {
17901 for (i = 0; i < 3; i++)
17902 {
17903 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17904 ops[1] = gen_rtx_REG (SImode, src_start + i);
17905 output_asm_insn ("mov%?\t%0, %1", ops);
17906 }
17907 }
17908 else
17909 {
17910 for (i = 2; i >= 0; i--)
17911 {
17912 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17913 ops[1] = gen_rtx_REG (SImode, src_start + i);
17914 output_asm_insn ("mov%?\t%0, %1", ops);
17915 }
17916 }
17917
17918 return "";
17919 }
17920
17921 void
17922 arm_emit_movpair (rtx dest, rtx src)
17923 {
17924 /* If the src is an immediate, simplify it. */
17925 if (CONST_INT_P (src))
17926 {
17927 HOST_WIDE_INT val = INTVAL (src);
17928 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17929 if ((val >> 16) & 0x0000ffff)
17930 {
17931 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17932 GEN_INT (16)),
17933 GEN_INT ((val >> 16) & 0x0000ffff));
17934 rtx_insn *insn = get_last_insn ();
17935 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17936 }
17937 return;
17938 }
17939 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17940 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17941 rtx_insn *insn = get_last_insn ();
17942 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17943 }
17944
17945 /* Output a move between double words. It must be REG<-MEM
17946 or MEM<-REG. */
17947 const char *
17948 output_move_double (rtx *operands, bool emit, int *count)
17949 {
17950 enum rtx_code code0 = GET_CODE (operands[0]);
17951 enum rtx_code code1 = GET_CODE (operands[1]);
17952 rtx otherops[3];
17953 if (count)
17954 *count = 1;
17955
17956 /* The only case when this might happen is when
17957 you are looking at the length of a DImode instruction
17958 that has an invalid constant in it. */
17959 if (code0 == REG && code1 != MEM)
17960 {
17961 gcc_assert (!emit);
17962 *count = 2;
17963 return "";
17964 }
17965
17966 if (code0 == REG)
17967 {
17968 unsigned int reg0 = REGNO (operands[0]);
17969
17970 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17971
17972 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17973
17974 switch (GET_CODE (XEXP (operands[1], 0)))
17975 {
17976 case REG:
17977
17978 if (emit)
17979 {
17980 if (TARGET_LDRD
17981 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17982 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17983 else
17984 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17985 }
17986 break;
17987
17988 case PRE_INC:
17989 gcc_assert (TARGET_LDRD);
17990 if (emit)
17991 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17992 break;
17993
17994 case PRE_DEC:
17995 if (emit)
17996 {
17997 if (TARGET_LDRD)
17998 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17999 else
18000 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18001 }
18002 break;
18003
18004 case POST_INC:
18005 if (emit)
18006 {
18007 if (TARGET_LDRD)
18008 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18009 else
18010 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18011 }
18012 break;
18013
18014 case POST_DEC:
18015 gcc_assert (TARGET_LDRD);
18016 if (emit)
18017 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18018 break;
18019
18020 case PRE_MODIFY:
18021 case POST_MODIFY:
18022 /* Autoicrement addressing modes should never have overlapping
18023 base and destination registers, and overlapping index registers
18024 are already prohibited, so this doesn't need to worry about
18025 fix_cm3_ldrd. */
18026 otherops[0] = operands[0];
18027 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18028 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18029
18030 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18031 {
18032 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18033 {
18034 /* Registers overlap so split out the increment. */
18035 if (emit)
18036 {
18037 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18038 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18039 }
18040 if (count)
18041 *count = 2;
18042 }
18043 else
18044 {
18045 /* Use a single insn if we can.
18046 FIXME: IWMMXT allows offsets larger than ldrd can
18047 handle, fix these up with a pair of ldr. */
18048 if (TARGET_THUMB2
18049 || !CONST_INT_P (otherops[2])
18050 || (INTVAL (otherops[2]) > -256
18051 && INTVAL (otherops[2]) < 256))
18052 {
18053 if (emit)
18054 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18055 }
18056 else
18057 {
18058 if (emit)
18059 {
18060 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18061 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18062 }
18063 if (count)
18064 *count = 2;
18065
18066 }
18067 }
18068 }
18069 else
18070 {
18071 /* Use a single insn if we can.
18072 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18073 fix these up with a pair of ldr. */
18074 if (TARGET_THUMB2
18075 || !CONST_INT_P (otherops[2])
18076 || (INTVAL (otherops[2]) > -256
18077 && INTVAL (otherops[2]) < 256))
18078 {
18079 if (emit)
18080 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18081 }
18082 else
18083 {
18084 if (emit)
18085 {
18086 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18087 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18088 }
18089 if (count)
18090 *count = 2;
18091 }
18092 }
18093 break;
18094
18095 case LABEL_REF:
18096 case CONST:
18097 /* We might be able to use ldrd %0, %1 here. However the range is
18098 different to ldr/adr, and it is broken on some ARMv7-M
18099 implementations. */
18100 /* Use the second register of the pair to avoid problematic
18101 overlap. */
18102 otherops[1] = operands[1];
18103 if (emit)
18104 output_asm_insn ("adr%?\t%0, %1", otherops);
18105 operands[1] = otherops[0];
18106 if (emit)
18107 {
18108 if (TARGET_LDRD)
18109 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18110 else
18111 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18112 }
18113
18114 if (count)
18115 *count = 2;
18116 break;
18117
18118 /* ??? This needs checking for thumb2. */
18119 default:
18120 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18121 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18122 {
18123 otherops[0] = operands[0];
18124 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18125 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18126
18127 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18128 {
18129 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18130 {
18131 switch ((int) INTVAL (otherops[2]))
18132 {
18133 case -8:
18134 if (emit)
18135 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18136 return "";
18137 case -4:
18138 if (TARGET_THUMB2)
18139 break;
18140 if (emit)
18141 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18142 return "";
18143 case 4:
18144 if (TARGET_THUMB2)
18145 break;
18146 if (emit)
18147 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18148 return "";
18149 }
18150 }
18151 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18152 operands[1] = otherops[0];
18153 if (TARGET_LDRD
18154 && (REG_P (otherops[2])
18155 || TARGET_THUMB2
18156 || (CONST_INT_P (otherops[2])
18157 && INTVAL (otherops[2]) > -256
18158 && INTVAL (otherops[2]) < 256)))
18159 {
18160 if (reg_overlap_mentioned_p (operands[0],
18161 otherops[2]))
18162 {
18163 /* Swap base and index registers over to
18164 avoid a conflict. */
18165 std::swap (otherops[1], otherops[2]);
18166 }
18167 /* If both registers conflict, it will usually
18168 have been fixed by a splitter. */
18169 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18170 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18171 {
18172 if (emit)
18173 {
18174 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18175 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18176 }
18177 if (count)
18178 *count = 2;
18179 }
18180 else
18181 {
18182 otherops[0] = operands[0];
18183 if (emit)
18184 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18185 }
18186 return "";
18187 }
18188
18189 if (CONST_INT_P (otherops[2]))
18190 {
18191 if (emit)
18192 {
18193 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18194 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18195 else
18196 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18197 }
18198 }
18199 else
18200 {
18201 if (emit)
18202 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18203 }
18204 }
18205 else
18206 {
18207 if (emit)
18208 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18209 }
18210
18211 if (count)
18212 *count = 2;
18213
18214 if (TARGET_LDRD)
18215 return "ldrd%?\t%0, [%1]";
18216
18217 return "ldmia%?\t%1, %M0";
18218 }
18219 else
18220 {
18221 otherops[1] = adjust_address (operands[1], SImode, 4);
18222 /* Take care of overlapping base/data reg. */
18223 if (reg_mentioned_p (operands[0], operands[1]))
18224 {
18225 if (emit)
18226 {
18227 output_asm_insn ("ldr%?\t%0, %1", otherops);
18228 output_asm_insn ("ldr%?\t%0, %1", operands);
18229 }
18230 if (count)
18231 *count = 2;
18232
18233 }
18234 else
18235 {
18236 if (emit)
18237 {
18238 output_asm_insn ("ldr%?\t%0, %1", operands);
18239 output_asm_insn ("ldr%?\t%0, %1", otherops);
18240 }
18241 if (count)
18242 *count = 2;
18243 }
18244 }
18245 }
18246 }
18247 else
18248 {
18249 /* Constraints should ensure this. */
18250 gcc_assert (code0 == MEM && code1 == REG);
18251 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18252 || (TARGET_ARM && TARGET_LDRD));
18253
18254 switch (GET_CODE (XEXP (operands[0], 0)))
18255 {
18256 case REG:
18257 if (emit)
18258 {
18259 if (TARGET_LDRD)
18260 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18261 else
18262 output_asm_insn ("stm%?\t%m0, %M1", operands);
18263 }
18264 break;
18265
18266 case PRE_INC:
18267 gcc_assert (TARGET_LDRD);
18268 if (emit)
18269 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18270 break;
18271
18272 case PRE_DEC:
18273 if (emit)
18274 {
18275 if (TARGET_LDRD)
18276 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18277 else
18278 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18279 }
18280 break;
18281
18282 case POST_INC:
18283 if (emit)
18284 {
18285 if (TARGET_LDRD)
18286 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18287 else
18288 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18289 }
18290 break;
18291
18292 case POST_DEC:
18293 gcc_assert (TARGET_LDRD);
18294 if (emit)
18295 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18296 break;
18297
18298 case PRE_MODIFY:
18299 case POST_MODIFY:
18300 otherops[0] = operands[1];
18301 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18302 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18303
18304 /* IWMMXT allows offsets larger than ldrd can handle,
18305 fix these up with a pair of ldr. */
18306 if (!TARGET_THUMB2
18307 && CONST_INT_P (otherops[2])
18308 && (INTVAL(otherops[2]) <= -256
18309 || INTVAL(otherops[2]) >= 256))
18310 {
18311 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18312 {
18313 if (emit)
18314 {
18315 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18316 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18317 }
18318 if (count)
18319 *count = 2;
18320 }
18321 else
18322 {
18323 if (emit)
18324 {
18325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18326 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18327 }
18328 if (count)
18329 *count = 2;
18330 }
18331 }
18332 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18333 {
18334 if (emit)
18335 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18336 }
18337 else
18338 {
18339 if (emit)
18340 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18341 }
18342 break;
18343
18344 case PLUS:
18345 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18346 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18347 {
18348 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18349 {
18350 case -8:
18351 if (emit)
18352 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18353 return "";
18354
18355 case -4:
18356 if (TARGET_THUMB2)
18357 break;
18358 if (emit)
18359 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18360 return "";
18361
18362 case 4:
18363 if (TARGET_THUMB2)
18364 break;
18365 if (emit)
18366 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18367 return "";
18368 }
18369 }
18370 if (TARGET_LDRD
18371 && (REG_P (otherops[2])
18372 || TARGET_THUMB2
18373 || (CONST_INT_P (otherops[2])
18374 && INTVAL (otherops[2]) > -256
18375 && INTVAL (otherops[2]) < 256)))
18376 {
18377 otherops[0] = operands[1];
18378 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18379 if (emit)
18380 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18381 return "";
18382 }
18383 /* Fall through */
18384
18385 default:
18386 otherops[0] = adjust_address (operands[0], SImode, 4);
18387 otherops[1] = operands[1];
18388 if (emit)
18389 {
18390 output_asm_insn ("str%?\t%1, %0", operands);
18391 output_asm_insn ("str%?\t%H1, %0", otherops);
18392 }
18393 if (count)
18394 *count = 2;
18395 }
18396 }
18397
18398 return "";
18399 }
18400
18401 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18402 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18403
18404 const char *
18405 output_move_quad (rtx *operands)
18406 {
18407 if (REG_P (operands[0]))
18408 {
18409 /* Load, or reg->reg move. */
18410
18411 if (MEM_P (operands[1]))
18412 {
18413 switch (GET_CODE (XEXP (operands[1], 0)))
18414 {
18415 case REG:
18416 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18417 break;
18418
18419 case LABEL_REF:
18420 case CONST:
18421 output_asm_insn ("adr%?\t%0, %1", operands);
18422 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18423 break;
18424
18425 default:
18426 gcc_unreachable ();
18427 }
18428 }
18429 else
18430 {
18431 rtx ops[2];
18432 int dest, src, i;
18433
18434 gcc_assert (REG_P (operands[1]));
18435
18436 dest = REGNO (operands[0]);
18437 src = REGNO (operands[1]);
18438
18439 /* This seems pretty dumb, but hopefully GCC won't try to do it
18440 very often. */
18441 if (dest < src)
18442 for (i = 0; i < 4; i++)
18443 {
18444 ops[0] = gen_rtx_REG (SImode, dest + i);
18445 ops[1] = gen_rtx_REG (SImode, src + i);
18446 output_asm_insn ("mov%?\t%0, %1", ops);
18447 }
18448 else
18449 for (i = 3; i >= 0; i--)
18450 {
18451 ops[0] = gen_rtx_REG (SImode, dest + i);
18452 ops[1] = gen_rtx_REG (SImode, src + i);
18453 output_asm_insn ("mov%?\t%0, %1", ops);
18454 }
18455 }
18456 }
18457 else
18458 {
18459 gcc_assert (MEM_P (operands[0]));
18460 gcc_assert (REG_P (operands[1]));
18461 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18462
18463 switch (GET_CODE (XEXP (operands[0], 0)))
18464 {
18465 case REG:
18466 output_asm_insn ("stm%?\t%m0, %M1", operands);
18467 break;
18468
18469 default:
18470 gcc_unreachable ();
18471 }
18472 }
18473
18474 return "";
18475 }
18476
18477 /* Output a VFP load or store instruction. */
18478
18479 const char *
18480 output_move_vfp (rtx *operands)
18481 {
18482 rtx reg, mem, addr, ops[2];
18483 int load = REG_P (operands[0]);
18484 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18485 int sp = (!TARGET_VFP_FP16INST
18486 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18487 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18488 const char *templ;
18489 char buff[50];
18490 machine_mode mode;
18491
18492 reg = operands[!load];
18493 mem = operands[load];
18494
18495 mode = GET_MODE (reg);
18496
18497 gcc_assert (REG_P (reg));
18498 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18499 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18500 || mode == SFmode
18501 || mode == DFmode
18502 || mode == HImode
18503 || mode == SImode
18504 || mode == DImode
18505 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18506 gcc_assert (MEM_P (mem));
18507
18508 addr = XEXP (mem, 0);
18509
18510 switch (GET_CODE (addr))
18511 {
18512 case PRE_DEC:
18513 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18514 ops[0] = XEXP (addr, 0);
18515 ops[1] = reg;
18516 break;
18517
18518 case POST_INC:
18519 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18520 ops[0] = XEXP (addr, 0);
18521 ops[1] = reg;
18522 break;
18523
18524 default:
18525 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18526 ops[0] = reg;
18527 ops[1] = mem;
18528 break;
18529 }
18530
18531 sprintf (buff, templ,
18532 load ? "ld" : "st",
18533 dp ? "64" : sp ? "32" : "16",
18534 dp ? "P" : "",
18535 integer_p ? "\t%@ int" : "");
18536 output_asm_insn (buff, ops);
18537
18538 return "";
18539 }
18540
18541 /* Output a Neon double-word or quad-word load or store, or a load
18542 or store for larger structure modes.
18543
18544 WARNING: The ordering of elements is weird in big-endian mode,
18545 because the EABI requires that vectors stored in memory appear
18546 as though they were stored by a VSTM, as required by the EABI.
18547 GCC RTL defines element ordering based on in-memory order.
18548 This can be different from the architectural ordering of elements
18549 within a NEON register. The intrinsics defined in arm_neon.h use the
18550 NEON register element ordering, not the GCC RTL element ordering.
18551
18552 For example, the in-memory ordering of a big-endian a quadword
18553 vector with 16-bit elements when stored from register pair {d0,d1}
18554 will be (lowest address first, d0[N] is NEON register element N):
18555
18556 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18557
18558 When necessary, quadword registers (dN, dN+1) are moved to ARM
18559 registers from rN in the order:
18560
18561 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18562
18563 So that STM/LDM can be used on vectors in ARM registers, and the
18564 same memory layout will result as if VSTM/VLDM were used.
18565
18566 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18567 possible, which allows use of appropriate alignment tags.
18568 Note that the choice of "64" is independent of the actual vector
18569 element size; this size simply ensures that the behavior is
18570 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18571
18572 Due to limitations of those instructions, use of VST1.64/VLD1.64
18573 is not possible if:
18574 - the address contains PRE_DEC, or
18575 - the mode refers to more than 4 double-word registers
18576
18577 In those cases, it would be possible to replace VSTM/VLDM by a
18578 sequence of instructions; this is not currently implemented since
18579 this is not certain to actually improve performance. */
18580
18581 const char *
18582 output_move_neon (rtx *operands)
18583 {
18584 rtx reg, mem, addr, ops[2];
18585 int regno, nregs, load = REG_P (operands[0]);
18586 const char *templ;
18587 char buff[50];
18588 machine_mode mode;
18589
18590 reg = operands[!load];
18591 mem = operands[load];
18592
18593 mode = GET_MODE (reg);
18594
18595 gcc_assert (REG_P (reg));
18596 regno = REGNO (reg);
18597 nregs = REG_NREGS (reg) / 2;
18598 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18599 || NEON_REGNO_OK_FOR_QUAD (regno));
18600 gcc_assert (VALID_NEON_DREG_MODE (mode)
18601 || VALID_NEON_QREG_MODE (mode)
18602 || VALID_NEON_STRUCT_MODE (mode));
18603 gcc_assert (MEM_P (mem));
18604
18605 addr = XEXP (mem, 0);
18606
18607 /* Strip off const from addresses like (const (plus (...))). */
18608 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18609 addr = XEXP (addr, 0);
18610
18611 switch (GET_CODE (addr))
18612 {
18613 case POST_INC:
18614 /* We have to use vldm / vstm for too-large modes. */
18615 if (nregs > 4)
18616 {
18617 templ = "v%smia%%?\t%%0!, %%h1";
18618 ops[0] = XEXP (addr, 0);
18619 }
18620 else
18621 {
18622 templ = "v%s1.64\t%%h1, %%A0";
18623 ops[0] = mem;
18624 }
18625 ops[1] = reg;
18626 break;
18627
18628 case PRE_DEC:
18629 /* We have to use vldm / vstm in this case, since there is no
18630 pre-decrement form of the vld1 / vst1 instructions. */
18631 templ = "v%smdb%%?\t%%0!, %%h1";
18632 ops[0] = XEXP (addr, 0);
18633 ops[1] = reg;
18634 break;
18635
18636 case POST_MODIFY:
18637 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18638 gcc_unreachable ();
18639
18640 case REG:
18641 /* We have to use vldm / vstm for too-large modes. */
18642 if (nregs > 1)
18643 {
18644 if (nregs > 4)
18645 templ = "v%smia%%?\t%%m0, %%h1";
18646 else
18647 templ = "v%s1.64\t%%h1, %%A0";
18648
18649 ops[0] = mem;
18650 ops[1] = reg;
18651 break;
18652 }
18653 /* Fall through. */
18654 case LABEL_REF:
18655 case PLUS:
18656 {
18657 int i;
18658 int overlap = -1;
18659 for (i = 0; i < nregs; i++)
18660 {
18661 /* We're only using DImode here because it's a convenient size. */
18662 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18663 ops[1] = adjust_address (mem, DImode, 8 * i);
18664 if (reg_overlap_mentioned_p (ops[0], mem))
18665 {
18666 gcc_assert (overlap == -1);
18667 overlap = i;
18668 }
18669 else
18670 {
18671 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18672 output_asm_insn (buff, ops);
18673 }
18674 }
18675 if (overlap != -1)
18676 {
18677 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18678 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18679 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18680 output_asm_insn (buff, ops);
18681 }
18682
18683 return "";
18684 }
18685
18686 default:
18687 gcc_unreachable ();
18688 }
18689
18690 sprintf (buff, templ, load ? "ld" : "st");
18691 output_asm_insn (buff, ops);
18692
18693 return "";
18694 }
18695
18696 /* Compute and return the length of neon_mov<mode>, where <mode> is
18697 one of VSTRUCT modes: EI, OI, CI or XI. */
18698 int
18699 arm_attr_length_move_neon (rtx_insn *insn)
18700 {
18701 rtx reg, mem, addr;
18702 int load;
18703 machine_mode mode;
18704
18705 extract_insn_cached (insn);
18706
18707 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18708 {
18709 mode = GET_MODE (recog_data.operand[0]);
18710 switch (mode)
18711 {
18712 case E_EImode:
18713 case E_OImode:
18714 return 8;
18715 case E_CImode:
18716 return 12;
18717 case E_XImode:
18718 return 16;
18719 default:
18720 gcc_unreachable ();
18721 }
18722 }
18723
18724 load = REG_P (recog_data.operand[0]);
18725 reg = recog_data.operand[!load];
18726 mem = recog_data.operand[load];
18727
18728 gcc_assert (MEM_P (mem));
18729
18730 addr = XEXP (mem, 0);
18731
18732 /* Strip off const from addresses like (const (plus (...))). */
18733 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18734 addr = XEXP (addr, 0);
18735
18736 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18737 {
18738 int insns = REG_NREGS (reg) / 2;
18739 return insns * 4;
18740 }
18741 else
18742 return 4;
18743 }
18744
18745 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18746 return zero. */
18747
18748 int
18749 arm_address_offset_is_imm (rtx_insn *insn)
18750 {
18751 rtx mem, addr;
18752
18753 extract_insn_cached (insn);
18754
18755 if (REG_P (recog_data.operand[0]))
18756 return 0;
18757
18758 mem = recog_data.operand[0];
18759
18760 gcc_assert (MEM_P (mem));
18761
18762 addr = XEXP (mem, 0);
18763
18764 if (REG_P (addr)
18765 || (GET_CODE (addr) == PLUS
18766 && REG_P (XEXP (addr, 0))
18767 && CONST_INT_P (XEXP (addr, 1))))
18768 return 1;
18769 else
18770 return 0;
18771 }
18772
18773 /* Output an ADD r, s, #n where n may be too big for one instruction.
18774 If adding zero to one register, output nothing. */
18775 const char *
18776 output_add_immediate (rtx *operands)
18777 {
18778 HOST_WIDE_INT n = INTVAL (operands[2]);
18779
18780 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18781 {
18782 if (n < 0)
18783 output_multi_immediate (operands,
18784 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18785 -n);
18786 else
18787 output_multi_immediate (operands,
18788 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18789 n);
18790 }
18791
18792 return "";
18793 }
18794
18795 /* Output a multiple immediate operation.
18796 OPERANDS is the vector of operands referred to in the output patterns.
18797 INSTR1 is the output pattern to use for the first constant.
18798 INSTR2 is the output pattern to use for subsequent constants.
18799 IMMED_OP is the index of the constant slot in OPERANDS.
18800 N is the constant value. */
18801 static const char *
18802 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18803 int immed_op, HOST_WIDE_INT n)
18804 {
18805 #if HOST_BITS_PER_WIDE_INT > 32
18806 n &= 0xffffffff;
18807 #endif
18808
18809 if (n == 0)
18810 {
18811 /* Quick and easy output. */
18812 operands[immed_op] = const0_rtx;
18813 output_asm_insn (instr1, operands);
18814 }
18815 else
18816 {
18817 int i;
18818 const char * instr = instr1;
18819
18820 /* Note that n is never zero here (which would give no output). */
18821 for (i = 0; i < 32; i += 2)
18822 {
18823 if (n & (3 << i))
18824 {
18825 operands[immed_op] = GEN_INT (n & (255 << i));
18826 output_asm_insn (instr, operands);
18827 instr = instr2;
18828 i += 6;
18829 }
18830 }
18831 }
18832
18833 return "";
18834 }
18835
18836 /* Return the name of a shifter operation. */
18837 static const char *
18838 arm_shift_nmem(enum rtx_code code)
18839 {
18840 switch (code)
18841 {
18842 case ASHIFT:
18843 return ARM_LSL_NAME;
18844
18845 case ASHIFTRT:
18846 return "asr";
18847
18848 case LSHIFTRT:
18849 return "lsr";
18850
18851 case ROTATERT:
18852 return "ror";
18853
18854 default:
18855 abort();
18856 }
18857 }
18858
18859 /* Return the appropriate ARM instruction for the operation code.
18860 The returned result should not be overwritten. OP is the rtx of the
18861 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18862 was shifted. */
18863 const char *
18864 arithmetic_instr (rtx op, int shift_first_arg)
18865 {
18866 switch (GET_CODE (op))
18867 {
18868 case PLUS:
18869 return "add";
18870
18871 case MINUS:
18872 return shift_first_arg ? "rsb" : "sub";
18873
18874 case IOR:
18875 return "orr";
18876
18877 case XOR:
18878 return "eor";
18879
18880 case AND:
18881 return "and";
18882
18883 case ASHIFT:
18884 case ASHIFTRT:
18885 case LSHIFTRT:
18886 case ROTATERT:
18887 return arm_shift_nmem(GET_CODE(op));
18888
18889 default:
18890 gcc_unreachable ();
18891 }
18892 }
18893
18894 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18895 for the operation code. The returned result should not be overwritten.
18896 OP is the rtx code of the shift.
18897 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18898 shift. */
18899 static const char *
18900 shift_op (rtx op, HOST_WIDE_INT *amountp)
18901 {
18902 const char * mnem;
18903 enum rtx_code code = GET_CODE (op);
18904
18905 switch (code)
18906 {
18907 case ROTATE:
18908 if (!CONST_INT_P (XEXP (op, 1)))
18909 {
18910 output_operand_lossage ("invalid shift operand");
18911 return NULL;
18912 }
18913
18914 code = ROTATERT;
18915 *amountp = 32 - INTVAL (XEXP (op, 1));
18916 mnem = "ror";
18917 break;
18918
18919 case ASHIFT:
18920 case ASHIFTRT:
18921 case LSHIFTRT:
18922 case ROTATERT:
18923 mnem = arm_shift_nmem(code);
18924 if (CONST_INT_P (XEXP (op, 1)))
18925 {
18926 *amountp = INTVAL (XEXP (op, 1));
18927 }
18928 else if (REG_P (XEXP (op, 1)))
18929 {
18930 *amountp = -1;
18931 return mnem;
18932 }
18933 else
18934 {
18935 output_operand_lossage ("invalid shift operand");
18936 return NULL;
18937 }
18938 break;
18939
18940 case MULT:
18941 /* We never have to worry about the amount being other than a
18942 power of 2, since this case can never be reloaded from a reg. */
18943 if (!CONST_INT_P (XEXP (op, 1)))
18944 {
18945 output_operand_lossage ("invalid shift operand");
18946 return NULL;
18947 }
18948
18949 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18950
18951 /* Amount must be a power of two. */
18952 if (*amountp & (*amountp - 1))
18953 {
18954 output_operand_lossage ("invalid shift operand");
18955 return NULL;
18956 }
18957
18958 *amountp = exact_log2 (*amountp);
18959 gcc_assert (IN_RANGE (*amountp, 0, 31));
18960 return ARM_LSL_NAME;
18961
18962 default:
18963 output_operand_lossage ("invalid shift operand");
18964 return NULL;
18965 }
18966
18967 /* This is not 100% correct, but follows from the desire to merge
18968 multiplication by a power of 2 with the recognizer for a
18969 shift. >=32 is not a valid shift for "lsl", so we must try and
18970 output a shift that produces the correct arithmetical result.
18971 Using lsr #32 is identical except for the fact that the carry bit
18972 is not set correctly if we set the flags; but we never use the
18973 carry bit from such an operation, so we can ignore that. */
18974 if (code == ROTATERT)
18975 /* Rotate is just modulo 32. */
18976 *amountp &= 31;
18977 else if (*amountp != (*amountp & 31))
18978 {
18979 if (code == ASHIFT)
18980 mnem = "lsr";
18981 *amountp = 32;
18982 }
18983
18984 /* Shifts of 0 are no-ops. */
18985 if (*amountp == 0)
18986 return NULL;
18987
18988 return mnem;
18989 }
18990
18991 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18992 because /bin/as is horribly restrictive. The judgement about
18993 whether or not each character is 'printable' (and can be output as
18994 is) or not (and must be printed with an octal escape) must be made
18995 with reference to the *host* character set -- the situation is
18996 similar to that discussed in the comments above pp_c_char in
18997 c-pretty-print.c. */
18998
18999 #define MAX_ASCII_LEN 51
19000
19001 void
19002 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19003 {
19004 int i;
19005 int len_so_far = 0;
19006
19007 fputs ("\t.ascii\t\"", stream);
19008
19009 for (i = 0; i < len; i++)
19010 {
19011 int c = p[i];
19012
19013 if (len_so_far >= MAX_ASCII_LEN)
19014 {
19015 fputs ("\"\n\t.ascii\t\"", stream);
19016 len_so_far = 0;
19017 }
19018
19019 if (ISPRINT (c))
19020 {
19021 if (c == '\\' || c == '\"')
19022 {
19023 putc ('\\', stream);
19024 len_so_far++;
19025 }
19026 putc (c, stream);
19027 len_so_far++;
19028 }
19029 else
19030 {
19031 fprintf (stream, "\\%03o", c);
19032 len_so_far += 4;
19033 }
19034 }
19035
19036 fputs ("\"\n", stream);
19037 }
19038 \f
19039 /* Whether a register is callee saved or not. This is necessary because high
19040 registers are marked as caller saved when optimizing for size on Thumb-1
19041 targets despite being callee saved in order to avoid using them. */
19042 #define callee_saved_reg_p(reg) \
19043 (!call_used_regs[reg] \
19044 || (TARGET_THUMB1 && optimize_size \
19045 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19046
19047 /* Compute the register save mask for registers 0 through 12
19048 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19049
19050 static unsigned long
19051 arm_compute_save_reg0_reg12_mask (void)
19052 {
19053 unsigned long func_type = arm_current_func_type ();
19054 unsigned long save_reg_mask = 0;
19055 unsigned int reg;
19056
19057 if (IS_INTERRUPT (func_type))
19058 {
19059 unsigned int max_reg;
19060 /* Interrupt functions must not corrupt any registers,
19061 even call clobbered ones. If this is a leaf function
19062 we can just examine the registers used by the RTL, but
19063 otherwise we have to assume that whatever function is
19064 called might clobber anything, and so we have to save
19065 all the call-clobbered registers as well. */
19066 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19067 /* FIQ handlers have registers r8 - r12 banked, so
19068 we only need to check r0 - r7, Normal ISRs only
19069 bank r14 and r15, so we must check up to r12.
19070 r13 is the stack pointer which is always preserved,
19071 so we do not need to consider it here. */
19072 max_reg = 7;
19073 else
19074 max_reg = 12;
19075
19076 for (reg = 0; reg <= max_reg; reg++)
19077 if (df_regs_ever_live_p (reg)
19078 || (! crtl->is_leaf && call_used_regs[reg]))
19079 save_reg_mask |= (1 << reg);
19080
19081 /* Also save the pic base register if necessary. */
19082 if (flag_pic
19083 && !TARGET_SINGLE_PIC_BASE
19084 && arm_pic_register != INVALID_REGNUM
19085 && crtl->uses_pic_offset_table)
19086 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19087 }
19088 else if (IS_VOLATILE(func_type))
19089 {
19090 /* For noreturn functions we historically omitted register saves
19091 altogether. However this really messes up debugging. As a
19092 compromise save just the frame pointers. Combined with the link
19093 register saved elsewhere this should be sufficient to get
19094 a backtrace. */
19095 if (frame_pointer_needed)
19096 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19097 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19098 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19099 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19100 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19101 }
19102 else
19103 {
19104 /* In the normal case we only need to save those registers
19105 which are call saved and which are used by this function. */
19106 for (reg = 0; reg <= 11; reg++)
19107 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19108 save_reg_mask |= (1 << reg);
19109
19110 /* Handle the frame pointer as a special case. */
19111 if (frame_pointer_needed)
19112 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19113
19114 /* If we aren't loading the PIC register,
19115 don't stack it even though it may be live. */
19116 if (flag_pic
19117 && !TARGET_SINGLE_PIC_BASE
19118 && arm_pic_register != INVALID_REGNUM
19119 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19120 || crtl->uses_pic_offset_table))
19121 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19122
19123 /* The prologue will copy SP into R0, so save it. */
19124 if (IS_STACKALIGN (func_type))
19125 save_reg_mask |= 1;
19126 }
19127
19128 /* Save registers so the exception handler can modify them. */
19129 if (crtl->calls_eh_return)
19130 {
19131 unsigned int i;
19132
19133 for (i = 0; ; i++)
19134 {
19135 reg = EH_RETURN_DATA_REGNO (i);
19136 if (reg == INVALID_REGNUM)
19137 break;
19138 save_reg_mask |= 1 << reg;
19139 }
19140 }
19141
19142 return save_reg_mask;
19143 }
19144
19145 /* Return true if r3 is live at the start of the function. */
19146
19147 static bool
19148 arm_r3_live_at_start_p (void)
19149 {
19150 /* Just look at cfg info, which is still close enough to correct at this
19151 point. This gives false positives for broken functions that might use
19152 uninitialized data that happens to be allocated in r3, but who cares? */
19153 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19154 }
19155
19156 /* Compute the number of bytes used to store the static chain register on the
19157 stack, above the stack frame. We need to know this accurately to get the
19158 alignment of the rest of the stack frame correct. */
19159
19160 static int
19161 arm_compute_static_chain_stack_bytes (void)
19162 {
19163 /* See the defining assertion in arm_expand_prologue. */
19164 if (IS_NESTED (arm_current_func_type ())
19165 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19166 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19167 || flag_stack_clash_protection)
19168 && !df_regs_ever_live_p (LR_REGNUM)))
19169 && arm_r3_live_at_start_p ()
19170 && crtl->args.pretend_args_size == 0)
19171 return 4;
19172
19173 return 0;
19174 }
19175
19176 /* Compute a bit mask of which core registers need to be
19177 saved on the stack for the current function.
19178 This is used by arm_compute_frame_layout, which may add extra registers. */
19179
19180 static unsigned long
19181 arm_compute_save_core_reg_mask (void)
19182 {
19183 unsigned int save_reg_mask = 0;
19184 unsigned long func_type = arm_current_func_type ();
19185 unsigned int reg;
19186
19187 if (IS_NAKED (func_type))
19188 /* This should never really happen. */
19189 return 0;
19190
19191 /* If we are creating a stack frame, then we must save the frame pointer,
19192 IP (which will hold the old stack pointer), LR and the PC. */
19193 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19194 save_reg_mask |=
19195 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19196 | (1 << IP_REGNUM)
19197 | (1 << LR_REGNUM)
19198 | (1 << PC_REGNUM);
19199
19200 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19201
19202 /* Decide if we need to save the link register.
19203 Interrupt routines have their own banked link register,
19204 so they never need to save it.
19205 Otherwise if we do not use the link register we do not need to save
19206 it. If we are pushing other registers onto the stack however, we
19207 can save an instruction in the epilogue by pushing the link register
19208 now and then popping it back into the PC. This incurs extra memory
19209 accesses though, so we only do it when optimizing for size, and only
19210 if we know that we will not need a fancy return sequence. */
19211 if (df_regs_ever_live_p (LR_REGNUM)
19212 || (save_reg_mask
19213 && optimize_size
19214 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19215 && !crtl->tail_call_emit
19216 && !crtl->calls_eh_return))
19217 save_reg_mask |= 1 << LR_REGNUM;
19218
19219 if (cfun->machine->lr_save_eliminated)
19220 save_reg_mask &= ~ (1 << LR_REGNUM);
19221
19222 if (TARGET_REALLY_IWMMXT
19223 && ((bit_count (save_reg_mask)
19224 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19225 arm_compute_static_chain_stack_bytes())
19226 ) % 2) != 0)
19227 {
19228 /* The total number of registers that are going to be pushed
19229 onto the stack is odd. We need to ensure that the stack
19230 is 64-bit aligned before we start to save iWMMXt registers,
19231 and also before we start to create locals. (A local variable
19232 might be a double or long long which we will load/store using
19233 an iWMMXt instruction). Therefore we need to push another
19234 ARM register, so that the stack will be 64-bit aligned. We
19235 try to avoid using the arg registers (r0 -r3) as they might be
19236 used to pass values in a tail call. */
19237 for (reg = 4; reg <= 12; reg++)
19238 if ((save_reg_mask & (1 << reg)) == 0)
19239 break;
19240
19241 if (reg <= 12)
19242 save_reg_mask |= (1 << reg);
19243 else
19244 {
19245 cfun->machine->sibcall_blocked = 1;
19246 save_reg_mask |= (1 << 3);
19247 }
19248 }
19249
19250 /* We may need to push an additional register for use initializing the
19251 PIC base register. */
19252 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19253 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19254 {
19255 reg = thumb_find_work_register (1 << 4);
19256 if (!call_used_regs[reg])
19257 save_reg_mask |= (1 << reg);
19258 }
19259
19260 return save_reg_mask;
19261 }
19262
19263 /* Compute a bit mask of which core registers need to be
19264 saved on the stack for the current function. */
19265 static unsigned long
19266 thumb1_compute_save_core_reg_mask (void)
19267 {
19268 unsigned long mask;
19269 unsigned reg;
19270
19271 mask = 0;
19272 for (reg = 0; reg < 12; reg ++)
19273 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19274 mask |= 1 << reg;
19275
19276 /* Handle the frame pointer as a special case. */
19277 if (frame_pointer_needed)
19278 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19279
19280 if (flag_pic
19281 && !TARGET_SINGLE_PIC_BASE
19282 && arm_pic_register != INVALID_REGNUM
19283 && crtl->uses_pic_offset_table)
19284 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19285
19286 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19287 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19288 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19289
19290 /* LR will also be pushed if any lo regs are pushed. */
19291 if (mask & 0xff || thumb_force_lr_save ())
19292 mask |= (1 << LR_REGNUM);
19293
19294 /* Make sure we have a low work register if we need one.
19295 We will need one if we are going to push a high register,
19296 but we are not currently intending to push a low register. */
19297 if ((mask & 0xff) == 0
19298 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19299 {
19300 /* Use thumb_find_work_register to choose which register
19301 we will use. If the register is live then we will
19302 have to push it. Use LAST_LO_REGNUM as our fallback
19303 choice for the register to select. */
19304 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19305 /* Make sure the register returned by thumb_find_work_register is
19306 not part of the return value. */
19307 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19308 reg = LAST_LO_REGNUM;
19309
19310 if (callee_saved_reg_p (reg))
19311 mask |= 1 << reg;
19312 }
19313
19314 /* The 504 below is 8 bytes less than 512 because there are two possible
19315 alignment words. We can't tell here if they will be present or not so we
19316 have to play it safe and assume that they are. */
19317 if ((CALLER_INTERWORKING_SLOT_SIZE +
19318 ROUND_UP_WORD (get_frame_size ()) +
19319 crtl->outgoing_args_size) >= 504)
19320 {
19321 /* This is the same as the code in thumb1_expand_prologue() which
19322 determines which register to use for stack decrement. */
19323 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19324 if (mask & (1 << reg))
19325 break;
19326
19327 if (reg > LAST_LO_REGNUM)
19328 {
19329 /* Make sure we have a register available for stack decrement. */
19330 mask |= 1 << LAST_LO_REGNUM;
19331 }
19332 }
19333
19334 return mask;
19335 }
19336
19337
19338 /* Return the number of bytes required to save VFP registers. */
19339 static int
19340 arm_get_vfp_saved_size (void)
19341 {
19342 unsigned int regno;
19343 int count;
19344 int saved;
19345
19346 saved = 0;
19347 /* Space for saved VFP registers. */
19348 if (TARGET_HARD_FLOAT)
19349 {
19350 count = 0;
19351 for (regno = FIRST_VFP_REGNUM;
19352 regno < LAST_VFP_REGNUM;
19353 regno += 2)
19354 {
19355 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19356 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19357 {
19358 if (count > 0)
19359 {
19360 /* Workaround ARM10 VFPr1 bug. */
19361 if (count == 2 && !arm_arch6)
19362 count++;
19363 saved += count * 8;
19364 }
19365 count = 0;
19366 }
19367 else
19368 count++;
19369 }
19370 if (count > 0)
19371 {
19372 if (count == 2 && !arm_arch6)
19373 count++;
19374 saved += count * 8;
19375 }
19376 }
19377 return saved;
19378 }
19379
19380
19381 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19382 everything bar the final return instruction. If simple_return is true,
19383 then do not output epilogue, because it has already been emitted in RTL. */
19384 const char *
19385 output_return_instruction (rtx operand, bool really_return, bool reverse,
19386 bool simple_return)
19387 {
19388 char conditional[10];
19389 char instr[100];
19390 unsigned reg;
19391 unsigned long live_regs_mask;
19392 unsigned long func_type;
19393 arm_stack_offsets *offsets;
19394
19395 func_type = arm_current_func_type ();
19396
19397 if (IS_NAKED (func_type))
19398 return "";
19399
19400 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19401 {
19402 /* If this function was declared non-returning, and we have
19403 found a tail call, then we have to trust that the called
19404 function won't return. */
19405 if (really_return)
19406 {
19407 rtx ops[2];
19408
19409 /* Otherwise, trap an attempted return by aborting. */
19410 ops[0] = operand;
19411 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19412 : "abort");
19413 assemble_external_libcall (ops[1]);
19414 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19415 }
19416
19417 return "";
19418 }
19419
19420 gcc_assert (!cfun->calls_alloca || really_return);
19421
19422 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19423
19424 cfun->machine->return_used_this_function = 1;
19425
19426 offsets = arm_get_frame_offsets ();
19427 live_regs_mask = offsets->saved_regs_mask;
19428
19429 if (!simple_return && live_regs_mask)
19430 {
19431 const char * return_reg;
19432
19433 /* If we do not have any special requirements for function exit
19434 (e.g. interworking) then we can load the return address
19435 directly into the PC. Otherwise we must load it into LR. */
19436 if (really_return
19437 && !IS_CMSE_ENTRY (func_type)
19438 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19439 return_reg = reg_names[PC_REGNUM];
19440 else
19441 return_reg = reg_names[LR_REGNUM];
19442
19443 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19444 {
19445 /* There are three possible reasons for the IP register
19446 being saved. 1) a stack frame was created, in which case
19447 IP contains the old stack pointer, or 2) an ISR routine
19448 corrupted it, or 3) it was saved to align the stack on
19449 iWMMXt. In case 1, restore IP into SP, otherwise just
19450 restore IP. */
19451 if (frame_pointer_needed)
19452 {
19453 live_regs_mask &= ~ (1 << IP_REGNUM);
19454 live_regs_mask |= (1 << SP_REGNUM);
19455 }
19456 else
19457 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19458 }
19459
19460 /* On some ARM architectures it is faster to use LDR rather than
19461 LDM to load a single register. On other architectures, the
19462 cost is the same. In 26 bit mode, or for exception handlers,
19463 we have to use LDM to load the PC so that the CPSR is also
19464 restored. */
19465 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19466 if (live_regs_mask == (1U << reg))
19467 break;
19468
19469 if (reg <= LAST_ARM_REGNUM
19470 && (reg != LR_REGNUM
19471 || ! really_return
19472 || ! IS_INTERRUPT (func_type)))
19473 {
19474 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19475 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19476 }
19477 else
19478 {
19479 char *p;
19480 int first = 1;
19481
19482 /* Generate the load multiple instruction to restore the
19483 registers. Note we can get here, even if
19484 frame_pointer_needed is true, but only if sp already
19485 points to the base of the saved core registers. */
19486 if (live_regs_mask & (1 << SP_REGNUM))
19487 {
19488 unsigned HOST_WIDE_INT stack_adjust;
19489
19490 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19491 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19492
19493 if (stack_adjust && arm_arch5 && TARGET_ARM)
19494 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19495 else
19496 {
19497 /* If we can't use ldmib (SA110 bug),
19498 then try to pop r3 instead. */
19499 if (stack_adjust)
19500 live_regs_mask |= 1 << 3;
19501
19502 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19503 }
19504 }
19505 /* For interrupt returns we have to use an LDM rather than
19506 a POP so that we can use the exception return variant. */
19507 else if (IS_INTERRUPT (func_type))
19508 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19509 else
19510 sprintf (instr, "pop%s\t{", conditional);
19511
19512 p = instr + strlen (instr);
19513
19514 for (reg = 0; reg <= SP_REGNUM; reg++)
19515 if (live_regs_mask & (1 << reg))
19516 {
19517 int l = strlen (reg_names[reg]);
19518
19519 if (first)
19520 first = 0;
19521 else
19522 {
19523 memcpy (p, ", ", 2);
19524 p += 2;
19525 }
19526
19527 memcpy (p, "%|", 2);
19528 memcpy (p + 2, reg_names[reg], l);
19529 p += l + 2;
19530 }
19531
19532 if (live_regs_mask & (1 << LR_REGNUM))
19533 {
19534 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19535 /* If returning from an interrupt, restore the CPSR. */
19536 if (IS_INTERRUPT (func_type))
19537 strcat (p, "^");
19538 }
19539 else
19540 strcpy (p, "}");
19541 }
19542
19543 output_asm_insn (instr, & operand);
19544
19545 /* See if we need to generate an extra instruction to
19546 perform the actual function return. */
19547 if (really_return
19548 && func_type != ARM_FT_INTERWORKED
19549 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19550 {
19551 /* The return has already been handled
19552 by loading the LR into the PC. */
19553 return "";
19554 }
19555 }
19556
19557 if (really_return)
19558 {
19559 switch ((int) ARM_FUNC_TYPE (func_type))
19560 {
19561 case ARM_FT_ISR:
19562 case ARM_FT_FIQ:
19563 /* ??? This is wrong for unified assembly syntax. */
19564 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19565 break;
19566
19567 case ARM_FT_INTERWORKED:
19568 gcc_assert (arm_arch5 || arm_arch4t);
19569 sprintf (instr, "bx%s\t%%|lr", conditional);
19570 break;
19571
19572 case ARM_FT_EXCEPTION:
19573 /* ??? This is wrong for unified assembly syntax. */
19574 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19575 break;
19576
19577 default:
19578 if (IS_CMSE_ENTRY (func_type))
19579 {
19580 /* Check if we have to clear the 'GE bits' which is only used if
19581 parallel add and subtraction instructions are available. */
19582 if (TARGET_INT_SIMD)
19583 snprintf (instr, sizeof (instr),
19584 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19585 else
19586 snprintf (instr, sizeof (instr),
19587 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19588
19589 output_asm_insn (instr, & operand);
19590 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19591 {
19592 /* Clear the cumulative exception-status bits (0-4,7) and the
19593 condition code bits (28-31) of the FPSCR. We need to
19594 remember to clear the first scratch register used (IP) and
19595 save and restore the second (r4). */
19596 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19597 output_asm_insn (instr, & operand);
19598 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19599 output_asm_insn (instr, & operand);
19600 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19601 output_asm_insn (instr, & operand);
19602 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19603 output_asm_insn (instr, & operand);
19604 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19605 output_asm_insn (instr, & operand);
19606 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19607 output_asm_insn (instr, & operand);
19608 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19609 output_asm_insn (instr, & operand);
19610 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19611 output_asm_insn (instr, & operand);
19612 }
19613 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19614 }
19615 /* Use bx if it's available. */
19616 else if (arm_arch5 || arm_arch4t)
19617 sprintf (instr, "bx%s\t%%|lr", conditional);
19618 else
19619 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19620 break;
19621 }
19622
19623 output_asm_insn (instr, & operand);
19624 }
19625
19626 return "";
19627 }
19628
19629 /* Output in FILE asm statements needed to declare the NAME of the function
19630 defined by its DECL node. */
19631
19632 void
19633 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19634 {
19635 size_t cmse_name_len;
19636 char *cmse_name = 0;
19637 char cmse_prefix[] = "__acle_se_";
19638
19639 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19640 extra function label for each function with the 'cmse_nonsecure_entry'
19641 attribute. This extra function label should be prepended with
19642 '__acle_se_', telling the linker that it needs to create secure gateway
19643 veneers for this function. */
19644 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19645 DECL_ATTRIBUTES (decl)))
19646 {
19647 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19648 cmse_name = XALLOCAVEC (char, cmse_name_len);
19649 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19650 targetm.asm_out.globalize_label (file, cmse_name);
19651
19652 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19653 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19654 }
19655
19656 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19657 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19658 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19659 ASM_OUTPUT_LABEL (file, name);
19660
19661 if (cmse_name)
19662 ASM_OUTPUT_LABEL (file, cmse_name);
19663
19664 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19665 }
19666
19667 /* Write the function name into the code section, directly preceding
19668 the function prologue.
19669
19670 Code will be output similar to this:
19671 t0
19672 .ascii "arm_poke_function_name", 0
19673 .align
19674 t1
19675 .word 0xff000000 + (t1 - t0)
19676 arm_poke_function_name
19677 mov ip, sp
19678 stmfd sp!, {fp, ip, lr, pc}
19679 sub fp, ip, #4
19680
19681 When performing a stack backtrace, code can inspect the value
19682 of 'pc' stored at 'fp' + 0. If the trace function then looks
19683 at location pc - 12 and the top 8 bits are set, then we know
19684 that there is a function name embedded immediately preceding this
19685 location and has length ((pc[-3]) & 0xff000000).
19686
19687 We assume that pc is declared as a pointer to an unsigned long.
19688
19689 It is of no benefit to output the function name if we are assembling
19690 a leaf function. These function types will not contain a stack
19691 backtrace structure, therefore it is not possible to determine the
19692 function name. */
19693 void
19694 arm_poke_function_name (FILE *stream, const char *name)
19695 {
19696 unsigned long alignlength;
19697 unsigned long length;
19698 rtx x;
19699
19700 length = strlen (name) + 1;
19701 alignlength = ROUND_UP_WORD (length);
19702
19703 ASM_OUTPUT_ASCII (stream, name, length);
19704 ASM_OUTPUT_ALIGN (stream, 2);
19705 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19706 assemble_aligned_integer (UNITS_PER_WORD, x);
19707 }
19708
19709 /* Place some comments into the assembler stream
19710 describing the current function. */
19711 static void
19712 arm_output_function_prologue (FILE *f)
19713 {
19714 unsigned long func_type;
19715
19716 /* Sanity check. */
19717 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19718
19719 func_type = arm_current_func_type ();
19720
19721 switch ((int) ARM_FUNC_TYPE (func_type))
19722 {
19723 default:
19724 case ARM_FT_NORMAL:
19725 break;
19726 case ARM_FT_INTERWORKED:
19727 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19728 break;
19729 case ARM_FT_ISR:
19730 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19731 break;
19732 case ARM_FT_FIQ:
19733 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19734 break;
19735 case ARM_FT_EXCEPTION:
19736 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19737 break;
19738 }
19739
19740 if (IS_NAKED (func_type))
19741 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19742
19743 if (IS_VOLATILE (func_type))
19744 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19745
19746 if (IS_NESTED (func_type))
19747 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19748 if (IS_STACKALIGN (func_type))
19749 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19750 if (IS_CMSE_ENTRY (func_type))
19751 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19752
19753 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19754 crtl->args.size,
19755 crtl->args.pretend_args_size,
19756 (HOST_WIDE_INT) get_frame_size ());
19757
19758 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19759 frame_pointer_needed,
19760 cfun->machine->uses_anonymous_args);
19761
19762 if (cfun->machine->lr_save_eliminated)
19763 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19764
19765 if (crtl->calls_eh_return)
19766 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19767
19768 }
19769
19770 static void
19771 arm_output_function_epilogue (FILE *)
19772 {
19773 arm_stack_offsets *offsets;
19774
19775 if (TARGET_THUMB1)
19776 {
19777 int regno;
19778
19779 /* Emit any call-via-reg trampolines that are needed for v4t support
19780 of call_reg and call_value_reg type insns. */
19781 for (regno = 0; regno < LR_REGNUM; regno++)
19782 {
19783 rtx label = cfun->machine->call_via[regno];
19784
19785 if (label != NULL)
19786 {
19787 switch_to_section (function_section (current_function_decl));
19788 targetm.asm_out.internal_label (asm_out_file, "L",
19789 CODE_LABEL_NUMBER (label));
19790 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19791 }
19792 }
19793
19794 /* ??? Probably not safe to set this here, since it assumes that a
19795 function will be emitted as assembly immediately after we generate
19796 RTL for it. This does not happen for inline functions. */
19797 cfun->machine->return_used_this_function = 0;
19798 }
19799 else /* TARGET_32BIT */
19800 {
19801 /* We need to take into account any stack-frame rounding. */
19802 offsets = arm_get_frame_offsets ();
19803
19804 gcc_assert (!use_return_insn (FALSE, NULL)
19805 || (cfun->machine->return_used_this_function != 0)
19806 || offsets->saved_regs == offsets->outgoing_args
19807 || frame_pointer_needed);
19808 }
19809 }
19810
19811 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19812 STR and STRD. If an even number of registers are being pushed, one
19813 or more STRD patterns are created for each register pair. If an
19814 odd number of registers are pushed, emit an initial STR followed by
19815 as many STRD instructions as are needed. This works best when the
19816 stack is initially 64-bit aligned (the normal case), since it
19817 ensures that each STRD is also 64-bit aligned. */
19818 static void
19819 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19820 {
19821 int num_regs = 0;
19822 int i;
19823 int regno;
19824 rtx par = NULL_RTX;
19825 rtx dwarf = NULL_RTX;
19826 rtx tmp;
19827 bool first = true;
19828
19829 num_regs = bit_count (saved_regs_mask);
19830
19831 /* Must be at least one register to save, and can't save SP or PC. */
19832 gcc_assert (num_regs > 0 && num_regs <= 14);
19833 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19834 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19835
19836 /* Create sequence for DWARF info. All the frame-related data for
19837 debugging is held in this wrapper. */
19838 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19839
19840 /* Describe the stack adjustment. */
19841 tmp = gen_rtx_SET (stack_pointer_rtx,
19842 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19843 RTX_FRAME_RELATED_P (tmp) = 1;
19844 XVECEXP (dwarf, 0, 0) = tmp;
19845
19846 /* Find the first register. */
19847 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19848 ;
19849
19850 i = 0;
19851
19852 /* If there's an odd number of registers to push. Start off by
19853 pushing a single register. This ensures that subsequent strd
19854 operations are dword aligned (assuming that SP was originally
19855 64-bit aligned). */
19856 if ((num_regs & 1) != 0)
19857 {
19858 rtx reg, mem, insn;
19859
19860 reg = gen_rtx_REG (SImode, regno);
19861 if (num_regs == 1)
19862 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19863 stack_pointer_rtx));
19864 else
19865 mem = gen_frame_mem (Pmode,
19866 gen_rtx_PRE_MODIFY
19867 (Pmode, stack_pointer_rtx,
19868 plus_constant (Pmode, stack_pointer_rtx,
19869 -4 * num_regs)));
19870
19871 tmp = gen_rtx_SET (mem, reg);
19872 RTX_FRAME_RELATED_P (tmp) = 1;
19873 insn = emit_insn (tmp);
19874 RTX_FRAME_RELATED_P (insn) = 1;
19875 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19876 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19877 RTX_FRAME_RELATED_P (tmp) = 1;
19878 i++;
19879 regno++;
19880 XVECEXP (dwarf, 0, i) = tmp;
19881 first = false;
19882 }
19883
19884 while (i < num_regs)
19885 if (saved_regs_mask & (1 << regno))
19886 {
19887 rtx reg1, reg2, mem1, mem2;
19888 rtx tmp0, tmp1, tmp2;
19889 int regno2;
19890
19891 /* Find the register to pair with this one. */
19892 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19893 regno2++)
19894 ;
19895
19896 reg1 = gen_rtx_REG (SImode, regno);
19897 reg2 = gen_rtx_REG (SImode, regno2);
19898
19899 if (first)
19900 {
19901 rtx insn;
19902
19903 first = false;
19904 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19905 stack_pointer_rtx,
19906 -4 * num_regs));
19907 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19908 stack_pointer_rtx,
19909 -4 * (num_regs - 1)));
19910 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19911 plus_constant (Pmode, stack_pointer_rtx,
19912 -4 * (num_regs)));
19913 tmp1 = gen_rtx_SET (mem1, reg1);
19914 tmp2 = gen_rtx_SET (mem2, reg2);
19915 RTX_FRAME_RELATED_P (tmp0) = 1;
19916 RTX_FRAME_RELATED_P (tmp1) = 1;
19917 RTX_FRAME_RELATED_P (tmp2) = 1;
19918 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19919 XVECEXP (par, 0, 0) = tmp0;
19920 XVECEXP (par, 0, 1) = tmp1;
19921 XVECEXP (par, 0, 2) = tmp2;
19922 insn = emit_insn (par);
19923 RTX_FRAME_RELATED_P (insn) = 1;
19924 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19925 }
19926 else
19927 {
19928 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19929 stack_pointer_rtx,
19930 4 * i));
19931 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19932 stack_pointer_rtx,
19933 4 * (i + 1)));
19934 tmp1 = gen_rtx_SET (mem1, reg1);
19935 tmp2 = gen_rtx_SET (mem2, reg2);
19936 RTX_FRAME_RELATED_P (tmp1) = 1;
19937 RTX_FRAME_RELATED_P (tmp2) = 1;
19938 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19939 XVECEXP (par, 0, 0) = tmp1;
19940 XVECEXP (par, 0, 1) = tmp2;
19941 emit_insn (par);
19942 }
19943
19944 /* Create unwind information. This is an approximation. */
19945 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19946 plus_constant (Pmode,
19947 stack_pointer_rtx,
19948 4 * i)),
19949 reg1);
19950 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19951 plus_constant (Pmode,
19952 stack_pointer_rtx,
19953 4 * (i + 1))),
19954 reg2);
19955
19956 RTX_FRAME_RELATED_P (tmp1) = 1;
19957 RTX_FRAME_RELATED_P (tmp2) = 1;
19958 XVECEXP (dwarf, 0, i + 1) = tmp1;
19959 XVECEXP (dwarf, 0, i + 2) = tmp2;
19960 i += 2;
19961 regno = regno2 + 1;
19962 }
19963 else
19964 regno++;
19965
19966 return;
19967 }
19968
19969 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19970 whenever possible, otherwise it emits single-word stores. The first store
19971 also allocates stack space for all saved registers, using writeback with
19972 post-addressing mode. All other stores use offset addressing. If no STRD
19973 can be emitted, this function emits a sequence of single-word stores,
19974 and not an STM as before, because single-word stores provide more freedom
19975 scheduling and can be turned into an STM by peephole optimizations. */
19976 static void
19977 arm_emit_strd_push (unsigned long saved_regs_mask)
19978 {
19979 int num_regs = 0;
19980 int i, j, dwarf_index = 0;
19981 int offset = 0;
19982 rtx dwarf = NULL_RTX;
19983 rtx insn = NULL_RTX;
19984 rtx tmp, mem;
19985
19986 /* TODO: A more efficient code can be emitted by changing the
19987 layout, e.g., first push all pairs that can use STRD to keep the
19988 stack aligned, and then push all other registers. */
19989 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19990 if (saved_regs_mask & (1 << i))
19991 num_regs++;
19992
19993 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19994 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19995 gcc_assert (num_regs > 0);
19996
19997 /* Create sequence for DWARF info. */
19998 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19999
20000 /* For dwarf info, we generate explicit stack update. */
20001 tmp = gen_rtx_SET (stack_pointer_rtx,
20002 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20003 RTX_FRAME_RELATED_P (tmp) = 1;
20004 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20005
20006 /* Save registers. */
20007 offset = - 4 * num_regs;
20008 j = 0;
20009 while (j <= LAST_ARM_REGNUM)
20010 if (saved_regs_mask & (1 << j))
20011 {
20012 if ((j % 2 == 0)
20013 && (saved_regs_mask & (1 << (j + 1))))
20014 {
20015 /* Current register and previous register form register pair for
20016 which STRD can be generated. */
20017 if (offset < 0)
20018 {
20019 /* Allocate stack space for all saved registers. */
20020 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20021 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20022 mem = gen_frame_mem (DImode, tmp);
20023 offset = 0;
20024 }
20025 else if (offset > 0)
20026 mem = gen_frame_mem (DImode,
20027 plus_constant (Pmode,
20028 stack_pointer_rtx,
20029 offset));
20030 else
20031 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20032
20033 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20034 RTX_FRAME_RELATED_P (tmp) = 1;
20035 tmp = emit_insn (tmp);
20036
20037 /* Record the first store insn. */
20038 if (dwarf_index == 1)
20039 insn = tmp;
20040
20041 /* Generate dwarf info. */
20042 mem = gen_frame_mem (SImode,
20043 plus_constant (Pmode,
20044 stack_pointer_rtx,
20045 offset));
20046 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20047 RTX_FRAME_RELATED_P (tmp) = 1;
20048 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20049
20050 mem = gen_frame_mem (SImode,
20051 plus_constant (Pmode,
20052 stack_pointer_rtx,
20053 offset + 4));
20054 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20055 RTX_FRAME_RELATED_P (tmp) = 1;
20056 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20057
20058 offset += 8;
20059 j += 2;
20060 }
20061 else
20062 {
20063 /* Emit a single word store. */
20064 if (offset < 0)
20065 {
20066 /* Allocate stack space for all saved registers. */
20067 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20068 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20069 mem = gen_frame_mem (SImode, tmp);
20070 offset = 0;
20071 }
20072 else if (offset > 0)
20073 mem = gen_frame_mem (SImode,
20074 plus_constant (Pmode,
20075 stack_pointer_rtx,
20076 offset));
20077 else
20078 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20079
20080 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20081 RTX_FRAME_RELATED_P (tmp) = 1;
20082 tmp = emit_insn (tmp);
20083
20084 /* Record the first store insn. */
20085 if (dwarf_index == 1)
20086 insn = tmp;
20087
20088 /* Generate dwarf info. */
20089 mem = gen_frame_mem (SImode,
20090 plus_constant(Pmode,
20091 stack_pointer_rtx,
20092 offset));
20093 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20094 RTX_FRAME_RELATED_P (tmp) = 1;
20095 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20096
20097 offset += 4;
20098 j += 1;
20099 }
20100 }
20101 else
20102 j++;
20103
20104 /* Attach dwarf info to the first insn we generate. */
20105 gcc_assert (insn != NULL_RTX);
20106 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20107 RTX_FRAME_RELATED_P (insn) = 1;
20108 }
20109
20110 /* Generate and emit an insn that we will recognize as a push_multi.
20111 Unfortunately, since this insn does not reflect very well the actual
20112 semantics of the operation, we need to annotate the insn for the benefit
20113 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20114 MASK for registers that should be annotated for DWARF2 frame unwind
20115 information. */
20116 static rtx
20117 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20118 {
20119 int num_regs = 0;
20120 int num_dwarf_regs = 0;
20121 int i, j;
20122 rtx par;
20123 rtx dwarf;
20124 int dwarf_par_index;
20125 rtx tmp, reg;
20126
20127 /* We don't record the PC in the dwarf frame information. */
20128 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20129
20130 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20131 {
20132 if (mask & (1 << i))
20133 num_regs++;
20134 if (dwarf_regs_mask & (1 << i))
20135 num_dwarf_regs++;
20136 }
20137
20138 gcc_assert (num_regs && num_regs <= 16);
20139 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20140
20141 /* For the body of the insn we are going to generate an UNSPEC in
20142 parallel with several USEs. This allows the insn to be recognized
20143 by the push_multi pattern in the arm.md file.
20144
20145 The body of the insn looks something like this:
20146
20147 (parallel [
20148 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20149 (const_int:SI <num>)))
20150 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20151 (use (reg:SI XX))
20152 (use (reg:SI YY))
20153 ...
20154 ])
20155
20156 For the frame note however, we try to be more explicit and actually
20157 show each register being stored into the stack frame, plus a (single)
20158 decrement of the stack pointer. We do it this way in order to be
20159 friendly to the stack unwinding code, which only wants to see a single
20160 stack decrement per instruction. The RTL we generate for the note looks
20161 something like this:
20162
20163 (sequence [
20164 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20165 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20166 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20167 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20168 ...
20169 ])
20170
20171 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20172 instead we'd have a parallel expression detailing all
20173 the stores to the various memory addresses so that debug
20174 information is more up-to-date. Remember however while writing
20175 this to take care of the constraints with the push instruction.
20176
20177 Note also that this has to be taken care of for the VFP registers.
20178
20179 For more see PR43399. */
20180
20181 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20182 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20183 dwarf_par_index = 1;
20184
20185 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20186 {
20187 if (mask & (1 << i))
20188 {
20189 reg = gen_rtx_REG (SImode, i);
20190
20191 XVECEXP (par, 0, 0)
20192 = gen_rtx_SET (gen_frame_mem
20193 (BLKmode,
20194 gen_rtx_PRE_MODIFY (Pmode,
20195 stack_pointer_rtx,
20196 plus_constant
20197 (Pmode, stack_pointer_rtx,
20198 -4 * num_regs))
20199 ),
20200 gen_rtx_UNSPEC (BLKmode,
20201 gen_rtvec (1, reg),
20202 UNSPEC_PUSH_MULT));
20203
20204 if (dwarf_regs_mask & (1 << i))
20205 {
20206 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20207 reg);
20208 RTX_FRAME_RELATED_P (tmp) = 1;
20209 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20210 }
20211
20212 break;
20213 }
20214 }
20215
20216 for (j = 1, i++; j < num_regs; i++)
20217 {
20218 if (mask & (1 << i))
20219 {
20220 reg = gen_rtx_REG (SImode, i);
20221
20222 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20223
20224 if (dwarf_regs_mask & (1 << i))
20225 {
20226 tmp
20227 = gen_rtx_SET (gen_frame_mem
20228 (SImode,
20229 plus_constant (Pmode, stack_pointer_rtx,
20230 4 * j)),
20231 reg);
20232 RTX_FRAME_RELATED_P (tmp) = 1;
20233 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20234 }
20235
20236 j++;
20237 }
20238 }
20239
20240 par = emit_insn (par);
20241
20242 tmp = gen_rtx_SET (stack_pointer_rtx,
20243 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20244 RTX_FRAME_RELATED_P (tmp) = 1;
20245 XVECEXP (dwarf, 0, 0) = tmp;
20246
20247 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20248
20249 return par;
20250 }
20251
20252 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20253 SIZE is the offset to be adjusted.
20254 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20255 static void
20256 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20257 {
20258 rtx dwarf;
20259
20260 RTX_FRAME_RELATED_P (insn) = 1;
20261 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20262 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20263 }
20264
20265 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20266 SAVED_REGS_MASK shows which registers need to be restored.
20267
20268 Unfortunately, since this insn does not reflect very well the actual
20269 semantics of the operation, we need to annotate the insn for the benefit
20270 of DWARF2 frame unwind information. */
20271 static void
20272 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20273 {
20274 int num_regs = 0;
20275 int i, j;
20276 rtx par;
20277 rtx dwarf = NULL_RTX;
20278 rtx tmp, reg;
20279 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20280 int offset_adj;
20281 int emit_update;
20282
20283 offset_adj = return_in_pc ? 1 : 0;
20284 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20285 if (saved_regs_mask & (1 << i))
20286 num_regs++;
20287
20288 gcc_assert (num_regs && num_regs <= 16);
20289
20290 /* If SP is in reglist, then we don't emit SP update insn. */
20291 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20292
20293 /* The parallel needs to hold num_regs SETs
20294 and one SET for the stack update. */
20295 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20296
20297 if (return_in_pc)
20298 XVECEXP (par, 0, 0) = ret_rtx;
20299
20300 if (emit_update)
20301 {
20302 /* Increment the stack pointer, based on there being
20303 num_regs 4-byte registers to restore. */
20304 tmp = gen_rtx_SET (stack_pointer_rtx,
20305 plus_constant (Pmode,
20306 stack_pointer_rtx,
20307 4 * num_regs));
20308 RTX_FRAME_RELATED_P (tmp) = 1;
20309 XVECEXP (par, 0, offset_adj) = tmp;
20310 }
20311
20312 /* Now restore every reg, which may include PC. */
20313 for (j = 0, i = 0; j < num_regs; i++)
20314 if (saved_regs_mask & (1 << i))
20315 {
20316 reg = gen_rtx_REG (SImode, i);
20317 if ((num_regs == 1) && emit_update && !return_in_pc)
20318 {
20319 /* Emit single load with writeback. */
20320 tmp = gen_frame_mem (SImode,
20321 gen_rtx_POST_INC (Pmode,
20322 stack_pointer_rtx));
20323 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20324 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20325 return;
20326 }
20327
20328 tmp = gen_rtx_SET (reg,
20329 gen_frame_mem
20330 (SImode,
20331 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20332 RTX_FRAME_RELATED_P (tmp) = 1;
20333 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20334
20335 /* We need to maintain a sequence for DWARF info too. As dwarf info
20336 should not have PC, skip PC. */
20337 if (i != PC_REGNUM)
20338 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20339
20340 j++;
20341 }
20342
20343 if (return_in_pc)
20344 par = emit_jump_insn (par);
20345 else
20346 par = emit_insn (par);
20347
20348 REG_NOTES (par) = dwarf;
20349 if (!return_in_pc)
20350 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20351 stack_pointer_rtx, stack_pointer_rtx);
20352 }
20353
20354 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20355 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20356
20357 Unfortunately, since this insn does not reflect very well the actual
20358 semantics of the operation, we need to annotate the insn for the benefit
20359 of DWARF2 frame unwind information. */
20360 static void
20361 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20362 {
20363 int i, j;
20364 rtx par;
20365 rtx dwarf = NULL_RTX;
20366 rtx tmp, reg;
20367
20368 gcc_assert (num_regs && num_regs <= 32);
20369
20370 /* Workaround ARM10 VFPr1 bug. */
20371 if (num_regs == 2 && !arm_arch6)
20372 {
20373 if (first_reg == 15)
20374 first_reg--;
20375
20376 num_regs++;
20377 }
20378
20379 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20380 there could be up to 32 D-registers to restore.
20381 If there are more than 16 D-registers, make two recursive calls,
20382 each of which emits one pop_multi instruction. */
20383 if (num_regs > 16)
20384 {
20385 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20386 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20387 return;
20388 }
20389
20390 /* The parallel needs to hold num_regs SETs
20391 and one SET for the stack update. */
20392 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20393
20394 /* Increment the stack pointer, based on there being
20395 num_regs 8-byte registers to restore. */
20396 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20397 RTX_FRAME_RELATED_P (tmp) = 1;
20398 XVECEXP (par, 0, 0) = tmp;
20399
20400 /* Now show every reg that will be restored, using a SET for each. */
20401 for (j = 0, i=first_reg; j < num_regs; i += 2)
20402 {
20403 reg = gen_rtx_REG (DFmode, i);
20404
20405 tmp = gen_rtx_SET (reg,
20406 gen_frame_mem
20407 (DFmode,
20408 plus_constant (Pmode, base_reg, 8 * j)));
20409 RTX_FRAME_RELATED_P (tmp) = 1;
20410 XVECEXP (par, 0, j + 1) = tmp;
20411
20412 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20413
20414 j++;
20415 }
20416
20417 par = emit_insn (par);
20418 REG_NOTES (par) = dwarf;
20419
20420 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20421 if (REGNO (base_reg) == IP_REGNUM)
20422 {
20423 RTX_FRAME_RELATED_P (par) = 1;
20424 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20425 }
20426 else
20427 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20428 base_reg, base_reg);
20429 }
20430
20431 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20432 number of registers are being popped, multiple LDRD patterns are created for
20433 all register pairs. If odd number of registers are popped, last register is
20434 loaded by using LDR pattern. */
20435 static void
20436 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20437 {
20438 int num_regs = 0;
20439 int i, j;
20440 rtx par = NULL_RTX;
20441 rtx dwarf = NULL_RTX;
20442 rtx tmp, reg, tmp1;
20443 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20444
20445 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20446 if (saved_regs_mask & (1 << i))
20447 num_regs++;
20448
20449 gcc_assert (num_regs && num_regs <= 16);
20450
20451 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20452 to be popped. So, if num_regs is even, now it will become odd,
20453 and we can generate pop with PC. If num_regs is odd, it will be
20454 even now, and ldr with return can be generated for PC. */
20455 if (return_in_pc)
20456 num_regs--;
20457
20458 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20459
20460 /* Var j iterates over all the registers to gather all the registers in
20461 saved_regs_mask. Var i gives index of saved registers in stack frame.
20462 A PARALLEL RTX of register-pair is created here, so that pattern for
20463 LDRD can be matched. As PC is always last register to be popped, and
20464 we have already decremented num_regs if PC, we don't have to worry
20465 about PC in this loop. */
20466 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20467 if (saved_regs_mask & (1 << j))
20468 {
20469 /* Create RTX for memory load. */
20470 reg = gen_rtx_REG (SImode, j);
20471 tmp = gen_rtx_SET (reg,
20472 gen_frame_mem (SImode,
20473 plus_constant (Pmode,
20474 stack_pointer_rtx, 4 * i)));
20475 RTX_FRAME_RELATED_P (tmp) = 1;
20476
20477 if (i % 2 == 0)
20478 {
20479 /* When saved-register index (i) is even, the RTX to be emitted is
20480 yet to be created. Hence create it first. The LDRD pattern we
20481 are generating is :
20482 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20483 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20484 where target registers need not be consecutive. */
20485 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20486 dwarf = NULL_RTX;
20487 }
20488
20489 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20490 added as 0th element and if i is odd, reg_i is added as 1st element
20491 of LDRD pattern shown above. */
20492 XVECEXP (par, 0, (i % 2)) = tmp;
20493 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20494
20495 if ((i % 2) == 1)
20496 {
20497 /* When saved-register index (i) is odd, RTXs for both the registers
20498 to be loaded are generated in above given LDRD pattern, and the
20499 pattern can be emitted now. */
20500 par = emit_insn (par);
20501 REG_NOTES (par) = dwarf;
20502 RTX_FRAME_RELATED_P (par) = 1;
20503 }
20504
20505 i++;
20506 }
20507
20508 /* If the number of registers pushed is odd AND return_in_pc is false OR
20509 number of registers are even AND return_in_pc is true, last register is
20510 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20511 then LDR with post increment. */
20512
20513 /* Increment the stack pointer, based on there being
20514 num_regs 4-byte registers to restore. */
20515 tmp = gen_rtx_SET (stack_pointer_rtx,
20516 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20517 RTX_FRAME_RELATED_P (tmp) = 1;
20518 tmp = emit_insn (tmp);
20519 if (!return_in_pc)
20520 {
20521 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20522 stack_pointer_rtx, stack_pointer_rtx);
20523 }
20524
20525 dwarf = NULL_RTX;
20526
20527 if (((num_regs % 2) == 1 && !return_in_pc)
20528 || ((num_regs % 2) == 0 && return_in_pc))
20529 {
20530 /* Scan for the single register to be popped. Skip until the saved
20531 register is found. */
20532 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20533
20534 /* Gen LDR with post increment here. */
20535 tmp1 = gen_rtx_MEM (SImode,
20536 gen_rtx_POST_INC (SImode,
20537 stack_pointer_rtx));
20538 set_mem_alias_set (tmp1, get_frame_alias_set ());
20539
20540 reg = gen_rtx_REG (SImode, j);
20541 tmp = gen_rtx_SET (reg, tmp1);
20542 RTX_FRAME_RELATED_P (tmp) = 1;
20543 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20544
20545 if (return_in_pc)
20546 {
20547 /* If return_in_pc, j must be PC_REGNUM. */
20548 gcc_assert (j == PC_REGNUM);
20549 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20550 XVECEXP (par, 0, 0) = ret_rtx;
20551 XVECEXP (par, 0, 1) = tmp;
20552 par = emit_jump_insn (par);
20553 }
20554 else
20555 {
20556 par = emit_insn (tmp);
20557 REG_NOTES (par) = dwarf;
20558 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20559 stack_pointer_rtx, stack_pointer_rtx);
20560 }
20561
20562 }
20563 else if ((num_regs % 2) == 1 && return_in_pc)
20564 {
20565 /* There are 2 registers to be popped. So, generate the pattern
20566 pop_multiple_with_stack_update_and_return to pop in PC. */
20567 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20568 }
20569
20570 return;
20571 }
20572
20573 /* LDRD in ARM mode needs consecutive registers as operands. This function
20574 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20575 offset addressing and then generates one separate stack udpate. This provides
20576 more scheduling freedom, compared to writeback on every load. However,
20577 if the function returns using load into PC directly
20578 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20579 before the last load. TODO: Add a peephole optimization to recognize
20580 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20581 peephole optimization to merge the load at stack-offset zero
20582 with the stack update instruction using load with writeback
20583 in post-index addressing mode. */
20584 static void
20585 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20586 {
20587 int j = 0;
20588 int offset = 0;
20589 rtx par = NULL_RTX;
20590 rtx dwarf = NULL_RTX;
20591 rtx tmp, mem;
20592
20593 /* Restore saved registers. */
20594 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20595 j = 0;
20596 while (j <= LAST_ARM_REGNUM)
20597 if (saved_regs_mask & (1 << j))
20598 {
20599 if ((j % 2) == 0
20600 && (saved_regs_mask & (1 << (j + 1)))
20601 && (j + 1) != PC_REGNUM)
20602 {
20603 /* Current register and next register form register pair for which
20604 LDRD can be generated. PC is always the last register popped, and
20605 we handle it separately. */
20606 if (offset > 0)
20607 mem = gen_frame_mem (DImode,
20608 plus_constant (Pmode,
20609 stack_pointer_rtx,
20610 offset));
20611 else
20612 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20613
20614 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20615 tmp = emit_insn (tmp);
20616 RTX_FRAME_RELATED_P (tmp) = 1;
20617
20618 /* Generate dwarf info. */
20619
20620 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20621 gen_rtx_REG (SImode, j),
20622 NULL_RTX);
20623 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20624 gen_rtx_REG (SImode, j + 1),
20625 dwarf);
20626
20627 REG_NOTES (tmp) = dwarf;
20628
20629 offset += 8;
20630 j += 2;
20631 }
20632 else if (j != PC_REGNUM)
20633 {
20634 /* Emit a single word load. */
20635 if (offset > 0)
20636 mem = gen_frame_mem (SImode,
20637 plus_constant (Pmode,
20638 stack_pointer_rtx,
20639 offset));
20640 else
20641 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20642
20643 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20644 tmp = emit_insn (tmp);
20645 RTX_FRAME_RELATED_P (tmp) = 1;
20646
20647 /* Generate dwarf info. */
20648 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20649 gen_rtx_REG (SImode, j),
20650 NULL_RTX);
20651
20652 offset += 4;
20653 j += 1;
20654 }
20655 else /* j == PC_REGNUM */
20656 j++;
20657 }
20658 else
20659 j++;
20660
20661 /* Update the stack. */
20662 if (offset > 0)
20663 {
20664 tmp = gen_rtx_SET (stack_pointer_rtx,
20665 plus_constant (Pmode,
20666 stack_pointer_rtx,
20667 offset));
20668 tmp = emit_insn (tmp);
20669 arm_add_cfa_adjust_cfa_note (tmp, offset,
20670 stack_pointer_rtx, stack_pointer_rtx);
20671 offset = 0;
20672 }
20673
20674 if (saved_regs_mask & (1 << PC_REGNUM))
20675 {
20676 /* Only PC is to be popped. */
20677 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20678 XVECEXP (par, 0, 0) = ret_rtx;
20679 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20680 gen_frame_mem (SImode,
20681 gen_rtx_POST_INC (SImode,
20682 stack_pointer_rtx)));
20683 RTX_FRAME_RELATED_P (tmp) = 1;
20684 XVECEXP (par, 0, 1) = tmp;
20685 par = emit_jump_insn (par);
20686
20687 /* Generate dwarf info. */
20688 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20689 gen_rtx_REG (SImode, PC_REGNUM),
20690 NULL_RTX);
20691 REG_NOTES (par) = dwarf;
20692 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20693 stack_pointer_rtx, stack_pointer_rtx);
20694 }
20695 }
20696
20697 /* Calculate the size of the return value that is passed in registers. */
20698 static unsigned
20699 arm_size_return_regs (void)
20700 {
20701 machine_mode mode;
20702
20703 if (crtl->return_rtx != 0)
20704 mode = GET_MODE (crtl->return_rtx);
20705 else
20706 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20707
20708 return GET_MODE_SIZE (mode);
20709 }
20710
20711 /* Return true if the current function needs to save/restore LR. */
20712 static bool
20713 thumb_force_lr_save (void)
20714 {
20715 return !cfun->machine->lr_save_eliminated
20716 && (!crtl->is_leaf
20717 || thumb_far_jump_used_p ()
20718 || df_regs_ever_live_p (LR_REGNUM));
20719 }
20720
20721 /* We do not know if r3 will be available because
20722 we do have an indirect tailcall happening in this
20723 particular case. */
20724 static bool
20725 is_indirect_tailcall_p (rtx call)
20726 {
20727 rtx pat = PATTERN (call);
20728
20729 /* Indirect tail call. */
20730 pat = XVECEXP (pat, 0, 0);
20731 if (GET_CODE (pat) == SET)
20732 pat = SET_SRC (pat);
20733
20734 pat = XEXP (XEXP (pat, 0), 0);
20735 return REG_P (pat);
20736 }
20737
20738 /* Return true if r3 is used by any of the tail call insns in the
20739 current function. */
20740 static bool
20741 any_sibcall_could_use_r3 (void)
20742 {
20743 edge_iterator ei;
20744 edge e;
20745
20746 if (!crtl->tail_call_emit)
20747 return false;
20748 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20749 if (e->flags & EDGE_SIBCALL)
20750 {
20751 rtx_insn *call = BB_END (e->src);
20752 if (!CALL_P (call))
20753 call = prev_nonnote_nondebug_insn (call);
20754 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20755 if (find_regno_fusage (call, USE, 3)
20756 || is_indirect_tailcall_p (call))
20757 return true;
20758 }
20759 return false;
20760 }
20761
20762
20763 /* Compute the distance from register FROM to register TO.
20764 These can be the arg pointer (26), the soft frame pointer (25),
20765 the stack pointer (13) or the hard frame pointer (11).
20766 In thumb mode r7 is used as the soft frame pointer, if needed.
20767 Typical stack layout looks like this:
20768
20769 old stack pointer -> | |
20770 ----
20771 | | \
20772 | | saved arguments for
20773 | | vararg functions
20774 | | /
20775 --
20776 hard FP & arg pointer -> | | \
20777 | | stack
20778 | | frame
20779 | | /
20780 --
20781 | | \
20782 | | call saved
20783 | | registers
20784 soft frame pointer -> | | /
20785 --
20786 | | \
20787 | | local
20788 | | variables
20789 locals base pointer -> | | /
20790 --
20791 | | \
20792 | | outgoing
20793 | | arguments
20794 current stack pointer -> | | /
20795 --
20796
20797 For a given function some or all of these stack components
20798 may not be needed, giving rise to the possibility of
20799 eliminating some of the registers.
20800
20801 The values returned by this function must reflect the behavior
20802 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20803
20804 The sign of the number returned reflects the direction of stack
20805 growth, so the values are positive for all eliminations except
20806 from the soft frame pointer to the hard frame pointer.
20807
20808 SFP may point just inside the local variables block to ensure correct
20809 alignment. */
20810
20811
20812 /* Return cached stack offsets. */
20813
20814 static arm_stack_offsets *
20815 arm_get_frame_offsets (void)
20816 {
20817 struct arm_stack_offsets *offsets;
20818
20819 offsets = &cfun->machine->stack_offsets;
20820
20821 return offsets;
20822 }
20823
20824
20825 /* Calculate stack offsets. These are used to calculate register elimination
20826 offsets and in prologue/epilogue code. Also calculates which registers
20827 should be saved. */
20828
20829 static void
20830 arm_compute_frame_layout (void)
20831 {
20832 struct arm_stack_offsets *offsets;
20833 unsigned long func_type;
20834 int saved;
20835 int core_saved;
20836 HOST_WIDE_INT frame_size;
20837 int i;
20838
20839 offsets = &cfun->machine->stack_offsets;
20840
20841 /* Initially this is the size of the local variables. It will translated
20842 into an offset once we have determined the size of preceding data. */
20843 frame_size = ROUND_UP_WORD (get_frame_size ());
20844
20845 /* Space for variadic functions. */
20846 offsets->saved_args = crtl->args.pretend_args_size;
20847
20848 /* In Thumb mode this is incorrect, but never used. */
20849 offsets->frame
20850 = (offsets->saved_args
20851 + arm_compute_static_chain_stack_bytes ()
20852 + (frame_pointer_needed ? 4 : 0));
20853
20854 if (TARGET_32BIT)
20855 {
20856 unsigned int regno;
20857
20858 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20859 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20860 saved = core_saved;
20861
20862 /* We know that SP will be doubleword aligned on entry, and we must
20863 preserve that condition at any subroutine call. We also require the
20864 soft frame pointer to be doubleword aligned. */
20865
20866 if (TARGET_REALLY_IWMMXT)
20867 {
20868 /* Check for the call-saved iWMMXt registers. */
20869 for (regno = FIRST_IWMMXT_REGNUM;
20870 regno <= LAST_IWMMXT_REGNUM;
20871 regno++)
20872 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20873 saved += 8;
20874 }
20875
20876 func_type = arm_current_func_type ();
20877 /* Space for saved VFP registers. */
20878 if (! IS_VOLATILE (func_type)
20879 && TARGET_HARD_FLOAT)
20880 saved += arm_get_vfp_saved_size ();
20881 }
20882 else /* TARGET_THUMB1 */
20883 {
20884 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20885 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20886 saved = core_saved;
20887 if (TARGET_BACKTRACE)
20888 saved += 16;
20889 }
20890
20891 /* Saved registers include the stack frame. */
20892 offsets->saved_regs
20893 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20894 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20895
20896 /* A leaf function does not need any stack alignment if it has nothing
20897 on the stack. */
20898 if (crtl->is_leaf && frame_size == 0
20899 /* However if it calls alloca(), we have a dynamically allocated
20900 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20901 && ! cfun->calls_alloca)
20902 {
20903 offsets->outgoing_args = offsets->soft_frame;
20904 offsets->locals_base = offsets->soft_frame;
20905 return;
20906 }
20907
20908 /* Ensure SFP has the correct alignment. */
20909 if (ARM_DOUBLEWORD_ALIGN
20910 && (offsets->soft_frame & 7))
20911 {
20912 offsets->soft_frame += 4;
20913 /* Try to align stack by pushing an extra reg. Don't bother doing this
20914 when there is a stack frame as the alignment will be rolled into
20915 the normal stack adjustment. */
20916 if (frame_size + crtl->outgoing_args_size == 0)
20917 {
20918 int reg = -1;
20919
20920 /* Register r3 is caller-saved. Normally it does not need to be
20921 saved on entry by the prologue. However if we choose to save
20922 it for padding then we may confuse the compiler into thinking
20923 a prologue sequence is required when in fact it is not. This
20924 will occur when shrink-wrapping if r3 is used as a scratch
20925 register and there are no other callee-saved writes.
20926
20927 This situation can be avoided when other callee-saved registers
20928 are available and r3 is not mandatory if we choose a callee-saved
20929 register for padding. */
20930 bool prefer_callee_reg_p = false;
20931
20932 /* If it is safe to use r3, then do so. This sometimes
20933 generates better code on Thumb-2 by avoiding the need to
20934 use 32-bit push/pop instructions. */
20935 if (! any_sibcall_could_use_r3 ()
20936 && arm_size_return_regs () <= 12
20937 && (offsets->saved_regs_mask & (1 << 3)) == 0
20938 && (TARGET_THUMB2
20939 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20940 {
20941 reg = 3;
20942 if (!TARGET_THUMB2)
20943 prefer_callee_reg_p = true;
20944 }
20945 if (reg == -1
20946 || prefer_callee_reg_p)
20947 {
20948 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20949 {
20950 /* Avoid fixed registers; they may be changed at
20951 arbitrary times so it's unsafe to restore them
20952 during the epilogue. */
20953 if (!fixed_regs[i]
20954 && (offsets->saved_regs_mask & (1 << i)) == 0)
20955 {
20956 reg = i;
20957 break;
20958 }
20959 }
20960 }
20961
20962 if (reg != -1)
20963 {
20964 offsets->saved_regs += 4;
20965 offsets->saved_regs_mask |= (1 << reg);
20966 }
20967 }
20968 }
20969
20970 offsets->locals_base = offsets->soft_frame + frame_size;
20971 offsets->outgoing_args = (offsets->locals_base
20972 + crtl->outgoing_args_size);
20973
20974 if (ARM_DOUBLEWORD_ALIGN)
20975 {
20976 /* Ensure SP remains doubleword aligned. */
20977 if (offsets->outgoing_args & 7)
20978 offsets->outgoing_args += 4;
20979 gcc_assert (!(offsets->outgoing_args & 7));
20980 }
20981 }
20982
20983
20984 /* Calculate the relative offsets for the different stack pointers. Positive
20985 offsets are in the direction of stack growth. */
20986
20987 HOST_WIDE_INT
20988 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20989 {
20990 arm_stack_offsets *offsets;
20991
20992 offsets = arm_get_frame_offsets ();
20993
20994 /* OK, now we have enough information to compute the distances.
20995 There must be an entry in these switch tables for each pair
20996 of registers in ELIMINABLE_REGS, even if some of the entries
20997 seem to be redundant or useless. */
20998 switch (from)
20999 {
21000 case ARG_POINTER_REGNUM:
21001 switch (to)
21002 {
21003 case THUMB_HARD_FRAME_POINTER_REGNUM:
21004 return 0;
21005
21006 case FRAME_POINTER_REGNUM:
21007 /* This is the reverse of the soft frame pointer
21008 to hard frame pointer elimination below. */
21009 return offsets->soft_frame - offsets->saved_args;
21010
21011 case ARM_HARD_FRAME_POINTER_REGNUM:
21012 /* This is only non-zero in the case where the static chain register
21013 is stored above the frame. */
21014 return offsets->frame - offsets->saved_args - 4;
21015
21016 case STACK_POINTER_REGNUM:
21017 /* If nothing has been pushed on the stack at all
21018 then this will return -4. This *is* correct! */
21019 return offsets->outgoing_args - (offsets->saved_args + 4);
21020
21021 default:
21022 gcc_unreachable ();
21023 }
21024 gcc_unreachable ();
21025
21026 case FRAME_POINTER_REGNUM:
21027 switch (to)
21028 {
21029 case THUMB_HARD_FRAME_POINTER_REGNUM:
21030 return 0;
21031
21032 case ARM_HARD_FRAME_POINTER_REGNUM:
21033 /* The hard frame pointer points to the top entry in the
21034 stack frame. The soft frame pointer to the bottom entry
21035 in the stack frame. If there is no stack frame at all,
21036 then they are identical. */
21037
21038 return offsets->frame - offsets->soft_frame;
21039
21040 case STACK_POINTER_REGNUM:
21041 return offsets->outgoing_args - offsets->soft_frame;
21042
21043 default:
21044 gcc_unreachable ();
21045 }
21046 gcc_unreachable ();
21047
21048 default:
21049 /* You cannot eliminate from the stack pointer.
21050 In theory you could eliminate from the hard frame
21051 pointer to the stack pointer, but this will never
21052 happen, since if a stack frame is not needed the
21053 hard frame pointer will never be used. */
21054 gcc_unreachable ();
21055 }
21056 }
21057
21058 /* Given FROM and TO register numbers, say whether this elimination is
21059 allowed. Frame pointer elimination is automatically handled.
21060
21061 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21062 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21063 pointer, we must eliminate FRAME_POINTER_REGNUM into
21064 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21065 ARG_POINTER_REGNUM. */
21066
21067 bool
21068 arm_can_eliminate (const int from, const int to)
21069 {
21070 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21071 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21072 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21073 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21074 true);
21075 }
21076
21077 /* Emit RTL to save coprocessor registers on function entry. Returns the
21078 number of bytes pushed. */
21079
21080 static int
21081 arm_save_coproc_regs(void)
21082 {
21083 int saved_size = 0;
21084 unsigned reg;
21085 unsigned start_reg;
21086 rtx insn;
21087
21088 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21089 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21090 {
21091 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21092 insn = gen_rtx_MEM (V2SImode, insn);
21093 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21094 RTX_FRAME_RELATED_P (insn) = 1;
21095 saved_size += 8;
21096 }
21097
21098 if (TARGET_HARD_FLOAT)
21099 {
21100 start_reg = FIRST_VFP_REGNUM;
21101
21102 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21103 {
21104 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21105 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21106 {
21107 if (start_reg != reg)
21108 saved_size += vfp_emit_fstmd (start_reg,
21109 (reg - start_reg) / 2);
21110 start_reg = reg + 2;
21111 }
21112 }
21113 if (start_reg != reg)
21114 saved_size += vfp_emit_fstmd (start_reg,
21115 (reg - start_reg) / 2);
21116 }
21117 return saved_size;
21118 }
21119
21120
21121 /* Set the Thumb frame pointer from the stack pointer. */
21122
21123 static void
21124 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21125 {
21126 HOST_WIDE_INT amount;
21127 rtx insn, dwarf;
21128
21129 amount = offsets->outgoing_args - offsets->locals_base;
21130 if (amount < 1024)
21131 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21132 stack_pointer_rtx, GEN_INT (amount)));
21133 else
21134 {
21135 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21136 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21137 expects the first two operands to be the same. */
21138 if (TARGET_THUMB2)
21139 {
21140 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21141 stack_pointer_rtx,
21142 hard_frame_pointer_rtx));
21143 }
21144 else
21145 {
21146 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21147 hard_frame_pointer_rtx,
21148 stack_pointer_rtx));
21149 }
21150 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21151 plus_constant (Pmode, stack_pointer_rtx, amount));
21152 RTX_FRAME_RELATED_P (dwarf) = 1;
21153 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21154 }
21155
21156 RTX_FRAME_RELATED_P (insn) = 1;
21157 }
21158
21159 struct scratch_reg {
21160 rtx reg;
21161 bool saved;
21162 };
21163
21164 /* Return a short-lived scratch register for use as a 2nd scratch register on
21165 function entry after the registers are saved in the prologue. This register
21166 must be released by means of release_scratch_register_on_entry. IP is not
21167 considered since it is always used as the 1st scratch register if available.
21168
21169 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21170 mask of live registers. */
21171
21172 static void
21173 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21174 unsigned long live_regs)
21175 {
21176 int regno = -1;
21177
21178 sr->saved = false;
21179
21180 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21181 regno = LR_REGNUM;
21182 else
21183 {
21184 unsigned int i;
21185
21186 for (i = 4; i < 11; i++)
21187 if (regno1 != i && (live_regs & (1 << i)) != 0)
21188 {
21189 regno = i;
21190 break;
21191 }
21192
21193 if (regno < 0)
21194 {
21195 /* If IP is used as the 1st scratch register for a nested function,
21196 then either r3 wasn't available or is used to preserve IP. */
21197 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21198 regno1 = 3;
21199 regno = (regno1 == 3 ? 2 : 3);
21200 sr->saved
21201 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21202 regno);
21203 }
21204 }
21205
21206 sr->reg = gen_rtx_REG (SImode, regno);
21207 if (sr->saved)
21208 {
21209 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21210 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21211 rtx x = gen_rtx_SET (stack_pointer_rtx,
21212 plus_constant (Pmode, stack_pointer_rtx, -4));
21213 RTX_FRAME_RELATED_P (insn) = 1;
21214 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21215 }
21216 }
21217
21218 /* Release a scratch register obtained from the preceding function. */
21219
21220 static void
21221 release_scratch_register_on_entry (struct scratch_reg *sr)
21222 {
21223 if (sr->saved)
21224 {
21225 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21226 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21227 rtx x = gen_rtx_SET (stack_pointer_rtx,
21228 plus_constant (Pmode, stack_pointer_rtx, 4));
21229 RTX_FRAME_RELATED_P (insn) = 1;
21230 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21231 }
21232 }
21233
21234 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21235
21236 #if PROBE_INTERVAL > 4096
21237 #error Cannot use indexed addressing mode for stack probing
21238 #endif
21239
21240 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21241 inclusive. These are offsets from the current stack pointer. REGNO1
21242 is the index number of the 1st scratch register and LIVE_REGS is the
21243 mask of live registers. */
21244
21245 static void
21246 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21247 unsigned int regno1, unsigned long live_regs)
21248 {
21249 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21250
21251 /* See if we have a constant small number of probes to generate. If so,
21252 that's the easy case. */
21253 if (size <= PROBE_INTERVAL)
21254 {
21255 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21256 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21257 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21258 }
21259
21260 /* The run-time loop is made up of 10 insns in the generic case while the
21261 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21262 else if (size <= 5 * PROBE_INTERVAL)
21263 {
21264 HOST_WIDE_INT i, rem;
21265
21266 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21267 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21268 emit_stack_probe (reg1);
21269
21270 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21271 it exceeds SIZE. If only two probes are needed, this will not
21272 generate any code. Then probe at FIRST + SIZE. */
21273 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21274 {
21275 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21276 emit_stack_probe (reg1);
21277 }
21278
21279 rem = size - (i - PROBE_INTERVAL);
21280 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21281 {
21282 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21283 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21284 }
21285 else
21286 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21287 }
21288
21289 /* Otherwise, do the same as above, but in a loop. Note that we must be
21290 extra careful with variables wrapping around because we might be at
21291 the very top (or the very bottom) of the address space and we have
21292 to be able to handle this case properly; in particular, we use an
21293 equality test for the loop condition. */
21294 else
21295 {
21296 HOST_WIDE_INT rounded_size;
21297 struct scratch_reg sr;
21298
21299 get_scratch_register_on_entry (&sr, regno1, live_regs);
21300
21301 emit_move_insn (reg1, GEN_INT (first));
21302
21303
21304 /* Step 1: round SIZE to the previous multiple of the interval. */
21305
21306 rounded_size = size & -PROBE_INTERVAL;
21307 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21308
21309
21310 /* Step 2: compute initial and final value of the loop counter. */
21311
21312 /* TEST_ADDR = SP + FIRST. */
21313 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21314
21315 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21316 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21317
21318
21319 /* Step 3: the loop
21320
21321 do
21322 {
21323 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21324 probe at TEST_ADDR
21325 }
21326 while (TEST_ADDR != LAST_ADDR)
21327
21328 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21329 until it is equal to ROUNDED_SIZE. */
21330
21331 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21332
21333
21334 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21335 that SIZE is equal to ROUNDED_SIZE. */
21336
21337 if (size != rounded_size)
21338 {
21339 HOST_WIDE_INT rem = size - rounded_size;
21340
21341 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21342 {
21343 emit_set_insn (sr.reg,
21344 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21345 emit_stack_probe (plus_constant (Pmode, sr.reg,
21346 PROBE_INTERVAL - rem));
21347 }
21348 else
21349 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21350 }
21351
21352 release_scratch_register_on_entry (&sr);
21353 }
21354
21355 /* Make sure nothing is scheduled before we are done. */
21356 emit_insn (gen_blockage ());
21357 }
21358
21359 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21360 absolute addresses. */
21361
21362 const char *
21363 output_probe_stack_range (rtx reg1, rtx reg2)
21364 {
21365 static int labelno = 0;
21366 char loop_lab[32];
21367 rtx xops[2];
21368
21369 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21370
21371 /* Loop. */
21372 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21373
21374 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21375 xops[0] = reg1;
21376 xops[1] = GEN_INT (PROBE_INTERVAL);
21377 output_asm_insn ("sub\t%0, %0, %1", xops);
21378
21379 /* Probe at TEST_ADDR. */
21380 output_asm_insn ("str\tr0, [%0, #0]", xops);
21381
21382 /* Test if TEST_ADDR == LAST_ADDR. */
21383 xops[1] = reg2;
21384 output_asm_insn ("cmp\t%0, %1", xops);
21385
21386 /* Branch. */
21387 fputs ("\tbne\t", asm_out_file);
21388 assemble_name_raw (asm_out_file, loop_lab);
21389 fputc ('\n', asm_out_file);
21390
21391 return "";
21392 }
21393
21394 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21395 function. */
21396 void
21397 arm_expand_prologue (void)
21398 {
21399 rtx amount;
21400 rtx insn;
21401 rtx ip_rtx;
21402 unsigned long live_regs_mask;
21403 unsigned long func_type;
21404 int fp_offset = 0;
21405 int saved_pretend_args = 0;
21406 int saved_regs = 0;
21407 unsigned HOST_WIDE_INT args_to_push;
21408 HOST_WIDE_INT size;
21409 arm_stack_offsets *offsets;
21410 bool clobber_ip;
21411
21412 func_type = arm_current_func_type ();
21413
21414 /* Naked functions don't have prologues. */
21415 if (IS_NAKED (func_type))
21416 {
21417 if (flag_stack_usage_info)
21418 current_function_static_stack_size = 0;
21419 return;
21420 }
21421
21422 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21423 args_to_push = crtl->args.pretend_args_size;
21424
21425 /* Compute which register we will have to save onto the stack. */
21426 offsets = arm_get_frame_offsets ();
21427 live_regs_mask = offsets->saved_regs_mask;
21428
21429 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21430
21431 if (IS_STACKALIGN (func_type))
21432 {
21433 rtx r0, r1;
21434
21435 /* Handle a word-aligned stack pointer. We generate the following:
21436
21437 mov r0, sp
21438 bic r1, r0, #7
21439 mov sp, r1
21440 <save and restore r0 in normal prologue/epilogue>
21441 mov sp, r0
21442 bx lr
21443
21444 The unwinder doesn't need to know about the stack realignment.
21445 Just tell it we saved SP in r0. */
21446 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21447
21448 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21449 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21450
21451 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21452 RTX_FRAME_RELATED_P (insn) = 1;
21453 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21454
21455 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21456
21457 /* ??? The CFA changes here, which may cause GDB to conclude that it
21458 has entered a different function. That said, the unwind info is
21459 correct, individually, before and after this instruction because
21460 we've described the save of SP, which will override the default
21461 handling of SP as restoring from the CFA. */
21462 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21463 }
21464
21465 /* The static chain register is the same as the IP register. If it is
21466 clobbered when creating the frame, we need to save and restore it. */
21467 clobber_ip = IS_NESTED (func_type)
21468 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21469 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21470 || flag_stack_clash_protection)
21471 && !df_regs_ever_live_p (LR_REGNUM)
21472 && arm_r3_live_at_start_p ()));
21473
21474 /* Find somewhere to store IP whilst the frame is being created.
21475 We try the following places in order:
21476
21477 1. The last argument register r3 if it is available.
21478 2. A slot on the stack above the frame if there are no
21479 arguments to push onto the stack.
21480 3. Register r3 again, after pushing the argument registers
21481 onto the stack, if this is a varargs function.
21482 4. The last slot on the stack created for the arguments to
21483 push, if this isn't a varargs function.
21484
21485 Note - we only need to tell the dwarf2 backend about the SP
21486 adjustment in the second variant; the static chain register
21487 doesn't need to be unwound, as it doesn't contain a value
21488 inherited from the caller. */
21489 if (clobber_ip)
21490 {
21491 if (!arm_r3_live_at_start_p ())
21492 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21493 else if (args_to_push == 0)
21494 {
21495 rtx addr, dwarf;
21496
21497 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21498 saved_regs += 4;
21499
21500 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21501 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21502 fp_offset = 4;
21503
21504 /* Just tell the dwarf backend that we adjusted SP. */
21505 dwarf = gen_rtx_SET (stack_pointer_rtx,
21506 plus_constant (Pmode, stack_pointer_rtx,
21507 -fp_offset));
21508 RTX_FRAME_RELATED_P (insn) = 1;
21509 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21510 }
21511 else
21512 {
21513 /* Store the args on the stack. */
21514 if (cfun->machine->uses_anonymous_args)
21515 {
21516 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21517 (0xf0 >> (args_to_push / 4)) & 0xf);
21518 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21519 saved_pretend_args = 1;
21520 }
21521 else
21522 {
21523 rtx addr, dwarf;
21524
21525 if (args_to_push == 4)
21526 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21527 else
21528 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21529 plus_constant (Pmode,
21530 stack_pointer_rtx,
21531 -args_to_push));
21532
21533 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21534
21535 /* Just tell the dwarf backend that we adjusted SP. */
21536 dwarf = gen_rtx_SET (stack_pointer_rtx,
21537 plus_constant (Pmode, stack_pointer_rtx,
21538 -args_to_push));
21539 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21540 }
21541
21542 RTX_FRAME_RELATED_P (insn) = 1;
21543 fp_offset = args_to_push;
21544 args_to_push = 0;
21545 }
21546 }
21547
21548 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21549 {
21550 if (IS_INTERRUPT (func_type))
21551 {
21552 /* Interrupt functions must not corrupt any registers.
21553 Creating a frame pointer however, corrupts the IP
21554 register, so we must push it first. */
21555 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21556
21557 /* Do not set RTX_FRAME_RELATED_P on this insn.
21558 The dwarf stack unwinding code only wants to see one
21559 stack decrement per function, and this is not it. If
21560 this instruction is labeled as being part of the frame
21561 creation sequence then dwarf2out_frame_debug_expr will
21562 die when it encounters the assignment of IP to FP
21563 later on, since the use of SP here establishes SP as
21564 the CFA register and not IP.
21565
21566 Anyway this instruction is not really part of the stack
21567 frame creation although it is part of the prologue. */
21568 }
21569
21570 insn = emit_set_insn (ip_rtx,
21571 plus_constant (Pmode, stack_pointer_rtx,
21572 fp_offset));
21573 RTX_FRAME_RELATED_P (insn) = 1;
21574 }
21575
21576 if (args_to_push)
21577 {
21578 /* Push the argument registers, or reserve space for them. */
21579 if (cfun->machine->uses_anonymous_args)
21580 insn = emit_multi_reg_push
21581 ((0xf0 >> (args_to_push / 4)) & 0xf,
21582 (0xf0 >> (args_to_push / 4)) & 0xf);
21583 else
21584 insn = emit_insn
21585 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21586 GEN_INT (- args_to_push)));
21587 RTX_FRAME_RELATED_P (insn) = 1;
21588 }
21589
21590 /* If this is an interrupt service routine, and the link register
21591 is going to be pushed, and we're not generating extra
21592 push of IP (needed when frame is needed and frame layout if apcs),
21593 subtracting four from LR now will mean that the function return
21594 can be done with a single instruction. */
21595 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21596 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21597 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21598 && TARGET_ARM)
21599 {
21600 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21601
21602 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21603 }
21604
21605 if (live_regs_mask)
21606 {
21607 unsigned long dwarf_regs_mask = live_regs_mask;
21608
21609 saved_regs += bit_count (live_regs_mask) * 4;
21610 if (optimize_size && !frame_pointer_needed
21611 && saved_regs == offsets->saved_regs - offsets->saved_args)
21612 {
21613 /* If no coprocessor registers are being pushed and we don't have
21614 to worry about a frame pointer then push extra registers to
21615 create the stack frame. This is done in a way that does not
21616 alter the frame layout, so is independent of the epilogue. */
21617 int n;
21618 int frame;
21619 n = 0;
21620 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21621 n++;
21622 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21623 if (frame && n * 4 >= frame)
21624 {
21625 n = frame / 4;
21626 live_regs_mask |= (1 << n) - 1;
21627 saved_regs += frame;
21628 }
21629 }
21630
21631 if (TARGET_LDRD
21632 && current_tune->prefer_ldrd_strd
21633 && !optimize_function_for_size_p (cfun))
21634 {
21635 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21636 if (TARGET_THUMB2)
21637 thumb2_emit_strd_push (live_regs_mask);
21638 else if (TARGET_ARM
21639 && !TARGET_APCS_FRAME
21640 && !IS_INTERRUPT (func_type))
21641 arm_emit_strd_push (live_regs_mask);
21642 else
21643 {
21644 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21645 RTX_FRAME_RELATED_P (insn) = 1;
21646 }
21647 }
21648 else
21649 {
21650 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21651 RTX_FRAME_RELATED_P (insn) = 1;
21652 }
21653 }
21654
21655 if (! IS_VOLATILE (func_type))
21656 saved_regs += arm_save_coproc_regs ();
21657
21658 if (frame_pointer_needed && TARGET_ARM)
21659 {
21660 /* Create the new frame pointer. */
21661 if (TARGET_APCS_FRAME)
21662 {
21663 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21664 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21665 RTX_FRAME_RELATED_P (insn) = 1;
21666 }
21667 else
21668 {
21669 insn = GEN_INT (saved_regs - (4 + fp_offset));
21670 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21671 stack_pointer_rtx, insn));
21672 RTX_FRAME_RELATED_P (insn) = 1;
21673 }
21674 }
21675
21676 size = offsets->outgoing_args - offsets->saved_args;
21677 if (flag_stack_usage_info)
21678 current_function_static_stack_size = size;
21679
21680 /* If this isn't an interrupt service routine and we have a frame, then do
21681 stack checking. We use IP as the first scratch register, except for the
21682 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21683 if (!IS_INTERRUPT (func_type)
21684 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21685 || flag_stack_clash_protection))
21686 {
21687 unsigned int regno;
21688
21689 if (!IS_NESTED (func_type) || clobber_ip)
21690 regno = IP_REGNUM;
21691 else if (df_regs_ever_live_p (LR_REGNUM))
21692 regno = LR_REGNUM;
21693 else
21694 regno = 3;
21695
21696 if (crtl->is_leaf && !cfun->calls_alloca)
21697 {
21698 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21699 arm_emit_probe_stack_range (get_stack_check_protect (),
21700 size - get_stack_check_protect (),
21701 regno, live_regs_mask);
21702 }
21703 else if (size > 0)
21704 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21705 regno, live_regs_mask);
21706 }
21707
21708 /* Recover the static chain register. */
21709 if (clobber_ip)
21710 {
21711 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21712 insn = gen_rtx_REG (SImode, 3);
21713 else
21714 {
21715 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21716 insn = gen_frame_mem (SImode, insn);
21717 }
21718 emit_set_insn (ip_rtx, insn);
21719 emit_insn (gen_force_register_use (ip_rtx));
21720 }
21721
21722 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21723 {
21724 /* This add can produce multiple insns for a large constant, so we
21725 need to get tricky. */
21726 rtx_insn *last = get_last_insn ();
21727
21728 amount = GEN_INT (offsets->saved_args + saved_regs
21729 - offsets->outgoing_args);
21730
21731 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21732 amount));
21733 do
21734 {
21735 last = last ? NEXT_INSN (last) : get_insns ();
21736 RTX_FRAME_RELATED_P (last) = 1;
21737 }
21738 while (last != insn);
21739
21740 /* If the frame pointer is needed, emit a special barrier that
21741 will prevent the scheduler from moving stores to the frame
21742 before the stack adjustment. */
21743 if (frame_pointer_needed)
21744 emit_insn (gen_stack_tie (stack_pointer_rtx,
21745 hard_frame_pointer_rtx));
21746 }
21747
21748
21749 if (frame_pointer_needed && TARGET_THUMB2)
21750 thumb_set_frame_pointer (offsets);
21751
21752 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21753 {
21754 unsigned long mask;
21755
21756 mask = live_regs_mask;
21757 mask &= THUMB2_WORK_REGS;
21758 if (!IS_NESTED (func_type))
21759 mask |= (1 << IP_REGNUM);
21760 arm_load_pic_register (mask);
21761 }
21762
21763 /* If we are profiling, make sure no instructions are scheduled before
21764 the call to mcount. Similarly if the user has requested no
21765 scheduling in the prolog. Similarly if we want non-call exceptions
21766 using the EABI unwinder, to prevent faulting instructions from being
21767 swapped with a stack adjustment. */
21768 if (crtl->profile || !TARGET_SCHED_PROLOG
21769 || (arm_except_unwind_info (&global_options) == UI_TARGET
21770 && cfun->can_throw_non_call_exceptions))
21771 emit_insn (gen_blockage ());
21772
21773 /* If the link register is being kept alive, with the return address in it,
21774 then make sure that it does not get reused by the ce2 pass. */
21775 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21776 cfun->machine->lr_save_eliminated = 1;
21777 }
21778 \f
21779 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21780 static void
21781 arm_print_condition (FILE *stream)
21782 {
21783 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21784 {
21785 /* Branch conversion is not implemented for Thumb-2. */
21786 if (TARGET_THUMB)
21787 {
21788 output_operand_lossage ("predicated Thumb instruction");
21789 return;
21790 }
21791 if (current_insn_predicate != NULL)
21792 {
21793 output_operand_lossage
21794 ("predicated instruction in conditional sequence");
21795 return;
21796 }
21797
21798 fputs (arm_condition_codes[arm_current_cc], stream);
21799 }
21800 else if (current_insn_predicate)
21801 {
21802 enum arm_cond_code code;
21803
21804 if (TARGET_THUMB1)
21805 {
21806 output_operand_lossage ("predicated Thumb instruction");
21807 return;
21808 }
21809
21810 code = get_arm_condition_code (current_insn_predicate);
21811 fputs (arm_condition_codes[code], stream);
21812 }
21813 }
21814
21815
21816 /* Globally reserved letters: acln
21817 Puncutation letters currently used: @_|?().!#
21818 Lower case letters currently used: bcdefhimpqtvwxyz
21819 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21820 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21821
21822 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21823
21824 If CODE is 'd', then the X is a condition operand and the instruction
21825 should only be executed if the condition is true.
21826 if CODE is 'D', then the X is a condition operand and the instruction
21827 should only be executed if the condition is false: however, if the mode
21828 of the comparison is CCFPEmode, then always execute the instruction -- we
21829 do this because in these circumstances !GE does not necessarily imply LT;
21830 in these cases the instruction pattern will take care to make sure that
21831 an instruction containing %d will follow, thereby undoing the effects of
21832 doing this instruction unconditionally.
21833 If CODE is 'N' then X is a floating point operand that must be negated
21834 before output.
21835 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21836 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21837 static void
21838 arm_print_operand (FILE *stream, rtx x, int code)
21839 {
21840 switch (code)
21841 {
21842 case '@':
21843 fputs (ASM_COMMENT_START, stream);
21844 return;
21845
21846 case '_':
21847 fputs (user_label_prefix, stream);
21848 return;
21849
21850 case '|':
21851 fputs (REGISTER_PREFIX, stream);
21852 return;
21853
21854 case '?':
21855 arm_print_condition (stream);
21856 return;
21857
21858 case '.':
21859 /* The current condition code for a condition code setting instruction.
21860 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21861 fputc('s', stream);
21862 arm_print_condition (stream);
21863 return;
21864
21865 case '!':
21866 /* If the instruction is conditionally executed then print
21867 the current condition code, otherwise print 's'. */
21868 gcc_assert (TARGET_THUMB2);
21869 if (current_insn_predicate)
21870 arm_print_condition (stream);
21871 else
21872 fputc('s', stream);
21873 break;
21874
21875 /* %# is a "break" sequence. It doesn't output anything, but is used to
21876 separate e.g. operand numbers from following text, if that text consists
21877 of further digits which we don't want to be part of the operand
21878 number. */
21879 case '#':
21880 return;
21881
21882 case 'N':
21883 {
21884 REAL_VALUE_TYPE r;
21885 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21886 fprintf (stream, "%s", fp_const_from_val (&r));
21887 }
21888 return;
21889
21890 /* An integer or symbol address without a preceding # sign. */
21891 case 'c':
21892 switch (GET_CODE (x))
21893 {
21894 case CONST_INT:
21895 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21896 break;
21897
21898 case SYMBOL_REF:
21899 output_addr_const (stream, x);
21900 break;
21901
21902 case CONST:
21903 if (GET_CODE (XEXP (x, 0)) == PLUS
21904 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21905 {
21906 output_addr_const (stream, x);
21907 break;
21908 }
21909 /* Fall through. */
21910
21911 default:
21912 output_operand_lossage ("Unsupported operand for code '%c'", code);
21913 }
21914 return;
21915
21916 /* An integer that we want to print in HEX. */
21917 case 'x':
21918 switch (GET_CODE (x))
21919 {
21920 case CONST_INT:
21921 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21922 break;
21923
21924 default:
21925 output_operand_lossage ("Unsupported operand for code '%c'", code);
21926 }
21927 return;
21928
21929 case 'B':
21930 if (CONST_INT_P (x))
21931 {
21932 HOST_WIDE_INT val;
21933 val = ARM_SIGN_EXTEND (~INTVAL (x));
21934 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21935 }
21936 else
21937 {
21938 putc ('~', stream);
21939 output_addr_const (stream, x);
21940 }
21941 return;
21942
21943 case 'b':
21944 /* Print the log2 of a CONST_INT. */
21945 {
21946 HOST_WIDE_INT val;
21947
21948 if (!CONST_INT_P (x)
21949 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21950 output_operand_lossage ("Unsupported operand for code '%c'", code);
21951 else
21952 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21953 }
21954 return;
21955
21956 case 'L':
21957 /* The low 16 bits of an immediate constant. */
21958 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21959 return;
21960
21961 case 'i':
21962 fprintf (stream, "%s", arithmetic_instr (x, 1));
21963 return;
21964
21965 case 'I':
21966 fprintf (stream, "%s", arithmetic_instr (x, 0));
21967 return;
21968
21969 case 'S':
21970 {
21971 HOST_WIDE_INT val;
21972 const char *shift;
21973
21974 shift = shift_op (x, &val);
21975
21976 if (shift)
21977 {
21978 fprintf (stream, ", %s ", shift);
21979 if (val == -1)
21980 arm_print_operand (stream, XEXP (x, 1), 0);
21981 else
21982 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21983 }
21984 }
21985 return;
21986
21987 /* An explanation of the 'Q', 'R' and 'H' register operands:
21988
21989 In a pair of registers containing a DI or DF value the 'Q'
21990 operand returns the register number of the register containing
21991 the least significant part of the value. The 'R' operand returns
21992 the register number of the register containing the most
21993 significant part of the value.
21994
21995 The 'H' operand returns the higher of the two register numbers.
21996 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21997 same as the 'Q' operand, since the most significant part of the
21998 value is held in the lower number register. The reverse is true
21999 on systems where WORDS_BIG_ENDIAN is false.
22000
22001 The purpose of these operands is to distinguish between cases
22002 where the endian-ness of the values is important (for example
22003 when they are added together), and cases where the endian-ness
22004 is irrelevant, but the order of register operations is important.
22005 For example when loading a value from memory into a register
22006 pair, the endian-ness does not matter. Provided that the value
22007 from the lower memory address is put into the lower numbered
22008 register, and the value from the higher address is put into the
22009 higher numbered register, the load will work regardless of whether
22010 the value being loaded is big-wordian or little-wordian. The
22011 order of the two register loads can matter however, if the address
22012 of the memory location is actually held in one of the registers
22013 being overwritten by the load.
22014
22015 The 'Q' and 'R' constraints are also available for 64-bit
22016 constants. */
22017 case 'Q':
22018 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22019 {
22020 rtx part = gen_lowpart (SImode, x);
22021 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22022 return;
22023 }
22024
22025 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22026 {
22027 output_operand_lossage ("invalid operand for code '%c'", code);
22028 return;
22029 }
22030
22031 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22032 return;
22033
22034 case 'R':
22035 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22036 {
22037 machine_mode mode = GET_MODE (x);
22038 rtx part;
22039
22040 if (mode == VOIDmode)
22041 mode = DImode;
22042 part = gen_highpart_mode (SImode, mode, x);
22043 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22044 return;
22045 }
22046
22047 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22048 {
22049 output_operand_lossage ("invalid operand for code '%c'", code);
22050 return;
22051 }
22052
22053 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22054 return;
22055
22056 case 'H':
22057 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22058 {
22059 output_operand_lossage ("invalid operand for code '%c'", code);
22060 return;
22061 }
22062
22063 asm_fprintf (stream, "%r", REGNO (x) + 1);
22064 return;
22065
22066 case 'J':
22067 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22068 {
22069 output_operand_lossage ("invalid operand for code '%c'", code);
22070 return;
22071 }
22072
22073 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22074 return;
22075
22076 case 'K':
22077 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22078 {
22079 output_operand_lossage ("invalid operand for code '%c'", code);
22080 return;
22081 }
22082
22083 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22084 return;
22085
22086 case 'm':
22087 asm_fprintf (stream, "%r",
22088 REG_P (XEXP (x, 0))
22089 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22090 return;
22091
22092 case 'M':
22093 asm_fprintf (stream, "{%r-%r}",
22094 REGNO (x),
22095 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22096 return;
22097
22098 /* Like 'M', but writing doubleword vector registers, for use by Neon
22099 insns. */
22100 case 'h':
22101 {
22102 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22103 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22104 if (numregs == 1)
22105 asm_fprintf (stream, "{d%d}", regno);
22106 else
22107 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22108 }
22109 return;
22110
22111 case 'd':
22112 /* CONST_TRUE_RTX means always -- that's the default. */
22113 if (x == const_true_rtx)
22114 return;
22115
22116 if (!COMPARISON_P (x))
22117 {
22118 output_operand_lossage ("invalid operand for code '%c'", code);
22119 return;
22120 }
22121
22122 fputs (arm_condition_codes[get_arm_condition_code (x)],
22123 stream);
22124 return;
22125
22126 case 'D':
22127 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22128 want to do that. */
22129 if (x == const_true_rtx)
22130 {
22131 output_operand_lossage ("instruction never executed");
22132 return;
22133 }
22134 if (!COMPARISON_P (x))
22135 {
22136 output_operand_lossage ("invalid operand for code '%c'", code);
22137 return;
22138 }
22139
22140 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22141 (get_arm_condition_code (x))],
22142 stream);
22143 return;
22144
22145 case 's':
22146 case 'V':
22147 case 'W':
22148 case 'X':
22149 case 'Y':
22150 case 'Z':
22151 /* Former Maverick support, removed after GCC-4.7. */
22152 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22153 return;
22154
22155 case 'U':
22156 if (!REG_P (x)
22157 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22158 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22159 /* Bad value for wCG register number. */
22160 {
22161 output_operand_lossage ("invalid operand for code '%c'", code);
22162 return;
22163 }
22164
22165 else
22166 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22167 return;
22168
22169 /* Print an iWMMXt control register name. */
22170 case 'w':
22171 if (!CONST_INT_P (x)
22172 || INTVAL (x) < 0
22173 || INTVAL (x) >= 16)
22174 /* Bad value for wC register number. */
22175 {
22176 output_operand_lossage ("invalid operand for code '%c'", code);
22177 return;
22178 }
22179
22180 else
22181 {
22182 static const char * wc_reg_names [16] =
22183 {
22184 "wCID", "wCon", "wCSSF", "wCASF",
22185 "wC4", "wC5", "wC6", "wC7",
22186 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22187 "wC12", "wC13", "wC14", "wC15"
22188 };
22189
22190 fputs (wc_reg_names [INTVAL (x)], stream);
22191 }
22192 return;
22193
22194 /* Print the high single-precision register of a VFP double-precision
22195 register. */
22196 case 'p':
22197 {
22198 machine_mode mode = GET_MODE (x);
22199 int regno;
22200
22201 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22202 {
22203 output_operand_lossage ("invalid operand for code '%c'", code);
22204 return;
22205 }
22206
22207 regno = REGNO (x);
22208 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22209 {
22210 output_operand_lossage ("invalid operand for code '%c'", code);
22211 return;
22212 }
22213
22214 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22215 }
22216 return;
22217
22218 /* Print a VFP/Neon double precision or quad precision register name. */
22219 case 'P':
22220 case 'q':
22221 {
22222 machine_mode mode = GET_MODE (x);
22223 int is_quad = (code == 'q');
22224 int regno;
22225
22226 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22227 {
22228 output_operand_lossage ("invalid operand for code '%c'", code);
22229 return;
22230 }
22231
22232 if (!REG_P (x)
22233 || !IS_VFP_REGNUM (REGNO (x)))
22234 {
22235 output_operand_lossage ("invalid operand for code '%c'", code);
22236 return;
22237 }
22238
22239 regno = REGNO (x);
22240 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22241 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22242 {
22243 output_operand_lossage ("invalid operand for code '%c'", code);
22244 return;
22245 }
22246
22247 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22248 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22249 }
22250 return;
22251
22252 /* These two codes print the low/high doubleword register of a Neon quad
22253 register, respectively. For pair-structure types, can also print
22254 low/high quadword registers. */
22255 case 'e':
22256 case 'f':
22257 {
22258 machine_mode mode = GET_MODE (x);
22259 int regno;
22260
22261 if ((GET_MODE_SIZE (mode) != 16
22262 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22263 {
22264 output_operand_lossage ("invalid operand for code '%c'", code);
22265 return;
22266 }
22267
22268 regno = REGNO (x);
22269 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22270 {
22271 output_operand_lossage ("invalid operand for code '%c'", code);
22272 return;
22273 }
22274
22275 if (GET_MODE_SIZE (mode) == 16)
22276 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22277 + (code == 'f' ? 1 : 0));
22278 else
22279 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22280 + (code == 'f' ? 1 : 0));
22281 }
22282 return;
22283
22284 /* Print a VFPv3 floating-point constant, represented as an integer
22285 index. */
22286 case 'G':
22287 {
22288 int index = vfp3_const_double_index (x);
22289 gcc_assert (index != -1);
22290 fprintf (stream, "%d", index);
22291 }
22292 return;
22293
22294 /* Print bits representing opcode features for Neon.
22295
22296 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22297 and polynomials as unsigned.
22298
22299 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22300
22301 Bit 2 is 1 for rounding functions, 0 otherwise. */
22302
22303 /* Identify the type as 's', 'u', 'p' or 'f'. */
22304 case 'T':
22305 {
22306 HOST_WIDE_INT bits = INTVAL (x);
22307 fputc ("uspf"[bits & 3], stream);
22308 }
22309 return;
22310
22311 /* Likewise, but signed and unsigned integers are both 'i'. */
22312 case 'F':
22313 {
22314 HOST_WIDE_INT bits = INTVAL (x);
22315 fputc ("iipf"[bits & 3], stream);
22316 }
22317 return;
22318
22319 /* As for 'T', but emit 'u' instead of 'p'. */
22320 case 't':
22321 {
22322 HOST_WIDE_INT bits = INTVAL (x);
22323 fputc ("usuf"[bits & 3], stream);
22324 }
22325 return;
22326
22327 /* Bit 2: rounding (vs none). */
22328 case 'O':
22329 {
22330 HOST_WIDE_INT bits = INTVAL (x);
22331 fputs ((bits & 4) != 0 ? "r" : "", stream);
22332 }
22333 return;
22334
22335 /* Memory operand for vld1/vst1 instruction. */
22336 case 'A':
22337 {
22338 rtx addr;
22339 bool postinc = FALSE;
22340 rtx postinc_reg = NULL;
22341 unsigned align, memsize, align_bits;
22342
22343 gcc_assert (MEM_P (x));
22344 addr = XEXP (x, 0);
22345 if (GET_CODE (addr) == POST_INC)
22346 {
22347 postinc = 1;
22348 addr = XEXP (addr, 0);
22349 }
22350 if (GET_CODE (addr) == POST_MODIFY)
22351 {
22352 postinc_reg = XEXP( XEXP (addr, 1), 1);
22353 addr = XEXP (addr, 0);
22354 }
22355 asm_fprintf (stream, "[%r", REGNO (addr));
22356
22357 /* We know the alignment of this access, so we can emit a hint in the
22358 instruction (for some alignments) as an aid to the memory subsystem
22359 of the target. */
22360 align = MEM_ALIGN (x) >> 3;
22361 memsize = MEM_SIZE (x);
22362
22363 /* Only certain alignment specifiers are supported by the hardware. */
22364 if (memsize == 32 && (align % 32) == 0)
22365 align_bits = 256;
22366 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22367 align_bits = 128;
22368 else if (memsize >= 8 && (align % 8) == 0)
22369 align_bits = 64;
22370 else
22371 align_bits = 0;
22372
22373 if (align_bits != 0)
22374 asm_fprintf (stream, ":%d", align_bits);
22375
22376 asm_fprintf (stream, "]");
22377
22378 if (postinc)
22379 fputs("!", stream);
22380 if (postinc_reg)
22381 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22382 }
22383 return;
22384
22385 case 'C':
22386 {
22387 rtx addr;
22388
22389 gcc_assert (MEM_P (x));
22390 addr = XEXP (x, 0);
22391 gcc_assert (REG_P (addr));
22392 asm_fprintf (stream, "[%r]", REGNO (addr));
22393 }
22394 return;
22395
22396 /* Translate an S register number into a D register number and element index. */
22397 case 'y':
22398 {
22399 machine_mode mode = GET_MODE (x);
22400 int regno;
22401
22402 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22403 {
22404 output_operand_lossage ("invalid operand for code '%c'", code);
22405 return;
22406 }
22407
22408 regno = REGNO (x);
22409 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22410 {
22411 output_operand_lossage ("invalid operand for code '%c'", code);
22412 return;
22413 }
22414
22415 regno = regno - FIRST_VFP_REGNUM;
22416 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22417 }
22418 return;
22419
22420 case 'v':
22421 gcc_assert (CONST_DOUBLE_P (x));
22422 int result;
22423 result = vfp3_const_double_for_fract_bits (x);
22424 if (result == 0)
22425 result = vfp3_const_double_for_bits (x);
22426 fprintf (stream, "#%d", result);
22427 return;
22428
22429 /* Register specifier for vld1.16/vst1.16. Translate the S register
22430 number into a D register number and element index. */
22431 case 'z':
22432 {
22433 machine_mode mode = GET_MODE (x);
22434 int regno;
22435
22436 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22437 {
22438 output_operand_lossage ("invalid operand for code '%c'", code);
22439 return;
22440 }
22441
22442 regno = REGNO (x);
22443 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22444 {
22445 output_operand_lossage ("invalid operand for code '%c'", code);
22446 return;
22447 }
22448
22449 regno = regno - FIRST_VFP_REGNUM;
22450 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22451 }
22452 return;
22453
22454 default:
22455 if (x == 0)
22456 {
22457 output_operand_lossage ("missing operand");
22458 return;
22459 }
22460
22461 switch (GET_CODE (x))
22462 {
22463 case REG:
22464 asm_fprintf (stream, "%r", REGNO (x));
22465 break;
22466
22467 case MEM:
22468 output_address (GET_MODE (x), XEXP (x, 0));
22469 break;
22470
22471 case CONST_DOUBLE:
22472 {
22473 char fpstr[20];
22474 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22475 sizeof (fpstr), 0, 1);
22476 fprintf (stream, "#%s", fpstr);
22477 }
22478 break;
22479
22480 default:
22481 gcc_assert (GET_CODE (x) != NEG);
22482 fputc ('#', stream);
22483 if (GET_CODE (x) == HIGH)
22484 {
22485 fputs (":lower16:", stream);
22486 x = XEXP (x, 0);
22487 }
22488
22489 output_addr_const (stream, x);
22490 break;
22491 }
22492 }
22493 }
22494 \f
22495 /* Target hook for printing a memory address. */
22496 static void
22497 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22498 {
22499 if (TARGET_32BIT)
22500 {
22501 int is_minus = GET_CODE (x) == MINUS;
22502
22503 if (REG_P (x))
22504 asm_fprintf (stream, "[%r]", REGNO (x));
22505 else if (GET_CODE (x) == PLUS || is_minus)
22506 {
22507 rtx base = XEXP (x, 0);
22508 rtx index = XEXP (x, 1);
22509 HOST_WIDE_INT offset = 0;
22510 if (!REG_P (base)
22511 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22512 {
22513 /* Ensure that BASE is a register. */
22514 /* (one of them must be). */
22515 /* Also ensure the SP is not used as in index register. */
22516 std::swap (base, index);
22517 }
22518 switch (GET_CODE (index))
22519 {
22520 case CONST_INT:
22521 offset = INTVAL (index);
22522 if (is_minus)
22523 offset = -offset;
22524 asm_fprintf (stream, "[%r, #%wd]",
22525 REGNO (base), offset);
22526 break;
22527
22528 case REG:
22529 asm_fprintf (stream, "[%r, %s%r]",
22530 REGNO (base), is_minus ? "-" : "",
22531 REGNO (index));
22532 break;
22533
22534 case MULT:
22535 case ASHIFTRT:
22536 case LSHIFTRT:
22537 case ASHIFT:
22538 case ROTATERT:
22539 {
22540 asm_fprintf (stream, "[%r, %s%r",
22541 REGNO (base), is_minus ? "-" : "",
22542 REGNO (XEXP (index, 0)));
22543 arm_print_operand (stream, index, 'S');
22544 fputs ("]", stream);
22545 break;
22546 }
22547
22548 default:
22549 gcc_unreachable ();
22550 }
22551 }
22552 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22553 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22554 {
22555 gcc_assert (REG_P (XEXP (x, 0)));
22556
22557 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22558 asm_fprintf (stream, "[%r, #%s%d]!",
22559 REGNO (XEXP (x, 0)),
22560 GET_CODE (x) == PRE_DEC ? "-" : "",
22561 GET_MODE_SIZE (mode));
22562 else
22563 asm_fprintf (stream, "[%r], #%s%d",
22564 REGNO (XEXP (x, 0)),
22565 GET_CODE (x) == POST_DEC ? "-" : "",
22566 GET_MODE_SIZE (mode));
22567 }
22568 else if (GET_CODE (x) == PRE_MODIFY)
22569 {
22570 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22571 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22572 asm_fprintf (stream, "#%wd]!",
22573 INTVAL (XEXP (XEXP (x, 1), 1)));
22574 else
22575 asm_fprintf (stream, "%r]!",
22576 REGNO (XEXP (XEXP (x, 1), 1)));
22577 }
22578 else if (GET_CODE (x) == POST_MODIFY)
22579 {
22580 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22581 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22582 asm_fprintf (stream, "#%wd",
22583 INTVAL (XEXP (XEXP (x, 1), 1)));
22584 else
22585 asm_fprintf (stream, "%r",
22586 REGNO (XEXP (XEXP (x, 1), 1)));
22587 }
22588 else output_addr_const (stream, x);
22589 }
22590 else
22591 {
22592 if (REG_P (x))
22593 asm_fprintf (stream, "[%r]", REGNO (x));
22594 else if (GET_CODE (x) == POST_INC)
22595 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22596 else if (GET_CODE (x) == PLUS)
22597 {
22598 gcc_assert (REG_P (XEXP (x, 0)));
22599 if (CONST_INT_P (XEXP (x, 1)))
22600 asm_fprintf (stream, "[%r, #%wd]",
22601 REGNO (XEXP (x, 0)),
22602 INTVAL (XEXP (x, 1)));
22603 else
22604 asm_fprintf (stream, "[%r, %r]",
22605 REGNO (XEXP (x, 0)),
22606 REGNO (XEXP (x, 1)));
22607 }
22608 else
22609 output_addr_const (stream, x);
22610 }
22611 }
22612 \f
22613 /* Target hook for indicating whether a punctuation character for
22614 TARGET_PRINT_OPERAND is valid. */
22615 static bool
22616 arm_print_operand_punct_valid_p (unsigned char code)
22617 {
22618 return (code == '@' || code == '|' || code == '.'
22619 || code == '(' || code == ')' || code == '#'
22620 || (TARGET_32BIT && (code == '?'))
22621 || (TARGET_THUMB2 && (code == '!'))
22622 || (TARGET_THUMB && (code == '_')));
22623 }
22624 \f
22625 /* Target hook for assembling integer objects. The ARM version needs to
22626 handle word-sized values specially. */
22627 static bool
22628 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22629 {
22630 machine_mode mode;
22631
22632 if (size == UNITS_PER_WORD && aligned_p)
22633 {
22634 fputs ("\t.word\t", asm_out_file);
22635 output_addr_const (asm_out_file, x);
22636
22637 /* Mark symbols as position independent. We only do this in the
22638 .text segment, not in the .data segment. */
22639 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22640 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22641 {
22642 /* See legitimize_pic_address for an explanation of the
22643 TARGET_VXWORKS_RTP check. */
22644 /* References to weak symbols cannot be resolved locally:
22645 they may be overridden by a non-weak definition at link
22646 time. */
22647 if (!arm_pic_data_is_text_relative
22648 || (GET_CODE (x) == SYMBOL_REF
22649 && (!SYMBOL_REF_LOCAL_P (x)
22650 || (SYMBOL_REF_DECL (x)
22651 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22652 fputs ("(GOT)", asm_out_file);
22653 else
22654 fputs ("(GOTOFF)", asm_out_file);
22655 }
22656 fputc ('\n', asm_out_file);
22657 return true;
22658 }
22659
22660 mode = GET_MODE (x);
22661
22662 if (arm_vector_mode_supported_p (mode))
22663 {
22664 int i, units;
22665
22666 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22667
22668 units = CONST_VECTOR_NUNITS (x);
22669 size = GET_MODE_UNIT_SIZE (mode);
22670
22671 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22672 for (i = 0; i < units; i++)
22673 {
22674 rtx elt = CONST_VECTOR_ELT (x, i);
22675 assemble_integer
22676 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22677 }
22678 else
22679 for (i = 0; i < units; i++)
22680 {
22681 rtx elt = CONST_VECTOR_ELT (x, i);
22682 assemble_real
22683 (*CONST_DOUBLE_REAL_VALUE (elt),
22684 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22685 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22686 }
22687
22688 return true;
22689 }
22690
22691 return default_assemble_integer (x, size, aligned_p);
22692 }
22693
22694 static void
22695 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22696 {
22697 section *s;
22698
22699 if (!TARGET_AAPCS_BASED)
22700 {
22701 (is_ctor ?
22702 default_named_section_asm_out_constructor
22703 : default_named_section_asm_out_destructor) (symbol, priority);
22704 return;
22705 }
22706
22707 /* Put these in the .init_array section, using a special relocation. */
22708 if (priority != DEFAULT_INIT_PRIORITY)
22709 {
22710 char buf[18];
22711 sprintf (buf, "%s.%.5u",
22712 is_ctor ? ".init_array" : ".fini_array",
22713 priority);
22714 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22715 }
22716 else if (is_ctor)
22717 s = ctors_section;
22718 else
22719 s = dtors_section;
22720
22721 switch_to_section (s);
22722 assemble_align (POINTER_SIZE);
22723 fputs ("\t.word\t", asm_out_file);
22724 output_addr_const (asm_out_file, symbol);
22725 fputs ("(target1)\n", asm_out_file);
22726 }
22727
22728 /* Add a function to the list of static constructors. */
22729
22730 static void
22731 arm_elf_asm_constructor (rtx symbol, int priority)
22732 {
22733 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22734 }
22735
22736 /* Add a function to the list of static destructors. */
22737
22738 static void
22739 arm_elf_asm_destructor (rtx symbol, int priority)
22740 {
22741 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22742 }
22743 \f
22744 /* A finite state machine takes care of noticing whether or not instructions
22745 can be conditionally executed, and thus decrease execution time and code
22746 size by deleting branch instructions. The fsm is controlled by
22747 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22748
22749 /* The state of the fsm controlling condition codes are:
22750 0: normal, do nothing special
22751 1: make ASM_OUTPUT_OPCODE not output this instruction
22752 2: make ASM_OUTPUT_OPCODE not output this instruction
22753 3: make instructions conditional
22754 4: make instructions conditional
22755
22756 State transitions (state->state by whom under condition):
22757 0 -> 1 final_prescan_insn if the `target' is a label
22758 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22759 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22760 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22761 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22762 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22763 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22764 (the target insn is arm_target_insn).
22765
22766 If the jump clobbers the conditions then we use states 2 and 4.
22767
22768 A similar thing can be done with conditional return insns.
22769
22770 XXX In case the `target' is an unconditional branch, this conditionalising
22771 of the instructions always reduces code size, but not always execution
22772 time. But then, I want to reduce the code size to somewhere near what
22773 /bin/cc produces. */
22774
22775 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22776 instructions. When a COND_EXEC instruction is seen the subsequent
22777 instructions are scanned so that multiple conditional instructions can be
22778 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22779 specify the length and true/false mask for the IT block. These will be
22780 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22781
22782 /* Returns the index of the ARM condition code string in
22783 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22784 COMPARISON should be an rtx like `(eq (...) (...))'. */
22785
22786 enum arm_cond_code
22787 maybe_get_arm_condition_code (rtx comparison)
22788 {
22789 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22790 enum arm_cond_code code;
22791 enum rtx_code comp_code = GET_CODE (comparison);
22792
22793 if (GET_MODE_CLASS (mode) != MODE_CC)
22794 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22795 XEXP (comparison, 1));
22796
22797 switch (mode)
22798 {
22799 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22800 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22801 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22802 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22803 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22804 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22805 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22806 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22807 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22808 case E_CC_DLTUmode: code = ARM_CC;
22809
22810 dominance:
22811 if (comp_code == EQ)
22812 return ARM_INVERSE_CONDITION_CODE (code);
22813 if (comp_code == NE)
22814 return code;
22815 return ARM_NV;
22816
22817 case E_CC_NOOVmode:
22818 switch (comp_code)
22819 {
22820 case NE: return ARM_NE;
22821 case EQ: return ARM_EQ;
22822 case GE: return ARM_PL;
22823 case LT: return ARM_MI;
22824 default: return ARM_NV;
22825 }
22826
22827 case E_CC_Zmode:
22828 switch (comp_code)
22829 {
22830 case NE: return ARM_NE;
22831 case EQ: return ARM_EQ;
22832 default: return ARM_NV;
22833 }
22834
22835 case E_CC_Nmode:
22836 switch (comp_code)
22837 {
22838 case NE: return ARM_MI;
22839 case EQ: return ARM_PL;
22840 default: return ARM_NV;
22841 }
22842
22843 case E_CCFPEmode:
22844 case E_CCFPmode:
22845 /* We can handle all cases except UNEQ and LTGT. */
22846 switch (comp_code)
22847 {
22848 case GE: return ARM_GE;
22849 case GT: return ARM_GT;
22850 case LE: return ARM_LS;
22851 case LT: return ARM_MI;
22852 case NE: return ARM_NE;
22853 case EQ: return ARM_EQ;
22854 case ORDERED: return ARM_VC;
22855 case UNORDERED: return ARM_VS;
22856 case UNLT: return ARM_LT;
22857 case UNLE: return ARM_LE;
22858 case UNGT: return ARM_HI;
22859 case UNGE: return ARM_PL;
22860 /* UNEQ and LTGT do not have a representation. */
22861 case UNEQ: /* Fall through. */
22862 case LTGT: /* Fall through. */
22863 default: return ARM_NV;
22864 }
22865
22866 case E_CC_SWPmode:
22867 switch (comp_code)
22868 {
22869 case NE: return ARM_NE;
22870 case EQ: return ARM_EQ;
22871 case GE: return ARM_LE;
22872 case GT: return ARM_LT;
22873 case LE: return ARM_GE;
22874 case LT: return ARM_GT;
22875 case GEU: return ARM_LS;
22876 case GTU: return ARM_CC;
22877 case LEU: return ARM_CS;
22878 case LTU: return ARM_HI;
22879 default: return ARM_NV;
22880 }
22881
22882 case E_CC_Cmode:
22883 switch (comp_code)
22884 {
22885 case LTU: return ARM_CS;
22886 case GEU: return ARM_CC;
22887 case NE: return ARM_CS;
22888 case EQ: return ARM_CC;
22889 default: return ARM_NV;
22890 }
22891
22892 case E_CC_CZmode:
22893 switch (comp_code)
22894 {
22895 case NE: return ARM_NE;
22896 case EQ: return ARM_EQ;
22897 case GEU: return ARM_CS;
22898 case GTU: return ARM_HI;
22899 case LEU: return ARM_LS;
22900 case LTU: return ARM_CC;
22901 default: return ARM_NV;
22902 }
22903
22904 case E_CC_NCVmode:
22905 switch (comp_code)
22906 {
22907 case GE: return ARM_GE;
22908 case LT: return ARM_LT;
22909 case GEU: return ARM_CS;
22910 case LTU: return ARM_CC;
22911 default: return ARM_NV;
22912 }
22913
22914 case E_CC_Vmode:
22915 switch (comp_code)
22916 {
22917 case NE: return ARM_VS;
22918 case EQ: return ARM_VC;
22919 default: return ARM_NV;
22920 }
22921
22922 case E_CCmode:
22923 switch (comp_code)
22924 {
22925 case NE: return ARM_NE;
22926 case EQ: return ARM_EQ;
22927 case GE: return ARM_GE;
22928 case GT: return ARM_GT;
22929 case LE: return ARM_LE;
22930 case LT: return ARM_LT;
22931 case GEU: return ARM_CS;
22932 case GTU: return ARM_HI;
22933 case LEU: return ARM_LS;
22934 case LTU: return ARM_CC;
22935 default: return ARM_NV;
22936 }
22937
22938 default: gcc_unreachable ();
22939 }
22940 }
22941
22942 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22943 static enum arm_cond_code
22944 get_arm_condition_code (rtx comparison)
22945 {
22946 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22947 gcc_assert (code != ARM_NV);
22948 return code;
22949 }
22950
22951 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22952 code registers when not targetting Thumb1. The VFP condition register
22953 only exists when generating hard-float code. */
22954 static bool
22955 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22956 {
22957 if (!TARGET_32BIT)
22958 return false;
22959
22960 *p1 = CC_REGNUM;
22961 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22962 return true;
22963 }
22964
22965 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22966 instructions. */
22967 void
22968 thumb2_final_prescan_insn (rtx_insn *insn)
22969 {
22970 rtx_insn *first_insn = insn;
22971 rtx body = PATTERN (insn);
22972 rtx predicate;
22973 enum arm_cond_code code;
22974 int n;
22975 int mask;
22976 int max;
22977
22978 /* max_insns_skipped in the tune was already taken into account in the
22979 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22980 just emit the IT blocks as we can. It does not make sense to split
22981 the IT blocks. */
22982 max = MAX_INSN_PER_IT_BLOCK;
22983
22984 /* Remove the previous insn from the count of insns to be output. */
22985 if (arm_condexec_count)
22986 arm_condexec_count--;
22987
22988 /* Nothing to do if we are already inside a conditional block. */
22989 if (arm_condexec_count)
22990 return;
22991
22992 if (GET_CODE (body) != COND_EXEC)
22993 return;
22994
22995 /* Conditional jumps are implemented directly. */
22996 if (JUMP_P (insn))
22997 return;
22998
22999 predicate = COND_EXEC_TEST (body);
23000 arm_current_cc = get_arm_condition_code (predicate);
23001
23002 n = get_attr_ce_count (insn);
23003 arm_condexec_count = 1;
23004 arm_condexec_mask = (1 << n) - 1;
23005 arm_condexec_masklen = n;
23006 /* See if subsequent instructions can be combined into the same block. */
23007 for (;;)
23008 {
23009 insn = next_nonnote_insn (insn);
23010
23011 /* Jumping into the middle of an IT block is illegal, so a label or
23012 barrier terminates the block. */
23013 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23014 break;
23015
23016 body = PATTERN (insn);
23017 /* USE and CLOBBER aren't really insns, so just skip them. */
23018 if (GET_CODE (body) == USE
23019 || GET_CODE (body) == CLOBBER)
23020 continue;
23021
23022 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23023 if (GET_CODE (body) != COND_EXEC)
23024 break;
23025 /* Maximum number of conditionally executed instructions in a block. */
23026 n = get_attr_ce_count (insn);
23027 if (arm_condexec_masklen + n > max)
23028 break;
23029
23030 predicate = COND_EXEC_TEST (body);
23031 code = get_arm_condition_code (predicate);
23032 mask = (1 << n) - 1;
23033 if (arm_current_cc == code)
23034 arm_condexec_mask |= (mask << arm_condexec_masklen);
23035 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23036 break;
23037
23038 arm_condexec_count++;
23039 arm_condexec_masklen += n;
23040
23041 /* A jump must be the last instruction in a conditional block. */
23042 if (JUMP_P (insn))
23043 break;
23044 }
23045 /* Restore recog_data (getting the attributes of other insns can
23046 destroy this array, but final.c assumes that it remains intact
23047 across this call). */
23048 extract_constrain_insn_cached (first_insn);
23049 }
23050
23051 void
23052 arm_final_prescan_insn (rtx_insn *insn)
23053 {
23054 /* BODY will hold the body of INSN. */
23055 rtx body = PATTERN (insn);
23056
23057 /* This will be 1 if trying to repeat the trick, and things need to be
23058 reversed if it appears to fail. */
23059 int reverse = 0;
23060
23061 /* If we start with a return insn, we only succeed if we find another one. */
23062 int seeking_return = 0;
23063 enum rtx_code return_code = UNKNOWN;
23064
23065 /* START_INSN will hold the insn from where we start looking. This is the
23066 first insn after the following code_label if REVERSE is true. */
23067 rtx_insn *start_insn = insn;
23068
23069 /* If in state 4, check if the target branch is reached, in order to
23070 change back to state 0. */
23071 if (arm_ccfsm_state == 4)
23072 {
23073 if (insn == arm_target_insn)
23074 {
23075 arm_target_insn = NULL;
23076 arm_ccfsm_state = 0;
23077 }
23078 return;
23079 }
23080
23081 /* If in state 3, it is possible to repeat the trick, if this insn is an
23082 unconditional branch to a label, and immediately following this branch
23083 is the previous target label which is only used once, and the label this
23084 branch jumps to is not too far off. */
23085 if (arm_ccfsm_state == 3)
23086 {
23087 if (simplejump_p (insn))
23088 {
23089 start_insn = next_nonnote_insn (start_insn);
23090 if (BARRIER_P (start_insn))
23091 {
23092 /* XXX Isn't this always a barrier? */
23093 start_insn = next_nonnote_insn (start_insn);
23094 }
23095 if (LABEL_P (start_insn)
23096 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23097 && LABEL_NUSES (start_insn) == 1)
23098 reverse = TRUE;
23099 else
23100 return;
23101 }
23102 else if (ANY_RETURN_P (body))
23103 {
23104 start_insn = next_nonnote_insn (start_insn);
23105 if (BARRIER_P (start_insn))
23106 start_insn = next_nonnote_insn (start_insn);
23107 if (LABEL_P (start_insn)
23108 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23109 && LABEL_NUSES (start_insn) == 1)
23110 {
23111 reverse = TRUE;
23112 seeking_return = 1;
23113 return_code = GET_CODE (body);
23114 }
23115 else
23116 return;
23117 }
23118 else
23119 return;
23120 }
23121
23122 gcc_assert (!arm_ccfsm_state || reverse);
23123 if (!JUMP_P (insn))
23124 return;
23125
23126 /* This jump might be paralleled with a clobber of the condition codes
23127 the jump should always come first */
23128 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23129 body = XVECEXP (body, 0, 0);
23130
23131 if (reverse
23132 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23133 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23134 {
23135 int insns_skipped;
23136 int fail = FALSE, succeed = FALSE;
23137 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23138 int then_not_else = TRUE;
23139 rtx_insn *this_insn = start_insn;
23140 rtx label = 0;
23141
23142 /* Register the insn jumped to. */
23143 if (reverse)
23144 {
23145 if (!seeking_return)
23146 label = XEXP (SET_SRC (body), 0);
23147 }
23148 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23149 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23150 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23151 {
23152 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23153 then_not_else = FALSE;
23154 }
23155 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23156 {
23157 seeking_return = 1;
23158 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23159 }
23160 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23161 {
23162 seeking_return = 1;
23163 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23164 then_not_else = FALSE;
23165 }
23166 else
23167 gcc_unreachable ();
23168
23169 /* See how many insns this branch skips, and what kind of insns. If all
23170 insns are okay, and the label or unconditional branch to the same
23171 label is not too far away, succeed. */
23172 for (insns_skipped = 0;
23173 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23174 {
23175 rtx scanbody;
23176
23177 this_insn = next_nonnote_insn (this_insn);
23178 if (!this_insn)
23179 break;
23180
23181 switch (GET_CODE (this_insn))
23182 {
23183 case CODE_LABEL:
23184 /* Succeed if it is the target label, otherwise fail since
23185 control falls in from somewhere else. */
23186 if (this_insn == label)
23187 {
23188 arm_ccfsm_state = 1;
23189 succeed = TRUE;
23190 }
23191 else
23192 fail = TRUE;
23193 break;
23194
23195 case BARRIER:
23196 /* Succeed if the following insn is the target label.
23197 Otherwise fail.
23198 If return insns are used then the last insn in a function
23199 will be a barrier. */
23200 this_insn = next_nonnote_insn (this_insn);
23201 if (this_insn && this_insn == label)
23202 {
23203 arm_ccfsm_state = 1;
23204 succeed = TRUE;
23205 }
23206 else
23207 fail = TRUE;
23208 break;
23209
23210 case CALL_INSN:
23211 /* The AAPCS says that conditional calls should not be
23212 used since they make interworking inefficient (the
23213 linker can't transform BL<cond> into BLX). That's
23214 only a problem if the machine has BLX. */
23215 if (arm_arch5)
23216 {
23217 fail = TRUE;
23218 break;
23219 }
23220
23221 /* Succeed if the following insn is the target label, or
23222 if the following two insns are a barrier and the
23223 target label. */
23224 this_insn = next_nonnote_insn (this_insn);
23225 if (this_insn && BARRIER_P (this_insn))
23226 this_insn = next_nonnote_insn (this_insn);
23227
23228 if (this_insn && this_insn == label
23229 && insns_skipped < max_insns_skipped)
23230 {
23231 arm_ccfsm_state = 1;
23232 succeed = TRUE;
23233 }
23234 else
23235 fail = TRUE;
23236 break;
23237
23238 case JUMP_INSN:
23239 /* If this is an unconditional branch to the same label, succeed.
23240 If it is to another label, do nothing. If it is conditional,
23241 fail. */
23242 /* XXX Probably, the tests for SET and the PC are
23243 unnecessary. */
23244
23245 scanbody = PATTERN (this_insn);
23246 if (GET_CODE (scanbody) == SET
23247 && GET_CODE (SET_DEST (scanbody)) == PC)
23248 {
23249 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23250 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23251 {
23252 arm_ccfsm_state = 2;
23253 succeed = TRUE;
23254 }
23255 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23256 fail = TRUE;
23257 }
23258 /* Fail if a conditional return is undesirable (e.g. on a
23259 StrongARM), but still allow this if optimizing for size. */
23260 else if (GET_CODE (scanbody) == return_code
23261 && !use_return_insn (TRUE, NULL)
23262 && !optimize_size)
23263 fail = TRUE;
23264 else if (GET_CODE (scanbody) == return_code)
23265 {
23266 arm_ccfsm_state = 2;
23267 succeed = TRUE;
23268 }
23269 else if (GET_CODE (scanbody) == PARALLEL)
23270 {
23271 switch (get_attr_conds (this_insn))
23272 {
23273 case CONDS_NOCOND:
23274 break;
23275 default:
23276 fail = TRUE;
23277 break;
23278 }
23279 }
23280 else
23281 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23282
23283 break;
23284
23285 case INSN:
23286 /* Instructions using or affecting the condition codes make it
23287 fail. */
23288 scanbody = PATTERN (this_insn);
23289 if (!(GET_CODE (scanbody) == SET
23290 || GET_CODE (scanbody) == PARALLEL)
23291 || get_attr_conds (this_insn) != CONDS_NOCOND)
23292 fail = TRUE;
23293 break;
23294
23295 default:
23296 break;
23297 }
23298 }
23299 if (succeed)
23300 {
23301 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23302 arm_target_label = CODE_LABEL_NUMBER (label);
23303 else
23304 {
23305 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23306
23307 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23308 {
23309 this_insn = next_nonnote_insn (this_insn);
23310 gcc_assert (!this_insn
23311 || (!BARRIER_P (this_insn)
23312 && !LABEL_P (this_insn)));
23313 }
23314 if (!this_insn)
23315 {
23316 /* Oh, dear! we ran off the end.. give up. */
23317 extract_constrain_insn_cached (insn);
23318 arm_ccfsm_state = 0;
23319 arm_target_insn = NULL;
23320 return;
23321 }
23322 arm_target_insn = this_insn;
23323 }
23324
23325 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23326 what it was. */
23327 if (!reverse)
23328 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23329
23330 if (reverse || then_not_else)
23331 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23332 }
23333
23334 /* Restore recog_data (getting the attributes of other insns can
23335 destroy this array, but final.c assumes that it remains intact
23336 across this call. */
23337 extract_constrain_insn_cached (insn);
23338 }
23339 }
23340
23341 /* Output IT instructions. */
23342 void
23343 thumb2_asm_output_opcode (FILE * stream)
23344 {
23345 char buff[5];
23346 int n;
23347
23348 if (arm_condexec_mask)
23349 {
23350 for (n = 0; n < arm_condexec_masklen; n++)
23351 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23352 buff[n] = 0;
23353 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23354 arm_condition_codes[arm_current_cc]);
23355 arm_condexec_mask = 0;
23356 }
23357 }
23358
23359 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23360 UNITS_PER_WORD bytes wide. */
23361 static unsigned int
23362 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23363 {
23364 if (TARGET_32BIT
23365 && regno > PC_REGNUM
23366 && regno != FRAME_POINTER_REGNUM
23367 && regno != ARG_POINTER_REGNUM
23368 && !IS_VFP_REGNUM (regno))
23369 return 1;
23370
23371 return ARM_NUM_REGS (mode);
23372 }
23373
23374 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23375 static bool
23376 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23377 {
23378 if (GET_MODE_CLASS (mode) == MODE_CC)
23379 return (regno == CC_REGNUM
23380 || (TARGET_HARD_FLOAT
23381 && regno == VFPCC_REGNUM));
23382
23383 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23384 return false;
23385
23386 if (TARGET_THUMB1)
23387 /* For the Thumb we only allow values bigger than SImode in
23388 registers 0 - 6, so that there is always a second low
23389 register available to hold the upper part of the value.
23390 We probably we ought to ensure that the register is the
23391 start of an even numbered register pair. */
23392 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23393
23394 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23395 {
23396 if (mode == SFmode || mode == SImode)
23397 return VFP_REGNO_OK_FOR_SINGLE (regno);
23398
23399 if (mode == DFmode)
23400 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23401
23402 if (mode == HFmode)
23403 return VFP_REGNO_OK_FOR_SINGLE (regno);
23404
23405 /* VFP registers can hold HImode values. */
23406 if (mode == HImode)
23407 return VFP_REGNO_OK_FOR_SINGLE (regno);
23408
23409 if (TARGET_NEON)
23410 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23411 || (VALID_NEON_QREG_MODE (mode)
23412 && NEON_REGNO_OK_FOR_QUAD (regno))
23413 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23414 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23415 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23416 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23417 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23418
23419 return false;
23420 }
23421
23422 if (TARGET_REALLY_IWMMXT)
23423 {
23424 if (IS_IWMMXT_GR_REGNUM (regno))
23425 return mode == SImode;
23426
23427 if (IS_IWMMXT_REGNUM (regno))
23428 return VALID_IWMMXT_REG_MODE (mode);
23429 }
23430
23431 /* We allow almost any value to be stored in the general registers.
23432 Restrict doubleword quantities to even register pairs in ARM state
23433 so that we can use ldrd. Do not allow very large Neon structure
23434 opaque modes in general registers; they would use too many. */
23435 if (regno <= LAST_ARM_REGNUM)
23436 {
23437 if (ARM_NUM_REGS (mode) > 4)
23438 return false;
23439
23440 if (TARGET_THUMB2)
23441 return true;
23442
23443 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23444 }
23445
23446 if (regno == FRAME_POINTER_REGNUM
23447 || regno == ARG_POINTER_REGNUM)
23448 /* We only allow integers in the fake hard registers. */
23449 return GET_MODE_CLASS (mode) == MODE_INT;
23450
23451 return false;
23452 }
23453
23454 /* Implement TARGET_MODES_TIEABLE_P. */
23455
23456 static bool
23457 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23458 {
23459 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23460 return true;
23461
23462 /* We specifically want to allow elements of "structure" modes to
23463 be tieable to the structure. This more general condition allows
23464 other rarer situations too. */
23465 if (TARGET_NEON
23466 && (VALID_NEON_DREG_MODE (mode1)
23467 || VALID_NEON_QREG_MODE (mode1)
23468 || VALID_NEON_STRUCT_MODE (mode1))
23469 && (VALID_NEON_DREG_MODE (mode2)
23470 || VALID_NEON_QREG_MODE (mode2)
23471 || VALID_NEON_STRUCT_MODE (mode2)))
23472 return true;
23473
23474 return false;
23475 }
23476
23477 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23478 not used in arm mode. */
23479
23480 enum reg_class
23481 arm_regno_class (int regno)
23482 {
23483 if (regno == PC_REGNUM)
23484 return NO_REGS;
23485
23486 if (TARGET_THUMB1)
23487 {
23488 if (regno == STACK_POINTER_REGNUM)
23489 return STACK_REG;
23490 if (regno == CC_REGNUM)
23491 return CC_REG;
23492 if (regno < 8)
23493 return LO_REGS;
23494 return HI_REGS;
23495 }
23496
23497 if (TARGET_THUMB2 && regno < 8)
23498 return LO_REGS;
23499
23500 if ( regno <= LAST_ARM_REGNUM
23501 || regno == FRAME_POINTER_REGNUM
23502 || regno == ARG_POINTER_REGNUM)
23503 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23504
23505 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23506 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23507
23508 if (IS_VFP_REGNUM (regno))
23509 {
23510 if (regno <= D7_VFP_REGNUM)
23511 return VFP_D0_D7_REGS;
23512 else if (regno <= LAST_LO_VFP_REGNUM)
23513 return VFP_LO_REGS;
23514 else
23515 return VFP_HI_REGS;
23516 }
23517
23518 if (IS_IWMMXT_REGNUM (regno))
23519 return IWMMXT_REGS;
23520
23521 if (IS_IWMMXT_GR_REGNUM (regno))
23522 return IWMMXT_GR_REGS;
23523
23524 return NO_REGS;
23525 }
23526
23527 /* Handle a special case when computing the offset
23528 of an argument from the frame pointer. */
23529 int
23530 arm_debugger_arg_offset (int value, rtx addr)
23531 {
23532 rtx_insn *insn;
23533
23534 /* We are only interested if dbxout_parms() failed to compute the offset. */
23535 if (value != 0)
23536 return 0;
23537
23538 /* We can only cope with the case where the address is held in a register. */
23539 if (!REG_P (addr))
23540 return 0;
23541
23542 /* If we are using the frame pointer to point at the argument, then
23543 an offset of 0 is correct. */
23544 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23545 return 0;
23546
23547 /* If we are using the stack pointer to point at the
23548 argument, then an offset of 0 is correct. */
23549 /* ??? Check this is consistent with thumb2 frame layout. */
23550 if ((TARGET_THUMB || !frame_pointer_needed)
23551 && REGNO (addr) == SP_REGNUM)
23552 return 0;
23553
23554 /* Oh dear. The argument is pointed to by a register rather
23555 than being held in a register, or being stored at a known
23556 offset from the frame pointer. Since GDB only understands
23557 those two kinds of argument we must translate the address
23558 held in the register into an offset from the frame pointer.
23559 We do this by searching through the insns for the function
23560 looking to see where this register gets its value. If the
23561 register is initialized from the frame pointer plus an offset
23562 then we are in luck and we can continue, otherwise we give up.
23563
23564 This code is exercised by producing debugging information
23565 for a function with arguments like this:
23566
23567 double func (double a, double b, int c, double d) {return d;}
23568
23569 Without this code the stab for parameter 'd' will be set to
23570 an offset of 0 from the frame pointer, rather than 8. */
23571
23572 /* The if() statement says:
23573
23574 If the insn is a normal instruction
23575 and if the insn is setting the value in a register
23576 and if the register being set is the register holding the address of the argument
23577 and if the address is computing by an addition
23578 that involves adding to a register
23579 which is the frame pointer
23580 a constant integer
23581
23582 then... */
23583
23584 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23585 {
23586 if ( NONJUMP_INSN_P (insn)
23587 && GET_CODE (PATTERN (insn)) == SET
23588 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23589 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23590 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23591 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23592 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23593 )
23594 {
23595 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23596
23597 break;
23598 }
23599 }
23600
23601 if (value == 0)
23602 {
23603 debug_rtx (addr);
23604 warning (0, "unable to compute real location of stacked parameter");
23605 value = 8; /* XXX magic hack */
23606 }
23607
23608 return value;
23609 }
23610 \f
23611 /* Implement TARGET_PROMOTED_TYPE. */
23612
23613 static tree
23614 arm_promoted_type (const_tree t)
23615 {
23616 if (SCALAR_FLOAT_TYPE_P (t)
23617 && TYPE_PRECISION (t) == 16
23618 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23619 return float_type_node;
23620 return NULL_TREE;
23621 }
23622
23623 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23624 This simply adds HFmode as a supported mode; even though we don't
23625 implement arithmetic on this type directly, it's supported by
23626 optabs conversions, much the way the double-word arithmetic is
23627 special-cased in the default hook. */
23628
23629 static bool
23630 arm_scalar_mode_supported_p (scalar_mode mode)
23631 {
23632 if (mode == HFmode)
23633 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23634 else if (ALL_FIXED_POINT_MODE_P (mode))
23635 return true;
23636 else
23637 return default_scalar_mode_supported_p (mode);
23638 }
23639
23640 /* Set the value of FLT_EVAL_METHOD.
23641 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23642
23643 0: evaluate all operations and constants, whose semantic type has at
23644 most the range and precision of type float, to the range and
23645 precision of float; evaluate all other operations and constants to
23646 the range and precision of the semantic type;
23647
23648 N, where _FloatN is a supported interchange floating type
23649 evaluate all operations and constants, whose semantic type has at
23650 most the range and precision of _FloatN type, to the range and
23651 precision of the _FloatN type; evaluate all other operations and
23652 constants to the range and precision of the semantic type;
23653
23654 If we have the ARMv8.2-A extensions then we support _Float16 in native
23655 precision, so we should set this to 16. Otherwise, we support the type,
23656 but want to evaluate expressions in float precision, so set this to
23657 0. */
23658
23659 static enum flt_eval_method
23660 arm_excess_precision (enum excess_precision_type type)
23661 {
23662 switch (type)
23663 {
23664 case EXCESS_PRECISION_TYPE_FAST:
23665 case EXCESS_PRECISION_TYPE_STANDARD:
23666 /* We can calculate either in 16-bit range and precision or
23667 32-bit range and precision. Make that decision based on whether
23668 we have native support for the ARMv8.2-A 16-bit floating-point
23669 instructions or not. */
23670 return (TARGET_VFP_FP16INST
23671 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23672 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23673 case EXCESS_PRECISION_TYPE_IMPLICIT:
23674 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23675 default:
23676 gcc_unreachable ();
23677 }
23678 return FLT_EVAL_METHOD_UNPREDICTABLE;
23679 }
23680
23681
23682 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23683 _Float16 if we are using anything other than ieee format for 16-bit
23684 floating point. Otherwise, punt to the default implementation. */
23685 static opt_scalar_float_mode
23686 arm_floatn_mode (int n, bool extended)
23687 {
23688 if (!extended && n == 16)
23689 {
23690 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23691 return HFmode;
23692 return opt_scalar_float_mode ();
23693 }
23694
23695 return default_floatn_mode (n, extended);
23696 }
23697
23698
23699 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23700 not to early-clobber SRC registers in the process.
23701
23702 We assume that the operands described by SRC and DEST represent a
23703 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23704 number of components into which the copy has been decomposed. */
23705 void
23706 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23707 {
23708 unsigned int i;
23709
23710 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23711 || REGNO (operands[0]) < REGNO (operands[1]))
23712 {
23713 for (i = 0; i < count; i++)
23714 {
23715 operands[2 * i] = dest[i];
23716 operands[2 * i + 1] = src[i];
23717 }
23718 }
23719 else
23720 {
23721 for (i = 0; i < count; i++)
23722 {
23723 operands[2 * i] = dest[count - i - 1];
23724 operands[2 * i + 1] = src[count - i - 1];
23725 }
23726 }
23727 }
23728
23729 /* Split operands into moves from op[1] + op[2] into op[0]. */
23730
23731 void
23732 neon_split_vcombine (rtx operands[3])
23733 {
23734 unsigned int dest = REGNO (operands[0]);
23735 unsigned int src1 = REGNO (operands[1]);
23736 unsigned int src2 = REGNO (operands[2]);
23737 machine_mode halfmode = GET_MODE (operands[1]);
23738 unsigned int halfregs = REG_NREGS (operands[1]);
23739 rtx destlo, desthi;
23740
23741 if (src1 == dest && src2 == dest + halfregs)
23742 {
23743 /* No-op move. Can't split to nothing; emit something. */
23744 emit_note (NOTE_INSN_DELETED);
23745 return;
23746 }
23747
23748 /* Preserve register attributes for variable tracking. */
23749 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23750 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23751 GET_MODE_SIZE (halfmode));
23752
23753 /* Special case of reversed high/low parts. Use VSWP. */
23754 if (src2 == dest && src1 == dest + halfregs)
23755 {
23756 rtx x = gen_rtx_SET (destlo, operands[1]);
23757 rtx y = gen_rtx_SET (desthi, operands[2]);
23758 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23759 return;
23760 }
23761
23762 if (!reg_overlap_mentioned_p (operands[2], destlo))
23763 {
23764 /* Try to avoid unnecessary moves if part of the result
23765 is in the right place already. */
23766 if (src1 != dest)
23767 emit_move_insn (destlo, operands[1]);
23768 if (src2 != dest + halfregs)
23769 emit_move_insn (desthi, operands[2]);
23770 }
23771 else
23772 {
23773 if (src2 != dest + halfregs)
23774 emit_move_insn (desthi, operands[2]);
23775 if (src1 != dest)
23776 emit_move_insn (destlo, operands[1]);
23777 }
23778 }
23779 \f
23780 /* Return the number (counting from 0) of
23781 the least significant set bit in MASK. */
23782
23783 inline static int
23784 number_of_first_bit_set (unsigned mask)
23785 {
23786 return ctz_hwi (mask);
23787 }
23788
23789 /* Like emit_multi_reg_push, but allowing for a different set of
23790 registers to be described as saved. MASK is the set of registers
23791 to be saved; REAL_REGS is the set of registers to be described as
23792 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23793
23794 static rtx_insn *
23795 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23796 {
23797 unsigned long regno;
23798 rtx par[10], tmp, reg;
23799 rtx_insn *insn;
23800 int i, j;
23801
23802 /* Build the parallel of the registers actually being stored. */
23803 for (i = 0; mask; ++i, mask &= mask - 1)
23804 {
23805 regno = ctz_hwi (mask);
23806 reg = gen_rtx_REG (SImode, regno);
23807
23808 if (i == 0)
23809 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23810 else
23811 tmp = gen_rtx_USE (VOIDmode, reg);
23812
23813 par[i] = tmp;
23814 }
23815
23816 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23817 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23818 tmp = gen_frame_mem (BLKmode, tmp);
23819 tmp = gen_rtx_SET (tmp, par[0]);
23820 par[0] = tmp;
23821
23822 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23823 insn = emit_insn (tmp);
23824
23825 /* Always build the stack adjustment note for unwind info. */
23826 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23827 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23828 par[0] = tmp;
23829
23830 /* Build the parallel of the registers recorded as saved for unwind. */
23831 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23832 {
23833 regno = ctz_hwi (real_regs);
23834 reg = gen_rtx_REG (SImode, regno);
23835
23836 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23837 tmp = gen_frame_mem (SImode, tmp);
23838 tmp = gen_rtx_SET (tmp, reg);
23839 RTX_FRAME_RELATED_P (tmp) = 1;
23840 par[j + 1] = tmp;
23841 }
23842
23843 if (j == 0)
23844 tmp = par[0];
23845 else
23846 {
23847 RTX_FRAME_RELATED_P (par[0]) = 1;
23848 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23849 }
23850
23851 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23852
23853 return insn;
23854 }
23855
23856 /* Emit code to push or pop registers to or from the stack. F is the
23857 assembly file. MASK is the registers to pop. */
23858 static void
23859 thumb_pop (FILE *f, unsigned long mask)
23860 {
23861 int regno;
23862 int lo_mask = mask & 0xFF;
23863
23864 gcc_assert (mask);
23865
23866 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23867 {
23868 /* Special case. Do not generate a POP PC statement here, do it in
23869 thumb_exit() */
23870 thumb_exit (f, -1);
23871 return;
23872 }
23873
23874 fprintf (f, "\tpop\t{");
23875
23876 /* Look at the low registers first. */
23877 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23878 {
23879 if (lo_mask & 1)
23880 {
23881 asm_fprintf (f, "%r", regno);
23882
23883 if ((lo_mask & ~1) != 0)
23884 fprintf (f, ", ");
23885 }
23886 }
23887
23888 if (mask & (1 << PC_REGNUM))
23889 {
23890 /* Catch popping the PC. */
23891 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23892 || IS_CMSE_ENTRY (arm_current_func_type ()))
23893 {
23894 /* The PC is never poped directly, instead
23895 it is popped into r3 and then BX is used. */
23896 fprintf (f, "}\n");
23897
23898 thumb_exit (f, -1);
23899
23900 return;
23901 }
23902 else
23903 {
23904 if (mask & 0xFF)
23905 fprintf (f, ", ");
23906
23907 asm_fprintf (f, "%r", PC_REGNUM);
23908 }
23909 }
23910
23911 fprintf (f, "}\n");
23912 }
23913
23914 /* Generate code to return from a thumb function.
23915 If 'reg_containing_return_addr' is -1, then the return address is
23916 actually on the stack, at the stack pointer. */
23917 static void
23918 thumb_exit (FILE *f, int reg_containing_return_addr)
23919 {
23920 unsigned regs_available_for_popping;
23921 unsigned regs_to_pop;
23922 int pops_needed;
23923 unsigned available;
23924 unsigned required;
23925 machine_mode mode;
23926 int size;
23927 int restore_a4 = FALSE;
23928
23929 /* Compute the registers we need to pop. */
23930 regs_to_pop = 0;
23931 pops_needed = 0;
23932
23933 if (reg_containing_return_addr == -1)
23934 {
23935 regs_to_pop |= 1 << LR_REGNUM;
23936 ++pops_needed;
23937 }
23938
23939 if (TARGET_BACKTRACE)
23940 {
23941 /* Restore the (ARM) frame pointer and stack pointer. */
23942 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23943 pops_needed += 2;
23944 }
23945
23946 /* If there is nothing to pop then just emit the BX instruction and
23947 return. */
23948 if (pops_needed == 0)
23949 {
23950 if (crtl->calls_eh_return)
23951 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23952
23953 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23954 {
23955 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23956 reg_containing_return_addr);
23957 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23958 }
23959 else
23960 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23961 return;
23962 }
23963 /* Otherwise if we are not supporting interworking and we have not created
23964 a backtrace structure and the function was not entered in ARM mode then
23965 just pop the return address straight into the PC. */
23966 else if (!TARGET_INTERWORK
23967 && !TARGET_BACKTRACE
23968 && !is_called_in_ARM_mode (current_function_decl)
23969 && !crtl->calls_eh_return
23970 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23971 {
23972 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23973 return;
23974 }
23975
23976 /* Find out how many of the (return) argument registers we can corrupt. */
23977 regs_available_for_popping = 0;
23978
23979 /* If returning via __builtin_eh_return, the bottom three registers
23980 all contain information needed for the return. */
23981 if (crtl->calls_eh_return)
23982 size = 12;
23983 else
23984 {
23985 /* If we can deduce the registers used from the function's
23986 return value. This is more reliable that examining
23987 df_regs_ever_live_p () because that will be set if the register is
23988 ever used in the function, not just if the register is used
23989 to hold a return value. */
23990
23991 if (crtl->return_rtx != 0)
23992 mode = GET_MODE (crtl->return_rtx);
23993 else
23994 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23995
23996 size = GET_MODE_SIZE (mode);
23997
23998 if (size == 0)
23999 {
24000 /* In a void function we can use any argument register.
24001 In a function that returns a structure on the stack
24002 we can use the second and third argument registers. */
24003 if (mode == VOIDmode)
24004 regs_available_for_popping =
24005 (1 << ARG_REGISTER (1))
24006 | (1 << ARG_REGISTER (2))
24007 | (1 << ARG_REGISTER (3));
24008 else
24009 regs_available_for_popping =
24010 (1 << ARG_REGISTER (2))
24011 | (1 << ARG_REGISTER (3));
24012 }
24013 else if (size <= 4)
24014 regs_available_for_popping =
24015 (1 << ARG_REGISTER (2))
24016 | (1 << ARG_REGISTER (3));
24017 else if (size <= 8)
24018 regs_available_for_popping =
24019 (1 << ARG_REGISTER (3));
24020 }
24021
24022 /* Match registers to be popped with registers into which we pop them. */
24023 for (available = regs_available_for_popping,
24024 required = regs_to_pop;
24025 required != 0 && available != 0;
24026 available &= ~(available & - available),
24027 required &= ~(required & - required))
24028 -- pops_needed;
24029
24030 /* If we have any popping registers left over, remove them. */
24031 if (available > 0)
24032 regs_available_for_popping &= ~available;
24033
24034 /* Otherwise if we need another popping register we can use
24035 the fourth argument register. */
24036 else if (pops_needed)
24037 {
24038 /* If we have not found any free argument registers and
24039 reg a4 contains the return address, we must move it. */
24040 if (regs_available_for_popping == 0
24041 && reg_containing_return_addr == LAST_ARG_REGNUM)
24042 {
24043 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24044 reg_containing_return_addr = LR_REGNUM;
24045 }
24046 else if (size > 12)
24047 {
24048 /* Register a4 is being used to hold part of the return value,
24049 but we have dire need of a free, low register. */
24050 restore_a4 = TRUE;
24051
24052 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24053 }
24054
24055 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24056 {
24057 /* The fourth argument register is available. */
24058 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24059
24060 --pops_needed;
24061 }
24062 }
24063
24064 /* Pop as many registers as we can. */
24065 thumb_pop (f, regs_available_for_popping);
24066
24067 /* Process the registers we popped. */
24068 if (reg_containing_return_addr == -1)
24069 {
24070 /* The return address was popped into the lowest numbered register. */
24071 regs_to_pop &= ~(1 << LR_REGNUM);
24072
24073 reg_containing_return_addr =
24074 number_of_first_bit_set (regs_available_for_popping);
24075
24076 /* Remove this register for the mask of available registers, so that
24077 the return address will not be corrupted by further pops. */
24078 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24079 }
24080
24081 /* If we popped other registers then handle them here. */
24082 if (regs_available_for_popping)
24083 {
24084 int frame_pointer;
24085
24086 /* Work out which register currently contains the frame pointer. */
24087 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24088
24089 /* Move it into the correct place. */
24090 asm_fprintf (f, "\tmov\t%r, %r\n",
24091 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24092
24093 /* (Temporarily) remove it from the mask of popped registers. */
24094 regs_available_for_popping &= ~(1 << frame_pointer);
24095 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24096
24097 if (regs_available_for_popping)
24098 {
24099 int stack_pointer;
24100
24101 /* We popped the stack pointer as well,
24102 find the register that contains it. */
24103 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24104
24105 /* Move it into the stack register. */
24106 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24107
24108 /* At this point we have popped all necessary registers, so
24109 do not worry about restoring regs_available_for_popping
24110 to its correct value:
24111
24112 assert (pops_needed == 0)
24113 assert (regs_available_for_popping == (1 << frame_pointer))
24114 assert (regs_to_pop == (1 << STACK_POINTER)) */
24115 }
24116 else
24117 {
24118 /* Since we have just move the popped value into the frame
24119 pointer, the popping register is available for reuse, and
24120 we know that we still have the stack pointer left to pop. */
24121 regs_available_for_popping |= (1 << frame_pointer);
24122 }
24123 }
24124
24125 /* If we still have registers left on the stack, but we no longer have
24126 any registers into which we can pop them, then we must move the return
24127 address into the link register and make available the register that
24128 contained it. */
24129 if (regs_available_for_popping == 0 && pops_needed > 0)
24130 {
24131 regs_available_for_popping |= 1 << reg_containing_return_addr;
24132
24133 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24134 reg_containing_return_addr);
24135
24136 reg_containing_return_addr = LR_REGNUM;
24137 }
24138
24139 /* If we have registers left on the stack then pop some more.
24140 We know that at most we will want to pop FP and SP. */
24141 if (pops_needed > 0)
24142 {
24143 int popped_into;
24144 int move_to;
24145
24146 thumb_pop (f, regs_available_for_popping);
24147
24148 /* We have popped either FP or SP.
24149 Move whichever one it is into the correct register. */
24150 popped_into = number_of_first_bit_set (regs_available_for_popping);
24151 move_to = number_of_first_bit_set (regs_to_pop);
24152
24153 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24154 --pops_needed;
24155 }
24156
24157 /* If we still have not popped everything then we must have only
24158 had one register available to us and we are now popping the SP. */
24159 if (pops_needed > 0)
24160 {
24161 int popped_into;
24162
24163 thumb_pop (f, regs_available_for_popping);
24164
24165 popped_into = number_of_first_bit_set (regs_available_for_popping);
24166
24167 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24168 /*
24169 assert (regs_to_pop == (1 << STACK_POINTER))
24170 assert (pops_needed == 1)
24171 */
24172 }
24173
24174 /* If necessary restore the a4 register. */
24175 if (restore_a4)
24176 {
24177 if (reg_containing_return_addr != LR_REGNUM)
24178 {
24179 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24180 reg_containing_return_addr = LR_REGNUM;
24181 }
24182
24183 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24184 }
24185
24186 if (crtl->calls_eh_return)
24187 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24188
24189 /* Return to caller. */
24190 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24191 {
24192 /* This is for the cases where LR is not being used to contain the return
24193 address. It may therefore contain information that we might not want
24194 to leak, hence it must be cleared. The value in R0 will never be a
24195 secret at this point, so it is safe to use it, see the clearing code
24196 in 'cmse_nonsecure_entry_clear_before_return'. */
24197 if (reg_containing_return_addr != LR_REGNUM)
24198 asm_fprintf (f, "\tmov\tlr, r0\n");
24199
24200 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24201 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24202 }
24203 else
24204 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24205 }
24206 \f
24207 /* Scan INSN just before assembler is output for it.
24208 For Thumb-1, we track the status of the condition codes; this
24209 information is used in the cbranchsi4_insn pattern. */
24210 void
24211 thumb1_final_prescan_insn (rtx_insn *insn)
24212 {
24213 if (flag_print_asm_name)
24214 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24215 INSN_ADDRESSES (INSN_UID (insn)));
24216 /* Don't overwrite the previous setter when we get to a cbranch. */
24217 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24218 {
24219 enum attr_conds conds;
24220
24221 if (cfun->machine->thumb1_cc_insn)
24222 {
24223 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24224 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24225 CC_STATUS_INIT;
24226 }
24227 conds = get_attr_conds (insn);
24228 if (conds == CONDS_SET)
24229 {
24230 rtx set = single_set (insn);
24231 cfun->machine->thumb1_cc_insn = insn;
24232 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24233 cfun->machine->thumb1_cc_op1 = const0_rtx;
24234 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24235 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24236 {
24237 rtx src1 = XEXP (SET_SRC (set), 1);
24238 if (src1 == const0_rtx)
24239 cfun->machine->thumb1_cc_mode = CCmode;
24240 }
24241 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24242 {
24243 /* Record the src register operand instead of dest because
24244 cprop_hardreg pass propagates src. */
24245 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24246 }
24247 }
24248 else if (conds != CONDS_NOCOND)
24249 cfun->machine->thumb1_cc_insn = NULL_RTX;
24250 }
24251
24252 /* Check if unexpected far jump is used. */
24253 if (cfun->machine->lr_save_eliminated
24254 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24255 internal_error("Unexpected thumb1 far jump");
24256 }
24257
24258 int
24259 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24260 {
24261 unsigned HOST_WIDE_INT mask = 0xff;
24262 int i;
24263
24264 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24265 if (val == 0) /* XXX */
24266 return 0;
24267
24268 for (i = 0; i < 25; i++)
24269 if ((val & (mask << i)) == val)
24270 return 1;
24271
24272 return 0;
24273 }
24274
24275 /* Returns nonzero if the current function contains,
24276 or might contain a far jump. */
24277 static int
24278 thumb_far_jump_used_p (void)
24279 {
24280 rtx_insn *insn;
24281 bool far_jump = false;
24282 unsigned int func_size = 0;
24283
24284 /* If we have already decided that far jumps may be used,
24285 do not bother checking again, and always return true even if
24286 it turns out that they are not being used. Once we have made
24287 the decision that far jumps are present (and that hence the link
24288 register will be pushed onto the stack) we cannot go back on it. */
24289 if (cfun->machine->far_jump_used)
24290 return 1;
24291
24292 /* If this function is not being called from the prologue/epilogue
24293 generation code then it must be being called from the
24294 INITIAL_ELIMINATION_OFFSET macro. */
24295 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24296 {
24297 /* In this case we know that we are being asked about the elimination
24298 of the arg pointer register. If that register is not being used,
24299 then there are no arguments on the stack, and we do not have to
24300 worry that a far jump might force the prologue to push the link
24301 register, changing the stack offsets. In this case we can just
24302 return false, since the presence of far jumps in the function will
24303 not affect stack offsets.
24304
24305 If the arg pointer is live (or if it was live, but has now been
24306 eliminated and so set to dead) then we do have to test to see if
24307 the function might contain a far jump. This test can lead to some
24308 false negatives, since before reload is completed, then length of
24309 branch instructions is not known, so gcc defaults to returning their
24310 longest length, which in turn sets the far jump attribute to true.
24311
24312 A false negative will not result in bad code being generated, but it
24313 will result in a needless push and pop of the link register. We
24314 hope that this does not occur too often.
24315
24316 If we need doubleword stack alignment this could affect the other
24317 elimination offsets so we can't risk getting it wrong. */
24318 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24319 cfun->machine->arg_pointer_live = 1;
24320 else if (!cfun->machine->arg_pointer_live)
24321 return 0;
24322 }
24323
24324 /* We should not change far_jump_used during or after reload, as there is
24325 no chance to change stack frame layout. */
24326 if (reload_in_progress || reload_completed)
24327 return 0;
24328
24329 /* Check to see if the function contains a branch
24330 insn with the far jump attribute set. */
24331 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24332 {
24333 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24334 {
24335 far_jump = true;
24336 }
24337 func_size += get_attr_length (insn);
24338 }
24339
24340 /* Attribute far_jump will always be true for thumb1 before
24341 shorten_branch pass. So checking far_jump attribute before
24342 shorten_branch isn't much useful.
24343
24344 Following heuristic tries to estimate more accurately if a far jump
24345 may finally be used. The heuristic is very conservative as there is
24346 no chance to roll-back the decision of not to use far jump.
24347
24348 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24349 2-byte insn is associated with a 4 byte constant pool. Using
24350 function size 2048/3 as the threshold is conservative enough. */
24351 if (far_jump)
24352 {
24353 if ((func_size * 3) >= 2048)
24354 {
24355 /* Record the fact that we have decided that
24356 the function does use far jumps. */
24357 cfun->machine->far_jump_used = 1;
24358 return 1;
24359 }
24360 }
24361
24362 return 0;
24363 }
24364
24365 /* Return nonzero if FUNC must be entered in ARM mode. */
24366 static bool
24367 is_called_in_ARM_mode (tree func)
24368 {
24369 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24370
24371 /* Ignore the problem about functions whose address is taken. */
24372 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24373 return true;
24374
24375 #ifdef ARM_PE
24376 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24377 #else
24378 return false;
24379 #endif
24380 }
24381
24382 /* Given the stack offsets and register mask in OFFSETS, decide how
24383 many additional registers to push instead of subtracting a constant
24384 from SP. For epilogues the principle is the same except we use pop.
24385 FOR_PROLOGUE indicates which we're generating. */
24386 static int
24387 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24388 {
24389 HOST_WIDE_INT amount;
24390 unsigned long live_regs_mask = offsets->saved_regs_mask;
24391 /* Extract a mask of the ones we can give to the Thumb's push/pop
24392 instruction. */
24393 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24394 /* Then count how many other high registers will need to be pushed. */
24395 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24396 int n_free, reg_base, size;
24397
24398 if (!for_prologue && frame_pointer_needed)
24399 amount = offsets->locals_base - offsets->saved_regs;
24400 else
24401 amount = offsets->outgoing_args - offsets->saved_regs;
24402
24403 /* If the stack frame size is 512 exactly, we can save one load
24404 instruction, which should make this a win even when optimizing
24405 for speed. */
24406 if (!optimize_size && amount != 512)
24407 return 0;
24408
24409 /* Can't do this if there are high registers to push. */
24410 if (high_regs_pushed != 0)
24411 return 0;
24412
24413 /* Shouldn't do it in the prologue if no registers would normally
24414 be pushed at all. In the epilogue, also allow it if we'll have
24415 a pop insn for the PC. */
24416 if (l_mask == 0
24417 && (for_prologue
24418 || TARGET_BACKTRACE
24419 || (live_regs_mask & 1 << LR_REGNUM) == 0
24420 || TARGET_INTERWORK
24421 || crtl->args.pretend_args_size != 0))
24422 return 0;
24423
24424 /* Don't do this if thumb_expand_prologue wants to emit instructions
24425 between the push and the stack frame allocation. */
24426 if (for_prologue
24427 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24428 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24429 return 0;
24430
24431 reg_base = 0;
24432 n_free = 0;
24433 if (!for_prologue)
24434 {
24435 size = arm_size_return_regs ();
24436 reg_base = ARM_NUM_INTS (size);
24437 live_regs_mask >>= reg_base;
24438 }
24439
24440 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24441 && (for_prologue || call_used_regs[reg_base + n_free]))
24442 {
24443 live_regs_mask >>= 1;
24444 n_free++;
24445 }
24446
24447 if (n_free == 0)
24448 return 0;
24449 gcc_assert (amount / 4 * 4 == amount);
24450
24451 if (amount >= 512 && (amount - n_free * 4) < 512)
24452 return (amount - 508) / 4;
24453 if (amount <= n_free * 4)
24454 return amount / 4;
24455 return 0;
24456 }
24457
24458 /* The bits which aren't usefully expanded as rtl. */
24459 const char *
24460 thumb1_unexpanded_epilogue (void)
24461 {
24462 arm_stack_offsets *offsets;
24463 int regno;
24464 unsigned long live_regs_mask = 0;
24465 int high_regs_pushed = 0;
24466 int extra_pop;
24467 int had_to_push_lr;
24468 int size;
24469
24470 if (cfun->machine->return_used_this_function != 0)
24471 return "";
24472
24473 if (IS_NAKED (arm_current_func_type ()))
24474 return "";
24475
24476 offsets = arm_get_frame_offsets ();
24477 live_regs_mask = offsets->saved_regs_mask;
24478 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24479
24480 /* If we can deduce the registers used from the function's return value.
24481 This is more reliable that examining df_regs_ever_live_p () because that
24482 will be set if the register is ever used in the function, not just if
24483 the register is used to hold a return value. */
24484 size = arm_size_return_regs ();
24485
24486 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24487 if (extra_pop > 0)
24488 {
24489 unsigned long extra_mask = (1 << extra_pop) - 1;
24490 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24491 }
24492
24493 /* The prolog may have pushed some high registers to use as
24494 work registers. e.g. the testsuite file:
24495 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24496 compiles to produce:
24497 push {r4, r5, r6, r7, lr}
24498 mov r7, r9
24499 mov r6, r8
24500 push {r6, r7}
24501 as part of the prolog. We have to undo that pushing here. */
24502
24503 if (high_regs_pushed)
24504 {
24505 unsigned long mask = live_regs_mask & 0xff;
24506 int next_hi_reg;
24507
24508 /* The available low registers depend on the size of the value we are
24509 returning. */
24510 if (size <= 12)
24511 mask |= 1 << 3;
24512 if (size <= 8)
24513 mask |= 1 << 2;
24514
24515 if (mask == 0)
24516 /* Oh dear! We have no low registers into which we can pop
24517 high registers! */
24518 internal_error
24519 ("no low registers available for popping high registers");
24520
24521 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24522 if (live_regs_mask & (1 << next_hi_reg))
24523 break;
24524
24525 while (high_regs_pushed)
24526 {
24527 /* Find lo register(s) into which the high register(s) can
24528 be popped. */
24529 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24530 {
24531 if (mask & (1 << regno))
24532 high_regs_pushed--;
24533 if (high_regs_pushed == 0)
24534 break;
24535 }
24536
24537 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24538
24539 /* Pop the values into the low register(s). */
24540 thumb_pop (asm_out_file, mask);
24541
24542 /* Move the value(s) into the high registers. */
24543 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24544 {
24545 if (mask & (1 << regno))
24546 {
24547 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24548 regno);
24549
24550 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24551 if (live_regs_mask & (1 << next_hi_reg))
24552 break;
24553 }
24554 }
24555 }
24556 live_regs_mask &= ~0x0f00;
24557 }
24558
24559 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24560 live_regs_mask &= 0xff;
24561
24562 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24563 {
24564 /* Pop the return address into the PC. */
24565 if (had_to_push_lr)
24566 live_regs_mask |= 1 << PC_REGNUM;
24567
24568 /* Either no argument registers were pushed or a backtrace
24569 structure was created which includes an adjusted stack
24570 pointer, so just pop everything. */
24571 if (live_regs_mask)
24572 thumb_pop (asm_out_file, live_regs_mask);
24573
24574 /* We have either just popped the return address into the
24575 PC or it is was kept in LR for the entire function.
24576 Note that thumb_pop has already called thumb_exit if the
24577 PC was in the list. */
24578 if (!had_to_push_lr)
24579 thumb_exit (asm_out_file, LR_REGNUM);
24580 }
24581 else
24582 {
24583 /* Pop everything but the return address. */
24584 if (live_regs_mask)
24585 thumb_pop (asm_out_file, live_regs_mask);
24586
24587 if (had_to_push_lr)
24588 {
24589 if (size > 12)
24590 {
24591 /* We have no free low regs, so save one. */
24592 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24593 LAST_ARG_REGNUM);
24594 }
24595
24596 /* Get the return address into a temporary register. */
24597 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24598
24599 if (size > 12)
24600 {
24601 /* Move the return address to lr. */
24602 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24603 LAST_ARG_REGNUM);
24604 /* Restore the low register. */
24605 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24606 IP_REGNUM);
24607 regno = LR_REGNUM;
24608 }
24609 else
24610 regno = LAST_ARG_REGNUM;
24611 }
24612 else
24613 regno = LR_REGNUM;
24614
24615 /* Remove the argument registers that were pushed onto the stack. */
24616 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24617 SP_REGNUM, SP_REGNUM,
24618 crtl->args.pretend_args_size);
24619
24620 thumb_exit (asm_out_file, regno);
24621 }
24622
24623 return "";
24624 }
24625
24626 /* Functions to save and restore machine-specific function data. */
24627 static struct machine_function *
24628 arm_init_machine_status (void)
24629 {
24630 struct machine_function *machine;
24631 machine = ggc_cleared_alloc<machine_function> ();
24632
24633 #if ARM_FT_UNKNOWN != 0
24634 machine->func_type = ARM_FT_UNKNOWN;
24635 #endif
24636 return machine;
24637 }
24638
24639 /* Return an RTX indicating where the return address to the
24640 calling function can be found. */
24641 rtx
24642 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24643 {
24644 if (count != 0)
24645 return NULL_RTX;
24646
24647 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24648 }
24649
24650 /* Do anything needed before RTL is emitted for each function. */
24651 void
24652 arm_init_expanders (void)
24653 {
24654 /* Arrange to initialize and mark the machine per-function status. */
24655 init_machine_status = arm_init_machine_status;
24656
24657 /* This is to stop the combine pass optimizing away the alignment
24658 adjustment of va_arg. */
24659 /* ??? It is claimed that this should not be necessary. */
24660 if (cfun)
24661 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24662 }
24663
24664 /* Check that FUNC is called with a different mode. */
24665
24666 bool
24667 arm_change_mode_p (tree func)
24668 {
24669 if (TREE_CODE (func) != FUNCTION_DECL)
24670 return false;
24671
24672 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24673
24674 if (!callee_tree)
24675 callee_tree = target_option_default_node;
24676
24677 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24678 int flags = callee_opts->x_target_flags;
24679
24680 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24681 }
24682
24683 /* Like arm_compute_initial_elimination offset. Simpler because there
24684 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24685 to point at the base of the local variables after static stack
24686 space for a function has been allocated. */
24687
24688 HOST_WIDE_INT
24689 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24690 {
24691 arm_stack_offsets *offsets;
24692
24693 offsets = arm_get_frame_offsets ();
24694
24695 switch (from)
24696 {
24697 case ARG_POINTER_REGNUM:
24698 switch (to)
24699 {
24700 case STACK_POINTER_REGNUM:
24701 return offsets->outgoing_args - offsets->saved_args;
24702
24703 case FRAME_POINTER_REGNUM:
24704 return offsets->soft_frame - offsets->saved_args;
24705
24706 case ARM_HARD_FRAME_POINTER_REGNUM:
24707 return offsets->saved_regs - offsets->saved_args;
24708
24709 case THUMB_HARD_FRAME_POINTER_REGNUM:
24710 return offsets->locals_base - offsets->saved_args;
24711
24712 default:
24713 gcc_unreachable ();
24714 }
24715 break;
24716
24717 case FRAME_POINTER_REGNUM:
24718 switch (to)
24719 {
24720 case STACK_POINTER_REGNUM:
24721 return offsets->outgoing_args - offsets->soft_frame;
24722
24723 case ARM_HARD_FRAME_POINTER_REGNUM:
24724 return offsets->saved_regs - offsets->soft_frame;
24725
24726 case THUMB_HARD_FRAME_POINTER_REGNUM:
24727 return offsets->locals_base - offsets->soft_frame;
24728
24729 default:
24730 gcc_unreachable ();
24731 }
24732 break;
24733
24734 default:
24735 gcc_unreachable ();
24736 }
24737 }
24738
24739 /* Generate the function's prologue. */
24740
24741 void
24742 thumb1_expand_prologue (void)
24743 {
24744 rtx_insn *insn;
24745
24746 HOST_WIDE_INT amount;
24747 HOST_WIDE_INT size;
24748 arm_stack_offsets *offsets;
24749 unsigned long func_type;
24750 int regno;
24751 unsigned long live_regs_mask;
24752 unsigned long l_mask;
24753 unsigned high_regs_pushed = 0;
24754 bool lr_needs_saving;
24755
24756 func_type = arm_current_func_type ();
24757
24758 /* Naked functions don't have prologues. */
24759 if (IS_NAKED (func_type))
24760 {
24761 if (flag_stack_usage_info)
24762 current_function_static_stack_size = 0;
24763 return;
24764 }
24765
24766 if (IS_INTERRUPT (func_type))
24767 {
24768 error ("interrupt Service Routines cannot be coded in Thumb mode");
24769 return;
24770 }
24771
24772 if (is_called_in_ARM_mode (current_function_decl))
24773 emit_insn (gen_prologue_thumb1_interwork ());
24774
24775 offsets = arm_get_frame_offsets ();
24776 live_regs_mask = offsets->saved_regs_mask;
24777 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24778
24779 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24780 l_mask = live_regs_mask & 0x40ff;
24781 /* Then count how many other high registers will need to be pushed. */
24782 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24783
24784 if (crtl->args.pretend_args_size)
24785 {
24786 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24787
24788 if (cfun->machine->uses_anonymous_args)
24789 {
24790 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24791 unsigned long mask;
24792
24793 mask = 1ul << (LAST_ARG_REGNUM + 1);
24794 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24795
24796 insn = thumb1_emit_multi_reg_push (mask, 0);
24797 }
24798 else
24799 {
24800 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24801 stack_pointer_rtx, x));
24802 }
24803 RTX_FRAME_RELATED_P (insn) = 1;
24804 }
24805
24806 if (TARGET_BACKTRACE)
24807 {
24808 HOST_WIDE_INT offset = 0;
24809 unsigned work_register;
24810 rtx work_reg, x, arm_hfp_rtx;
24811
24812 /* We have been asked to create a stack backtrace structure.
24813 The code looks like this:
24814
24815 0 .align 2
24816 0 func:
24817 0 sub SP, #16 Reserve space for 4 registers.
24818 2 push {R7} Push low registers.
24819 4 add R7, SP, #20 Get the stack pointer before the push.
24820 6 str R7, [SP, #8] Store the stack pointer
24821 (before reserving the space).
24822 8 mov R7, PC Get hold of the start of this code + 12.
24823 10 str R7, [SP, #16] Store it.
24824 12 mov R7, FP Get hold of the current frame pointer.
24825 14 str R7, [SP, #4] Store it.
24826 16 mov R7, LR Get hold of the current return address.
24827 18 str R7, [SP, #12] Store it.
24828 20 add R7, SP, #16 Point at the start of the
24829 backtrace structure.
24830 22 mov FP, R7 Put this value into the frame pointer. */
24831
24832 work_register = thumb_find_work_register (live_regs_mask);
24833 work_reg = gen_rtx_REG (SImode, work_register);
24834 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24835
24836 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24837 stack_pointer_rtx, GEN_INT (-16)));
24838 RTX_FRAME_RELATED_P (insn) = 1;
24839
24840 if (l_mask)
24841 {
24842 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24843 RTX_FRAME_RELATED_P (insn) = 1;
24844 lr_needs_saving = false;
24845
24846 offset = bit_count (l_mask) * UNITS_PER_WORD;
24847 }
24848
24849 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24850 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24851
24852 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24853 x = gen_frame_mem (SImode, x);
24854 emit_move_insn (x, work_reg);
24855
24856 /* Make sure that the instruction fetching the PC is in the right place
24857 to calculate "start of backtrace creation code + 12". */
24858 /* ??? The stores using the common WORK_REG ought to be enough to
24859 prevent the scheduler from doing anything weird. Failing that
24860 we could always move all of the following into an UNSPEC_VOLATILE. */
24861 if (l_mask)
24862 {
24863 x = gen_rtx_REG (SImode, PC_REGNUM);
24864 emit_move_insn (work_reg, x);
24865
24866 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24867 x = gen_frame_mem (SImode, x);
24868 emit_move_insn (x, work_reg);
24869
24870 emit_move_insn (work_reg, arm_hfp_rtx);
24871
24872 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24873 x = gen_frame_mem (SImode, x);
24874 emit_move_insn (x, work_reg);
24875 }
24876 else
24877 {
24878 emit_move_insn (work_reg, arm_hfp_rtx);
24879
24880 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24881 x = gen_frame_mem (SImode, x);
24882 emit_move_insn (x, work_reg);
24883
24884 x = gen_rtx_REG (SImode, PC_REGNUM);
24885 emit_move_insn (work_reg, x);
24886
24887 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24888 x = gen_frame_mem (SImode, x);
24889 emit_move_insn (x, work_reg);
24890 }
24891
24892 x = gen_rtx_REG (SImode, LR_REGNUM);
24893 emit_move_insn (work_reg, x);
24894
24895 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24896 x = gen_frame_mem (SImode, x);
24897 emit_move_insn (x, work_reg);
24898
24899 x = GEN_INT (offset + 12);
24900 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24901
24902 emit_move_insn (arm_hfp_rtx, work_reg);
24903 }
24904 /* Optimization: If we are not pushing any low registers but we are going
24905 to push some high registers then delay our first push. This will just
24906 be a push of LR and we can combine it with the push of the first high
24907 register. */
24908 else if ((l_mask & 0xff) != 0
24909 || (high_regs_pushed == 0 && lr_needs_saving))
24910 {
24911 unsigned long mask = l_mask;
24912 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24913 insn = thumb1_emit_multi_reg_push (mask, mask);
24914 RTX_FRAME_RELATED_P (insn) = 1;
24915 lr_needs_saving = false;
24916 }
24917
24918 if (high_regs_pushed)
24919 {
24920 unsigned pushable_regs;
24921 unsigned next_hi_reg;
24922 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24923 : crtl->args.info.nregs;
24924 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24925
24926 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24927 if (live_regs_mask & (1 << next_hi_reg))
24928 break;
24929
24930 /* Here we need to mask out registers used for passing arguments
24931 even if they can be pushed. This is to avoid using them to stash the high
24932 registers. Such kind of stash may clobber the use of arguments. */
24933 pushable_regs = l_mask & (~arg_regs_mask);
24934 if (lr_needs_saving)
24935 pushable_regs &= ~(1 << LR_REGNUM);
24936
24937 if (pushable_regs == 0)
24938 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24939
24940 while (high_regs_pushed > 0)
24941 {
24942 unsigned long real_regs_mask = 0;
24943 unsigned long push_mask = 0;
24944
24945 for (regno = LR_REGNUM; regno >= 0; regno --)
24946 {
24947 if (pushable_regs & (1 << regno))
24948 {
24949 emit_move_insn (gen_rtx_REG (SImode, regno),
24950 gen_rtx_REG (SImode, next_hi_reg));
24951
24952 high_regs_pushed --;
24953 real_regs_mask |= (1 << next_hi_reg);
24954 push_mask |= (1 << regno);
24955
24956 if (high_regs_pushed)
24957 {
24958 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24959 next_hi_reg --)
24960 if (live_regs_mask & (1 << next_hi_reg))
24961 break;
24962 }
24963 else
24964 break;
24965 }
24966 }
24967
24968 /* If we had to find a work register and we have not yet
24969 saved the LR then add it to the list of regs to push. */
24970 if (lr_needs_saving)
24971 {
24972 push_mask |= 1 << LR_REGNUM;
24973 real_regs_mask |= 1 << LR_REGNUM;
24974 lr_needs_saving = false;
24975 }
24976
24977 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24978 RTX_FRAME_RELATED_P (insn) = 1;
24979 }
24980 }
24981
24982 /* Load the pic register before setting the frame pointer,
24983 so we can use r7 as a temporary work register. */
24984 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24985 arm_load_pic_register (live_regs_mask);
24986
24987 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24988 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24989 stack_pointer_rtx);
24990
24991 size = offsets->outgoing_args - offsets->saved_args;
24992 if (flag_stack_usage_info)
24993 current_function_static_stack_size = size;
24994
24995 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24996 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
24997 || flag_stack_clash_protection)
24998 && size)
24999 sorry ("-fstack-check=specific for Thumb-1");
25000
25001 amount = offsets->outgoing_args - offsets->saved_regs;
25002 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25003 if (amount)
25004 {
25005 if (amount < 512)
25006 {
25007 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25008 GEN_INT (- amount)));
25009 RTX_FRAME_RELATED_P (insn) = 1;
25010 }
25011 else
25012 {
25013 rtx reg, dwarf;
25014
25015 /* The stack decrement is too big for an immediate value in a single
25016 insn. In theory we could issue multiple subtracts, but after
25017 three of them it becomes more space efficient to place the full
25018 value in the constant pool and load into a register. (Also the
25019 ARM debugger really likes to see only one stack decrement per
25020 function). So instead we look for a scratch register into which
25021 we can load the decrement, and then we subtract this from the
25022 stack pointer. Unfortunately on the thumb the only available
25023 scratch registers are the argument registers, and we cannot use
25024 these as they may hold arguments to the function. Instead we
25025 attempt to locate a call preserved register which is used by this
25026 function. If we can find one, then we know that it will have
25027 been pushed at the start of the prologue and so we can corrupt
25028 it now. */
25029 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25030 if (live_regs_mask & (1 << regno))
25031 break;
25032
25033 gcc_assert(regno <= LAST_LO_REGNUM);
25034
25035 reg = gen_rtx_REG (SImode, regno);
25036
25037 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25038
25039 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25040 stack_pointer_rtx, reg));
25041
25042 dwarf = gen_rtx_SET (stack_pointer_rtx,
25043 plus_constant (Pmode, stack_pointer_rtx,
25044 -amount));
25045 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25046 RTX_FRAME_RELATED_P (insn) = 1;
25047 }
25048 }
25049
25050 if (frame_pointer_needed)
25051 thumb_set_frame_pointer (offsets);
25052
25053 /* If we are profiling, make sure no instructions are scheduled before
25054 the call to mcount. Similarly if the user has requested no
25055 scheduling in the prolog. Similarly if we want non-call exceptions
25056 using the EABI unwinder, to prevent faulting instructions from being
25057 swapped with a stack adjustment. */
25058 if (crtl->profile || !TARGET_SCHED_PROLOG
25059 || (arm_except_unwind_info (&global_options) == UI_TARGET
25060 && cfun->can_throw_non_call_exceptions))
25061 emit_insn (gen_blockage ());
25062
25063 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25064 if (live_regs_mask & 0xff)
25065 cfun->machine->lr_save_eliminated = 0;
25066 }
25067
25068 /* Clear caller saved registers not used to pass return values and leaked
25069 condition flags before exiting a cmse_nonsecure_entry function. */
25070
25071 void
25072 cmse_nonsecure_entry_clear_before_return (void)
25073 {
25074 uint64_t to_clear_mask[2];
25075 uint32_t padding_bits_to_clear = 0;
25076 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25077 int regno, maxregno = IP_REGNUM;
25078 tree result_type;
25079 rtx result_rtl;
25080
25081 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25082 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25083
25084 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25085 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25086 to make sure the instructions used to clear them are present. */
25087 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25088 {
25089 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25090 maxregno = LAST_VFP_REGNUM;
25091
25092 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25093 to_clear_mask[0] |= float_mask;
25094
25095 float_mask = (1ULL << (maxregno - 63)) - 1;
25096 to_clear_mask[1] = float_mask;
25097
25098 /* Make sure we don't clear the two scratch registers used to clear the
25099 relevant FPSCR bits in output_return_instruction. */
25100 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25101 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25102 emit_use (gen_rtx_REG (SImode, 4));
25103 to_clear_mask[0] &= ~(1ULL << 4);
25104 }
25105
25106 /* If the user has defined registers to be caller saved, these are no longer
25107 restored by the function before returning and must thus be cleared for
25108 security purposes. */
25109 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25110 {
25111 /* We do not touch registers that can be used to pass arguments as per
25112 the AAPCS, since these should never be made callee-saved by user
25113 options. */
25114 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25115 continue;
25116 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25117 continue;
25118 if (call_used_regs[regno])
25119 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25120 }
25121
25122 /* Make sure we do not clear the registers used to return the result in. */
25123 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25124 if (!VOID_TYPE_P (result_type))
25125 {
25126 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25127
25128 /* No need to check that we return in registers, because we don't
25129 support returning on stack yet. */
25130 to_clear_mask[0]
25131 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25132 padding_bits_to_clear_ptr);
25133 }
25134
25135 if (padding_bits_to_clear != 0)
25136 {
25137 rtx reg_rtx;
25138 /* Padding bits to clear is not 0 so we know we are dealing with
25139 returning a composite type, which only uses r0. Let's make sure that
25140 r1-r3 is cleared too, we will use r1 as a scratch register. */
25141 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25142
25143 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25144
25145 /* Fill the lower half of the negated padding_bits_to_clear. */
25146 emit_move_insn (reg_rtx,
25147 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25148
25149 /* Also fill the top half of the negated padding_bits_to_clear. */
25150 if (((~padding_bits_to_clear) >> 16) > 0)
25151 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25152 GEN_INT (16),
25153 GEN_INT (16)),
25154 GEN_INT ((~padding_bits_to_clear) >> 16)));
25155
25156 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25157 gen_rtx_REG (SImode, R0_REGNUM),
25158 reg_rtx));
25159 }
25160
25161 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25162 {
25163 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25164 continue;
25165
25166 if (IS_VFP_REGNUM (regno))
25167 {
25168 /* If regno is an even vfp register and its successor is also to
25169 be cleared, use vmov. */
25170 if (TARGET_VFP_DOUBLE
25171 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25172 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25173 {
25174 emit_move_insn (gen_rtx_REG (DFmode, regno),
25175 CONST1_RTX (DFmode));
25176 emit_use (gen_rtx_REG (DFmode, regno));
25177 regno++;
25178 }
25179 else
25180 {
25181 emit_move_insn (gen_rtx_REG (SFmode, regno),
25182 CONST1_RTX (SFmode));
25183 emit_use (gen_rtx_REG (SFmode, regno));
25184 }
25185 }
25186 else
25187 {
25188 if (TARGET_THUMB1)
25189 {
25190 if (regno == R0_REGNUM)
25191 emit_move_insn (gen_rtx_REG (SImode, regno),
25192 const0_rtx);
25193 else
25194 /* R0 has either been cleared before, see code above, or it
25195 holds a return value, either way it is not secret
25196 information. */
25197 emit_move_insn (gen_rtx_REG (SImode, regno),
25198 gen_rtx_REG (SImode, R0_REGNUM));
25199 emit_use (gen_rtx_REG (SImode, regno));
25200 }
25201 else
25202 {
25203 emit_move_insn (gen_rtx_REG (SImode, regno),
25204 gen_rtx_REG (SImode, LR_REGNUM));
25205 emit_use (gen_rtx_REG (SImode, regno));
25206 }
25207 }
25208 }
25209 }
25210
25211 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25212 POP instruction can be generated. LR should be replaced by PC. All
25213 the checks required are already done by USE_RETURN_INSN (). Hence,
25214 all we really need to check here is if single register is to be
25215 returned, or multiple register return. */
25216 void
25217 thumb2_expand_return (bool simple_return)
25218 {
25219 int i, num_regs;
25220 unsigned long saved_regs_mask;
25221 arm_stack_offsets *offsets;
25222
25223 offsets = arm_get_frame_offsets ();
25224 saved_regs_mask = offsets->saved_regs_mask;
25225
25226 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25227 if (saved_regs_mask & (1 << i))
25228 num_regs++;
25229
25230 if (!simple_return && saved_regs_mask)
25231 {
25232 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25233 functions or adapt code to handle according to ACLE. This path should
25234 not be reachable for cmse_nonsecure_entry functions though we prefer
25235 to assert it for now to ensure that future code changes do not silently
25236 change this behavior. */
25237 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25238 if (num_regs == 1)
25239 {
25240 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25241 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25242 rtx addr = gen_rtx_MEM (SImode,
25243 gen_rtx_POST_INC (SImode,
25244 stack_pointer_rtx));
25245 set_mem_alias_set (addr, get_frame_alias_set ());
25246 XVECEXP (par, 0, 0) = ret_rtx;
25247 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25248 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25249 emit_jump_insn (par);
25250 }
25251 else
25252 {
25253 saved_regs_mask &= ~ (1 << LR_REGNUM);
25254 saved_regs_mask |= (1 << PC_REGNUM);
25255 arm_emit_multi_reg_pop (saved_regs_mask);
25256 }
25257 }
25258 else
25259 {
25260 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25261 cmse_nonsecure_entry_clear_before_return ();
25262 emit_jump_insn (simple_return_rtx);
25263 }
25264 }
25265
25266 void
25267 thumb1_expand_epilogue (void)
25268 {
25269 HOST_WIDE_INT amount;
25270 arm_stack_offsets *offsets;
25271 int regno;
25272
25273 /* Naked functions don't have prologues. */
25274 if (IS_NAKED (arm_current_func_type ()))
25275 return;
25276
25277 offsets = arm_get_frame_offsets ();
25278 amount = offsets->outgoing_args - offsets->saved_regs;
25279
25280 if (frame_pointer_needed)
25281 {
25282 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25283 amount = offsets->locals_base - offsets->saved_regs;
25284 }
25285 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25286
25287 gcc_assert (amount >= 0);
25288 if (amount)
25289 {
25290 emit_insn (gen_blockage ());
25291
25292 if (amount < 512)
25293 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25294 GEN_INT (amount)));
25295 else
25296 {
25297 /* r3 is always free in the epilogue. */
25298 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25299
25300 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25301 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25302 }
25303 }
25304
25305 /* Emit a USE (stack_pointer_rtx), so that
25306 the stack adjustment will not be deleted. */
25307 emit_insn (gen_force_register_use (stack_pointer_rtx));
25308
25309 if (crtl->profile || !TARGET_SCHED_PROLOG)
25310 emit_insn (gen_blockage ());
25311
25312 /* Emit a clobber for each insn that will be restored in the epilogue,
25313 so that flow2 will get register lifetimes correct. */
25314 for (regno = 0; regno < 13; regno++)
25315 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25316 emit_clobber (gen_rtx_REG (SImode, regno));
25317
25318 if (! df_regs_ever_live_p (LR_REGNUM))
25319 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25320
25321 /* Clear all caller-saved regs that are not used to return. */
25322 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25323 cmse_nonsecure_entry_clear_before_return ();
25324 }
25325
25326 /* Epilogue code for APCS frame. */
25327 static void
25328 arm_expand_epilogue_apcs_frame (bool really_return)
25329 {
25330 unsigned long func_type;
25331 unsigned long saved_regs_mask;
25332 int num_regs = 0;
25333 int i;
25334 int floats_from_frame = 0;
25335 arm_stack_offsets *offsets;
25336
25337 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25338 func_type = arm_current_func_type ();
25339
25340 /* Get frame offsets for ARM. */
25341 offsets = arm_get_frame_offsets ();
25342 saved_regs_mask = offsets->saved_regs_mask;
25343
25344 /* Find the offset of the floating-point save area in the frame. */
25345 floats_from_frame
25346 = (offsets->saved_args
25347 + arm_compute_static_chain_stack_bytes ()
25348 - offsets->frame);
25349
25350 /* Compute how many core registers saved and how far away the floats are. */
25351 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25352 if (saved_regs_mask & (1 << i))
25353 {
25354 num_regs++;
25355 floats_from_frame += 4;
25356 }
25357
25358 if (TARGET_HARD_FLOAT)
25359 {
25360 int start_reg;
25361 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25362
25363 /* The offset is from IP_REGNUM. */
25364 int saved_size = arm_get_vfp_saved_size ();
25365 if (saved_size > 0)
25366 {
25367 rtx_insn *insn;
25368 floats_from_frame += saved_size;
25369 insn = emit_insn (gen_addsi3 (ip_rtx,
25370 hard_frame_pointer_rtx,
25371 GEN_INT (-floats_from_frame)));
25372 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25373 ip_rtx, hard_frame_pointer_rtx);
25374 }
25375
25376 /* Generate VFP register multi-pop. */
25377 start_reg = FIRST_VFP_REGNUM;
25378
25379 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25380 /* Look for a case where a reg does not need restoring. */
25381 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25382 && (!df_regs_ever_live_p (i + 1)
25383 || call_used_regs[i + 1]))
25384 {
25385 if (start_reg != i)
25386 arm_emit_vfp_multi_reg_pop (start_reg,
25387 (i - start_reg) / 2,
25388 gen_rtx_REG (SImode,
25389 IP_REGNUM));
25390 start_reg = i + 2;
25391 }
25392
25393 /* Restore the remaining regs that we have discovered (or possibly
25394 even all of them, if the conditional in the for loop never
25395 fired). */
25396 if (start_reg != i)
25397 arm_emit_vfp_multi_reg_pop (start_reg,
25398 (i - start_reg) / 2,
25399 gen_rtx_REG (SImode, IP_REGNUM));
25400 }
25401
25402 if (TARGET_IWMMXT)
25403 {
25404 /* The frame pointer is guaranteed to be non-double-word aligned, as
25405 it is set to double-word-aligned old_stack_pointer - 4. */
25406 rtx_insn *insn;
25407 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25408
25409 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25410 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25411 {
25412 rtx addr = gen_frame_mem (V2SImode,
25413 plus_constant (Pmode, hard_frame_pointer_rtx,
25414 - lrm_count * 4));
25415 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25416 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25417 gen_rtx_REG (V2SImode, i),
25418 NULL_RTX);
25419 lrm_count += 2;
25420 }
25421 }
25422
25423 /* saved_regs_mask should contain IP which contains old stack pointer
25424 at the time of activation creation. Since SP and IP are adjacent registers,
25425 we can restore the value directly into SP. */
25426 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25427 saved_regs_mask &= ~(1 << IP_REGNUM);
25428 saved_regs_mask |= (1 << SP_REGNUM);
25429
25430 /* There are two registers left in saved_regs_mask - LR and PC. We
25431 only need to restore LR (the return address), but to
25432 save time we can load it directly into PC, unless we need a
25433 special function exit sequence, or we are not really returning. */
25434 if (really_return
25435 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25436 && !crtl->calls_eh_return)
25437 /* Delete LR from the register mask, so that LR on
25438 the stack is loaded into the PC in the register mask. */
25439 saved_regs_mask &= ~(1 << LR_REGNUM);
25440 else
25441 saved_regs_mask &= ~(1 << PC_REGNUM);
25442
25443 num_regs = bit_count (saved_regs_mask);
25444 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25445 {
25446 rtx_insn *insn;
25447 emit_insn (gen_blockage ());
25448 /* Unwind the stack to just below the saved registers. */
25449 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25450 hard_frame_pointer_rtx,
25451 GEN_INT (- 4 * num_regs)));
25452
25453 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25454 stack_pointer_rtx, hard_frame_pointer_rtx);
25455 }
25456
25457 arm_emit_multi_reg_pop (saved_regs_mask);
25458
25459 if (IS_INTERRUPT (func_type))
25460 {
25461 /* Interrupt handlers will have pushed the
25462 IP onto the stack, so restore it now. */
25463 rtx_insn *insn;
25464 rtx addr = gen_rtx_MEM (SImode,
25465 gen_rtx_POST_INC (SImode,
25466 stack_pointer_rtx));
25467 set_mem_alias_set (addr, get_frame_alias_set ());
25468 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25469 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25470 gen_rtx_REG (SImode, IP_REGNUM),
25471 NULL_RTX);
25472 }
25473
25474 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25475 return;
25476
25477 if (crtl->calls_eh_return)
25478 emit_insn (gen_addsi3 (stack_pointer_rtx,
25479 stack_pointer_rtx,
25480 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25481
25482 if (IS_STACKALIGN (func_type))
25483 /* Restore the original stack pointer. Before prologue, the stack was
25484 realigned and the original stack pointer saved in r0. For details,
25485 see comment in arm_expand_prologue. */
25486 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25487
25488 emit_jump_insn (simple_return_rtx);
25489 }
25490
25491 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25492 function is not a sibcall. */
25493 void
25494 arm_expand_epilogue (bool really_return)
25495 {
25496 unsigned long func_type;
25497 unsigned long saved_regs_mask;
25498 int num_regs = 0;
25499 int i;
25500 int amount;
25501 arm_stack_offsets *offsets;
25502
25503 func_type = arm_current_func_type ();
25504
25505 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25506 let output_return_instruction take care of instruction emission if any. */
25507 if (IS_NAKED (func_type)
25508 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25509 {
25510 if (really_return)
25511 emit_jump_insn (simple_return_rtx);
25512 return;
25513 }
25514
25515 /* If we are throwing an exception, then we really must be doing a
25516 return, so we can't tail-call. */
25517 gcc_assert (!crtl->calls_eh_return || really_return);
25518
25519 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25520 {
25521 arm_expand_epilogue_apcs_frame (really_return);
25522 return;
25523 }
25524
25525 /* Get frame offsets for ARM. */
25526 offsets = arm_get_frame_offsets ();
25527 saved_regs_mask = offsets->saved_regs_mask;
25528 num_regs = bit_count (saved_regs_mask);
25529
25530 if (frame_pointer_needed)
25531 {
25532 rtx_insn *insn;
25533 /* Restore stack pointer if necessary. */
25534 if (TARGET_ARM)
25535 {
25536 /* In ARM mode, frame pointer points to first saved register.
25537 Restore stack pointer to last saved register. */
25538 amount = offsets->frame - offsets->saved_regs;
25539
25540 /* Force out any pending memory operations that reference stacked data
25541 before stack de-allocation occurs. */
25542 emit_insn (gen_blockage ());
25543 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25544 hard_frame_pointer_rtx,
25545 GEN_INT (amount)));
25546 arm_add_cfa_adjust_cfa_note (insn, amount,
25547 stack_pointer_rtx,
25548 hard_frame_pointer_rtx);
25549
25550 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25551 deleted. */
25552 emit_insn (gen_force_register_use (stack_pointer_rtx));
25553 }
25554 else
25555 {
25556 /* In Thumb-2 mode, the frame pointer points to the last saved
25557 register. */
25558 amount = offsets->locals_base - offsets->saved_regs;
25559 if (amount)
25560 {
25561 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25562 hard_frame_pointer_rtx,
25563 GEN_INT (amount)));
25564 arm_add_cfa_adjust_cfa_note (insn, amount,
25565 hard_frame_pointer_rtx,
25566 hard_frame_pointer_rtx);
25567 }
25568
25569 /* Force out any pending memory operations that reference stacked data
25570 before stack de-allocation occurs. */
25571 emit_insn (gen_blockage ());
25572 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25573 hard_frame_pointer_rtx));
25574 arm_add_cfa_adjust_cfa_note (insn, 0,
25575 stack_pointer_rtx,
25576 hard_frame_pointer_rtx);
25577 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25578 deleted. */
25579 emit_insn (gen_force_register_use (stack_pointer_rtx));
25580 }
25581 }
25582 else
25583 {
25584 /* Pop off outgoing args and local frame to adjust stack pointer to
25585 last saved register. */
25586 amount = offsets->outgoing_args - offsets->saved_regs;
25587 if (amount)
25588 {
25589 rtx_insn *tmp;
25590 /* Force out any pending memory operations that reference stacked data
25591 before stack de-allocation occurs. */
25592 emit_insn (gen_blockage ());
25593 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25594 stack_pointer_rtx,
25595 GEN_INT (amount)));
25596 arm_add_cfa_adjust_cfa_note (tmp, amount,
25597 stack_pointer_rtx, stack_pointer_rtx);
25598 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25599 not deleted. */
25600 emit_insn (gen_force_register_use (stack_pointer_rtx));
25601 }
25602 }
25603
25604 if (TARGET_HARD_FLOAT)
25605 {
25606 /* Generate VFP register multi-pop. */
25607 int end_reg = LAST_VFP_REGNUM + 1;
25608
25609 /* Scan the registers in reverse order. We need to match
25610 any groupings made in the prologue and generate matching
25611 vldm operations. The need to match groups is because,
25612 unlike pop, vldm can only do consecutive regs. */
25613 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25614 /* Look for a case where a reg does not need restoring. */
25615 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25616 && (!df_regs_ever_live_p (i + 1)
25617 || call_used_regs[i + 1]))
25618 {
25619 /* Restore the regs discovered so far (from reg+2 to
25620 end_reg). */
25621 if (end_reg > i + 2)
25622 arm_emit_vfp_multi_reg_pop (i + 2,
25623 (end_reg - (i + 2)) / 2,
25624 stack_pointer_rtx);
25625 end_reg = i;
25626 }
25627
25628 /* Restore the remaining regs that we have discovered (or possibly
25629 even all of them, if the conditional in the for loop never
25630 fired). */
25631 if (end_reg > i + 2)
25632 arm_emit_vfp_multi_reg_pop (i + 2,
25633 (end_reg - (i + 2)) / 2,
25634 stack_pointer_rtx);
25635 }
25636
25637 if (TARGET_IWMMXT)
25638 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25639 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25640 {
25641 rtx_insn *insn;
25642 rtx addr = gen_rtx_MEM (V2SImode,
25643 gen_rtx_POST_INC (SImode,
25644 stack_pointer_rtx));
25645 set_mem_alias_set (addr, get_frame_alias_set ());
25646 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25647 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25648 gen_rtx_REG (V2SImode, i),
25649 NULL_RTX);
25650 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25651 stack_pointer_rtx, stack_pointer_rtx);
25652 }
25653
25654 if (saved_regs_mask)
25655 {
25656 rtx insn;
25657 bool return_in_pc = false;
25658
25659 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25660 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25661 && !IS_CMSE_ENTRY (func_type)
25662 && !IS_STACKALIGN (func_type)
25663 && really_return
25664 && crtl->args.pretend_args_size == 0
25665 && saved_regs_mask & (1 << LR_REGNUM)
25666 && !crtl->calls_eh_return)
25667 {
25668 saved_regs_mask &= ~(1 << LR_REGNUM);
25669 saved_regs_mask |= (1 << PC_REGNUM);
25670 return_in_pc = true;
25671 }
25672
25673 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25674 {
25675 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25676 if (saved_regs_mask & (1 << i))
25677 {
25678 rtx addr = gen_rtx_MEM (SImode,
25679 gen_rtx_POST_INC (SImode,
25680 stack_pointer_rtx));
25681 set_mem_alias_set (addr, get_frame_alias_set ());
25682
25683 if (i == PC_REGNUM)
25684 {
25685 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25686 XVECEXP (insn, 0, 0) = ret_rtx;
25687 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25688 addr);
25689 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25690 insn = emit_jump_insn (insn);
25691 }
25692 else
25693 {
25694 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25695 addr));
25696 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25697 gen_rtx_REG (SImode, i),
25698 NULL_RTX);
25699 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25700 stack_pointer_rtx,
25701 stack_pointer_rtx);
25702 }
25703 }
25704 }
25705 else
25706 {
25707 if (TARGET_LDRD
25708 && current_tune->prefer_ldrd_strd
25709 && !optimize_function_for_size_p (cfun))
25710 {
25711 if (TARGET_THUMB2)
25712 thumb2_emit_ldrd_pop (saved_regs_mask);
25713 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25714 arm_emit_ldrd_pop (saved_regs_mask);
25715 else
25716 arm_emit_multi_reg_pop (saved_regs_mask);
25717 }
25718 else
25719 arm_emit_multi_reg_pop (saved_regs_mask);
25720 }
25721
25722 if (return_in_pc)
25723 return;
25724 }
25725
25726 amount
25727 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25728 if (amount)
25729 {
25730 int i, j;
25731 rtx dwarf = NULL_RTX;
25732 rtx_insn *tmp =
25733 emit_insn (gen_addsi3 (stack_pointer_rtx,
25734 stack_pointer_rtx,
25735 GEN_INT (amount)));
25736
25737 RTX_FRAME_RELATED_P (tmp) = 1;
25738
25739 if (cfun->machine->uses_anonymous_args)
25740 {
25741 /* Restore pretend args. Refer arm_expand_prologue on how to save
25742 pretend_args in stack. */
25743 int num_regs = crtl->args.pretend_args_size / 4;
25744 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25745 for (j = 0, i = 0; j < num_regs; i++)
25746 if (saved_regs_mask & (1 << i))
25747 {
25748 rtx reg = gen_rtx_REG (SImode, i);
25749 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25750 j++;
25751 }
25752 REG_NOTES (tmp) = dwarf;
25753 }
25754 arm_add_cfa_adjust_cfa_note (tmp, amount,
25755 stack_pointer_rtx, stack_pointer_rtx);
25756 }
25757
25758 /* Clear all caller-saved regs that are not used to return. */
25759 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25760 {
25761 /* CMSE_ENTRY always returns. */
25762 gcc_assert (really_return);
25763 cmse_nonsecure_entry_clear_before_return ();
25764 }
25765
25766 if (!really_return)
25767 return;
25768
25769 if (crtl->calls_eh_return)
25770 emit_insn (gen_addsi3 (stack_pointer_rtx,
25771 stack_pointer_rtx,
25772 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25773
25774 if (IS_STACKALIGN (func_type))
25775 /* Restore the original stack pointer. Before prologue, the stack was
25776 realigned and the original stack pointer saved in r0. For details,
25777 see comment in arm_expand_prologue. */
25778 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25779
25780 emit_jump_insn (simple_return_rtx);
25781 }
25782
25783 /* Implementation of insn prologue_thumb1_interwork. This is the first
25784 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25785
25786 const char *
25787 thumb1_output_interwork (void)
25788 {
25789 const char * name;
25790 FILE *f = asm_out_file;
25791
25792 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25793 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25794 == SYMBOL_REF);
25795 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25796
25797 /* Generate code sequence to switch us into Thumb mode. */
25798 /* The .code 32 directive has already been emitted by
25799 ASM_DECLARE_FUNCTION_NAME. */
25800 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25801 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25802
25803 /* Generate a label, so that the debugger will notice the
25804 change in instruction sets. This label is also used by
25805 the assembler to bypass the ARM code when this function
25806 is called from a Thumb encoded function elsewhere in the
25807 same file. Hence the definition of STUB_NAME here must
25808 agree with the definition in gas/config/tc-arm.c. */
25809
25810 #define STUB_NAME ".real_start_of"
25811
25812 fprintf (f, "\t.code\t16\n");
25813 #ifdef ARM_PE
25814 if (arm_dllexport_name_p (name))
25815 name = arm_strip_name_encoding (name);
25816 #endif
25817 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25818 fprintf (f, "\t.thumb_func\n");
25819 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25820
25821 return "";
25822 }
25823
25824 /* Handle the case of a double word load into a low register from
25825 a computed memory address. The computed address may involve a
25826 register which is overwritten by the load. */
25827 const char *
25828 thumb_load_double_from_address (rtx *operands)
25829 {
25830 rtx addr;
25831 rtx base;
25832 rtx offset;
25833 rtx arg1;
25834 rtx arg2;
25835
25836 gcc_assert (REG_P (operands[0]));
25837 gcc_assert (MEM_P (operands[1]));
25838
25839 /* Get the memory address. */
25840 addr = XEXP (operands[1], 0);
25841
25842 /* Work out how the memory address is computed. */
25843 switch (GET_CODE (addr))
25844 {
25845 case REG:
25846 operands[2] = adjust_address (operands[1], SImode, 4);
25847
25848 if (REGNO (operands[0]) == REGNO (addr))
25849 {
25850 output_asm_insn ("ldr\t%H0, %2", operands);
25851 output_asm_insn ("ldr\t%0, %1", operands);
25852 }
25853 else
25854 {
25855 output_asm_insn ("ldr\t%0, %1", operands);
25856 output_asm_insn ("ldr\t%H0, %2", operands);
25857 }
25858 break;
25859
25860 case CONST:
25861 /* Compute <address> + 4 for the high order load. */
25862 operands[2] = adjust_address (operands[1], SImode, 4);
25863
25864 output_asm_insn ("ldr\t%0, %1", operands);
25865 output_asm_insn ("ldr\t%H0, %2", operands);
25866 break;
25867
25868 case PLUS:
25869 arg1 = XEXP (addr, 0);
25870 arg2 = XEXP (addr, 1);
25871
25872 if (CONSTANT_P (arg1))
25873 base = arg2, offset = arg1;
25874 else
25875 base = arg1, offset = arg2;
25876
25877 gcc_assert (REG_P (base));
25878
25879 /* Catch the case of <address> = <reg> + <reg> */
25880 if (REG_P (offset))
25881 {
25882 int reg_offset = REGNO (offset);
25883 int reg_base = REGNO (base);
25884 int reg_dest = REGNO (operands[0]);
25885
25886 /* Add the base and offset registers together into the
25887 higher destination register. */
25888 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25889 reg_dest + 1, reg_base, reg_offset);
25890
25891 /* Load the lower destination register from the address in
25892 the higher destination register. */
25893 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25894 reg_dest, reg_dest + 1);
25895
25896 /* Load the higher destination register from its own address
25897 plus 4. */
25898 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25899 reg_dest + 1, reg_dest + 1);
25900 }
25901 else
25902 {
25903 /* Compute <address> + 4 for the high order load. */
25904 operands[2] = adjust_address (operands[1], SImode, 4);
25905
25906 /* If the computed address is held in the low order register
25907 then load the high order register first, otherwise always
25908 load the low order register first. */
25909 if (REGNO (operands[0]) == REGNO (base))
25910 {
25911 output_asm_insn ("ldr\t%H0, %2", operands);
25912 output_asm_insn ("ldr\t%0, %1", operands);
25913 }
25914 else
25915 {
25916 output_asm_insn ("ldr\t%0, %1", operands);
25917 output_asm_insn ("ldr\t%H0, %2", operands);
25918 }
25919 }
25920 break;
25921
25922 case LABEL_REF:
25923 /* With no registers to worry about we can just load the value
25924 directly. */
25925 operands[2] = adjust_address (operands[1], SImode, 4);
25926
25927 output_asm_insn ("ldr\t%H0, %2", operands);
25928 output_asm_insn ("ldr\t%0, %1", operands);
25929 break;
25930
25931 default:
25932 gcc_unreachable ();
25933 }
25934
25935 return "";
25936 }
25937
25938 const char *
25939 thumb_output_move_mem_multiple (int n, rtx *operands)
25940 {
25941 switch (n)
25942 {
25943 case 2:
25944 if (REGNO (operands[4]) > REGNO (operands[5]))
25945 std::swap (operands[4], operands[5]);
25946
25947 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25948 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25949 break;
25950
25951 case 3:
25952 if (REGNO (operands[4]) > REGNO (operands[5]))
25953 std::swap (operands[4], operands[5]);
25954 if (REGNO (operands[5]) > REGNO (operands[6]))
25955 std::swap (operands[5], operands[6]);
25956 if (REGNO (operands[4]) > REGNO (operands[5]))
25957 std::swap (operands[4], operands[5]);
25958
25959 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25960 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25961 break;
25962
25963 default:
25964 gcc_unreachable ();
25965 }
25966
25967 return "";
25968 }
25969
25970 /* Output a call-via instruction for thumb state. */
25971 const char *
25972 thumb_call_via_reg (rtx reg)
25973 {
25974 int regno = REGNO (reg);
25975 rtx *labelp;
25976
25977 gcc_assert (regno < LR_REGNUM);
25978
25979 /* If we are in the normal text section we can use a single instance
25980 per compilation unit. If we are doing function sections, then we need
25981 an entry per section, since we can't rely on reachability. */
25982 if (in_section == text_section)
25983 {
25984 thumb_call_reg_needed = 1;
25985
25986 if (thumb_call_via_label[regno] == NULL)
25987 thumb_call_via_label[regno] = gen_label_rtx ();
25988 labelp = thumb_call_via_label + regno;
25989 }
25990 else
25991 {
25992 if (cfun->machine->call_via[regno] == NULL)
25993 cfun->machine->call_via[regno] = gen_label_rtx ();
25994 labelp = cfun->machine->call_via + regno;
25995 }
25996
25997 output_asm_insn ("bl\t%a0", labelp);
25998 return "";
25999 }
26000
26001 /* Routines for generating rtl. */
26002 void
26003 thumb_expand_movmemqi (rtx *operands)
26004 {
26005 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26006 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26007 HOST_WIDE_INT len = INTVAL (operands[2]);
26008 HOST_WIDE_INT offset = 0;
26009
26010 while (len >= 12)
26011 {
26012 emit_insn (gen_movmem12b (out, in, out, in));
26013 len -= 12;
26014 }
26015
26016 if (len >= 8)
26017 {
26018 emit_insn (gen_movmem8b (out, in, out, in));
26019 len -= 8;
26020 }
26021
26022 if (len >= 4)
26023 {
26024 rtx reg = gen_reg_rtx (SImode);
26025 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26026 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26027 len -= 4;
26028 offset += 4;
26029 }
26030
26031 if (len >= 2)
26032 {
26033 rtx reg = gen_reg_rtx (HImode);
26034 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26035 plus_constant (Pmode, in,
26036 offset))));
26037 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26038 offset)),
26039 reg));
26040 len -= 2;
26041 offset += 2;
26042 }
26043
26044 if (len)
26045 {
26046 rtx reg = gen_reg_rtx (QImode);
26047 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26048 plus_constant (Pmode, in,
26049 offset))));
26050 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26051 offset)),
26052 reg));
26053 }
26054 }
26055
26056 void
26057 thumb_reload_out_hi (rtx *operands)
26058 {
26059 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26060 }
26061
26062 /* Return the length of a function name prefix
26063 that starts with the character 'c'. */
26064 static int
26065 arm_get_strip_length (int c)
26066 {
26067 switch (c)
26068 {
26069 ARM_NAME_ENCODING_LENGTHS
26070 default: return 0;
26071 }
26072 }
26073
26074 /* Return a pointer to a function's name with any
26075 and all prefix encodings stripped from it. */
26076 const char *
26077 arm_strip_name_encoding (const char *name)
26078 {
26079 int skip;
26080
26081 while ((skip = arm_get_strip_length (* name)))
26082 name += skip;
26083
26084 return name;
26085 }
26086
26087 /* If there is a '*' anywhere in the name's prefix, then
26088 emit the stripped name verbatim, otherwise prepend an
26089 underscore if leading underscores are being used. */
26090 void
26091 arm_asm_output_labelref (FILE *stream, const char *name)
26092 {
26093 int skip;
26094 int verbatim = 0;
26095
26096 while ((skip = arm_get_strip_length (* name)))
26097 {
26098 verbatim |= (*name == '*');
26099 name += skip;
26100 }
26101
26102 if (verbatim)
26103 fputs (name, stream);
26104 else
26105 asm_fprintf (stream, "%U%s", name);
26106 }
26107
26108 /* This function is used to emit an EABI tag and its associated value.
26109 We emit the numerical value of the tag in case the assembler does not
26110 support textual tags. (Eg gas prior to 2.20). If requested we include
26111 the tag name in a comment so that anyone reading the assembler output
26112 will know which tag is being set.
26113
26114 This function is not static because arm-c.c needs it too. */
26115
26116 void
26117 arm_emit_eabi_attribute (const char *name, int num, int val)
26118 {
26119 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26120 if (flag_verbose_asm || flag_debug_asm)
26121 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26122 asm_fprintf (asm_out_file, "\n");
26123 }
26124
26125 /* This function is used to print CPU tuning information as comment
26126 in assembler file. Pointers are not printed for now. */
26127
26128 void
26129 arm_print_tune_info (void)
26130 {
26131 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26132 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26133 current_tune->constant_limit);
26134 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26135 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26136 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26137 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26138 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26139 "prefetch.l1_cache_size:\t%d\n",
26140 current_tune->prefetch.l1_cache_size);
26141 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26142 "prefetch.l1_cache_line_size:\t%d\n",
26143 current_tune->prefetch.l1_cache_line_size);
26144 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26145 "prefer_constant_pool:\t%d\n",
26146 (int) current_tune->prefer_constant_pool);
26147 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26148 "branch_cost:\t(s:speed, p:predictable)\n");
26149 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26150 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26151 current_tune->branch_cost (false, false));
26152 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26153 current_tune->branch_cost (false, true));
26154 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26155 current_tune->branch_cost (true, false));
26156 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26157 current_tune->branch_cost (true, true));
26158 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26159 "prefer_ldrd_strd:\t%d\n",
26160 (int) current_tune->prefer_ldrd_strd);
26161 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26162 "logical_op_non_short_circuit:\t[%d,%d]\n",
26163 (int) current_tune->logical_op_non_short_circuit_thumb,
26164 (int) current_tune->logical_op_non_short_circuit_arm);
26165 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26166 "prefer_neon_for_64bits:\t%d\n",
26167 (int) current_tune->prefer_neon_for_64bits);
26168 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26169 "disparage_flag_setting_t16_encodings:\t%d\n",
26170 (int) current_tune->disparage_flag_setting_t16_encodings);
26171 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26172 "string_ops_prefer_neon:\t%d\n",
26173 (int) current_tune->string_ops_prefer_neon);
26174 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26175 "max_insns_inline_memset:\t%d\n",
26176 current_tune->max_insns_inline_memset);
26177 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26178 current_tune->fusible_ops);
26179 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26180 (int) current_tune->sched_autopref);
26181 }
26182
26183 /* Print .arch and .arch_extension directives corresponding to the
26184 current architecture configuration. */
26185 static void
26186 arm_print_asm_arch_directives ()
26187 {
26188 const arch_option *arch
26189 = arm_parse_arch_option_name (all_architectures, "-march",
26190 arm_active_target.arch_name);
26191 auto_sbitmap opt_bits (isa_num_bits);
26192
26193 gcc_assert (arch);
26194
26195 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26196 if (!arch->common.extensions)
26197 return;
26198
26199 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26200 opt->name != NULL;
26201 opt++)
26202 {
26203 if (!opt->remove)
26204 {
26205 arm_initialize_isa (opt_bits, opt->isa_bits);
26206
26207 /* If every feature bit of this option is set in the target
26208 ISA specification, print out the option name. However,
26209 don't print anything if all the bits are part of the
26210 FPU specification. */
26211 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26212 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26213 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26214 }
26215 }
26216 }
26217
26218 static void
26219 arm_file_start (void)
26220 {
26221 int val;
26222
26223 if (TARGET_BPABI)
26224 {
26225 /* We don't have a specified CPU. Use the architecture to
26226 generate the tags.
26227
26228 Note: it might be better to do this unconditionally, then the
26229 assembler would not need to know about all new CPU names as
26230 they are added. */
26231 if (!arm_active_target.core_name)
26232 {
26233 /* armv7ve doesn't support any extensions. */
26234 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26235 {
26236 /* Keep backward compatability for assemblers
26237 which don't support armv7ve. */
26238 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26239 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26240 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26241 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26242 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26243 }
26244 else
26245 arm_print_asm_arch_directives ();
26246 }
26247 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26248 asm_fprintf (asm_out_file, "\t.arch %s\n",
26249 arm_active_target.core_name + 8);
26250 else
26251 {
26252 const char* truncated_name
26253 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26254 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26255 }
26256
26257 if (print_tune_info)
26258 arm_print_tune_info ();
26259
26260 if (! TARGET_SOFT_FLOAT)
26261 {
26262 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26263 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26264
26265 if (TARGET_HARD_FLOAT_ABI)
26266 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26267 }
26268
26269 /* Some of these attributes only apply when the corresponding features
26270 are used. However we don't have any easy way of figuring this out.
26271 Conservatively record the setting that would have been used. */
26272
26273 if (flag_rounding_math)
26274 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26275
26276 if (!flag_unsafe_math_optimizations)
26277 {
26278 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26279 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26280 }
26281 if (flag_signaling_nans)
26282 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26283
26284 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26285 flag_finite_math_only ? 1 : 3);
26286
26287 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26288 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26289 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26290 flag_short_enums ? 1 : 2);
26291
26292 /* Tag_ABI_optimization_goals. */
26293 if (optimize_size)
26294 val = 4;
26295 else if (optimize >= 2)
26296 val = 2;
26297 else if (optimize)
26298 val = 1;
26299 else
26300 val = 6;
26301 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26302
26303 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26304 unaligned_access);
26305
26306 if (arm_fp16_format)
26307 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26308 (int) arm_fp16_format);
26309
26310 if (arm_lang_output_object_attributes_hook)
26311 arm_lang_output_object_attributes_hook();
26312 }
26313
26314 default_file_start ();
26315 }
26316
26317 static void
26318 arm_file_end (void)
26319 {
26320 int regno;
26321
26322 if (NEED_INDICATE_EXEC_STACK)
26323 /* Add .note.GNU-stack. */
26324 file_end_indicate_exec_stack ();
26325
26326 if (! thumb_call_reg_needed)
26327 return;
26328
26329 switch_to_section (text_section);
26330 asm_fprintf (asm_out_file, "\t.code 16\n");
26331 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26332
26333 for (regno = 0; regno < LR_REGNUM; regno++)
26334 {
26335 rtx label = thumb_call_via_label[regno];
26336
26337 if (label != 0)
26338 {
26339 targetm.asm_out.internal_label (asm_out_file, "L",
26340 CODE_LABEL_NUMBER (label));
26341 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26342 }
26343 }
26344 }
26345
26346 #ifndef ARM_PE
26347 /* Symbols in the text segment can be accessed without indirecting via the
26348 constant pool; it may take an extra binary operation, but this is still
26349 faster than indirecting via memory. Don't do this when not optimizing,
26350 since we won't be calculating al of the offsets necessary to do this
26351 simplification. */
26352
26353 static void
26354 arm_encode_section_info (tree decl, rtx rtl, int first)
26355 {
26356 if (optimize > 0 && TREE_CONSTANT (decl))
26357 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26358
26359 default_encode_section_info (decl, rtl, first);
26360 }
26361 #endif /* !ARM_PE */
26362
26363 static void
26364 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26365 {
26366 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26367 && !strcmp (prefix, "L"))
26368 {
26369 arm_ccfsm_state = 0;
26370 arm_target_insn = NULL;
26371 }
26372 default_internal_label (stream, prefix, labelno);
26373 }
26374
26375 /* Output code to add DELTA to the first argument, and then jump
26376 to FUNCTION. Used for C++ multiple inheritance. */
26377
26378 static void
26379 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26380 HOST_WIDE_INT, tree function)
26381 {
26382 static int thunk_label = 0;
26383 char label[256];
26384 char labelpc[256];
26385 int mi_delta = delta;
26386 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26387 int shift = 0;
26388 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26389 ? 1 : 0);
26390 if (mi_delta < 0)
26391 mi_delta = - mi_delta;
26392
26393 final_start_function (emit_barrier (), file, 1);
26394
26395 if (TARGET_THUMB1)
26396 {
26397 int labelno = thunk_label++;
26398 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26399 /* Thunks are entered in arm mode when available. */
26400 if (TARGET_THUMB1_ONLY)
26401 {
26402 /* push r3 so we can use it as a temporary. */
26403 /* TODO: Omit this save if r3 is not used. */
26404 fputs ("\tpush {r3}\n", file);
26405 fputs ("\tldr\tr3, ", file);
26406 }
26407 else
26408 {
26409 fputs ("\tldr\tr12, ", file);
26410 }
26411 assemble_name (file, label);
26412 fputc ('\n', file);
26413 if (flag_pic)
26414 {
26415 /* If we are generating PIC, the ldr instruction below loads
26416 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26417 the address of the add + 8, so we have:
26418
26419 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26420 = target + 1.
26421
26422 Note that we have "+ 1" because some versions of GNU ld
26423 don't set the low bit of the result for R_ARM_REL32
26424 relocations against thumb function symbols.
26425 On ARMv6M this is +4, not +8. */
26426 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26427 assemble_name (file, labelpc);
26428 fputs (":\n", file);
26429 if (TARGET_THUMB1_ONLY)
26430 {
26431 /* This is 2 insns after the start of the thunk, so we know it
26432 is 4-byte aligned. */
26433 fputs ("\tadd\tr3, pc, r3\n", file);
26434 fputs ("\tmov r12, r3\n", file);
26435 }
26436 else
26437 fputs ("\tadd\tr12, pc, r12\n", file);
26438 }
26439 else if (TARGET_THUMB1_ONLY)
26440 fputs ("\tmov r12, r3\n", file);
26441 }
26442 if (TARGET_THUMB1_ONLY)
26443 {
26444 if (mi_delta > 255)
26445 {
26446 fputs ("\tldr\tr3, ", file);
26447 assemble_name (file, label);
26448 fputs ("+4\n", file);
26449 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26450 mi_op, this_regno, this_regno);
26451 }
26452 else if (mi_delta != 0)
26453 {
26454 /* Thumb1 unified syntax requires s suffix in instruction name when
26455 one of the operands is immediate. */
26456 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26457 mi_op, this_regno, this_regno,
26458 mi_delta);
26459 }
26460 }
26461 else
26462 {
26463 /* TODO: Use movw/movt for large constants when available. */
26464 while (mi_delta != 0)
26465 {
26466 if ((mi_delta & (3 << shift)) == 0)
26467 shift += 2;
26468 else
26469 {
26470 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26471 mi_op, this_regno, this_regno,
26472 mi_delta & (0xff << shift));
26473 mi_delta &= ~(0xff << shift);
26474 shift += 8;
26475 }
26476 }
26477 }
26478 if (TARGET_THUMB1)
26479 {
26480 if (TARGET_THUMB1_ONLY)
26481 fputs ("\tpop\t{r3}\n", file);
26482
26483 fprintf (file, "\tbx\tr12\n");
26484 ASM_OUTPUT_ALIGN (file, 2);
26485 assemble_name (file, label);
26486 fputs (":\n", file);
26487 if (flag_pic)
26488 {
26489 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26490 rtx tem = XEXP (DECL_RTL (function), 0);
26491 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26492 pipeline offset is four rather than eight. Adjust the offset
26493 accordingly. */
26494 tem = plus_constant (GET_MODE (tem), tem,
26495 TARGET_THUMB1_ONLY ? -3 : -7);
26496 tem = gen_rtx_MINUS (GET_MODE (tem),
26497 tem,
26498 gen_rtx_SYMBOL_REF (Pmode,
26499 ggc_strdup (labelpc)));
26500 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26501 }
26502 else
26503 /* Output ".word .LTHUNKn". */
26504 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26505
26506 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26507 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26508 }
26509 else
26510 {
26511 fputs ("\tb\t", file);
26512 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26513 if (NEED_PLT_RELOC)
26514 fputs ("(PLT)", file);
26515 fputc ('\n', file);
26516 }
26517
26518 final_end_function ();
26519 }
26520
26521 /* MI thunk handling for TARGET_32BIT. */
26522
26523 static void
26524 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26525 HOST_WIDE_INT vcall_offset, tree function)
26526 {
26527 /* On ARM, this_regno is R0 or R1 depending on
26528 whether the function returns an aggregate or not.
26529 */
26530 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26531 function)
26532 ? R1_REGNUM : R0_REGNUM);
26533
26534 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26535 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26536 reload_completed = 1;
26537 emit_note (NOTE_INSN_PROLOGUE_END);
26538
26539 /* Add DELTA to THIS_RTX. */
26540 if (delta != 0)
26541 arm_split_constant (PLUS, Pmode, NULL_RTX,
26542 delta, this_rtx, this_rtx, false);
26543
26544 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26545 if (vcall_offset != 0)
26546 {
26547 /* Load *THIS_RTX. */
26548 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26549 /* Compute *THIS_RTX + VCALL_OFFSET. */
26550 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26551 false);
26552 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26553 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26554 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26555 }
26556
26557 /* Generate a tail call to the target function. */
26558 if (!TREE_USED (function))
26559 {
26560 assemble_external (function);
26561 TREE_USED (function) = 1;
26562 }
26563 rtx funexp = XEXP (DECL_RTL (function), 0);
26564 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26565 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26566 SIBLING_CALL_P (insn) = 1;
26567
26568 insn = get_insns ();
26569 shorten_branches (insn);
26570 final_start_function (insn, file, 1);
26571 final (insn, file, 1);
26572 final_end_function ();
26573
26574 /* Stop pretending this is a post-reload pass. */
26575 reload_completed = 0;
26576 }
26577
26578 /* Output code to add DELTA to the first argument, and then jump
26579 to FUNCTION. Used for C++ multiple inheritance. */
26580
26581 static void
26582 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26583 HOST_WIDE_INT vcall_offset, tree function)
26584 {
26585 if (TARGET_32BIT)
26586 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26587 else
26588 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26589 }
26590
26591 int
26592 arm_emit_vector_const (FILE *file, rtx x)
26593 {
26594 int i;
26595 const char * pattern;
26596
26597 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26598
26599 switch (GET_MODE (x))
26600 {
26601 case E_V2SImode: pattern = "%08x"; break;
26602 case E_V4HImode: pattern = "%04x"; break;
26603 case E_V8QImode: pattern = "%02x"; break;
26604 default: gcc_unreachable ();
26605 }
26606
26607 fprintf (file, "0x");
26608 for (i = CONST_VECTOR_NUNITS (x); i--;)
26609 {
26610 rtx element;
26611
26612 element = CONST_VECTOR_ELT (x, i);
26613 fprintf (file, pattern, INTVAL (element));
26614 }
26615
26616 return 1;
26617 }
26618
26619 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26620 HFmode constant pool entries are actually loaded with ldr. */
26621 void
26622 arm_emit_fp16_const (rtx c)
26623 {
26624 long bits;
26625
26626 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26627 if (WORDS_BIG_ENDIAN)
26628 assemble_zeros (2);
26629 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26630 if (!WORDS_BIG_ENDIAN)
26631 assemble_zeros (2);
26632 }
26633
26634 const char *
26635 arm_output_load_gr (rtx *operands)
26636 {
26637 rtx reg;
26638 rtx offset;
26639 rtx wcgr;
26640 rtx sum;
26641
26642 if (!MEM_P (operands [1])
26643 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26644 || !REG_P (reg = XEXP (sum, 0))
26645 || !CONST_INT_P (offset = XEXP (sum, 1))
26646 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26647 return "wldrw%?\t%0, %1";
26648
26649 /* Fix up an out-of-range load of a GR register. */
26650 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26651 wcgr = operands[0];
26652 operands[0] = reg;
26653 output_asm_insn ("ldr%?\t%0, %1", operands);
26654
26655 operands[0] = wcgr;
26656 operands[1] = reg;
26657 output_asm_insn ("tmcr%?\t%0, %1", operands);
26658 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26659
26660 return "";
26661 }
26662
26663 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26664
26665 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26666 named arg and all anonymous args onto the stack.
26667 XXX I know the prologue shouldn't be pushing registers, but it is faster
26668 that way. */
26669
26670 static void
26671 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26672 machine_mode mode,
26673 tree type,
26674 int *pretend_size,
26675 int second_time ATTRIBUTE_UNUSED)
26676 {
26677 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26678 int nregs;
26679
26680 cfun->machine->uses_anonymous_args = 1;
26681 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26682 {
26683 nregs = pcum->aapcs_ncrn;
26684 if (nregs & 1)
26685 {
26686 int res = arm_needs_doubleword_align (mode, type);
26687 if (res < 0 && warn_psabi)
26688 inform (input_location, "parameter passing for argument of "
26689 "type %qT changed in GCC 7.1", type);
26690 else if (res > 0)
26691 nregs++;
26692 }
26693 }
26694 else
26695 nregs = pcum->nregs;
26696
26697 if (nregs < NUM_ARG_REGS)
26698 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26699 }
26700
26701 /* We can't rely on the caller doing the proper promotion when
26702 using APCS or ATPCS. */
26703
26704 static bool
26705 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26706 {
26707 return !TARGET_AAPCS_BASED;
26708 }
26709
26710 static machine_mode
26711 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26712 machine_mode mode,
26713 int *punsignedp ATTRIBUTE_UNUSED,
26714 const_tree fntype ATTRIBUTE_UNUSED,
26715 int for_return ATTRIBUTE_UNUSED)
26716 {
26717 if (GET_MODE_CLASS (mode) == MODE_INT
26718 && GET_MODE_SIZE (mode) < 4)
26719 return SImode;
26720
26721 return mode;
26722 }
26723
26724
26725 static bool
26726 arm_default_short_enums (void)
26727 {
26728 return ARM_DEFAULT_SHORT_ENUMS;
26729 }
26730
26731
26732 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26733
26734 static bool
26735 arm_align_anon_bitfield (void)
26736 {
26737 return TARGET_AAPCS_BASED;
26738 }
26739
26740
26741 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26742
26743 static tree
26744 arm_cxx_guard_type (void)
26745 {
26746 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26747 }
26748
26749
26750 /* The EABI says test the least significant bit of a guard variable. */
26751
26752 static bool
26753 arm_cxx_guard_mask_bit (void)
26754 {
26755 return TARGET_AAPCS_BASED;
26756 }
26757
26758
26759 /* The EABI specifies that all array cookies are 8 bytes long. */
26760
26761 static tree
26762 arm_get_cookie_size (tree type)
26763 {
26764 tree size;
26765
26766 if (!TARGET_AAPCS_BASED)
26767 return default_cxx_get_cookie_size (type);
26768
26769 size = build_int_cst (sizetype, 8);
26770 return size;
26771 }
26772
26773
26774 /* The EABI says that array cookies should also contain the element size. */
26775
26776 static bool
26777 arm_cookie_has_size (void)
26778 {
26779 return TARGET_AAPCS_BASED;
26780 }
26781
26782
26783 /* The EABI says constructors and destructors should return a pointer to
26784 the object constructed/destroyed. */
26785
26786 static bool
26787 arm_cxx_cdtor_returns_this (void)
26788 {
26789 return TARGET_AAPCS_BASED;
26790 }
26791
26792 /* The EABI says that an inline function may never be the key
26793 method. */
26794
26795 static bool
26796 arm_cxx_key_method_may_be_inline (void)
26797 {
26798 return !TARGET_AAPCS_BASED;
26799 }
26800
26801 static void
26802 arm_cxx_determine_class_data_visibility (tree decl)
26803 {
26804 if (!TARGET_AAPCS_BASED
26805 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26806 return;
26807
26808 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26809 is exported. However, on systems without dynamic vague linkage,
26810 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26811 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26812 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26813 else
26814 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26815 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26816 }
26817
26818 static bool
26819 arm_cxx_class_data_always_comdat (void)
26820 {
26821 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26822 vague linkage if the class has no key function. */
26823 return !TARGET_AAPCS_BASED;
26824 }
26825
26826
26827 /* The EABI says __aeabi_atexit should be used to register static
26828 destructors. */
26829
26830 static bool
26831 arm_cxx_use_aeabi_atexit (void)
26832 {
26833 return TARGET_AAPCS_BASED;
26834 }
26835
26836
26837 void
26838 arm_set_return_address (rtx source, rtx scratch)
26839 {
26840 arm_stack_offsets *offsets;
26841 HOST_WIDE_INT delta;
26842 rtx addr;
26843 unsigned long saved_regs;
26844
26845 offsets = arm_get_frame_offsets ();
26846 saved_regs = offsets->saved_regs_mask;
26847
26848 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26849 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26850 else
26851 {
26852 if (frame_pointer_needed)
26853 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26854 else
26855 {
26856 /* LR will be the first saved register. */
26857 delta = offsets->outgoing_args - (offsets->frame + 4);
26858
26859
26860 if (delta >= 4096)
26861 {
26862 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26863 GEN_INT (delta & ~4095)));
26864 addr = scratch;
26865 delta &= 4095;
26866 }
26867 else
26868 addr = stack_pointer_rtx;
26869
26870 addr = plus_constant (Pmode, addr, delta);
26871 }
26872 /* The store needs to be marked as frame related in order to prevent
26873 DSE from deleting it as dead if it is based on fp. */
26874 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26875 RTX_FRAME_RELATED_P (insn) = 1;
26876 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26877 }
26878 }
26879
26880
26881 void
26882 thumb_set_return_address (rtx source, rtx scratch)
26883 {
26884 arm_stack_offsets *offsets;
26885 HOST_WIDE_INT delta;
26886 HOST_WIDE_INT limit;
26887 int reg;
26888 rtx addr;
26889 unsigned long mask;
26890
26891 emit_use (source);
26892
26893 offsets = arm_get_frame_offsets ();
26894 mask = offsets->saved_regs_mask;
26895 if (mask & (1 << LR_REGNUM))
26896 {
26897 limit = 1024;
26898 /* Find the saved regs. */
26899 if (frame_pointer_needed)
26900 {
26901 delta = offsets->soft_frame - offsets->saved_args;
26902 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26903 if (TARGET_THUMB1)
26904 limit = 128;
26905 }
26906 else
26907 {
26908 delta = offsets->outgoing_args - offsets->saved_args;
26909 reg = SP_REGNUM;
26910 }
26911 /* Allow for the stack frame. */
26912 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26913 delta -= 16;
26914 /* The link register is always the first saved register. */
26915 delta -= 4;
26916
26917 /* Construct the address. */
26918 addr = gen_rtx_REG (SImode, reg);
26919 if (delta > limit)
26920 {
26921 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26922 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26923 addr = scratch;
26924 }
26925 else
26926 addr = plus_constant (Pmode, addr, delta);
26927
26928 /* The store needs to be marked as frame related in order to prevent
26929 DSE from deleting it as dead if it is based on fp. */
26930 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26931 RTX_FRAME_RELATED_P (insn) = 1;
26932 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26933 }
26934 else
26935 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26936 }
26937
26938 /* Implements target hook vector_mode_supported_p. */
26939 bool
26940 arm_vector_mode_supported_p (machine_mode mode)
26941 {
26942 /* Neon also supports V2SImode, etc. listed in the clause below. */
26943 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26944 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26945 || mode == V2DImode || mode == V8HFmode))
26946 return true;
26947
26948 if ((TARGET_NEON || TARGET_IWMMXT)
26949 && ((mode == V2SImode)
26950 || (mode == V4HImode)
26951 || (mode == V8QImode)))
26952 return true;
26953
26954 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26955 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26956 || mode == V2HAmode))
26957 return true;
26958
26959 return false;
26960 }
26961
26962 /* Implements target hook array_mode_supported_p. */
26963
26964 static bool
26965 arm_array_mode_supported_p (machine_mode mode,
26966 unsigned HOST_WIDE_INT nelems)
26967 {
26968 if (TARGET_NEON
26969 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26970 && (nelems >= 2 && nelems <= 4))
26971 return true;
26972
26973 return false;
26974 }
26975
26976 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26977 registers when autovectorizing for Neon, at least until multiple vector
26978 widths are supported properly by the middle-end. */
26979
26980 static machine_mode
26981 arm_preferred_simd_mode (scalar_mode mode)
26982 {
26983 if (TARGET_NEON)
26984 switch (mode)
26985 {
26986 case E_SFmode:
26987 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26988 case E_SImode:
26989 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26990 case E_HImode:
26991 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26992 case E_QImode:
26993 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26994 case E_DImode:
26995 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26996 return V2DImode;
26997 break;
26998
26999 default:;
27000 }
27001
27002 if (TARGET_REALLY_IWMMXT)
27003 switch (mode)
27004 {
27005 case E_SImode:
27006 return V2SImode;
27007 case E_HImode:
27008 return V4HImode;
27009 case E_QImode:
27010 return V8QImode;
27011
27012 default:;
27013 }
27014
27015 return word_mode;
27016 }
27017
27018 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27019
27020 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27021 using r0-r4 for function arguments, r7 for the stack frame and don't have
27022 enough left over to do doubleword arithmetic. For Thumb-2 all the
27023 potentially problematic instructions accept high registers so this is not
27024 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27025 that require many low registers. */
27026 static bool
27027 arm_class_likely_spilled_p (reg_class_t rclass)
27028 {
27029 if ((TARGET_THUMB1 && rclass == LO_REGS)
27030 || rclass == CC_REG)
27031 return true;
27032
27033 return false;
27034 }
27035
27036 /* Implements target hook small_register_classes_for_mode_p. */
27037 bool
27038 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27039 {
27040 return TARGET_THUMB1;
27041 }
27042
27043 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27044 ARM insns and therefore guarantee that the shift count is modulo 256.
27045 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27046 guarantee no particular behavior for out-of-range counts. */
27047
27048 static unsigned HOST_WIDE_INT
27049 arm_shift_truncation_mask (machine_mode mode)
27050 {
27051 return mode == SImode ? 255 : 0;
27052 }
27053
27054
27055 /* Map internal gcc register numbers to DWARF2 register numbers. */
27056
27057 unsigned int
27058 arm_dbx_register_number (unsigned int regno)
27059 {
27060 if (regno < 16)
27061 return regno;
27062
27063 if (IS_VFP_REGNUM (regno))
27064 {
27065 /* See comment in arm_dwarf_register_span. */
27066 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27067 return 64 + regno - FIRST_VFP_REGNUM;
27068 else
27069 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27070 }
27071
27072 if (IS_IWMMXT_GR_REGNUM (regno))
27073 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27074
27075 if (IS_IWMMXT_REGNUM (regno))
27076 return 112 + regno - FIRST_IWMMXT_REGNUM;
27077
27078 return DWARF_FRAME_REGISTERS;
27079 }
27080
27081 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27082 GCC models tham as 64 32-bit registers, so we need to describe this to
27083 the DWARF generation code. Other registers can use the default. */
27084 static rtx
27085 arm_dwarf_register_span (rtx rtl)
27086 {
27087 machine_mode mode;
27088 unsigned regno;
27089 rtx parts[16];
27090 int nregs;
27091 int i;
27092
27093 regno = REGNO (rtl);
27094 if (!IS_VFP_REGNUM (regno))
27095 return NULL_RTX;
27096
27097 /* XXX FIXME: The EABI defines two VFP register ranges:
27098 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27099 256-287: D0-D31
27100 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27101 corresponding D register. Until GDB supports this, we shall use the
27102 legacy encodings. We also use these encodings for D0-D15 for
27103 compatibility with older debuggers. */
27104 mode = GET_MODE (rtl);
27105 if (GET_MODE_SIZE (mode) < 8)
27106 return NULL_RTX;
27107
27108 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27109 {
27110 nregs = GET_MODE_SIZE (mode) / 4;
27111 for (i = 0; i < nregs; i += 2)
27112 if (TARGET_BIG_END)
27113 {
27114 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27115 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27116 }
27117 else
27118 {
27119 parts[i] = gen_rtx_REG (SImode, regno + i);
27120 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27121 }
27122 }
27123 else
27124 {
27125 nregs = GET_MODE_SIZE (mode) / 8;
27126 for (i = 0; i < nregs; i++)
27127 parts[i] = gen_rtx_REG (DImode, regno + i);
27128 }
27129
27130 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27131 }
27132
27133 #if ARM_UNWIND_INFO
27134 /* Emit unwind directives for a store-multiple instruction or stack pointer
27135 push during alignment.
27136 These should only ever be generated by the function prologue code, so
27137 expect them to have a particular form.
27138 The store-multiple instruction sometimes pushes pc as the last register,
27139 although it should not be tracked into unwind information, or for -Os
27140 sometimes pushes some dummy registers before first register that needs
27141 to be tracked in unwind information; such dummy registers are there just
27142 to avoid separate stack adjustment, and will not be restored in the
27143 epilogue. */
27144
27145 static void
27146 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27147 {
27148 int i;
27149 HOST_WIDE_INT offset;
27150 HOST_WIDE_INT nregs;
27151 int reg_size;
27152 unsigned reg;
27153 unsigned lastreg;
27154 unsigned padfirst = 0, padlast = 0;
27155 rtx e;
27156
27157 e = XVECEXP (p, 0, 0);
27158 gcc_assert (GET_CODE (e) == SET);
27159
27160 /* First insn will adjust the stack pointer. */
27161 gcc_assert (GET_CODE (e) == SET
27162 && REG_P (SET_DEST (e))
27163 && REGNO (SET_DEST (e)) == SP_REGNUM
27164 && GET_CODE (SET_SRC (e)) == PLUS);
27165
27166 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27167 nregs = XVECLEN (p, 0) - 1;
27168 gcc_assert (nregs);
27169
27170 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27171 if (reg < 16)
27172 {
27173 /* For -Os dummy registers can be pushed at the beginning to
27174 avoid separate stack pointer adjustment. */
27175 e = XVECEXP (p, 0, 1);
27176 e = XEXP (SET_DEST (e), 0);
27177 if (GET_CODE (e) == PLUS)
27178 padfirst = INTVAL (XEXP (e, 1));
27179 gcc_assert (padfirst == 0 || optimize_size);
27180 /* The function prologue may also push pc, but not annotate it as it is
27181 never restored. We turn this into a stack pointer adjustment. */
27182 e = XVECEXP (p, 0, nregs);
27183 e = XEXP (SET_DEST (e), 0);
27184 if (GET_CODE (e) == PLUS)
27185 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27186 else
27187 padlast = offset - 4;
27188 gcc_assert (padlast == 0 || padlast == 4);
27189 if (padlast == 4)
27190 fprintf (asm_out_file, "\t.pad #4\n");
27191 reg_size = 4;
27192 fprintf (asm_out_file, "\t.save {");
27193 }
27194 else if (IS_VFP_REGNUM (reg))
27195 {
27196 reg_size = 8;
27197 fprintf (asm_out_file, "\t.vsave {");
27198 }
27199 else
27200 /* Unknown register type. */
27201 gcc_unreachable ();
27202
27203 /* If the stack increment doesn't match the size of the saved registers,
27204 something has gone horribly wrong. */
27205 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27206
27207 offset = padfirst;
27208 lastreg = 0;
27209 /* The remaining insns will describe the stores. */
27210 for (i = 1; i <= nregs; i++)
27211 {
27212 /* Expect (set (mem <addr>) (reg)).
27213 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27214 e = XVECEXP (p, 0, i);
27215 gcc_assert (GET_CODE (e) == SET
27216 && MEM_P (SET_DEST (e))
27217 && REG_P (SET_SRC (e)));
27218
27219 reg = REGNO (SET_SRC (e));
27220 gcc_assert (reg >= lastreg);
27221
27222 if (i != 1)
27223 fprintf (asm_out_file, ", ");
27224 /* We can't use %r for vfp because we need to use the
27225 double precision register names. */
27226 if (IS_VFP_REGNUM (reg))
27227 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27228 else
27229 asm_fprintf (asm_out_file, "%r", reg);
27230
27231 if (flag_checking)
27232 {
27233 /* Check that the addresses are consecutive. */
27234 e = XEXP (SET_DEST (e), 0);
27235 if (GET_CODE (e) == PLUS)
27236 gcc_assert (REG_P (XEXP (e, 0))
27237 && REGNO (XEXP (e, 0)) == SP_REGNUM
27238 && CONST_INT_P (XEXP (e, 1))
27239 && offset == INTVAL (XEXP (e, 1)));
27240 else
27241 gcc_assert (i == 1
27242 && REG_P (e)
27243 && REGNO (e) == SP_REGNUM);
27244 offset += reg_size;
27245 }
27246 }
27247 fprintf (asm_out_file, "}\n");
27248 if (padfirst)
27249 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27250 }
27251
27252 /* Emit unwind directives for a SET. */
27253
27254 static void
27255 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27256 {
27257 rtx e0;
27258 rtx e1;
27259 unsigned reg;
27260
27261 e0 = XEXP (p, 0);
27262 e1 = XEXP (p, 1);
27263 switch (GET_CODE (e0))
27264 {
27265 case MEM:
27266 /* Pushing a single register. */
27267 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27268 || !REG_P (XEXP (XEXP (e0, 0), 0))
27269 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27270 abort ();
27271
27272 asm_fprintf (asm_out_file, "\t.save ");
27273 if (IS_VFP_REGNUM (REGNO (e1)))
27274 asm_fprintf(asm_out_file, "{d%d}\n",
27275 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27276 else
27277 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27278 break;
27279
27280 case REG:
27281 if (REGNO (e0) == SP_REGNUM)
27282 {
27283 /* A stack increment. */
27284 if (GET_CODE (e1) != PLUS
27285 || !REG_P (XEXP (e1, 0))
27286 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27287 || !CONST_INT_P (XEXP (e1, 1)))
27288 abort ();
27289
27290 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27291 -INTVAL (XEXP (e1, 1)));
27292 }
27293 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27294 {
27295 HOST_WIDE_INT offset;
27296
27297 if (GET_CODE (e1) == PLUS)
27298 {
27299 if (!REG_P (XEXP (e1, 0))
27300 || !CONST_INT_P (XEXP (e1, 1)))
27301 abort ();
27302 reg = REGNO (XEXP (e1, 0));
27303 offset = INTVAL (XEXP (e1, 1));
27304 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27305 HARD_FRAME_POINTER_REGNUM, reg,
27306 offset);
27307 }
27308 else if (REG_P (e1))
27309 {
27310 reg = REGNO (e1);
27311 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27312 HARD_FRAME_POINTER_REGNUM, reg);
27313 }
27314 else
27315 abort ();
27316 }
27317 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27318 {
27319 /* Move from sp to reg. */
27320 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27321 }
27322 else if (GET_CODE (e1) == PLUS
27323 && REG_P (XEXP (e1, 0))
27324 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27325 && CONST_INT_P (XEXP (e1, 1)))
27326 {
27327 /* Set reg to offset from sp. */
27328 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27329 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27330 }
27331 else
27332 abort ();
27333 break;
27334
27335 default:
27336 abort ();
27337 }
27338 }
27339
27340
27341 /* Emit unwind directives for the given insn. */
27342
27343 static void
27344 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27345 {
27346 rtx note, pat;
27347 bool handled_one = false;
27348
27349 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27350 return;
27351
27352 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27353 && (TREE_NOTHROW (current_function_decl)
27354 || crtl->all_throwers_are_sibcalls))
27355 return;
27356
27357 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27358 return;
27359
27360 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27361 {
27362 switch (REG_NOTE_KIND (note))
27363 {
27364 case REG_FRAME_RELATED_EXPR:
27365 pat = XEXP (note, 0);
27366 goto found;
27367
27368 case REG_CFA_REGISTER:
27369 pat = XEXP (note, 0);
27370 if (pat == NULL)
27371 {
27372 pat = PATTERN (insn);
27373 if (GET_CODE (pat) == PARALLEL)
27374 pat = XVECEXP (pat, 0, 0);
27375 }
27376
27377 /* Only emitted for IS_STACKALIGN re-alignment. */
27378 {
27379 rtx dest, src;
27380 unsigned reg;
27381
27382 src = SET_SRC (pat);
27383 dest = SET_DEST (pat);
27384
27385 gcc_assert (src == stack_pointer_rtx);
27386 reg = REGNO (dest);
27387 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27388 reg + 0x90, reg);
27389 }
27390 handled_one = true;
27391 break;
27392
27393 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27394 to get correct dwarf information for shrink-wrap. We should not
27395 emit unwind information for it because these are used either for
27396 pretend arguments or notes to adjust sp and restore registers from
27397 stack. */
27398 case REG_CFA_DEF_CFA:
27399 case REG_CFA_ADJUST_CFA:
27400 case REG_CFA_RESTORE:
27401 return;
27402
27403 case REG_CFA_EXPRESSION:
27404 case REG_CFA_OFFSET:
27405 /* ??? Only handling here what we actually emit. */
27406 gcc_unreachable ();
27407
27408 default:
27409 break;
27410 }
27411 }
27412 if (handled_one)
27413 return;
27414 pat = PATTERN (insn);
27415 found:
27416
27417 switch (GET_CODE (pat))
27418 {
27419 case SET:
27420 arm_unwind_emit_set (asm_out_file, pat);
27421 break;
27422
27423 case SEQUENCE:
27424 /* Store multiple. */
27425 arm_unwind_emit_sequence (asm_out_file, pat);
27426 break;
27427
27428 default:
27429 abort();
27430 }
27431 }
27432
27433
27434 /* Output a reference from a function exception table to the type_info
27435 object X. The EABI specifies that the symbol should be relocated by
27436 an R_ARM_TARGET2 relocation. */
27437
27438 static bool
27439 arm_output_ttype (rtx x)
27440 {
27441 fputs ("\t.word\t", asm_out_file);
27442 output_addr_const (asm_out_file, x);
27443 /* Use special relocations for symbol references. */
27444 if (!CONST_INT_P (x))
27445 fputs ("(TARGET2)", asm_out_file);
27446 fputc ('\n', asm_out_file);
27447
27448 return TRUE;
27449 }
27450
27451 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27452
27453 static void
27454 arm_asm_emit_except_personality (rtx personality)
27455 {
27456 fputs ("\t.personality\t", asm_out_file);
27457 output_addr_const (asm_out_file, personality);
27458 fputc ('\n', asm_out_file);
27459 }
27460 #endif /* ARM_UNWIND_INFO */
27461
27462 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27463
27464 static void
27465 arm_asm_init_sections (void)
27466 {
27467 #if ARM_UNWIND_INFO
27468 exception_section = get_unnamed_section (0, output_section_asm_op,
27469 "\t.handlerdata");
27470 #endif /* ARM_UNWIND_INFO */
27471
27472 #ifdef OBJECT_FORMAT_ELF
27473 if (target_pure_code)
27474 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27475 #endif
27476 }
27477
27478 /* Output unwind directives for the start/end of a function. */
27479
27480 void
27481 arm_output_fn_unwind (FILE * f, bool prologue)
27482 {
27483 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27484 return;
27485
27486 if (prologue)
27487 fputs ("\t.fnstart\n", f);
27488 else
27489 {
27490 /* If this function will never be unwound, then mark it as such.
27491 The came condition is used in arm_unwind_emit to suppress
27492 the frame annotations. */
27493 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27494 && (TREE_NOTHROW (current_function_decl)
27495 || crtl->all_throwers_are_sibcalls))
27496 fputs("\t.cantunwind\n", f);
27497
27498 fputs ("\t.fnend\n", f);
27499 }
27500 }
27501
27502 static bool
27503 arm_emit_tls_decoration (FILE *fp, rtx x)
27504 {
27505 enum tls_reloc reloc;
27506 rtx val;
27507
27508 val = XVECEXP (x, 0, 0);
27509 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27510
27511 output_addr_const (fp, val);
27512
27513 switch (reloc)
27514 {
27515 case TLS_GD32:
27516 fputs ("(tlsgd)", fp);
27517 break;
27518 case TLS_LDM32:
27519 fputs ("(tlsldm)", fp);
27520 break;
27521 case TLS_LDO32:
27522 fputs ("(tlsldo)", fp);
27523 break;
27524 case TLS_IE32:
27525 fputs ("(gottpoff)", fp);
27526 break;
27527 case TLS_LE32:
27528 fputs ("(tpoff)", fp);
27529 break;
27530 case TLS_DESCSEQ:
27531 fputs ("(tlsdesc)", fp);
27532 break;
27533 default:
27534 gcc_unreachable ();
27535 }
27536
27537 switch (reloc)
27538 {
27539 case TLS_GD32:
27540 case TLS_LDM32:
27541 case TLS_IE32:
27542 case TLS_DESCSEQ:
27543 fputs (" + (. - ", fp);
27544 output_addr_const (fp, XVECEXP (x, 0, 2));
27545 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27546 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27547 output_addr_const (fp, XVECEXP (x, 0, 3));
27548 fputc (')', fp);
27549 break;
27550 default:
27551 break;
27552 }
27553
27554 return TRUE;
27555 }
27556
27557 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27558
27559 static void
27560 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27561 {
27562 gcc_assert (size == 4);
27563 fputs ("\t.word\t", file);
27564 output_addr_const (file, x);
27565 fputs ("(tlsldo)", file);
27566 }
27567
27568 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27569
27570 static bool
27571 arm_output_addr_const_extra (FILE *fp, rtx x)
27572 {
27573 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27574 return arm_emit_tls_decoration (fp, x);
27575 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27576 {
27577 char label[256];
27578 int labelno = INTVAL (XVECEXP (x, 0, 0));
27579
27580 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27581 assemble_name_raw (fp, label);
27582
27583 return TRUE;
27584 }
27585 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27586 {
27587 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27588 if (GOT_PCREL)
27589 fputs ("+.", fp);
27590 fputs ("-(", fp);
27591 output_addr_const (fp, XVECEXP (x, 0, 0));
27592 fputc (')', fp);
27593 return TRUE;
27594 }
27595 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27596 {
27597 output_addr_const (fp, XVECEXP (x, 0, 0));
27598 if (GOT_PCREL)
27599 fputs ("+.", fp);
27600 fputs ("-(", fp);
27601 output_addr_const (fp, XVECEXP (x, 0, 1));
27602 fputc (')', fp);
27603 return TRUE;
27604 }
27605 else if (GET_CODE (x) == CONST_VECTOR)
27606 return arm_emit_vector_const (fp, x);
27607
27608 return FALSE;
27609 }
27610
27611 /* Output assembly for a shift instruction.
27612 SET_FLAGS determines how the instruction modifies the condition codes.
27613 0 - Do not set condition codes.
27614 1 - Set condition codes.
27615 2 - Use smallest instruction. */
27616 const char *
27617 arm_output_shift(rtx * operands, int set_flags)
27618 {
27619 char pattern[100];
27620 static const char flag_chars[3] = {'?', '.', '!'};
27621 const char *shift;
27622 HOST_WIDE_INT val;
27623 char c;
27624
27625 c = flag_chars[set_flags];
27626 shift = shift_op(operands[3], &val);
27627 if (shift)
27628 {
27629 if (val != -1)
27630 operands[2] = GEN_INT(val);
27631 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27632 }
27633 else
27634 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27635
27636 output_asm_insn (pattern, operands);
27637 return "";
27638 }
27639
27640 /* Output assembly for a WMMX immediate shift instruction. */
27641 const char *
27642 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27643 {
27644 int shift = INTVAL (operands[2]);
27645 char templ[50];
27646 machine_mode opmode = GET_MODE (operands[0]);
27647
27648 gcc_assert (shift >= 0);
27649
27650 /* If the shift value in the register versions is > 63 (for D qualifier),
27651 31 (for W qualifier) or 15 (for H qualifier). */
27652 if (((opmode == V4HImode) && (shift > 15))
27653 || ((opmode == V2SImode) && (shift > 31))
27654 || ((opmode == DImode) && (shift > 63)))
27655 {
27656 if (wror_or_wsra)
27657 {
27658 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27659 output_asm_insn (templ, operands);
27660 if (opmode == DImode)
27661 {
27662 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27663 output_asm_insn (templ, operands);
27664 }
27665 }
27666 else
27667 {
27668 /* The destination register will contain all zeros. */
27669 sprintf (templ, "wzero\t%%0");
27670 output_asm_insn (templ, operands);
27671 }
27672 return "";
27673 }
27674
27675 if ((opmode == DImode) && (shift > 32))
27676 {
27677 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27678 output_asm_insn (templ, operands);
27679 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27680 output_asm_insn (templ, operands);
27681 }
27682 else
27683 {
27684 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27685 output_asm_insn (templ, operands);
27686 }
27687 return "";
27688 }
27689
27690 /* Output assembly for a WMMX tinsr instruction. */
27691 const char *
27692 arm_output_iwmmxt_tinsr (rtx *operands)
27693 {
27694 int mask = INTVAL (operands[3]);
27695 int i;
27696 char templ[50];
27697 int units = mode_nunits[GET_MODE (operands[0])];
27698 gcc_assert ((mask & (mask - 1)) == 0);
27699 for (i = 0; i < units; ++i)
27700 {
27701 if ((mask & 0x01) == 1)
27702 {
27703 break;
27704 }
27705 mask >>= 1;
27706 }
27707 gcc_assert (i < units);
27708 {
27709 switch (GET_MODE (operands[0]))
27710 {
27711 case E_V8QImode:
27712 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27713 break;
27714 case E_V4HImode:
27715 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27716 break;
27717 case E_V2SImode:
27718 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27719 break;
27720 default:
27721 gcc_unreachable ();
27722 break;
27723 }
27724 output_asm_insn (templ, operands);
27725 }
27726 return "";
27727 }
27728
27729 /* Output a Thumb-1 casesi dispatch sequence. */
27730 const char *
27731 thumb1_output_casesi (rtx *operands)
27732 {
27733 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27734
27735 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27736
27737 switch (GET_MODE(diff_vec))
27738 {
27739 case E_QImode:
27740 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27741 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27742 case E_HImode:
27743 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27744 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27745 case E_SImode:
27746 return "bl\t%___gnu_thumb1_case_si";
27747 default:
27748 gcc_unreachable ();
27749 }
27750 }
27751
27752 /* Output a Thumb-2 casesi instruction. */
27753 const char *
27754 thumb2_output_casesi (rtx *operands)
27755 {
27756 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27757
27758 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27759
27760 output_asm_insn ("cmp\t%0, %1", operands);
27761 output_asm_insn ("bhi\t%l3", operands);
27762 switch (GET_MODE(diff_vec))
27763 {
27764 case E_QImode:
27765 return "tbb\t[%|pc, %0]";
27766 case E_HImode:
27767 return "tbh\t[%|pc, %0, lsl #1]";
27768 case E_SImode:
27769 if (flag_pic)
27770 {
27771 output_asm_insn ("adr\t%4, %l2", operands);
27772 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27773 output_asm_insn ("add\t%4, %4, %5", operands);
27774 return "bx\t%4";
27775 }
27776 else
27777 {
27778 output_asm_insn ("adr\t%4, %l2", operands);
27779 return "ldr\t%|pc, [%4, %0, lsl #2]";
27780 }
27781 default:
27782 gcc_unreachable ();
27783 }
27784 }
27785
27786 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27787 per-core tuning structs. */
27788 static int
27789 arm_issue_rate (void)
27790 {
27791 return current_tune->issue_rate;
27792 }
27793
27794 /* Return how many instructions should scheduler lookahead to choose the
27795 best one. */
27796 static int
27797 arm_first_cycle_multipass_dfa_lookahead (void)
27798 {
27799 int issue_rate = arm_issue_rate ();
27800
27801 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27802 }
27803
27804 /* Enable modeling of L2 auto-prefetcher. */
27805 static int
27806 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27807 {
27808 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27809 }
27810
27811 const char *
27812 arm_mangle_type (const_tree type)
27813 {
27814 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27815 has to be managled as if it is in the "std" namespace. */
27816 if (TARGET_AAPCS_BASED
27817 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27818 return "St9__va_list";
27819
27820 /* Half-precision float. */
27821 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27822 return "Dh";
27823
27824 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27825 builtin type. */
27826 if (TYPE_NAME (type) != NULL)
27827 return arm_mangle_builtin_type (type);
27828
27829 /* Use the default mangling. */
27830 return NULL;
27831 }
27832
27833 /* Order of allocation of core registers for Thumb: this allocation is
27834 written over the corresponding initial entries of the array
27835 initialized with REG_ALLOC_ORDER. We allocate all low registers
27836 first. Saving and restoring a low register is usually cheaper than
27837 using a call-clobbered high register. */
27838
27839 static const int thumb_core_reg_alloc_order[] =
27840 {
27841 3, 2, 1, 0, 4, 5, 6, 7,
27842 12, 14, 8, 9, 10, 11
27843 };
27844
27845 /* Adjust register allocation order when compiling for Thumb. */
27846
27847 void
27848 arm_order_regs_for_local_alloc (void)
27849 {
27850 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27851 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27852 if (TARGET_THUMB)
27853 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27854 sizeof (thumb_core_reg_alloc_order));
27855 }
27856
27857 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27858
27859 bool
27860 arm_frame_pointer_required (void)
27861 {
27862 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27863 return true;
27864
27865 /* If the function receives nonlocal gotos, it needs to save the frame
27866 pointer in the nonlocal_goto_save_area object. */
27867 if (cfun->has_nonlocal_label)
27868 return true;
27869
27870 /* The frame pointer is required for non-leaf APCS frames. */
27871 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27872 return true;
27873
27874 /* If we are probing the stack in the prologue, we will have a faulting
27875 instruction prior to the stack adjustment and this requires a frame
27876 pointer if we want to catch the exception using the EABI unwinder. */
27877 if (!IS_INTERRUPT (arm_current_func_type ())
27878 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27879 || flag_stack_clash_protection)
27880 && arm_except_unwind_info (&global_options) == UI_TARGET
27881 && cfun->can_throw_non_call_exceptions)
27882 {
27883 HOST_WIDE_INT size = get_frame_size ();
27884
27885 /* That's irrelevant if there is no stack adjustment. */
27886 if (size <= 0)
27887 return false;
27888
27889 /* That's relevant only if there is a stack probe. */
27890 if (crtl->is_leaf && !cfun->calls_alloca)
27891 {
27892 /* We don't have the final size of the frame so adjust. */
27893 size += 32 * UNITS_PER_WORD;
27894 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27895 return true;
27896 }
27897 else
27898 return true;
27899 }
27900
27901 return false;
27902 }
27903
27904 /* Only thumb1 can't support conditional execution, so return true if
27905 the target is not thumb1. */
27906 static bool
27907 arm_have_conditional_execution (void)
27908 {
27909 return !TARGET_THUMB1;
27910 }
27911
27912 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27913 static HOST_WIDE_INT
27914 arm_vector_alignment (const_tree type)
27915 {
27916 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27917
27918 if (TARGET_AAPCS_BASED)
27919 align = MIN (align, 64);
27920
27921 return align;
27922 }
27923
27924 static unsigned int
27925 arm_autovectorize_vector_sizes (void)
27926 {
27927 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27928 }
27929
27930 static bool
27931 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27932 {
27933 /* Vectors which aren't in packed structures will not be less aligned than
27934 the natural alignment of their element type, so this is safe. */
27935 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27936 return !is_packed;
27937
27938 return default_builtin_vector_alignment_reachable (type, is_packed);
27939 }
27940
27941 static bool
27942 arm_builtin_support_vector_misalignment (machine_mode mode,
27943 const_tree type, int misalignment,
27944 bool is_packed)
27945 {
27946 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27947 {
27948 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27949
27950 if (is_packed)
27951 return align == 1;
27952
27953 /* If the misalignment is unknown, we should be able to handle the access
27954 so long as it is not to a member of a packed data structure. */
27955 if (misalignment == -1)
27956 return true;
27957
27958 /* Return true if the misalignment is a multiple of the natural alignment
27959 of the vector's element type. This is probably always going to be
27960 true in practice, since we've already established that this isn't a
27961 packed access. */
27962 return ((misalignment % align) == 0);
27963 }
27964
27965 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27966 is_packed);
27967 }
27968
27969 static void
27970 arm_conditional_register_usage (void)
27971 {
27972 int regno;
27973
27974 if (TARGET_THUMB1 && optimize_size)
27975 {
27976 /* When optimizing for size on Thumb-1, it's better not
27977 to use the HI regs, because of the overhead of
27978 stacking them. */
27979 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27980 fixed_regs[regno] = call_used_regs[regno] = 1;
27981 }
27982
27983 /* The link register can be clobbered by any branch insn,
27984 but we have no way to track that at present, so mark
27985 it as unavailable. */
27986 if (TARGET_THUMB1)
27987 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27988
27989 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27990 {
27991 /* VFPv3 registers are disabled when earlier VFP
27992 versions are selected due to the definition of
27993 LAST_VFP_REGNUM. */
27994 for (regno = FIRST_VFP_REGNUM;
27995 regno <= LAST_VFP_REGNUM; ++ regno)
27996 {
27997 fixed_regs[regno] = 0;
27998 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27999 || regno >= FIRST_VFP_REGNUM + 32;
28000 }
28001 }
28002
28003 if (TARGET_REALLY_IWMMXT)
28004 {
28005 regno = FIRST_IWMMXT_GR_REGNUM;
28006 /* The 2002/10/09 revision of the XScale ABI has wCG0
28007 and wCG1 as call-preserved registers. The 2002/11/21
28008 revision changed this so that all wCG registers are
28009 scratch registers. */
28010 for (regno = FIRST_IWMMXT_GR_REGNUM;
28011 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28012 fixed_regs[regno] = 0;
28013 /* The XScale ABI has wR0 - wR9 as scratch registers,
28014 the rest as call-preserved registers. */
28015 for (regno = FIRST_IWMMXT_REGNUM;
28016 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28017 {
28018 fixed_regs[regno] = 0;
28019 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28020 }
28021 }
28022
28023 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28024 {
28025 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28026 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28027 }
28028 else if (TARGET_APCS_STACK)
28029 {
28030 fixed_regs[10] = 1;
28031 call_used_regs[10] = 1;
28032 }
28033 /* -mcaller-super-interworking reserves r11 for calls to
28034 _interwork_r11_call_via_rN(). Making the register global
28035 is an easy way of ensuring that it remains valid for all
28036 calls. */
28037 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28038 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28039 {
28040 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28041 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28042 if (TARGET_CALLER_INTERWORKING)
28043 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28044 }
28045 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28046 }
28047
28048 static reg_class_t
28049 arm_preferred_rename_class (reg_class_t rclass)
28050 {
28051 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28052 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28053 and code size can be reduced. */
28054 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28055 return LO_REGS;
28056 else
28057 return NO_REGS;
28058 }
28059
28060 /* Compute the attribute "length" of insn "*push_multi".
28061 So this function MUST be kept in sync with that insn pattern. */
28062 int
28063 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28064 {
28065 int i, regno, hi_reg;
28066 int num_saves = XVECLEN (parallel_op, 0);
28067
28068 /* ARM mode. */
28069 if (TARGET_ARM)
28070 return 4;
28071 /* Thumb1 mode. */
28072 if (TARGET_THUMB1)
28073 return 2;
28074
28075 /* Thumb2 mode. */
28076 regno = REGNO (first_op);
28077 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28078 list is 8-bit. Normally this means all registers in the list must be
28079 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28080 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28081 with 16-bit encoding. */
28082 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28083 for (i = 1; i < num_saves && !hi_reg; i++)
28084 {
28085 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28086 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28087 }
28088
28089 if (!hi_reg)
28090 return 2;
28091 return 4;
28092 }
28093
28094 /* Compute the attribute "length" of insn. Currently, this function is used
28095 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28096 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28097 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28098 true if OPERANDS contains insn which explicit updates base register. */
28099
28100 int
28101 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28102 {
28103 /* ARM mode. */
28104 if (TARGET_ARM)
28105 return 4;
28106 /* Thumb1 mode. */
28107 if (TARGET_THUMB1)
28108 return 2;
28109
28110 rtx parallel_op = operands[0];
28111 /* Initialize to elements number of PARALLEL. */
28112 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28113 /* Initialize the value to base register. */
28114 unsigned regno = REGNO (operands[1]);
28115 /* Skip return and write back pattern.
28116 We only need register pop pattern for later analysis. */
28117 unsigned first_indx = 0;
28118 first_indx += return_pc ? 1 : 0;
28119 first_indx += write_back_p ? 1 : 0;
28120
28121 /* A pop operation can be done through LDM or POP. If the base register is SP
28122 and if it's with write back, then a LDM will be alias of POP. */
28123 bool pop_p = (regno == SP_REGNUM && write_back_p);
28124 bool ldm_p = !pop_p;
28125
28126 /* Check base register for LDM. */
28127 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28128 return 4;
28129
28130 /* Check each register in the list. */
28131 for (; indx >= first_indx; indx--)
28132 {
28133 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28134 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28135 comment in arm_attr_length_push_multi. */
28136 if (REGNO_REG_CLASS (regno) == HI_REGS
28137 && (regno != PC_REGNUM || ldm_p))
28138 return 4;
28139 }
28140
28141 return 2;
28142 }
28143
28144 /* Compute the number of instructions emitted by output_move_double. */
28145 int
28146 arm_count_output_move_double_insns (rtx *operands)
28147 {
28148 int count;
28149 rtx ops[2];
28150 /* output_move_double may modify the operands array, so call it
28151 here on a copy of the array. */
28152 ops[0] = operands[0];
28153 ops[1] = operands[1];
28154 output_move_double (ops, false, &count);
28155 return count;
28156 }
28157
28158 int
28159 vfp3_const_double_for_fract_bits (rtx operand)
28160 {
28161 REAL_VALUE_TYPE r0;
28162
28163 if (!CONST_DOUBLE_P (operand))
28164 return 0;
28165
28166 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28167 if (exact_real_inverse (DFmode, &r0)
28168 && !REAL_VALUE_NEGATIVE (r0))
28169 {
28170 if (exact_real_truncate (DFmode, &r0))
28171 {
28172 HOST_WIDE_INT value = real_to_integer (&r0);
28173 value = value & 0xffffffff;
28174 if ((value != 0) && ( (value & (value - 1)) == 0))
28175 {
28176 int ret = exact_log2 (value);
28177 gcc_assert (IN_RANGE (ret, 0, 31));
28178 return ret;
28179 }
28180 }
28181 }
28182 return 0;
28183 }
28184
28185 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28186 log2 is in [1, 32], return that log2. Otherwise return -1.
28187 This is used in the patterns for vcvt.s32.f32 floating-point to
28188 fixed-point conversions. */
28189
28190 int
28191 vfp3_const_double_for_bits (rtx x)
28192 {
28193 const REAL_VALUE_TYPE *r;
28194
28195 if (!CONST_DOUBLE_P (x))
28196 return -1;
28197
28198 r = CONST_DOUBLE_REAL_VALUE (x);
28199
28200 if (REAL_VALUE_NEGATIVE (*r)
28201 || REAL_VALUE_ISNAN (*r)
28202 || REAL_VALUE_ISINF (*r)
28203 || !real_isinteger (r, SFmode))
28204 return -1;
28205
28206 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28207
28208 /* The exact_log2 above will have returned -1 if this is
28209 not an exact log2. */
28210 if (!IN_RANGE (hwint, 1, 32))
28211 return -1;
28212
28213 return hwint;
28214 }
28215
28216 \f
28217 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28218
28219 static void
28220 arm_pre_atomic_barrier (enum memmodel model)
28221 {
28222 if (need_atomic_barrier_p (model, true))
28223 emit_insn (gen_memory_barrier ());
28224 }
28225
28226 static void
28227 arm_post_atomic_barrier (enum memmodel model)
28228 {
28229 if (need_atomic_barrier_p (model, false))
28230 emit_insn (gen_memory_barrier ());
28231 }
28232
28233 /* Emit the load-exclusive and store-exclusive instructions.
28234 Use acquire and release versions if necessary. */
28235
28236 static void
28237 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28238 {
28239 rtx (*gen) (rtx, rtx);
28240
28241 if (acq)
28242 {
28243 switch (mode)
28244 {
28245 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28246 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28247 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28248 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28249 default:
28250 gcc_unreachable ();
28251 }
28252 }
28253 else
28254 {
28255 switch (mode)
28256 {
28257 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28258 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28259 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28260 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28261 default:
28262 gcc_unreachable ();
28263 }
28264 }
28265
28266 emit_insn (gen (rval, mem));
28267 }
28268
28269 static void
28270 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28271 rtx mem, bool rel)
28272 {
28273 rtx (*gen) (rtx, rtx, rtx);
28274
28275 if (rel)
28276 {
28277 switch (mode)
28278 {
28279 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28280 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28281 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28282 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28283 default:
28284 gcc_unreachable ();
28285 }
28286 }
28287 else
28288 {
28289 switch (mode)
28290 {
28291 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28292 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28293 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28294 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28295 default:
28296 gcc_unreachable ();
28297 }
28298 }
28299
28300 emit_insn (gen (bval, rval, mem));
28301 }
28302
28303 /* Mark the previous jump instruction as unlikely. */
28304
28305 static void
28306 emit_unlikely_jump (rtx insn)
28307 {
28308 rtx_insn *jump = emit_jump_insn (insn);
28309 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28310 }
28311
28312 /* Expand a compare and swap pattern. */
28313
28314 void
28315 arm_expand_compare_and_swap (rtx operands[])
28316 {
28317 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28318 machine_mode mode;
28319 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28320
28321 bval = operands[0];
28322 rval = operands[1];
28323 mem = operands[2];
28324 oldval = operands[3];
28325 newval = operands[4];
28326 is_weak = operands[5];
28327 mod_s = operands[6];
28328 mod_f = operands[7];
28329 mode = GET_MODE (mem);
28330
28331 /* Normally the succ memory model must be stronger than fail, but in the
28332 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28333 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28334
28335 if (TARGET_HAVE_LDACQ
28336 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28337 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28338 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28339
28340 switch (mode)
28341 {
28342 case E_QImode:
28343 case E_HImode:
28344 /* For narrow modes, we're going to perform the comparison in SImode,
28345 so do the zero-extension now. */
28346 rval = gen_reg_rtx (SImode);
28347 oldval = convert_modes (SImode, mode, oldval, true);
28348 /* FALLTHRU */
28349
28350 case E_SImode:
28351 /* Force the value into a register if needed. We waited until after
28352 the zero-extension above to do this properly. */
28353 if (!arm_add_operand (oldval, SImode))
28354 oldval = force_reg (SImode, oldval);
28355 break;
28356
28357 case E_DImode:
28358 if (!cmpdi_operand (oldval, mode))
28359 oldval = force_reg (mode, oldval);
28360 break;
28361
28362 default:
28363 gcc_unreachable ();
28364 }
28365
28366 if (TARGET_THUMB1)
28367 {
28368 switch (mode)
28369 {
28370 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28371 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28372 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28373 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28374 default:
28375 gcc_unreachable ();
28376 }
28377 }
28378 else
28379 {
28380 switch (mode)
28381 {
28382 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28383 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28384 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28385 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28386 default:
28387 gcc_unreachable ();
28388 }
28389 }
28390
28391 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28392 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28393
28394 if (mode == QImode || mode == HImode)
28395 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28396
28397 /* In all cases, we arrange for success to be signaled by Z set.
28398 This arrangement allows for the boolean result to be used directly
28399 in a subsequent branch, post optimization. For Thumb-1 targets, the
28400 boolean negation of the result is also stored in bval because Thumb-1
28401 backend lacks dependency tracking for CC flag due to flag-setting not
28402 being represented at RTL level. */
28403 if (TARGET_THUMB1)
28404 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28405 else
28406 {
28407 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28408 emit_insn (gen_rtx_SET (bval, x));
28409 }
28410 }
28411
28412 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28413 another memory store between the load-exclusive and store-exclusive can
28414 reset the monitor from Exclusive to Open state. This means we must wait
28415 until after reload to split the pattern, lest we get a register spill in
28416 the middle of the atomic sequence. Success of the compare and swap is
28417 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28418 for Thumb-1 targets (ie. negation of the boolean value returned by
28419 atomic_compare_and_swapmode standard pattern in operand 0). */
28420
28421 void
28422 arm_split_compare_and_swap (rtx operands[])
28423 {
28424 rtx rval, mem, oldval, newval, neg_bval;
28425 machine_mode mode;
28426 enum memmodel mod_s, mod_f;
28427 bool is_weak;
28428 rtx_code_label *label1, *label2;
28429 rtx x, cond;
28430
28431 rval = operands[1];
28432 mem = operands[2];
28433 oldval = operands[3];
28434 newval = operands[4];
28435 is_weak = (operands[5] != const0_rtx);
28436 mod_s = memmodel_from_int (INTVAL (operands[6]));
28437 mod_f = memmodel_from_int (INTVAL (operands[7]));
28438 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28439 mode = GET_MODE (mem);
28440
28441 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28442
28443 bool use_acquire = TARGET_HAVE_LDACQ
28444 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28445 || is_mm_release (mod_s));
28446
28447 bool use_release = TARGET_HAVE_LDACQ
28448 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28449 || is_mm_acquire (mod_s));
28450
28451 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28452 a full barrier is emitted after the store-release. */
28453 if (is_armv8_sync)
28454 use_acquire = false;
28455
28456 /* Checks whether a barrier is needed and emits one accordingly. */
28457 if (!(use_acquire || use_release))
28458 arm_pre_atomic_barrier (mod_s);
28459
28460 label1 = NULL;
28461 if (!is_weak)
28462 {
28463 label1 = gen_label_rtx ();
28464 emit_label (label1);
28465 }
28466 label2 = gen_label_rtx ();
28467
28468 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28469
28470 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28471 as required to communicate with arm_expand_compare_and_swap. */
28472 if (TARGET_32BIT)
28473 {
28474 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28475 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28476 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28477 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28478 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28479 }
28480 else
28481 {
28482 emit_move_insn (neg_bval, const1_rtx);
28483 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28484 if (thumb1_cmpneg_operand (oldval, SImode))
28485 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28486 label2, cond));
28487 else
28488 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28489 }
28490
28491 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28492
28493 /* Weak or strong, we want EQ to be true for success, so that we
28494 match the flags that we got from the compare above. */
28495 if (TARGET_32BIT)
28496 {
28497 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28498 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28499 emit_insn (gen_rtx_SET (cond, x));
28500 }
28501
28502 if (!is_weak)
28503 {
28504 /* Z is set to boolean value of !neg_bval, as required to communicate
28505 with arm_expand_compare_and_swap. */
28506 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28507 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28508 }
28509
28510 if (!is_mm_relaxed (mod_f))
28511 emit_label (label2);
28512
28513 /* Checks whether a barrier is needed and emits one accordingly. */
28514 if (is_armv8_sync
28515 || !(use_acquire || use_release))
28516 arm_post_atomic_barrier (mod_s);
28517
28518 if (is_mm_relaxed (mod_f))
28519 emit_label (label2);
28520 }
28521
28522 /* Split an atomic operation pattern. Operation is given by CODE and is one
28523 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28524 operation). Operation is performed on the content at MEM and on VALUE
28525 following the memory model MODEL_RTX. The content at MEM before and after
28526 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28527 success of the operation is returned in COND. Using a scratch register or
28528 an operand register for these determines what result is returned for that
28529 pattern. */
28530
28531 void
28532 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28533 rtx value, rtx model_rtx, rtx cond)
28534 {
28535 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28536 machine_mode mode = GET_MODE (mem);
28537 machine_mode wmode = (mode == DImode ? DImode : SImode);
28538 rtx_code_label *label;
28539 bool all_low_regs, bind_old_new;
28540 rtx x;
28541
28542 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28543
28544 bool use_acquire = TARGET_HAVE_LDACQ
28545 && !(is_mm_relaxed (model) || is_mm_consume (model)
28546 || is_mm_release (model));
28547
28548 bool use_release = TARGET_HAVE_LDACQ
28549 && !(is_mm_relaxed (model) || is_mm_consume (model)
28550 || is_mm_acquire (model));
28551
28552 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28553 a full barrier is emitted after the store-release. */
28554 if (is_armv8_sync)
28555 use_acquire = false;
28556
28557 /* Checks whether a barrier is needed and emits one accordingly. */
28558 if (!(use_acquire || use_release))
28559 arm_pre_atomic_barrier (model);
28560
28561 label = gen_label_rtx ();
28562 emit_label (label);
28563
28564 if (new_out)
28565 new_out = gen_lowpart (wmode, new_out);
28566 if (old_out)
28567 old_out = gen_lowpart (wmode, old_out);
28568 else
28569 old_out = new_out;
28570 value = simplify_gen_subreg (wmode, value, mode, 0);
28571
28572 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28573
28574 /* Does the operation require destination and first operand to use the same
28575 register? This is decided by register constraints of relevant insn
28576 patterns in thumb1.md. */
28577 gcc_assert (!new_out || REG_P (new_out));
28578 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28579 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28580 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28581 bind_old_new =
28582 (TARGET_THUMB1
28583 && code != SET
28584 && code != MINUS
28585 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28586
28587 /* We want to return the old value while putting the result of the operation
28588 in the same register as the old value so copy the old value over to the
28589 destination register and use that register for the operation. */
28590 if (old_out && bind_old_new)
28591 {
28592 emit_move_insn (new_out, old_out);
28593 old_out = new_out;
28594 }
28595
28596 switch (code)
28597 {
28598 case SET:
28599 new_out = value;
28600 break;
28601
28602 case NOT:
28603 x = gen_rtx_AND (wmode, old_out, value);
28604 emit_insn (gen_rtx_SET (new_out, x));
28605 x = gen_rtx_NOT (wmode, new_out);
28606 emit_insn (gen_rtx_SET (new_out, x));
28607 break;
28608
28609 case MINUS:
28610 if (CONST_INT_P (value))
28611 {
28612 value = GEN_INT (-INTVAL (value));
28613 code = PLUS;
28614 }
28615 /* FALLTHRU */
28616
28617 case PLUS:
28618 if (mode == DImode)
28619 {
28620 /* DImode plus/minus need to clobber flags. */
28621 /* The adddi3 and subdi3 patterns are incorrectly written so that
28622 they require matching operands, even when we could easily support
28623 three operands. Thankfully, this can be fixed up post-splitting,
28624 as the individual add+adc patterns do accept three operands and
28625 post-reload cprop can make these moves go away. */
28626 emit_move_insn (new_out, old_out);
28627 if (code == PLUS)
28628 x = gen_adddi3 (new_out, new_out, value);
28629 else
28630 x = gen_subdi3 (new_out, new_out, value);
28631 emit_insn (x);
28632 break;
28633 }
28634 /* FALLTHRU */
28635
28636 default:
28637 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28638 emit_insn (gen_rtx_SET (new_out, x));
28639 break;
28640 }
28641
28642 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28643 use_release);
28644
28645 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28646 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28647
28648 /* Checks whether a barrier is needed and emits one accordingly. */
28649 if (is_armv8_sync
28650 || !(use_acquire || use_release))
28651 arm_post_atomic_barrier (model);
28652 }
28653 \f
28654 #define MAX_VECT_LEN 16
28655
28656 struct expand_vec_perm_d
28657 {
28658 rtx target, op0, op1;
28659 auto_vec_perm_indices perm;
28660 machine_mode vmode;
28661 bool one_vector_p;
28662 bool testing_p;
28663 };
28664
28665 /* Generate a variable permutation. */
28666
28667 static void
28668 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28669 {
28670 machine_mode vmode = GET_MODE (target);
28671 bool one_vector_p = rtx_equal_p (op0, op1);
28672
28673 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28674 gcc_checking_assert (GET_MODE (op0) == vmode);
28675 gcc_checking_assert (GET_MODE (op1) == vmode);
28676 gcc_checking_assert (GET_MODE (sel) == vmode);
28677 gcc_checking_assert (TARGET_NEON);
28678
28679 if (one_vector_p)
28680 {
28681 if (vmode == V8QImode)
28682 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28683 else
28684 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28685 }
28686 else
28687 {
28688 rtx pair;
28689
28690 if (vmode == V8QImode)
28691 {
28692 pair = gen_reg_rtx (V16QImode);
28693 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28694 pair = gen_lowpart (TImode, pair);
28695 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28696 }
28697 else
28698 {
28699 pair = gen_reg_rtx (OImode);
28700 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28701 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28702 }
28703 }
28704 }
28705
28706 void
28707 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28708 {
28709 machine_mode vmode = GET_MODE (target);
28710 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28711 bool one_vector_p = rtx_equal_p (op0, op1);
28712 rtx rmask[MAX_VECT_LEN], mask;
28713
28714 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28715 numbering of elements for big-endian, we must reverse the order. */
28716 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28717
28718 /* The VTBL instruction does not use a modulo index, so we must take care
28719 of that ourselves. */
28720 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28721 for (i = 0; i < nelt; ++i)
28722 rmask[i] = mask;
28723 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28724 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28725
28726 arm_expand_vec_perm_1 (target, op0, op1, sel);
28727 }
28728
28729 /* Map lane ordering between architectural lane order, and GCC lane order,
28730 taking into account ABI. See comment above output_move_neon for details. */
28731
28732 static int
28733 neon_endian_lane_map (machine_mode mode, int lane)
28734 {
28735 if (BYTES_BIG_ENDIAN)
28736 {
28737 int nelems = GET_MODE_NUNITS (mode);
28738 /* Reverse lane order. */
28739 lane = (nelems - 1 - lane);
28740 /* Reverse D register order, to match ABI. */
28741 if (GET_MODE_SIZE (mode) == 16)
28742 lane = lane ^ (nelems / 2);
28743 }
28744 return lane;
28745 }
28746
28747 /* Some permutations index into pairs of vectors, this is a helper function
28748 to map indexes into those pairs of vectors. */
28749
28750 static int
28751 neon_pair_endian_lane_map (machine_mode mode, int lane)
28752 {
28753 int nelem = GET_MODE_NUNITS (mode);
28754 if (BYTES_BIG_ENDIAN)
28755 lane =
28756 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28757 return lane;
28758 }
28759
28760 /* Generate or test for an insn that supports a constant permutation. */
28761
28762 /* Recognize patterns for the VUZP insns. */
28763
28764 static bool
28765 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28766 {
28767 unsigned int i, odd, mask, nelt = d->perm.length ();
28768 rtx out0, out1, in0, in1;
28769 rtx (*gen)(rtx, rtx, rtx, rtx);
28770 int first_elem;
28771 int swap_nelt;
28772
28773 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28774 return false;
28775
28776 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28777 big endian pattern on 64 bit vectors, so we correct for that. */
28778 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28779 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28780
28781 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28782
28783 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28784 odd = 0;
28785 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28786 odd = 1;
28787 else
28788 return false;
28789 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28790
28791 for (i = 0; i < nelt; i++)
28792 {
28793 unsigned elt =
28794 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28795 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28796 return false;
28797 }
28798
28799 /* Success! */
28800 if (d->testing_p)
28801 return true;
28802
28803 switch (d->vmode)
28804 {
28805 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28806 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28807 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28808 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28809 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28810 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28811 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28812 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28813 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28814 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28815 default:
28816 gcc_unreachable ();
28817 }
28818
28819 in0 = d->op0;
28820 in1 = d->op1;
28821 if (swap_nelt != 0)
28822 std::swap (in0, in1);
28823
28824 out0 = d->target;
28825 out1 = gen_reg_rtx (d->vmode);
28826 if (odd)
28827 std::swap (out0, out1);
28828
28829 emit_insn (gen (out0, in0, in1, out1));
28830 return true;
28831 }
28832
28833 /* Recognize patterns for the VZIP insns. */
28834
28835 static bool
28836 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28837 {
28838 unsigned int i, high, mask, nelt = d->perm.length ();
28839 rtx out0, out1, in0, in1;
28840 rtx (*gen)(rtx, rtx, rtx, rtx);
28841 int first_elem;
28842 bool is_swapped;
28843
28844 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28845 return false;
28846
28847 is_swapped = BYTES_BIG_ENDIAN;
28848
28849 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28850
28851 high = nelt / 2;
28852 if (first_elem == neon_endian_lane_map (d->vmode, high))
28853 ;
28854 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28855 high = 0;
28856 else
28857 return false;
28858 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28859
28860 for (i = 0; i < nelt / 2; i++)
28861 {
28862 unsigned elt =
28863 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28864 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28865 != elt)
28866 return false;
28867 elt =
28868 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28869 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28870 != elt)
28871 return false;
28872 }
28873
28874 /* Success! */
28875 if (d->testing_p)
28876 return true;
28877
28878 switch (d->vmode)
28879 {
28880 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28881 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28882 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28883 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28884 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28885 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28886 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28887 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28888 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28889 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28890 default:
28891 gcc_unreachable ();
28892 }
28893
28894 in0 = d->op0;
28895 in1 = d->op1;
28896 if (is_swapped)
28897 std::swap (in0, in1);
28898
28899 out0 = d->target;
28900 out1 = gen_reg_rtx (d->vmode);
28901 if (high)
28902 std::swap (out0, out1);
28903
28904 emit_insn (gen (out0, in0, in1, out1));
28905 return true;
28906 }
28907
28908 /* Recognize patterns for the VREV insns. */
28909
28910 static bool
28911 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28912 {
28913 unsigned int i, j, diff, nelt = d->perm.length ();
28914 rtx (*gen)(rtx, rtx);
28915
28916 if (!d->one_vector_p)
28917 return false;
28918
28919 diff = d->perm[0];
28920 switch (diff)
28921 {
28922 case 7:
28923 switch (d->vmode)
28924 {
28925 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28926 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28927 default:
28928 return false;
28929 }
28930 break;
28931 case 3:
28932 switch (d->vmode)
28933 {
28934 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28935 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28936 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28937 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28938 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28939 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28940 default:
28941 return false;
28942 }
28943 break;
28944 case 1:
28945 switch (d->vmode)
28946 {
28947 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28948 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28949 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28950 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28951 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28952 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28953 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28954 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28955 default:
28956 return false;
28957 }
28958 break;
28959 default:
28960 return false;
28961 }
28962
28963 for (i = 0; i < nelt ; i += diff + 1)
28964 for (j = 0; j <= diff; j += 1)
28965 {
28966 /* This is guaranteed to be true as the value of diff
28967 is 7, 3, 1 and we should have enough elements in the
28968 queue to generate this. Getting a vector mask with a
28969 value of diff other than these values implies that
28970 something is wrong by the time we get here. */
28971 gcc_assert (i + j < nelt);
28972 if (d->perm[i + j] != i + diff - j)
28973 return false;
28974 }
28975
28976 /* Success! */
28977 if (d->testing_p)
28978 return true;
28979
28980 emit_insn (gen (d->target, d->op0));
28981 return true;
28982 }
28983
28984 /* Recognize patterns for the VTRN insns. */
28985
28986 static bool
28987 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28988 {
28989 unsigned int i, odd, mask, nelt = d->perm.length ();
28990 rtx out0, out1, in0, in1;
28991 rtx (*gen)(rtx, rtx, rtx, rtx);
28992
28993 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28994 return false;
28995
28996 /* Note that these are little-endian tests. Adjust for big-endian later. */
28997 if (d->perm[0] == 0)
28998 odd = 0;
28999 else if (d->perm[0] == 1)
29000 odd = 1;
29001 else
29002 return false;
29003 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29004
29005 for (i = 0; i < nelt; i += 2)
29006 {
29007 if (d->perm[i] != i + odd)
29008 return false;
29009 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29010 return false;
29011 }
29012
29013 /* Success! */
29014 if (d->testing_p)
29015 return true;
29016
29017 switch (d->vmode)
29018 {
29019 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29020 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29021 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29022 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29023 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29024 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29025 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29026 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29027 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29028 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29029 default:
29030 gcc_unreachable ();
29031 }
29032
29033 in0 = d->op0;
29034 in1 = d->op1;
29035 if (BYTES_BIG_ENDIAN)
29036 {
29037 std::swap (in0, in1);
29038 odd = !odd;
29039 }
29040
29041 out0 = d->target;
29042 out1 = gen_reg_rtx (d->vmode);
29043 if (odd)
29044 std::swap (out0, out1);
29045
29046 emit_insn (gen (out0, in0, in1, out1));
29047 return true;
29048 }
29049
29050 /* Recognize patterns for the VEXT insns. */
29051
29052 static bool
29053 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29054 {
29055 unsigned int i, nelt = d->perm.length ();
29056 rtx (*gen) (rtx, rtx, rtx, rtx);
29057 rtx offset;
29058
29059 unsigned int location;
29060
29061 unsigned int next = d->perm[0] + 1;
29062
29063 /* TODO: Handle GCC's numbering of elements for big-endian. */
29064 if (BYTES_BIG_ENDIAN)
29065 return false;
29066
29067 /* Check if the extracted indexes are increasing by one. */
29068 for (i = 1; i < nelt; next++, i++)
29069 {
29070 /* If we hit the most significant element of the 2nd vector in
29071 the previous iteration, no need to test further. */
29072 if (next == 2 * nelt)
29073 return false;
29074
29075 /* If we are operating on only one vector: it could be a
29076 rotation. If there are only two elements of size < 64, let
29077 arm_evpc_neon_vrev catch it. */
29078 if (d->one_vector_p && (next == nelt))
29079 {
29080 if ((nelt == 2) && (d->vmode != V2DImode))
29081 return false;
29082 else
29083 next = 0;
29084 }
29085
29086 if (d->perm[i] != next)
29087 return false;
29088 }
29089
29090 location = d->perm[0];
29091
29092 switch (d->vmode)
29093 {
29094 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29095 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29096 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29097 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29098 case E_V2SImode: gen = gen_neon_vextv2si; break;
29099 case E_V4SImode: gen = gen_neon_vextv4si; break;
29100 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29101 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29102 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29103 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29104 case E_V2DImode: gen = gen_neon_vextv2di; break;
29105 default:
29106 return false;
29107 }
29108
29109 /* Success! */
29110 if (d->testing_p)
29111 return true;
29112
29113 offset = GEN_INT (location);
29114 emit_insn (gen (d->target, d->op0, d->op1, offset));
29115 return true;
29116 }
29117
29118 /* The NEON VTBL instruction is a fully variable permuation that's even
29119 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29120 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29121 can do slightly better by expanding this as a constant where we don't
29122 have to apply a mask. */
29123
29124 static bool
29125 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29126 {
29127 rtx rperm[MAX_VECT_LEN], sel;
29128 machine_mode vmode = d->vmode;
29129 unsigned int i, nelt = d->perm.length ();
29130
29131 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29132 numbering of elements for big-endian, we must reverse the order. */
29133 if (BYTES_BIG_ENDIAN)
29134 return false;
29135
29136 if (d->testing_p)
29137 return true;
29138
29139 /* Generic code will try constant permutation twice. Once with the
29140 original mode and again with the elements lowered to QImode.
29141 So wait and don't do the selector expansion ourselves. */
29142 if (vmode != V8QImode && vmode != V16QImode)
29143 return false;
29144
29145 for (i = 0; i < nelt; ++i)
29146 rperm[i] = GEN_INT (d->perm[i]);
29147 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29148 sel = force_reg (vmode, sel);
29149
29150 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29151 return true;
29152 }
29153
29154 static bool
29155 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29156 {
29157 /* Check if the input mask matches vext before reordering the
29158 operands. */
29159 if (TARGET_NEON)
29160 if (arm_evpc_neon_vext (d))
29161 return true;
29162
29163 /* The pattern matching functions above are written to look for a small
29164 number to begin the sequence (0, 1, N/2). If we begin with an index
29165 from the second operand, we can swap the operands. */
29166 unsigned int nelt = d->perm.length ();
29167 if (d->perm[0] >= nelt)
29168 {
29169 for (unsigned int i = 0; i < nelt; ++i)
29170 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29171
29172 std::swap (d->op0, d->op1);
29173 }
29174
29175 if (TARGET_NEON)
29176 {
29177 if (arm_evpc_neon_vuzp (d))
29178 return true;
29179 if (arm_evpc_neon_vzip (d))
29180 return true;
29181 if (arm_evpc_neon_vrev (d))
29182 return true;
29183 if (arm_evpc_neon_vtrn (d))
29184 return true;
29185 return arm_evpc_neon_vtbl (d);
29186 }
29187 return false;
29188 }
29189
29190 /* Expand a vec_perm_const pattern. */
29191
29192 bool
29193 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29194 {
29195 struct expand_vec_perm_d d;
29196 int i, nelt, which;
29197
29198 d.target = target;
29199 d.op0 = op0;
29200 d.op1 = op1;
29201
29202 d.vmode = GET_MODE (target);
29203 gcc_assert (VECTOR_MODE_P (d.vmode));
29204 d.testing_p = false;
29205
29206 nelt = GET_MODE_NUNITS (d.vmode);
29207 d.perm.reserve (nelt);
29208 for (i = which = 0; i < nelt; ++i)
29209 {
29210 rtx e = XVECEXP (sel, 0, i);
29211 int ei = INTVAL (e) & (2 * nelt - 1);
29212 which |= (ei < nelt ? 1 : 2);
29213 d.perm.quick_push (ei);
29214 }
29215
29216 switch (which)
29217 {
29218 default:
29219 gcc_unreachable();
29220
29221 case 3:
29222 d.one_vector_p = false;
29223 if (!rtx_equal_p (op0, op1))
29224 break;
29225
29226 /* The elements of PERM do not suggest that only the first operand
29227 is used, but both operands are identical. Allow easier matching
29228 of the permutation by folding the permutation into the single
29229 input vector. */
29230 /* FALLTHRU */
29231 case 2:
29232 for (i = 0; i < nelt; ++i)
29233 d.perm[i] &= nelt - 1;
29234 d.op0 = op1;
29235 d.one_vector_p = true;
29236 break;
29237
29238 case 1:
29239 d.op1 = op0;
29240 d.one_vector_p = true;
29241 break;
29242 }
29243
29244 return arm_expand_vec_perm_const_1 (&d);
29245 }
29246
29247 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29248
29249 static bool
29250 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29251 {
29252 struct expand_vec_perm_d d;
29253 unsigned int i, nelt, which;
29254 bool ret;
29255
29256 d.vmode = vmode;
29257 d.testing_p = true;
29258 d.perm.safe_splice (sel);
29259
29260 /* Categorize the set of elements in the selector. */
29261 nelt = GET_MODE_NUNITS (d.vmode);
29262 for (i = which = 0; i < nelt; ++i)
29263 {
29264 unsigned char e = d.perm[i];
29265 gcc_assert (e < 2 * nelt);
29266 which |= (e < nelt ? 1 : 2);
29267 }
29268
29269 /* For all elements from second vector, fold the elements to first. */
29270 if (which == 2)
29271 for (i = 0; i < nelt; ++i)
29272 d.perm[i] -= nelt;
29273
29274 /* Check whether the mask can be applied to the vector type. */
29275 d.one_vector_p = (which != 3);
29276
29277 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29278 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29279 if (!d.one_vector_p)
29280 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29281
29282 start_sequence ();
29283 ret = arm_expand_vec_perm_const_1 (&d);
29284 end_sequence ();
29285
29286 return ret;
29287 }
29288
29289 bool
29290 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29291 {
29292 /* If we are soft float and we do not have ldrd
29293 then all auto increment forms are ok. */
29294 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29295 return true;
29296
29297 switch (code)
29298 {
29299 /* Post increment and Pre Decrement are supported for all
29300 instruction forms except for vector forms. */
29301 case ARM_POST_INC:
29302 case ARM_PRE_DEC:
29303 if (VECTOR_MODE_P (mode))
29304 {
29305 if (code != ARM_PRE_DEC)
29306 return true;
29307 else
29308 return false;
29309 }
29310
29311 return true;
29312
29313 case ARM_POST_DEC:
29314 case ARM_PRE_INC:
29315 /* Without LDRD and mode size greater than
29316 word size, there is no point in auto-incrementing
29317 because ldm and stm will not have these forms. */
29318 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29319 return false;
29320
29321 /* Vector and floating point modes do not support
29322 these auto increment forms. */
29323 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29324 return false;
29325
29326 return true;
29327
29328 default:
29329 return false;
29330
29331 }
29332
29333 return false;
29334 }
29335
29336 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29337 on ARM, since we know that shifts by negative amounts are no-ops.
29338 Additionally, the default expansion code is not available or suitable
29339 for post-reload insn splits (this can occur when the register allocator
29340 chooses not to do a shift in NEON).
29341
29342 This function is used in both initial expand and post-reload splits, and
29343 handles all kinds of 64-bit shifts.
29344
29345 Input requirements:
29346 - It is safe for the input and output to be the same register, but
29347 early-clobber rules apply for the shift amount and scratch registers.
29348 - Shift by register requires both scratch registers. In all other cases
29349 the scratch registers may be NULL.
29350 - Ashiftrt by a register also clobbers the CC register. */
29351 void
29352 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29353 rtx amount, rtx scratch1, rtx scratch2)
29354 {
29355 rtx out_high = gen_highpart (SImode, out);
29356 rtx out_low = gen_lowpart (SImode, out);
29357 rtx in_high = gen_highpart (SImode, in);
29358 rtx in_low = gen_lowpart (SImode, in);
29359
29360 /* Terminology:
29361 in = the register pair containing the input value.
29362 out = the destination register pair.
29363 up = the high- or low-part of each pair.
29364 down = the opposite part to "up".
29365 In a shift, we can consider bits to shift from "up"-stream to
29366 "down"-stream, so in a left-shift "up" is the low-part and "down"
29367 is the high-part of each register pair. */
29368
29369 rtx out_up = code == ASHIFT ? out_low : out_high;
29370 rtx out_down = code == ASHIFT ? out_high : out_low;
29371 rtx in_up = code == ASHIFT ? in_low : in_high;
29372 rtx in_down = code == ASHIFT ? in_high : in_low;
29373
29374 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29375 gcc_assert (out
29376 && (REG_P (out) || GET_CODE (out) == SUBREG)
29377 && GET_MODE (out) == DImode);
29378 gcc_assert (in
29379 && (REG_P (in) || GET_CODE (in) == SUBREG)
29380 && GET_MODE (in) == DImode);
29381 gcc_assert (amount
29382 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29383 && GET_MODE (amount) == SImode)
29384 || CONST_INT_P (amount)));
29385 gcc_assert (scratch1 == NULL
29386 || (GET_CODE (scratch1) == SCRATCH)
29387 || (GET_MODE (scratch1) == SImode
29388 && REG_P (scratch1)));
29389 gcc_assert (scratch2 == NULL
29390 || (GET_CODE (scratch2) == SCRATCH)
29391 || (GET_MODE (scratch2) == SImode
29392 && REG_P (scratch2)));
29393 gcc_assert (!REG_P (out) || !REG_P (amount)
29394 || !HARD_REGISTER_P (out)
29395 || (REGNO (out) != REGNO (amount)
29396 && REGNO (out) + 1 != REGNO (amount)));
29397
29398 /* Macros to make following code more readable. */
29399 #define SUB_32(DEST,SRC) \
29400 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29401 #define RSB_32(DEST,SRC) \
29402 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29403 #define SUB_S_32(DEST,SRC) \
29404 gen_addsi3_compare0 ((DEST), (SRC), \
29405 GEN_INT (-32))
29406 #define SET(DEST,SRC) \
29407 gen_rtx_SET ((DEST), (SRC))
29408 #define SHIFT(CODE,SRC,AMOUNT) \
29409 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29410 #define LSHIFT(CODE,SRC,AMOUNT) \
29411 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29412 SImode, (SRC), (AMOUNT))
29413 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29414 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29415 SImode, (SRC), (AMOUNT))
29416 #define ORR(A,B) \
29417 gen_rtx_IOR (SImode, (A), (B))
29418 #define BRANCH(COND,LABEL) \
29419 gen_arm_cond_branch ((LABEL), \
29420 gen_rtx_ ## COND (CCmode, cc_reg, \
29421 const0_rtx), \
29422 cc_reg)
29423
29424 /* Shifts by register and shifts by constant are handled separately. */
29425 if (CONST_INT_P (amount))
29426 {
29427 /* We have a shift-by-constant. */
29428
29429 /* First, handle out-of-range shift amounts.
29430 In both cases we try to match the result an ARM instruction in a
29431 shift-by-register would give. This helps reduce execution
29432 differences between optimization levels, but it won't stop other
29433 parts of the compiler doing different things. This is "undefined
29434 behavior, in any case. */
29435 if (INTVAL (amount) <= 0)
29436 emit_insn (gen_movdi (out, in));
29437 else if (INTVAL (amount) >= 64)
29438 {
29439 if (code == ASHIFTRT)
29440 {
29441 rtx const31_rtx = GEN_INT (31);
29442 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29443 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29444 }
29445 else
29446 emit_insn (gen_movdi (out, const0_rtx));
29447 }
29448
29449 /* Now handle valid shifts. */
29450 else if (INTVAL (amount) < 32)
29451 {
29452 /* Shifts by a constant less than 32. */
29453 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29454
29455 /* Clearing the out register in DImode first avoids lots
29456 of spilling and results in less stack usage.
29457 Later this redundant insn is completely removed.
29458 Do that only if "in" and "out" are different registers. */
29459 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29460 emit_insn (SET (out, const0_rtx));
29461 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29462 emit_insn (SET (out_down,
29463 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29464 out_down)));
29465 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29466 }
29467 else
29468 {
29469 /* Shifts by a constant greater than 31. */
29470 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29471
29472 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29473 emit_insn (SET (out, const0_rtx));
29474 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29475 if (code == ASHIFTRT)
29476 emit_insn (gen_ashrsi3 (out_up, in_up,
29477 GEN_INT (31)));
29478 else
29479 emit_insn (SET (out_up, const0_rtx));
29480 }
29481 }
29482 else
29483 {
29484 /* We have a shift-by-register. */
29485 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29486
29487 /* This alternative requires the scratch registers. */
29488 gcc_assert (scratch1 && REG_P (scratch1));
29489 gcc_assert (scratch2 && REG_P (scratch2));
29490
29491 /* We will need the values "amount-32" and "32-amount" later.
29492 Swapping them around now allows the later code to be more general. */
29493 switch (code)
29494 {
29495 case ASHIFT:
29496 emit_insn (SUB_32 (scratch1, amount));
29497 emit_insn (RSB_32 (scratch2, amount));
29498 break;
29499 case ASHIFTRT:
29500 emit_insn (RSB_32 (scratch1, amount));
29501 /* Also set CC = amount > 32. */
29502 emit_insn (SUB_S_32 (scratch2, amount));
29503 break;
29504 case LSHIFTRT:
29505 emit_insn (RSB_32 (scratch1, amount));
29506 emit_insn (SUB_32 (scratch2, amount));
29507 break;
29508 default:
29509 gcc_unreachable ();
29510 }
29511
29512 /* Emit code like this:
29513
29514 arithmetic-left:
29515 out_down = in_down << amount;
29516 out_down = (in_up << (amount - 32)) | out_down;
29517 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29518 out_up = in_up << amount;
29519
29520 arithmetic-right:
29521 out_down = in_down >> amount;
29522 out_down = (in_up << (32 - amount)) | out_down;
29523 if (amount < 32)
29524 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29525 out_up = in_up << amount;
29526
29527 logical-right:
29528 out_down = in_down >> amount;
29529 out_down = (in_up << (32 - amount)) | out_down;
29530 if (amount < 32)
29531 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29532 out_up = in_up << amount;
29533
29534 The ARM and Thumb2 variants are the same but implemented slightly
29535 differently. If this were only called during expand we could just
29536 use the Thumb2 case and let combine do the right thing, but this
29537 can also be called from post-reload splitters. */
29538
29539 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29540
29541 if (!TARGET_THUMB2)
29542 {
29543 /* Emit code for ARM mode. */
29544 emit_insn (SET (out_down,
29545 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29546 if (code == ASHIFTRT)
29547 {
29548 rtx_code_label *done_label = gen_label_rtx ();
29549 emit_jump_insn (BRANCH (LT, done_label));
29550 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29551 out_down)));
29552 emit_label (done_label);
29553 }
29554 else
29555 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29556 out_down)));
29557 }
29558 else
29559 {
29560 /* Emit code for Thumb2 mode.
29561 Thumb2 can't do shift and or in one insn. */
29562 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29563 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29564
29565 if (code == ASHIFTRT)
29566 {
29567 rtx_code_label *done_label = gen_label_rtx ();
29568 emit_jump_insn (BRANCH (LT, done_label));
29569 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29570 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29571 emit_label (done_label);
29572 }
29573 else
29574 {
29575 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29576 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29577 }
29578 }
29579
29580 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29581 }
29582
29583 #undef SUB_32
29584 #undef RSB_32
29585 #undef SUB_S_32
29586 #undef SET
29587 #undef SHIFT
29588 #undef LSHIFT
29589 #undef REV_LSHIFT
29590 #undef ORR
29591 #undef BRANCH
29592 }
29593
29594 /* Returns true if the pattern is a valid symbolic address, which is either a
29595 symbol_ref or (symbol_ref + addend).
29596
29597 According to the ARM ELF ABI, the initial addend of REL-type relocations
29598 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29599 literal field of the instruction as a 16-bit signed value in the range
29600 -32768 <= A < 32768. */
29601
29602 bool
29603 arm_valid_symbolic_address_p (rtx addr)
29604 {
29605 rtx xop0, xop1 = NULL_RTX;
29606 rtx tmp = addr;
29607
29608 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29609 return true;
29610
29611 /* (const (plus: symbol_ref const_int)) */
29612 if (GET_CODE (addr) == CONST)
29613 tmp = XEXP (addr, 0);
29614
29615 if (GET_CODE (tmp) == PLUS)
29616 {
29617 xop0 = XEXP (tmp, 0);
29618 xop1 = XEXP (tmp, 1);
29619
29620 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29621 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29622 }
29623
29624 return false;
29625 }
29626
29627 /* Returns true if a valid comparison operation and makes
29628 the operands in a form that is valid. */
29629 bool
29630 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29631 {
29632 enum rtx_code code = GET_CODE (*comparison);
29633 int code_int;
29634 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29635 ? GET_MODE (*op2) : GET_MODE (*op1);
29636
29637 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29638
29639 if (code == UNEQ || code == LTGT)
29640 return false;
29641
29642 code_int = (int)code;
29643 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29644 PUT_CODE (*comparison, (enum rtx_code)code_int);
29645
29646 switch (mode)
29647 {
29648 case E_SImode:
29649 if (!arm_add_operand (*op1, mode))
29650 *op1 = force_reg (mode, *op1);
29651 if (!arm_add_operand (*op2, mode))
29652 *op2 = force_reg (mode, *op2);
29653 return true;
29654
29655 case E_DImode:
29656 if (!cmpdi_operand (*op1, mode))
29657 *op1 = force_reg (mode, *op1);
29658 if (!cmpdi_operand (*op2, mode))
29659 *op2 = force_reg (mode, *op2);
29660 return true;
29661
29662 case E_HFmode:
29663 if (!TARGET_VFP_FP16INST)
29664 break;
29665 /* FP16 comparisons are done in SF mode. */
29666 mode = SFmode;
29667 *op1 = convert_to_mode (mode, *op1, 1);
29668 *op2 = convert_to_mode (mode, *op2, 1);
29669 /* Fall through. */
29670 case E_SFmode:
29671 case E_DFmode:
29672 if (!vfp_compare_operand (*op1, mode))
29673 *op1 = force_reg (mode, *op1);
29674 if (!vfp_compare_operand (*op2, mode))
29675 *op2 = force_reg (mode, *op2);
29676 return true;
29677 default:
29678 break;
29679 }
29680
29681 return false;
29682
29683 }
29684
29685 /* Maximum number of instructions to set block of memory. */
29686 static int
29687 arm_block_set_max_insns (void)
29688 {
29689 if (optimize_function_for_size_p (cfun))
29690 return 4;
29691 else
29692 return current_tune->max_insns_inline_memset;
29693 }
29694
29695 /* Return TRUE if it's profitable to set block of memory for
29696 non-vectorized case. VAL is the value to set the memory
29697 with. LENGTH is the number of bytes to set. ALIGN is the
29698 alignment of the destination memory in bytes. UNALIGNED_P
29699 is TRUE if we can only set the memory with instructions
29700 meeting alignment requirements. USE_STRD_P is TRUE if we
29701 can use strd to set the memory. */
29702 static bool
29703 arm_block_set_non_vect_profit_p (rtx val,
29704 unsigned HOST_WIDE_INT length,
29705 unsigned HOST_WIDE_INT align,
29706 bool unaligned_p, bool use_strd_p)
29707 {
29708 int num = 0;
29709 /* For leftovers in bytes of 0-7, we can set the memory block using
29710 strb/strh/str with minimum instruction number. */
29711 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29712
29713 if (unaligned_p)
29714 {
29715 num = arm_const_inline_cost (SET, val);
29716 num += length / align + length % align;
29717 }
29718 else if (use_strd_p)
29719 {
29720 num = arm_const_double_inline_cost (val);
29721 num += (length >> 3) + leftover[length & 7];
29722 }
29723 else
29724 {
29725 num = arm_const_inline_cost (SET, val);
29726 num += (length >> 2) + leftover[length & 3];
29727 }
29728
29729 /* We may be able to combine last pair STRH/STRB into a single STR
29730 by shifting one byte back. */
29731 if (unaligned_access && length > 3 && (length & 3) == 3)
29732 num--;
29733
29734 return (num <= arm_block_set_max_insns ());
29735 }
29736
29737 /* Return TRUE if it's profitable to set block of memory for
29738 vectorized case. LENGTH is the number of bytes to set.
29739 ALIGN is the alignment of destination memory in bytes.
29740 MODE is the vector mode used to set the memory. */
29741 static bool
29742 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29743 unsigned HOST_WIDE_INT align,
29744 machine_mode mode)
29745 {
29746 int num;
29747 bool unaligned_p = ((align & 3) != 0);
29748 unsigned int nelt = GET_MODE_NUNITS (mode);
29749
29750 /* Instruction loading constant value. */
29751 num = 1;
29752 /* Instructions storing the memory. */
29753 num += (length + nelt - 1) / nelt;
29754 /* Instructions adjusting the address expression. Only need to
29755 adjust address expression if it's 4 bytes aligned and bytes
29756 leftover can only be stored by mis-aligned store instruction. */
29757 if (!unaligned_p && (length & 3) != 0)
29758 num++;
29759
29760 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29761 if (!unaligned_p && mode == V16QImode)
29762 num--;
29763
29764 return (num <= arm_block_set_max_insns ());
29765 }
29766
29767 /* Set a block of memory using vectorization instructions for the
29768 unaligned case. We fill the first LENGTH bytes of the memory
29769 area starting from DSTBASE with byte constant VALUE. ALIGN is
29770 the alignment requirement of memory. Return TRUE if succeeded. */
29771 static bool
29772 arm_block_set_unaligned_vect (rtx dstbase,
29773 unsigned HOST_WIDE_INT length,
29774 unsigned HOST_WIDE_INT value,
29775 unsigned HOST_WIDE_INT align)
29776 {
29777 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29778 rtx dst, mem;
29779 rtx val_elt, val_vec, reg;
29780 rtx rval[MAX_VECT_LEN];
29781 rtx (*gen_func) (rtx, rtx);
29782 machine_mode mode;
29783 unsigned HOST_WIDE_INT v = value;
29784 unsigned int offset = 0;
29785 gcc_assert ((align & 0x3) != 0);
29786 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29787 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29788 if (length >= nelt_v16)
29789 {
29790 mode = V16QImode;
29791 gen_func = gen_movmisalignv16qi;
29792 }
29793 else
29794 {
29795 mode = V8QImode;
29796 gen_func = gen_movmisalignv8qi;
29797 }
29798 nelt_mode = GET_MODE_NUNITS (mode);
29799 gcc_assert (length >= nelt_mode);
29800 /* Skip if it isn't profitable. */
29801 if (!arm_block_set_vect_profit_p (length, align, mode))
29802 return false;
29803
29804 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29805 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29806
29807 v = sext_hwi (v, BITS_PER_WORD);
29808 val_elt = GEN_INT (v);
29809 for (j = 0; j < nelt_mode; j++)
29810 rval[j] = val_elt;
29811
29812 reg = gen_reg_rtx (mode);
29813 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29814 /* Emit instruction loading the constant value. */
29815 emit_move_insn (reg, val_vec);
29816
29817 /* Handle nelt_mode bytes in a vector. */
29818 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29819 {
29820 emit_insn ((*gen_func) (mem, reg));
29821 if (i + 2 * nelt_mode <= length)
29822 {
29823 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29824 offset += nelt_mode;
29825 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29826 }
29827 }
29828
29829 /* If there are not less than nelt_v8 bytes leftover, we must be in
29830 V16QI mode. */
29831 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29832
29833 /* Handle (8, 16) bytes leftover. */
29834 if (i + nelt_v8 < length)
29835 {
29836 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29837 offset += length - i;
29838 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29839
29840 /* We are shifting bytes back, set the alignment accordingly. */
29841 if ((length & 1) != 0 && align >= 2)
29842 set_mem_align (mem, BITS_PER_UNIT);
29843
29844 emit_insn (gen_movmisalignv16qi (mem, reg));
29845 }
29846 /* Handle (0, 8] bytes leftover. */
29847 else if (i < length && i + nelt_v8 >= length)
29848 {
29849 if (mode == V16QImode)
29850 reg = gen_lowpart (V8QImode, reg);
29851
29852 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29853 + (nelt_mode - nelt_v8))));
29854 offset += (length - i) + (nelt_mode - nelt_v8);
29855 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29856
29857 /* We are shifting bytes back, set the alignment accordingly. */
29858 if ((length & 1) != 0 && align >= 2)
29859 set_mem_align (mem, BITS_PER_UNIT);
29860
29861 emit_insn (gen_movmisalignv8qi (mem, reg));
29862 }
29863
29864 return true;
29865 }
29866
29867 /* Set a block of memory using vectorization instructions for the
29868 aligned case. We fill the first LENGTH bytes of the memory area
29869 starting from DSTBASE with byte constant VALUE. ALIGN is the
29870 alignment requirement of memory. Return TRUE if succeeded. */
29871 static bool
29872 arm_block_set_aligned_vect (rtx dstbase,
29873 unsigned HOST_WIDE_INT length,
29874 unsigned HOST_WIDE_INT value,
29875 unsigned HOST_WIDE_INT align)
29876 {
29877 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29878 rtx dst, addr, mem;
29879 rtx val_elt, val_vec, reg;
29880 rtx rval[MAX_VECT_LEN];
29881 machine_mode mode;
29882 unsigned HOST_WIDE_INT v = value;
29883 unsigned int offset = 0;
29884
29885 gcc_assert ((align & 0x3) == 0);
29886 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29887 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29888 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29889 mode = V16QImode;
29890 else
29891 mode = V8QImode;
29892
29893 nelt_mode = GET_MODE_NUNITS (mode);
29894 gcc_assert (length >= nelt_mode);
29895 /* Skip if it isn't profitable. */
29896 if (!arm_block_set_vect_profit_p (length, align, mode))
29897 return false;
29898
29899 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29900
29901 v = sext_hwi (v, BITS_PER_WORD);
29902 val_elt = GEN_INT (v);
29903 for (j = 0; j < nelt_mode; j++)
29904 rval[j] = val_elt;
29905
29906 reg = gen_reg_rtx (mode);
29907 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29908 /* Emit instruction loading the constant value. */
29909 emit_move_insn (reg, val_vec);
29910
29911 i = 0;
29912 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29913 if (mode == V16QImode)
29914 {
29915 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29916 emit_insn (gen_movmisalignv16qi (mem, reg));
29917 i += nelt_mode;
29918 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29919 if (i + nelt_v8 < length && i + nelt_v16 > length)
29920 {
29921 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29922 offset += length - nelt_mode;
29923 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29924 /* We are shifting bytes back, set the alignment accordingly. */
29925 if ((length & 0x3) == 0)
29926 set_mem_align (mem, BITS_PER_UNIT * 4);
29927 else if ((length & 0x1) == 0)
29928 set_mem_align (mem, BITS_PER_UNIT * 2);
29929 else
29930 set_mem_align (mem, BITS_PER_UNIT);
29931
29932 emit_insn (gen_movmisalignv16qi (mem, reg));
29933 return true;
29934 }
29935 /* Fall through for bytes leftover. */
29936 mode = V8QImode;
29937 nelt_mode = GET_MODE_NUNITS (mode);
29938 reg = gen_lowpart (V8QImode, reg);
29939 }
29940
29941 /* Handle 8 bytes in a vector. */
29942 for (; (i + nelt_mode <= length); i += nelt_mode)
29943 {
29944 addr = plus_constant (Pmode, dst, i);
29945 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29946 emit_move_insn (mem, reg);
29947 }
29948
29949 /* Handle single word leftover by shifting 4 bytes back. We can
29950 use aligned access for this case. */
29951 if (i + UNITS_PER_WORD == length)
29952 {
29953 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29954 offset += i - UNITS_PER_WORD;
29955 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29956 /* We are shifting 4 bytes back, set the alignment accordingly. */
29957 if (align > UNITS_PER_WORD)
29958 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29959
29960 emit_move_insn (mem, reg);
29961 }
29962 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29963 We have to use unaligned access for this case. */
29964 else if (i < length)
29965 {
29966 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29967 offset += length - nelt_mode;
29968 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29969 /* We are shifting bytes back, set the alignment accordingly. */
29970 if ((length & 1) == 0)
29971 set_mem_align (mem, BITS_PER_UNIT * 2);
29972 else
29973 set_mem_align (mem, BITS_PER_UNIT);
29974
29975 emit_insn (gen_movmisalignv8qi (mem, reg));
29976 }
29977
29978 return true;
29979 }
29980
29981 /* Set a block of memory using plain strh/strb instructions, only
29982 using instructions allowed by ALIGN on processor. We fill the
29983 first LENGTH bytes of the memory area starting from DSTBASE
29984 with byte constant VALUE. ALIGN is the alignment requirement
29985 of memory. */
29986 static bool
29987 arm_block_set_unaligned_non_vect (rtx dstbase,
29988 unsigned HOST_WIDE_INT length,
29989 unsigned HOST_WIDE_INT value,
29990 unsigned HOST_WIDE_INT align)
29991 {
29992 unsigned int i;
29993 rtx dst, addr, mem;
29994 rtx val_exp, val_reg, reg;
29995 machine_mode mode;
29996 HOST_WIDE_INT v = value;
29997
29998 gcc_assert (align == 1 || align == 2);
29999
30000 if (align == 2)
30001 v |= (value << BITS_PER_UNIT);
30002
30003 v = sext_hwi (v, BITS_PER_WORD);
30004 val_exp = GEN_INT (v);
30005 /* Skip if it isn't profitable. */
30006 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30007 align, true, false))
30008 return false;
30009
30010 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30011 mode = (align == 2 ? HImode : QImode);
30012 val_reg = force_reg (SImode, val_exp);
30013 reg = gen_lowpart (mode, val_reg);
30014
30015 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30016 {
30017 addr = plus_constant (Pmode, dst, i);
30018 mem = adjust_automodify_address (dstbase, mode, addr, i);
30019 emit_move_insn (mem, reg);
30020 }
30021
30022 /* Handle single byte leftover. */
30023 if (i + 1 == length)
30024 {
30025 reg = gen_lowpart (QImode, val_reg);
30026 addr = plus_constant (Pmode, dst, i);
30027 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30028 emit_move_insn (mem, reg);
30029 i++;
30030 }
30031
30032 gcc_assert (i == length);
30033 return true;
30034 }
30035
30036 /* Set a block of memory using plain strd/str/strh/strb instructions,
30037 to permit unaligned copies on processors which support unaligned
30038 semantics for those instructions. We fill the first LENGTH bytes
30039 of the memory area starting from DSTBASE with byte constant VALUE.
30040 ALIGN is the alignment requirement of memory. */
30041 static bool
30042 arm_block_set_aligned_non_vect (rtx dstbase,
30043 unsigned HOST_WIDE_INT length,
30044 unsigned HOST_WIDE_INT value,
30045 unsigned HOST_WIDE_INT align)
30046 {
30047 unsigned int i;
30048 rtx dst, addr, mem;
30049 rtx val_exp, val_reg, reg;
30050 unsigned HOST_WIDE_INT v;
30051 bool use_strd_p;
30052
30053 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30054 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30055
30056 v = (value | (value << 8) | (value << 16) | (value << 24));
30057 if (length < UNITS_PER_WORD)
30058 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30059
30060 if (use_strd_p)
30061 v |= (v << BITS_PER_WORD);
30062 else
30063 v = sext_hwi (v, BITS_PER_WORD);
30064
30065 val_exp = GEN_INT (v);
30066 /* Skip if it isn't profitable. */
30067 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30068 align, false, use_strd_p))
30069 {
30070 if (!use_strd_p)
30071 return false;
30072
30073 /* Try without strd. */
30074 v = (v >> BITS_PER_WORD);
30075 v = sext_hwi (v, BITS_PER_WORD);
30076 val_exp = GEN_INT (v);
30077 use_strd_p = false;
30078 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30079 align, false, use_strd_p))
30080 return false;
30081 }
30082
30083 i = 0;
30084 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30085 /* Handle double words using strd if possible. */
30086 if (use_strd_p)
30087 {
30088 val_reg = force_reg (DImode, val_exp);
30089 reg = val_reg;
30090 for (; (i + 8 <= length); i += 8)
30091 {
30092 addr = plus_constant (Pmode, dst, i);
30093 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30094 emit_move_insn (mem, reg);
30095 }
30096 }
30097 else
30098 val_reg = force_reg (SImode, val_exp);
30099
30100 /* Handle words. */
30101 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30102 for (; (i + 4 <= length); i += 4)
30103 {
30104 addr = plus_constant (Pmode, dst, i);
30105 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30106 if ((align & 3) == 0)
30107 emit_move_insn (mem, reg);
30108 else
30109 emit_insn (gen_unaligned_storesi (mem, reg));
30110 }
30111
30112 /* Merge last pair of STRH and STRB into a STR if possible. */
30113 if (unaligned_access && i > 0 && (i + 3) == length)
30114 {
30115 addr = plus_constant (Pmode, dst, i - 1);
30116 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30117 /* We are shifting one byte back, set the alignment accordingly. */
30118 if ((align & 1) == 0)
30119 set_mem_align (mem, BITS_PER_UNIT);
30120
30121 /* Most likely this is an unaligned access, and we can't tell at
30122 compilation time. */
30123 emit_insn (gen_unaligned_storesi (mem, reg));
30124 return true;
30125 }
30126
30127 /* Handle half word leftover. */
30128 if (i + 2 <= length)
30129 {
30130 reg = gen_lowpart (HImode, val_reg);
30131 addr = plus_constant (Pmode, dst, i);
30132 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30133 if ((align & 1) == 0)
30134 emit_move_insn (mem, reg);
30135 else
30136 emit_insn (gen_unaligned_storehi (mem, reg));
30137
30138 i += 2;
30139 }
30140
30141 /* Handle single byte leftover. */
30142 if (i + 1 == length)
30143 {
30144 reg = gen_lowpart (QImode, val_reg);
30145 addr = plus_constant (Pmode, dst, i);
30146 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30147 emit_move_insn (mem, reg);
30148 }
30149
30150 return true;
30151 }
30152
30153 /* Set a block of memory using vectorization instructions for both
30154 aligned and unaligned cases. We fill the first LENGTH bytes of
30155 the memory area starting from DSTBASE with byte constant VALUE.
30156 ALIGN is the alignment requirement of memory. */
30157 static bool
30158 arm_block_set_vect (rtx dstbase,
30159 unsigned HOST_WIDE_INT length,
30160 unsigned HOST_WIDE_INT value,
30161 unsigned HOST_WIDE_INT align)
30162 {
30163 /* Check whether we need to use unaligned store instruction. */
30164 if (((align & 3) != 0 || (length & 3) != 0)
30165 /* Check whether unaligned store instruction is available. */
30166 && (!unaligned_access || BYTES_BIG_ENDIAN))
30167 return false;
30168
30169 if ((align & 3) == 0)
30170 return arm_block_set_aligned_vect (dstbase, length, value, align);
30171 else
30172 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30173 }
30174
30175 /* Expand string store operation. Firstly we try to do that by using
30176 vectorization instructions, then try with ARM unaligned access and
30177 double-word store if profitable. OPERANDS[0] is the destination,
30178 OPERANDS[1] is the number of bytes, operands[2] is the value to
30179 initialize the memory, OPERANDS[3] is the known alignment of the
30180 destination. */
30181 bool
30182 arm_gen_setmem (rtx *operands)
30183 {
30184 rtx dstbase = operands[0];
30185 unsigned HOST_WIDE_INT length;
30186 unsigned HOST_WIDE_INT value;
30187 unsigned HOST_WIDE_INT align;
30188
30189 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30190 return false;
30191
30192 length = UINTVAL (operands[1]);
30193 if (length > 64)
30194 return false;
30195
30196 value = (UINTVAL (operands[2]) & 0xFF);
30197 align = UINTVAL (operands[3]);
30198 if (TARGET_NEON && length >= 8
30199 && current_tune->string_ops_prefer_neon
30200 && arm_block_set_vect (dstbase, length, value, align))
30201 return true;
30202
30203 if (!unaligned_access && (align & 3) != 0)
30204 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30205
30206 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30207 }
30208
30209
30210 static bool
30211 arm_macro_fusion_p (void)
30212 {
30213 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30214 }
30215
30216 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30217 for MOVW / MOVT macro fusion. */
30218
30219 static bool
30220 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30221 {
30222 /* We are trying to fuse
30223 movw imm / movt imm
30224 instructions as a group that gets scheduled together. */
30225
30226 rtx set_dest = SET_DEST (curr_set);
30227
30228 if (GET_MODE (set_dest) != SImode)
30229 return false;
30230
30231 /* We are trying to match:
30232 prev (movw) == (set (reg r0) (const_int imm16))
30233 curr (movt) == (set (zero_extract (reg r0)
30234 (const_int 16)
30235 (const_int 16))
30236 (const_int imm16_1))
30237 or
30238 prev (movw) == (set (reg r1)
30239 (high (symbol_ref ("SYM"))))
30240 curr (movt) == (set (reg r0)
30241 (lo_sum (reg r1)
30242 (symbol_ref ("SYM")))) */
30243
30244 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30245 {
30246 if (CONST_INT_P (SET_SRC (curr_set))
30247 && CONST_INT_P (SET_SRC (prev_set))
30248 && REG_P (XEXP (set_dest, 0))
30249 && REG_P (SET_DEST (prev_set))
30250 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30251 return true;
30252
30253 }
30254 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30255 && REG_P (SET_DEST (curr_set))
30256 && REG_P (SET_DEST (prev_set))
30257 && GET_CODE (SET_SRC (prev_set)) == HIGH
30258 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30259 return true;
30260
30261 return false;
30262 }
30263
30264 static bool
30265 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30266 {
30267 rtx prev_set = single_set (prev);
30268 rtx curr_set = single_set (curr);
30269
30270 if (!prev_set
30271 || !curr_set)
30272 return false;
30273
30274 if (any_condjump_p (curr))
30275 return false;
30276
30277 if (!arm_macro_fusion_p ())
30278 return false;
30279
30280 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30281 && aarch_crypto_can_dual_issue (prev, curr))
30282 return true;
30283
30284 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30285 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30286 return true;
30287
30288 return false;
30289 }
30290
30291 /* Return true iff the instruction fusion described by OP is enabled. */
30292 bool
30293 arm_fusion_enabled_p (tune_params::fuse_ops op)
30294 {
30295 return current_tune->fusible_ops & op;
30296 }
30297
30298 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30299 scheduled for speculative execution. Reject the long-running division
30300 and square-root instructions. */
30301
30302 static bool
30303 arm_sched_can_speculate_insn (rtx_insn *insn)
30304 {
30305 switch (get_attr_type (insn))
30306 {
30307 case TYPE_SDIV:
30308 case TYPE_UDIV:
30309 case TYPE_FDIVS:
30310 case TYPE_FDIVD:
30311 case TYPE_FSQRTS:
30312 case TYPE_FSQRTD:
30313 case TYPE_NEON_FP_SQRT_S:
30314 case TYPE_NEON_FP_SQRT_D:
30315 case TYPE_NEON_FP_SQRT_S_Q:
30316 case TYPE_NEON_FP_SQRT_D_Q:
30317 case TYPE_NEON_FP_DIV_S:
30318 case TYPE_NEON_FP_DIV_D:
30319 case TYPE_NEON_FP_DIV_S_Q:
30320 case TYPE_NEON_FP_DIV_D_Q:
30321 return false;
30322 default:
30323 return true;
30324 }
30325 }
30326
30327 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30328
30329 static unsigned HOST_WIDE_INT
30330 arm_asan_shadow_offset (void)
30331 {
30332 return HOST_WIDE_INT_1U << 29;
30333 }
30334
30335
30336 /* This is a temporary fix for PR60655. Ideally we need
30337 to handle most of these cases in the generic part but
30338 currently we reject minus (..) (sym_ref). We try to
30339 ameliorate the case with minus (sym_ref1) (sym_ref2)
30340 where they are in the same section. */
30341
30342 static bool
30343 arm_const_not_ok_for_debug_p (rtx p)
30344 {
30345 tree decl_op0 = NULL;
30346 tree decl_op1 = NULL;
30347
30348 if (GET_CODE (p) == MINUS)
30349 {
30350 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30351 {
30352 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30353 if (decl_op1
30354 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30355 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30356 {
30357 if ((VAR_P (decl_op1)
30358 || TREE_CODE (decl_op1) == CONST_DECL)
30359 && (VAR_P (decl_op0)
30360 || TREE_CODE (decl_op0) == CONST_DECL))
30361 return (get_variable_section (decl_op1, false)
30362 != get_variable_section (decl_op0, false));
30363
30364 if (TREE_CODE (decl_op1) == LABEL_DECL
30365 && TREE_CODE (decl_op0) == LABEL_DECL)
30366 return (DECL_CONTEXT (decl_op1)
30367 != DECL_CONTEXT (decl_op0));
30368 }
30369
30370 return true;
30371 }
30372 }
30373
30374 return false;
30375 }
30376
30377 /* return TRUE if x is a reference to a value in a constant pool */
30378 extern bool
30379 arm_is_constant_pool_ref (rtx x)
30380 {
30381 return (MEM_P (x)
30382 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30383 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30384 }
30385
30386 /* Remember the last target of arm_set_current_function. */
30387 static GTY(()) tree arm_previous_fndecl;
30388
30389 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30390
30391 void
30392 save_restore_target_globals (tree new_tree)
30393 {
30394 /* If we have a previous state, use it. */
30395 if (TREE_TARGET_GLOBALS (new_tree))
30396 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30397 else if (new_tree == target_option_default_node)
30398 restore_target_globals (&default_target_globals);
30399 else
30400 {
30401 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30402 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30403 }
30404
30405 arm_option_params_internal ();
30406 }
30407
30408 /* Invalidate arm_previous_fndecl. */
30409
30410 void
30411 arm_reset_previous_fndecl (void)
30412 {
30413 arm_previous_fndecl = NULL_TREE;
30414 }
30415
30416 /* Establish appropriate back-end context for processing the function
30417 FNDECL. The argument might be NULL to indicate processing at top
30418 level, outside of any function scope. */
30419
30420 static void
30421 arm_set_current_function (tree fndecl)
30422 {
30423 if (!fndecl || fndecl == arm_previous_fndecl)
30424 return;
30425
30426 tree old_tree = (arm_previous_fndecl
30427 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30428 : NULL_TREE);
30429
30430 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30431
30432 /* If current function has no attributes but previous one did,
30433 use the default node. */
30434 if (! new_tree && old_tree)
30435 new_tree = target_option_default_node;
30436
30437 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30438 the default have been handled by save_restore_target_globals from
30439 arm_pragma_target_parse. */
30440 if (old_tree == new_tree)
30441 return;
30442
30443 arm_previous_fndecl = fndecl;
30444
30445 /* First set the target options. */
30446 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30447
30448 save_restore_target_globals (new_tree);
30449 }
30450
30451 /* Implement TARGET_OPTION_PRINT. */
30452
30453 static void
30454 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30455 {
30456 int flags = ptr->x_target_flags;
30457 const char *fpu_name;
30458
30459 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30460 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30461
30462 fprintf (file, "%*sselected isa %s\n", indent, "",
30463 TARGET_THUMB2_P (flags) ? "thumb2" :
30464 TARGET_THUMB_P (flags) ? "thumb1" :
30465 "arm");
30466
30467 if (ptr->x_arm_arch_string)
30468 fprintf (file, "%*sselected architecture %s\n", indent, "",
30469 ptr->x_arm_arch_string);
30470
30471 if (ptr->x_arm_cpu_string)
30472 fprintf (file, "%*sselected CPU %s\n", indent, "",
30473 ptr->x_arm_cpu_string);
30474
30475 if (ptr->x_arm_tune_string)
30476 fprintf (file, "%*sselected tune %s\n", indent, "",
30477 ptr->x_arm_tune_string);
30478
30479 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30480 }
30481
30482 /* Hook to determine if one function can safely inline another. */
30483
30484 static bool
30485 arm_can_inline_p (tree caller, tree callee)
30486 {
30487 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30488 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30489 bool can_inline = true;
30490
30491 struct cl_target_option *caller_opts
30492 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30493 : target_option_default_node);
30494
30495 struct cl_target_option *callee_opts
30496 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30497 : target_option_default_node);
30498
30499 if (callee_opts == caller_opts)
30500 return true;
30501
30502 /* Callee's ISA features should be a subset of the caller's. */
30503 struct arm_build_target caller_target;
30504 struct arm_build_target callee_target;
30505 caller_target.isa = sbitmap_alloc (isa_num_bits);
30506 callee_target.isa = sbitmap_alloc (isa_num_bits);
30507
30508 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30509 false);
30510 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30511 false);
30512 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30513 can_inline = false;
30514
30515 sbitmap_free (caller_target.isa);
30516 sbitmap_free (callee_target.isa);
30517
30518 /* OK to inline between different modes.
30519 Function with mode specific instructions, e.g using asm,
30520 must be explicitly protected with noinline. */
30521 return can_inline;
30522 }
30523
30524 /* Hook to fix function's alignment affected by target attribute. */
30525
30526 static void
30527 arm_relayout_function (tree fndecl)
30528 {
30529 if (DECL_USER_ALIGN (fndecl))
30530 return;
30531
30532 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30533
30534 if (!callee_tree)
30535 callee_tree = target_option_default_node;
30536
30537 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30538 SET_DECL_ALIGN
30539 (fndecl,
30540 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30541 }
30542
30543 /* Inner function to process the attribute((target(...))), take an argument and
30544 set the current options from the argument. If we have a list, recursively
30545 go over the list. */
30546
30547 static bool
30548 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30549 {
30550 if (TREE_CODE (args) == TREE_LIST)
30551 {
30552 bool ret = true;
30553
30554 for (; args; args = TREE_CHAIN (args))
30555 if (TREE_VALUE (args)
30556 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30557 ret = false;
30558 return ret;
30559 }
30560
30561 else if (TREE_CODE (args) != STRING_CST)
30562 {
30563 error ("attribute %<target%> argument not a string");
30564 return false;
30565 }
30566
30567 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30568 char *q;
30569
30570 while ((q = strtok (argstr, ",")) != NULL)
30571 {
30572 while (ISSPACE (*q)) ++q;
30573
30574 argstr = NULL;
30575 if (!strncmp (q, "thumb", 5))
30576 opts->x_target_flags |= MASK_THUMB;
30577
30578 else if (!strncmp (q, "arm", 3))
30579 opts->x_target_flags &= ~MASK_THUMB;
30580
30581 else if (!strncmp (q, "fpu=", 4))
30582 {
30583 int fpu_index;
30584 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30585 &fpu_index, CL_TARGET))
30586 {
30587 error ("invalid fpu for attribute(target(\"%s\"))", q);
30588 return false;
30589 }
30590 if (fpu_index == TARGET_FPU_auto)
30591 {
30592 /* This doesn't really make sense until we support
30593 general dynamic selection of the architecture and all
30594 sub-features. */
30595 sorry ("auto fpu selection not currently permitted here");
30596 return false;
30597 }
30598 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30599 }
30600 else
30601 {
30602 error ("attribute(target(\"%s\")) is unknown", q);
30603 return false;
30604 }
30605 }
30606
30607 return true;
30608 }
30609
30610 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30611
30612 tree
30613 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30614 struct gcc_options *opts_set)
30615 {
30616 struct cl_target_option cl_opts;
30617
30618 if (!arm_valid_target_attribute_rec (args, opts))
30619 return NULL_TREE;
30620
30621 cl_target_option_save (&cl_opts, opts);
30622 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30623 arm_option_check_internal (opts);
30624 /* Do any overrides, such as global options arch=xxx. */
30625 arm_option_override_internal (opts, opts_set);
30626
30627 return build_target_option_node (opts);
30628 }
30629
30630 static void
30631 add_attribute (const char * mode, tree *attributes)
30632 {
30633 size_t len = strlen (mode);
30634 tree value = build_string (len, mode);
30635
30636 TREE_TYPE (value) = build_array_type (char_type_node,
30637 build_index_type (size_int (len)));
30638
30639 *attributes = tree_cons (get_identifier ("target"),
30640 build_tree_list (NULL_TREE, value),
30641 *attributes);
30642 }
30643
30644 /* For testing. Insert thumb or arm modes alternatively on functions. */
30645
30646 static void
30647 arm_insert_attributes (tree fndecl, tree * attributes)
30648 {
30649 const char *mode;
30650
30651 if (! TARGET_FLIP_THUMB)
30652 return;
30653
30654 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30655 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30656 return;
30657
30658 /* Nested definitions must inherit mode. */
30659 if (current_function_decl)
30660 {
30661 mode = TARGET_THUMB ? "thumb" : "arm";
30662 add_attribute (mode, attributes);
30663 return;
30664 }
30665
30666 /* If there is already a setting don't change it. */
30667 if (lookup_attribute ("target", *attributes) != NULL)
30668 return;
30669
30670 mode = thumb_flipper ? "thumb" : "arm";
30671 add_attribute (mode, attributes);
30672
30673 thumb_flipper = !thumb_flipper;
30674 }
30675
30676 /* Hook to validate attribute((target("string"))). */
30677
30678 static bool
30679 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30680 tree args, int ARG_UNUSED (flags))
30681 {
30682 bool ret = true;
30683 struct gcc_options func_options;
30684 tree cur_tree, new_optimize;
30685 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30686
30687 /* Get the optimization options of the current function. */
30688 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30689
30690 /* If the function changed the optimization levels as well as setting target
30691 options, start with the optimizations specified. */
30692 if (!func_optimize)
30693 func_optimize = optimization_default_node;
30694
30695 /* Init func_options. */
30696 memset (&func_options, 0, sizeof (func_options));
30697 init_options_struct (&func_options, NULL);
30698 lang_hooks.init_options_struct (&func_options);
30699
30700 /* Initialize func_options to the defaults. */
30701 cl_optimization_restore (&func_options,
30702 TREE_OPTIMIZATION (func_optimize));
30703
30704 cl_target_option_restore (&func_options,
30705 TREE_TARGET_OPTION (target_option_default_node));
30706
30707 /* Set func_options flags with new target mode. */
30708 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30709 &global_options_set);
30710
30711 if (cur_tree == NULL_TREE)
30712 ret = false;
30713
30714 new_optimize = build_optimization_node (&func_options);
30715
30716 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30717
30718 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30719
30720 finalize_options_struct (&func_options);
30721
30722 return ret;
30723 }
30724
30725 /* Match an ISA feature bitmap to a named FPU. We always use the
30726 first entry that exactly matches the feature set, so that we
30727 effectively canonicalize the FPU name for the assembler. */
30728 static const char*
30729 arm_identify_fpu_from_isa (sbitmap isa)
30730 {
30731 auto_sbitmap fpubits (isa_num_bits);
30732 auto_sbitmap cand_fpubits (isa_num_bits);
30733
30734 bitmap_and (fpubits, isa, isa_all_fpubits);
30735
30736 /* If there are no ISA feature bits relating to the FPU, we must be
30737 doing soft-float. */
30738 if (bitmap_empty_p (fpubits))
30739 return "softvfp";
30740
30741 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30742 {
30743 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30744 if (bitmap_equal_p (fpubits, cand_fpubits))
30745 return all_fpus[i].name;
30746 }
30747 /* We must find an entry, or things have gone wrong. */
30748 gcc_unreachable ();
30749 }
30750
30751 void
30752 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30753 {
30754
30755 fprintf (stream, "\t.syntax unified\n");
30756
30757 if (TARGET_THUMB)
30758 {
30759 if (is_called_in_ARM_mode (decl)
30760 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30761 && cfun->is_thunk))
30762 fprintf (stream, "\t.code 32\n");
30763 else if (TARGET_THUMB1)
30764 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30765 else
30766 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30767 }
30768 else
30769 fprintf (stream, "\t.arm\n");
30770
30771 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30772 (TARGET_SOFT_FLOAT
30773 ? "softvfp"
30774 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30775
30776 if (TARGET_POKE_FUNCTION_NAME)
30777 arm_poke_function_name (stream, (const char *) name);
30778 }
30779
30780 /* If MEM is in the form of [base+offset], extract the two parts
30781 of address and set to BASE and OFFSET, otherwise return false
30782 after clearing BASE and OFFSET. */
30783
30784 static bool
30785 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30786 {
30787 rtx addr;
30788
30789 gcc_assert (MEM_P (mem));
30790
30791 addr = XEXP (mem, 0);
30792
30793 /* Strip off const from addresses like (const (addr)). */
30794 if (GET_CODE (addr) == CONST)
30795 addr = XEXP (addr, 0);
30796
30797 if (GET_CODE (addr) == REG)
30798 {
30799 *base = addr;
30800 *offset = const0_rtx;
30801 return true;
30802 }
30803
30804 if (GET_CODE (addr) == PLUS
30805 && GET_CODE (XEXP (addr, 0)) == REG
30806 && CONST_INT_P (XEXP (addr, 1)))
30807 {
30808 *base = XEXP (addr, 0);
30809 *offset = XEXP (addr, 1);
30810 return true;
30811 }
30812
30813 *base = NULL_RTX;
30814 *offset = NULL_RTX;
30815
30816 return false;
30817 }
30818
30819 /* If INSN is a load or store of address in the form of [base+offset],
30820 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30821 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30822 otherwise return FALSE. */
30823
30824 static bool
30825 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30826 {
30827 rtx x, dest, src;
30828
30829 gcc_assert (INSN_P (insn));
30830 x = PATTERN (insn);
30831 if (GET_CODE (x) != SET)
30832 return false;
30833
30834 src = SET_SRC (x);
30835 dest = SET_DEST (x);
30836 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30837 {
30838 *is_load = false;
30839 extract_base_offset_in_addr (dest, base, offset);
30840 }
30841 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30842 {
30843 *is_load = true;
30844 extract_base_offset_in_addr (src, base, offset);
30845 }
30846 else
30847 return false;
30848
30849 return (*base != NULL_RTX && *offset != NULL_RTX);
30850 }
30851
30852 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30853
30854 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30855 and PRI are only calculated for these instructions. For other instruction,
30856 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30857 instruction fusion can be supported by returning different priorities.
30858
30859 It's important that irrelevant instructions get the largest FUSION_PRI. */
30860
30861 static void
30862 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30863 int *fusion_pri, int *pri)
30864 {
30865 int tmp, off_val;
30866 bool is_load;
30867 rtx base, offset;
30868
30869 gcc_assert (INSN_P (insn));
30870
30871 tmp = max_pri - 1;
30872 if (!fusion_load_store (insn, &base, &offset, &is_load))
30873 {
30874 *pri = tmp;
30875 *fusion_pri = tmp;
30876 return;
30877 }
30878
30879 /* Load goes first. */
30880 if (is_load)
30881 *fusion_pri = tmp - 1;
30882 else
30883 *fusion_pri = tmp - 2;
30884
30885 tmp /= 2;
30886
30887 /* INSN with smaller base register goes first. */
30888 tmp -= ((REGNO (base) & 0xff) << 20);
30889
30890 /* INSN with smaller offset goes first. */
30891 off_val = (int)(INTVAL (offset));
30892 if (off_val >= 0)
30893 tmp -= (off_val & 0xfffff);
30894 else
30895 tmp += ((- off_val) & 0xfffff);
30896
30897 *pri = tmp;
30898 return;
30899 }
30900
30901
30902 /* Construct and return a PARALLEL RTX vector with elements numbering the
30903 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30904 the vector - from the perspective of the architecture. This does not
30905 line up with GCC's perspective on lane numbers, so we end up with
30906 different masks depending on our target endian-ness. The diagram
30907 below may help. We must draw the distinction when building masks
30908 which select one half of the vector. An instruction selecting
30909 architectural low-lanes for a big-endian target, must be described using
30910 a mask selecting GCC high-lanes.
30911
30912 Big-Endian Little-Endian
30913
30914 GCC 0 1 2 3 3 2 1 0
30915 | x | x | x | x | | x | x | x | x |
30916 Architecture 3 2 1 0 3 2 1 0
30917
30918 Low Mask: { 2, 3 } { 0, 1 }
30919 High Mask: { 0, 1 } { 2, 3 }
30920 */
30921
30922 rtx
30923 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30924 {
30925 int nunits = GET_MODE_NUNITS (mode);
30926 rtvec v = rtvec_alloc (nunits / 2);
30927 int high_base = nunits / 2;
30928 int low_base = 0;
30929 int base;
30930 rtx t1;
30931 int i;
30932
30933 if (BYTES_BIG_ENDIAN)
30934 base = high ? low_base : high_base;
30935 else
30936 base = high ? high_base : low_base;
30937
30938 for (i = 0; i < nunits / 2; i++)
30939 RTVEC_ELT (v, i) = GEN_INT (base + i);
30940
30941 t1 = gen_rtx_PARALLEL (mode, v);
30942 return t1;
30943 }
30944
30945 /* Check OP for validity as a PARALLEL RTX vector with elements
30946 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30947 from the perspective of the architecture. See the diagram above
30948 arm_simd_vect_par_cnst_half_p for more details. */
30949
30950 bool
30951 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30952 bool high)
30953 {
30954 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30955 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30956 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30957 int i = 0;
30958
30959 if (!VECTOR_MODE_P (mode))
30960 return false;
30961
30962 if (count_op != count_ideal)
30963 return false;
30964
30965 for (i = 0; i < count_ideal; i++)
30966 {
30967 rtx elt_op = XVECEXP (op, 0, i);
30968 rtx elt_ideal = XVECEXP (ideal, 0, i);
30969
30970 if (!CONST_INT_P (elt_op)
30971 || INTVAL (elt_ideal) != INTVAL (elt_op))
30972 return false;
30973 }
30974 return true;
30975 }
30976
30977 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30978 in Thumb1. */
30979 static bool
30980 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30981 const_tree)
30982 {
30983 /* For now, we punt and not handle this for TARGET_THUMB1. */
30984 if (vcall_offset && TARGET_THUMB1)
30985 return false;
30986
30987 /* Otherwise ok. */
30988 return true;
30989 }
30990
30991 /* Generate RTL for a conditional branch with rtx comparison CODE in
30992 mode CC_MODE. The destination of the unlikely conditional branch
30993 is LABEL_REF. */
30994
30995 void
30996 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30997 rtx label_ref)
30998 {
30999 rtx x;
31000 x = gen_rtx_fmt_ee (code, VOIDmode,
31001 gen_rtx_REG (cc_mode, CC_REGNUM),
31002 const0_rtx);
31003
31004 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31005 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31006 pc_rtx);
31007 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31008 }
31009
31010 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31011
31012 For pure-code sections there is no letter code for this attribute, so
31013 output all the section flags numerically when this is needed. */
31014
31015 static bool
31016 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31017 {
31018
31019 if (flags & SECTION_ARM_PURECODE)
31020 {
31021 *num = 0x20000000;
31022
31023 if (!(flags & SECTION_DEBUG))
31024 *num |= 0x2;
31025 if (flags & SECTION_EXCLUDE)
31026 *num |= 0x80000000;
31027 if (flags & SECTION_WRITE)
31028 *num |= 0x1;
31029 if (flags & SECTION_CODE)
31030 *num |= 0x4;
31031 if (flags & SECTION_MERGE)
31032 *num |= 0x10;
31033 if (flags & SECTION_STRINGS)
31034 *num |= 0x20;
31035 if (flags & SECTION_TLS)
31036 *num |= 0x400;
31037 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31038 *num |= 0x200;
31039
31040 return true;
31041 }
31042
31043 return false;
31044 }
31045
31046 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31047
31048 If pure-code is passed as an option, make sure all functions are in
31049 sections that have the SHF_ARM_PURECODE attribute. */
31050
31051 static section *
31052 arm_function_section (tree decl, enum node_frequency freq,
31053 bool startup, bool exit)
31054 {
31055 const char * section_name;
31056 section * sec;
31057
31058 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31059 return default_function_section (decl, freq, startup, exit);
31060
31061 if (!target_pure_code)
31062 return default_function_section (decl, freq, startup, exit);
31063
31064
31065 section_name = DECL_SECTION_NAME (decl);
31066
31067 /* If a function is not in a named section then it falls under the 'default'
31068 text section, also known as '.text'. We can preserve previous behavior as
31069 the default text section already has the SHF_ARM_PURECODE section
31070 attribute. */
31071 if (!section_name)
31072 {
31073 section *default_sec = default_function_section (decl, freq, startup,
31074 exit);
31075
31076 /* If default_sec is not null, then it must be a special section like for
31077 example .text.startup. We set the pure-code attribute and return the
31078 same section to preserve existing behavior. */
31079 if (default_sec)
31080 default_sec->common.flags |= SECTION_ARM_PURECODE;
31081 return default_sec;
31082 }
31083
31084 /* Otherwise look whether a section has already been created with
31085 'section_name'. */
31086 sec = get_named_section (decl, section_name, 0);
31087 if (!sec)
31088 /* If that is not the case passing NULL as the section's name to
31089 'get_named_section' will create a section with the declaration's
31090 section name. */
31091 sec = get_named_section (decl, NULL, 0);
31092
31093 /* Set the SHF_ARM_PURECODE attribute. */
31094 sec->common.flags |= SECTION_ARM_PURECODE;
31095
31096 return sec;
31097 }
31098
31099 /* Implements the TARGET_SECTION_FLAGS hook.
31100
31101 If DECL is a function declaration and pure-code is passed as an option
31102 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31103 section's name and RELOC indicates whether the declarations initializer may
31104 contain runtime relocations. */
31105
31106 static unsigned int
31107 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31108 {
31109 unsigned int flags = default_section_type_flags (decl, name, reloc);
31110
31111 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31112 flags |= SECTION_ARM_PURECODE;
31113
31114 return flags;
31115 }
31116
31117 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31118
31119 static void
31120 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31121 rtx op0, rtx op1,
31122 rtx *quot_p, rtx *rem_p)
31123 {
31124 if (mode == SImode)
31125 gcc_assert (!TARGET_IDIV);
31126
31127 scalar_int_mode libval_mode
31128 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31129
31130 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31131 libval_mode,
31132 op0, GET_MODE (op0),
31133 op1, GET_MODE (op1));
31134
31135 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31136 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31137 GET_MODE_SIZE (mode));
31138
31139 gcc_assert (quotient);
31140 gcc_assert (remainder);
31141
31142 *quot_p = quotient;
31143 *rem_p = remainder;
31144 }
31145
31146 /* This function checks for the availability of the coprocessor builtin passed
31147 in BUILTIN for the current target. Returns true if it is available and
31148 false otherwise. If a BUILTIN is passed for which this function has not
31149 been implemented it will cause an exception. */
31150
31151 bool
31152 arm_coproc_builtin_available (enum unspecv builtin)
31153 {
31154 /* None of these builtins are available in Thumb mode if the target only
31155 supports Thumb-1. */
31156 if (TARGET_THUMB1)
31157 return false;
31158
31159 switch (builtin)
31160 {
31161 case VUNSPEC_CDP:
31162 case VUNSPEC_LDC:
31163 case VUNSPEC_LDCL:
31164 case VUNSPEC_STC:
31165 case VUNSPEC_STCL:
31166 case VUNSPEC_MCR:
31167 case VUNSPEC_MRC:
31168 if (arm_arch4)
31169 return true;
31170 break;
31171 case VUNSPEC_CDP2:
31172 case VUNSPEC_LDC2:
31173 case VUNSPEC_LDC2L:
31174 case VUNSPEC_STC2:
31175 case VUNSPEC_STC2L:
31176 case VUNSPEC_MCR2:
31177 case VUNSPEC_MRC2:
31178 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31179 ARMv8-{A,M}. */
31180 if (arm_arch5)
31181 return true;
31182 break;
31183 case VUNSPEC_MCRR:
31184 case VUNSPEC_MRRC:
31185 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31186 ARMv8-{A,M}. */
31187 if (arm_arch6 || arm_arch5te)
31188 return true;
31189 break;
31190 case VUNSPEC_MCRR2:
31191 case VUNSPEC_MRRC2:
31192 if (arm_arch6)
31193 return true;
31194 break;
31195 default:
31196 gcc_unreachable ();
31197 }
31198 return false;
31199 }
31200
31201 /* This function returns true if OP is a valid memory operand for the ldc and
31202 stc coprocessor instructions and false otherwise. */
31203
31204 bool
31205 arm_coproc_ldc_stc_legitimate_address (rtx op)
31206 {
31207 HOST_WIDE_INT range;
31208 /* Has to be a memory operand. */
31209 if (!MEM_P (op))
31210 return false;
31211
31212 op = XEXP (op, 0);
31213
31214 /* We accept registers. */
31215 if (REG_P (op))
31216 return true;
31217
31218 switch GET_CODE (op)
31219 {
31220 case PLUS:
31221 {
31222 /* Or registers with an offset. */
31223 if (!REG_P (XEXP (op, 0)))
31224 return false;
31225
31226 op = XEXP (op, 1);
31227
31228 /* The offset must be an immediate though. */
31229 if (!CONST_INT_P (op))
31230 return false;
31231
31232 range = INTVAL (op);
31233
31234 /* Within the range of [-1020,1020]. */
31235 if (!IN_RANGE (range, -1020, 1020))
31236 return false;
31237
31238 /* And a multiple of 4. */
31239 return (range % 4) == 0;
31240 }
31241 case PRE_INC:
31242 case POST_INC:
31243 case PRE_DEC:
31244 case POST_DEC:
31245 return REG_P (XEXP (op, 0));
31246 default:
31247 gcc_unreachable ();
31248 }
31249 return false;
31250 }
31251
31252 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31253
31254 In VFPv1, VFP registers could only be accessed in the mode they were
31255 set, so subregs would be invalid there. However, we don't support
31256 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31257
31258 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31259 VFP registers in little-endian order. We can't describe that accurately to
31260 GCC, so avoid taking subregs of such values.
31261
31262 The only exception is going from a 128-bit to a 64-bit type. In that
31263 case the data layout happens to be consistent for big-endian, so we
31264 explicitly allow that case. */
31265
31266 static bool
31267 arm_can_change_mode_class (machine_mode from, machine_mode to,
31268 reg_class_t rclass)
31269 {
31270 if (TARGET_BIG_END
31271 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31272 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31273 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31274 && reg_classes_intersect_p (VFP_REGS, rclass))
31275 return false;
31276 return true;
31277 }
31278
31279 #if CHECKING_P
31280 namespace selftest {
31281
31282 /* Scan the static data tables generated by parsecpu.awk looking for
31283 potential issues with the data. We primarily check for
31284 inconsistencies in the option extensions at present (extensions
31285 that duplicate others but aren't marked as aliases). Furthermore,
31286 for correct canonicalization later options must never be a subset
31287 of an earlier option. Any extension should also only specify other
31288 feature bits and never an architecture bit. The architecture is inferred
31289 from the declaration of the extension. */
31290 static void
31291 arm_test_cpu_arch_data (void)
31292 {
31293 const arch_option *arch;
31294 const cpu_option *cpu;
31295 auto_sbitmap target_isa (isa_num_bits);
31296 auto_sbitmap isa1 (isa_num_bits);
31297 auto_sbitmap isa2 (isa_num_bits);
31298
31299 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31300 {
31301 const cpu_arch_extension *ext1, *ext2;
31302
31303 if (arch->common.extensions == NULL)
31304 continue;
31305
31306 arm_initialize_isa (target_isa, arch->common.isa_bits);
31307
31308 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31309 {
31310 if (ext1->alias)
31311 continue;
31312
31313 arm_initialize_isa (isa1, ext1->isa_bits);
31314 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31315 {
31316 if (ext2->alias || ext1->remove != ext2->remove)
31317 continue;
31318
31319 arm_initialize_isa (isa2, ext2->isa_bits);
31320 /* If the option is a subset of the parent option, it doesn't
31321 add anything and so isn't useful. */
31322 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31323
31324 /* If the extension specifies any architectural bits then
31325 disallow it. Extensions should only specify feature bits. */
31326 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31327 }
31328 }
31329 }
31330
31331 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31332 {
31333 const cpu_arch_extension *ext1, *ext2;
31334
31335 if (cpu->common.extensions == NULL)
31336 continue;
31337
31338 arm_initialize_isa (target_isa, arch->common.isa_bits);
31339
31340 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31341 {
31342 if (ext1->alias)
31343 continue;
31344
31345 arm_initialize_isa (isa1, ext1->isa_bits);
31346 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31347 {
31348 if (ext2->alias || ext1->remove != ext2->remove)
31349 continue;
31350
31351 arm_initialize_isa (isa2, ext2->isa_bits);
31352 /* If the option is a subset of the parent option, it doesn't
31353 add anything and so isn't useful. */
31354 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31355
31356 /* If the extension specifies any architectural bits then
31357 disallow it. Extensions should only specify feature bits. */
31358 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31359 }
31360 }
31361 }
31362 }
31363
31364 static void
31365 arm_run_selftests (void)
31366 {
31367 arm_test_cpu_arch_data ();
31368 }
31369 } /* Namespace selftest. */
31370
31371 #undef TARGET_RUN_TARGET_SELFTESTS
31372 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31373 #endif /* CHECKING_P */
31374
31375 struct gcc_target targetm = TARGET_INITIALIZER;
31376
31377 #include "gt-arm.h"