839149134332bd4ba2ed506d765460661ba523fd
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
67 #include "gimple.h"
68
69 /* This file should be included last. */
70 #include "target-def.h"
71
72 /* Forward definitions of types. */
73 typedef struct minipool_node Mnode;
74 typedef struct minipool_fixup Mfix;
75
76 void (*arm_lang_output_object_attributes_hook)(void);
77
78 struct four_ints
79 {
80 int i[4];
81 };
82
83 /* Forward function declarations. */
84 static bool arm_const_not_ok_for_debug_p (rtx);
85 static int arm_needs_doubleword_align (machine_mode, const_tree);
86 static int arm_compute_static_chain_stack_bytes (void);
87 static arm_stack_offsets *arm_get_frame_offsets (void);
88 static void arm_add_gc_roots (void);
89 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
90 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
91 static unsigned bit_count (unsigned long);
92 static unsigned bitmap_popcount (const sbitmap);
93 static int arm_address_register_rtx_p (rtx, int);
94 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
95 static bool is_called_in_ARM_mode (tree);
96 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
97 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
98 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
99 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
100 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
101 inline static int thumb1_index_register_rtx_p (rtx, int);
102 static int thumb_far_jump_used_p (void);
103 static bool thumb_force_lr_save (void);
104 static unsigned arm_size_return_regs (void);
105 static bool arm_assemble_integer (rtx, unsigned int, int);
106 static void arm_print_operand (FILE *, rtx, int);
107 static void arm_print_operand_address (FILE *, machine_mode, rtx);
108 static bool arm_print_operand_punct_valid_p (unsigned char code);
109 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
110 static arm_cc get_arm_condition_code (rtx);
111 static const char *output_multi_immediate (rtx *, const char *, const char *,
112 int, HOST_WIDE_INT);
113 static const char *shift_op (rtx, HOST_WIDE_INT *);
114 static struct machine_function *arm_init_machine_status (void);
115 static void thumb_exit (FILE *, int);
116 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
117 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
118 static Mnode *add_minipool_forward_ref (Mfix *);
119 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
120 static Mnode *add_minipool_backward_ref (Mfix *);
121 static void assign_minipool_offsets (Mfix *);
122 static void arm_print_value (FILE *, rtx);
123 static void dump_minipool (rtx_insn *);
124 static int arm_barrier_cost (rtx_insn *);
125 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
126 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
127 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
128 machine_mode, rtx);
129 static void arm_reorg (void);
130 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
131 static unsigned long arm_compute_save_reg0_reg12_mask (void);
132 static unsigned long arm_compute_save_reg_mask (void);
133 static unsigned long arm_isr_value (tree);
134 static unsigned long arm_compute_func_type (void);
135 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
136 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
137 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
138 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
139 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
140 #endif
141 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
142 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
143 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
144 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
145 static int arm_comp_type_attributes (const_tree, const_tree);
146 static void arm_set_default_type_attributes (tree);
147 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
148 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
149 static int optimal_immediate_sequence (enum rtx_code code,
150 unsigned HOST_WIDE_INT val,
151 struct four_ints *return_sequence);
152 static int optimal_immediate_sequence_1 (enum rtx_code code,
153 unsigned HOST_WIDE_INT val,
154 struct four_ints *return_sequence,
155 int i);
156 static int arm_get_strip_length (int);
157 static bool arm_function_ok_for_sibcall (tree, tree);
158 static machine_mode arm_promote_function_mode (const_tree,
159 machine_mode, int *,
160 const_tree, int);
161 static bool arm_return_in_memory (const_tree, const_tree);
162 static rtx arm_function_value (const_tree, const_tree, bool);
163 static rtx arm_libcall_value_1 (machine_mode);
164 static rtx arm_libcall_value (machine_mode, const_rtx);
165 static bool arm_function_value_regno_p (const unsigned int);
166 static void arm_internal_label (FILE *, const char *, unsigned long);
167 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
168 tree);
169 static bool arm_have_conditional_execution (void);
170 static bool arm_cannot_force_const_mem (machine_mode, rtx);
171 static bool arm_legitimate_constant_p (machine_mode, rtx);
172 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
173 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
174 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
175 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx_insn *emit_set_insn (rtx, rtx);
178 static rtx emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
180 tree, bool);
181 static rtx arm_function_arg (cumulative_args_t, machine_mode,
182 const_tree, bool);
183 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
184 const_tree, bool);
185 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
186 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
187 const_tree);
188 static rtx aapcs_libcall_value (machine_mode);
189 static int aapcs_select_return_coproc (const_tree, const_tree);
190
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 #endif
195 #ifndef ARM_PE
196 static void arm_encode_section_info (tree, rtx, int);
197 #endif
198
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree, tree *);
202
203 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
204 tree, int *, int);
205 static bool arm_pass_by_reference (cumulative_args_t,
206 machine_mode, const_tree, bool);
207 static bool arm_promote_prototypes (const_tree);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree);
211 static bool arm_must_pass_in_stack (machine_mode, const_tree);
212 static bool arm_return_in_memory (const_tree, const_tree);
213 #if ARM_UNWIND_INFO
214 static void arm_unwind_emit (FILE *, rtx_insn *);
215 static bool arm_output_ttype (rtx);
216 static void arm_asm_emit_except_personality (rtx);
217 #endif
218 static void arm_asm_init_sections (void);
219 static rtx arm_dwarf_register_span (rtx);
220
221 static tree arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree arm_get_cookie_size (tree);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree, rtx);
233 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
234 static void arm_option_override (void);
235 static void arm_option_restore (struct gcc_options *,
236 struct cl_target_option *);
237 static void arm_override_options_after_change (void);
238 static void arm_option_print (FILE *, int, struct cl_target_option *);
239 static void arm_set_current_function (tree);
240 static bool arm_can_inline_p (tree, tree);
241 static void arm_relayout_function (tree);
242 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
243 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
244 static bool arm_sched_can_speculate_insn (rtx_insn *);
245 static bool arm_macro_fusion_p (void);
246 static bool arm_cannot_copy_insn_p (rtx_insn *);
247 static int arm_issue_rate (void);
248 static int arm_first_cycle_multipass_dfa_lookahead (void);
249 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
250 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
251 static bool arm_output_addr_const_extra (FILE *, rtx);
252 static bool arm_allocate_stack_slots_for_args (void);
253 static bool arm_warn_func_return (tree);
254 static tree arm_promoted_type (const_tree t);
255 static bool arm_scalar_mode_supported_p (machine_mode);
256 static bool arm_frame_pointer_required (void);
257 static bool arm_can_eliminate (const int, const int);
258 static void arm_asm_trampoline_template (FILE *);
259 static void arm_trampoline_init (rtx, tree, rtx);
260 static rtx arm_trampoline_adjust_address (rtx);
261 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
262 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
264 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
265 static bool arm_array_mode_supported_p (machine_mode,
266 unsigned HOST_WIDE_INT);
267 static machine_mode arm_preferred_simd_mode (machine_mode);
268 static bool arm_class_likely_spilled_p (reg_class_t);
269 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
270 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
271 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
272 const_tree type,
273 int misalignment,
274 bool is_packed);
275 static void arm_conditional_register_usage (void);
276 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
277 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
283
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
285 const unsigned char *sel);
286
287 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
288
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
290 tree vectype,
291 int misalign ATTRIBUTE_UNUSED);
292 static unsigned arm_add_stmt_cost (void *data, int count,
293 enum vect_cost_for_stmt kind,
294 struct _stmt_vec_info *stmt_info,
295 int misalign,
296 enum vect_cost_model_location where);
297
298 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
299 bool op0_preserve_value);
300 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
301
302 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
303 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
304 const_tree);
305 static section *arm_function_section (tree, enum node_frequency, bool, bool);
306 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
307 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
308 int reloc);
309 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
310 static machine_mode arm_floatn_mode (int, bool);
311 \f
312 /* Table of machine attributes. */
313 static const struct attribute_spec arm_attribute_table[] =
314 {
315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
316 affects_type_identity } */
317 /* Function calls made to this symbol must be done indirectly, because
318 it may lie outside of the 26 bit addressing range of a normal function
319 call. */
320 { "long_call", 0, 0, false, true, true, NULL, false },
321 /* Whereas these functions are always known to reside within the 26 bit
322 addressing range. */
323 { "short_call", 0, 0, false, true, true, NULL, false },
324 /* Specify the procedure call conventions for a function. */
325 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
326 false },
327 /* Interrupt Service Routines have special prologue and epilogue requirements. */
328 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
329 false },
330 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
331 false },
332 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
333 false },
334 #ifdef ARM_PE
335 /* ARM/PE has three new attributes:
336 interfacearm - ?
337 dllexport - for exporting a function/variable that will live in a dll
338 dllimport - for importing a function/variable from a dll
339
340 Microsoft allows multiple declspecs in one __declspec, separating
341 them with spaces. We do NOT support this. Instead, use __declspec
342 multiple times.
343 */
344 { "dllimport", 0, 0, true, false, false, NULL, false },
345 { "dllexport", 0, 0, true, false, false, NULL, false },
346 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
347 false },
348 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
351 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
352 false },
353 #endif
354 /* ARMv8-M Security Extensions support. */
355 { "cmse_nonsecure_entry", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_entry, false },
357 { "cmse_nonsecure_call", 0, 0, true, false, false,
358 arm_handle_cmse_nonsecure_call, true },
359 { NULL, 0, 0, false, false, false, NULL, false }
360 };
361 \f
362 /* Initialize the GCC target structure. */
363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
364 #undef TARGET_MERGE_DECL_ATTRIBUTES
365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #endif
367
368 #undef TARGET_LEGITIMIZE_ADDRESS
369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370
371 #undef TARGET_ATTRIBUTE_TABLE
372 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373
374 #undef TARGET_INSERT_ATTRIBUTES
375 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
376
377 #undef TARGET_ASM_FILE_START
378 #define TARGET_ASM_FILE_START arm_file_start
379 #undef TARGET_ASM_FILE_END
380 #define TARGET_ASM_FILE_END arm_file_end
381
382 #undef TARGET_ASM_ALIGNED_SI_OP
383 #define TARGET_ASM_ALIGNED_SI_OP NULL
384 #undef TARGET_ASM_INTEGER
385 #define TARGET_ASM_INTEGER arm_assemble_integer
386
387 #undef TARGET_PRINT_OPERAND
388 #define TARGET_PRINT_OPERAND arm_print_operand
389 #undef TARGET_PRINT_OPERAND_ADDRESS
390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
393
394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
396
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
399
400 #undef TARGET_ASM_FUNCTION_EPILOGUE
401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
402
403 #undef TARGET_CAN_INLINE_P
404 #define TARGET_CAN_INLINE_P arm_can_inline_p
405
406 #undef TARGET_RELAYOUT_FUNCTION
407 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
408
409 #undef TARGET_OPTION_OVERRIDE
410 #define TARGET_OPTION_OVERRIDE arm_option_override
411
412 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
413 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
414
415 #undef TARGET_OPTION_RESTORE
416 #define TARGET_OPTION_RESTORE arm_option_restore
417
418 #undef TARGET_OPTION_PRINT
419 #define TARGET_OPTION_PRINT arm_option_print
420
421 #undef TARGET_COMP_TYPE_ATTRIBUTES
422 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
423
424 #undef TARGET_SCHED_CAN_SPECULATE_INSN
425 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
426
427 #undef TARGET_SCHED_MACRO_FUSION_P
428 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
429
430 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
431 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
432
433 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
434 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
435
436 #undef TARGET_SCHED_ADJUST_COST
437 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
438
439 #undef TARGET_SET_CURRENT_FUNCTION
440 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
441
442 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
443 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
444
445 #undef TARGET_SCHED_REORDER
446 #define TARGET_SCHED_REORDER arm_sched_reorder
447
448 #undef TARGET_REGISTER_MOVE_COST
449 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
450
451 #undef TARGET_MEMORY_MOVE_COST
452 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
453
454 #undef TARGET_ENCODE_SECTION_INFO
455 #ifdef ARM_PE
456 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
457 #else
458 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
459 #endif
460
461 #undef TARGET_STRIP_NAME_ENCODING
462 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
463
464 #undef TARGET_ASM_INTERNAL_LABEL
465 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
466
467 #undef TARGET_FLOATN_MODE
468 #define TARGET_FLOATN_MODE arm_floatn_mode
469
470 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
471 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
472
473 #undef TARGET_FUNCTION_VALUE
474 #define TARGET_FUNCTION_VALUE arm_function_value
475
476 #undef TARGET_LIBCALL_VALUE
477 #define TARGET_LIBCALL_VALUE arm_libcall_value
478
479 #undef TARGET_FUNCTION_VALUE_REGNO_P
480 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
481
482 #undef TARGET_ASM_OUTPUT_MI_THUNK
483 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
484 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
485 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
486
487 #undef TARGET_RTX_COSTS
488 #define TARGET_RTX_COSTS arm_rtx_costs
489 #undef TARGET_ADDRESS_COST
490 #define TARGET_ADDRESS_COST arm_address_cost
491
492 #undef TARGET_SHIFT_TRUNCATION_MASK
493 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
494 #undef TARGET_VECTOR_MODE_SUPPORTED_P
495 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
496 #undef TARGET_ARRAY_MODE_SUPPORTED_P
497 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
498 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
499 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
500 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
501 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
502 arm_autovectorize_vector_sizes
503
504 #undef TARGET_MACHINE_DEPENDENT_REORG
505 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
506
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS arm_init_builtins
509 #undef TARGET_EXPAND_BUILTIN
510 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
511 #undef TARGET_BUILTIN_DECL
512 #define TARGET_BUILTIN_DECL arm_builtin_decl
513
514 #undef TARGET_INIT_LIBFUNCS
515 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
516
517 #undef TARGET_PROMOTE_FUNCTION_MODE
518 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
519 #undef TARGET_PROMOTE_PROTOTYPES
520 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
521 #undef TARGET_PASS_BY_REFERENCE
522 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
523 #undef TARGET_ARG_PARTIAL_BYTES
524 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
525 #undef TARGET_FUNCTION_ARG
526 #define TARGET_FUNCTION_ARG arm_function_arg
527 #undef TARGET_FUNCTION_ARG_ADVANCE
528 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
529 #undef TARGET_FUNCTION_ARG_BOUNDARY
530 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
531
532 #undef TARGET_SETUP_INCOMING_VARARGS
533 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
534
535 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
536 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
537
538 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
539 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
540 #undef TARGET_TRAMPOLINE_INIT
541 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
542 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
543 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
544
545 #undef TARGET_WARN_FUNC_RETURN
546 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
547
548 #undef TARGET_DEFAULT_SHORT_ENUMS
549 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
550
551 #undef TARGET_ALIGN_ANON_BITFIELD
552 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
553
554 #undef TARGET_NARROW_VOLATILE_BITFIELD
555 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
556
557 #undef TARGET_CXX_GUARD_TYPE
558 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
559
560 #undef TARGET_CXX_GUARD_MASK_BIT
561 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
562
563 #undef TARGET_CXX_GET_COOKIE_SIZE
564 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
565
566 #undef TARGET_CXX_COOKIE_HAS_SIZE
567 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
568
569 #undef TARGET_CXX_CDTOR_RETURNS_THIS
570 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
571
572 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
573 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
574
575 #undef TARGET_CXX_USE_AEABI_ATEXIT
576 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
577
578 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
579 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
580 arm_cxx_determine_class_data_visibility
581
582 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
583 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
584
585 #undef TARGET_RETURN_IN_MSB
586 #define TARGET_RETURN_IN_MSB arm_return_in_msb
587
588 #undef TARGET_RETURN_IN_MEMORY
589 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
590
591 #undef TARGET_MUST_PASS_IN_STACK
592 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
593
594 #if ARM_UNWIND_INFO
595 #undef TARGET_ASM_UNWIND_EMIT
596 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
597
598 /* EABI unwinding tables use a different format for the typeinfo tables. */
599 #undef TARGET_ASM_TTYPE
600 #define TARGET_ASM_TTYPE arm_output_ttype
601
602 #undef TARGET_ARM_EABI_UNWINDER
603 #define TARGET_ARM_EABI_UNWINDER true
604
605 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
606 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
607
608 #endif /* ARM_UNWIND_INFO */
609
610 #undef TARGET_ASM_INIT_SECTIONS
611 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
612
613 #undef TARGET_DWARF_REGISTER_SPAN
614 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
615
616 #undef TARGET_CANNOT_COPY_INSN_P
617 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
618
619 #ifdef HAVE_AS_TLS
620 #undef TARGET_HAVE_TLS
621 #define TARGET_HAVE_TLS true
622 #endif
623
624 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
625 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
626
627 #undef TARGET_LEGITIMATE_CONSTANT_P
628 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
629
630 #undef TARGET_CANNOT_FORCE_CONST_MEM
631 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
632
633 #undef TARGET_MAX_ANCHOR_OFFSET
634 #define TARGET_MAX_ANCHOR_OFFSET 4095
635
636 /* The minimum is set such that the total size of the block
637 for a particular anchor is -4088 + 1 + 4095 bytes, which is
638 divisible by eight, ensuring natural spacing of anchors. */
639 #undef TARGET_MIN_ANCHOR_OFFSET
640 #define TARGET_MIN_ANCHOR_OFFSET -4088
641
642 #undef TARGET_SCHED_ISSUE_RATE
643 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
644
645 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
646 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
647 arm_first_cycle_multipass_dfa_lookahead
648
649 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
650 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
651 arm_first_cycle_multipass_dfa_lookahead_guard
652
653 #undef TARGET_MANGLE_TYPE
654 #define TARGET_MANGLE_TYPE arm_mangle_type
655
656 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
657 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
658
659 #undef TARGET_BUILD_BUILTIN_VA_LIST
660 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
661 #undef TARGET_EXPAND_BUILTIN_VA_START
662 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
663 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
664 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
665
666 #ifdef HAVE_AS_TLS
667 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
668 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
669 #endif
670
671 #undef TARGET_LEGITIMATE_ADDRESS_P
672 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
673
674 #undef TARGET_PREFERRED_RELOAD_CLASS
675 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
676
677 #undef TARGET_PROMOTED_TYPE
678 #define TARGET_PROMOTED_TYPE arm_promoted_type
679
680 #undef TARGET_SCALAR_MODE_SUPPORTED_P
681 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
682
683 #undef TARGET_FRAME_POINTER_REQUIRED
684 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
685
686 #undef TARGET_CAN_ELIMINATE
687 #define TARGET_CAN_ELIMINATE arm_can_eliminate
688
689 #undef TARGET_CONDITIONAL_REGISTER_USAGE
690 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
691
692 #undef TARGET_CLASS_LIKELY_SPILLED_P
693 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
694
695 #undef TARGET_VECTORIZE_BUILTINS
696 #define TARGET_VECTORIZE_BUILTINS
697
698 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
699 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
700 arm_builtin_vectorized_function
701
702 #undef TARGET_VECTOR_ALIGNMENT
703 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
704
705 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
706 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
707 arm_vector_alignment_reachable
708
709 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
710 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
711 arm_builtin_support_vector_misalignment
712
713 #undef TARGET_PREFERRED_RENAME_CLASS
714 #define TARGET_PREFERRED_RENAME_CLASS \
715 arm_preferred_rename_class
716
717 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
718 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
719 arm_vectorize_vec_perm_const_ok
720
721 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
722 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
723 arm_builtin_vectorization_cost
724 #undef TARGET_VECTORIZE_ADD_STMT_COST
725 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
726
727 #undef TARGET_CANONICALIZE_COMPARISON
728 #define TARGET_CANONICALIZE_COMPARISON \
729 arm_canonicalize_comparison
730
731 #undef TARGET_ASAN_SHADOW_OFFSET
732 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
733
734 #undef MAX_INSN_PER_IT_BLOCK
735 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
736
737 #undef TARGET_CAN_USE_DOLOOP_P
738 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
739
740 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
741 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
742
743 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
744 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
745
746 #undef TARGET_SCHED_FUSION_PRIORITY
747 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
748
749 #undef TARGET_ASM_FUNCTION_SECTION
750 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
751
752 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
753 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
754
755 #undef TARGET_SECTION_TYPE_FLAGS
756 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
757
758 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
759 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
760
761 #undef TARGET_C_EXCESS_PRECISION
762 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
763
764 /* Although the architecture reserves bits 0 and 1, only the former is
765 used for ARM/Thumb ISA selection in v7 and earlier versions. */
766 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
767 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
768
769 struct gcc_target targetm = TARGET_INITIALIZER;
770 \f
771 /* Obstack for minipool constant handling. */
772 static struct obstack minipool_obstack;
773 static char * minipool_startobj;
774
775 /* The maximum number of insns skipped which
776 will be conditionalised if possible. */
777 static int max_insns_skipped = 5;
778
779 extern FILE * asm_out_file;
780
781 /* True if we are currently building a constant table. */
782 int making_const_table;
783
784 /* The processor for which instructions should be scheduled. */
785 enum processor_type arm_tune = TARGET_CPU_arm_none;
786
787 /* The current tuning set. */
788 const struct tune_params *current_tune;
789
790 /* Which floating point hardware to schedule for. */
791 int arm_fpu_attr;
792
793 /* Used for Thumb call_via trampolines. */
794 rtx thumb_call_via_label[14];
795 static int thumb_call_reg_needed;
796
797 /* The bits in this mask specify which instruction scheduling options should
798 be used. */
799 unsigned int tune_flags = 0;
800
801 /* The highest ARM architecture version supported by the
802 target. */
803 enum base_architecture arm_base_arch = BASE_ARCH_0;
804
805 /* Active target architecture and tuning. */
806
807 struct arm_build_target arm_active_target;
808
809 /* The following are used in the arm.md file as equivalents to bits
810 in the above two flag variables. */
811
812 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
813 int arm_arch3m = 0;
814
815 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
816 int arm_arch4 = 0;
817
818 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
819 int arm_arch4t = 0;
820
821 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
822 int arm_arch5 = 0;
823
824 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
825 int arm_arch5e = 0;
826
827 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
828 int arm_arch5te = 0;
829
830 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
831 int arm_arch6 = 0;
832
833 /* Nonzero if this chip supports the ARM 6K extensions. */
834 int arm_arch6k = 0;
835
836 /* Nonzero if this chip supports the ARM 6KZ extensions. */
837 int arm_arch6kz = 0;
838
839 /* Nonzero if instructions present in ARMv6-M can be used. */
840 int arm_arch6m = 0;
841
842 /* Nonzero if this chip supports the ARM 7 extensions. */
843 int arm_arch7 = 0;
844
845 /* Nonzero if this chip supports the Large Physical Address Extension. */
846 int arm_arch_lpae = 0;
847
848 /* Nonzero if instructions not present in the 'M' profile can be used. */
849 int arm_arch_notm = 0;
850
851 /* Nonzero if instructions present in ARMv7E-M can be used. */
852 int arm_arch7em = 0;
853
854 /* Nonzero if instructions present in ARMv8 can be used. */
855 int arm_arch8 = 0;
856
857 /* Nonzero if this chip supports the ARMv8.1 extensions. */
858 int arm_arch8_1 = 0;
859
860 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
861 int arm_arch8_2 = 0;
862
863 /* Nonzero if this chip supports the FP16 instructions extension of ARM
864 Architecture 8.2. */
865 int arm_fp16_inst = 0;
866
867 /* Nonzero if this chip can benefit from load scheduling. */
868 int arm_ld_sched = 0;
869
870 /* Nonzero if this chip is a StrongARM. */
871 int arm_tune_strongarm = 0;
872
873 /* Nonzero if this chip supports Intel Wireless MMX technology. */
874 int arm_arch_iwmmxt = 0;
875
876 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
877 int arm_arch_iwmmxt2 = 0;
878
879 /* Nonzero if this chip is an XScale. */
880 int arm_arch_xscale = 0;
881
882 /* Nonzero if tuning for XScale */
883 int arm_tune_xscale = 0;
884
885 /* Nonzero if we want to tune for stores that access the write-buffer.
886 This typically means an ARM6 or ARM7 with MMU or MPU. */
887 int arm_tune_wbuf = 0;
888
889 /* Nonzero if tuning for Cortex-A9. */
890 int arm_tune_cortex_a9 = 0;
891
892 /* Nonzero if we should define __THUMB_INTERWORK__ in the
893 preprocessor.
894 XXX This is a bit of a hack, it's intended to help work around
895 problems in GLD which doesn't understand that armv5t code is
896 interworking clean. */
897 int arm_cpp_interwork = 0;
898
899 /* Nonzero if chip supports Thumb 1. */
900 int arm_arch_thumb1;
901
902 /* Nonzero if chip supports Thumb 2. */
903 int arm_arch_thumb2;
904
905 /* Nonzero if chip supports integer division instruction. */
906 int arm_arch_arm_hwdiv;
907 int arm_arch_thumb_hwdiv;
908
909 /* Nonzero if chip disallows volatile memory access in IT block. */
910 int arm_arch_no_volatile_ce;
911
912 /* Nonzero if we should use Neon to handle 64-bits operations rather
913 than core registers. */
914 int prefer_neon_for_64bits = 0;
915
916 /* Nonzero if we shouldn't use literal pools. */
917 bool arm_disable_literal_pool = false;
918
919 /* The register number to be used for the PIC offset register. */
920 unsigned arm_pic_register = INVALID_REGNUM;
921
922 enum arm_pcs arm_pcs_default;
923
924 /* For an explanation of these variables, see final_prescan_insn below. */
925 int arm_ccfsm_state;
926 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
927 enum arm_cond_code arm_current_cc;
928
929 rtx arm_target_insn;
930 int arm_target_label;
931 /* The number of conditionally executed insns, including the current insn. */
932 int arm_condexec_count = 0;
933 /* A bitmask specifying the patterns for the IT block.
934 Zero means do not output an IT block before this insn. */
935 int arm_condexec_mask = 0;
936 /* The number of bits used in arm_condexec_mask. */
937 int arm_condexec_masklen = 0;
938
939 /* Nonzero if chip supports the ARMv8 CRC instructions. */
940 int arm_arch_crc = 0;
941
942 /* Nonzero if chip supports the ARMv8-M security extensions. */
943 int arm_arch_cmse = 0;
944
945 /* Nonzero if the core has a very small, high-latency, multiply unit. */
946 int arm_m_profile_small_mul = 0;
947
948 /* The condition codes of the ARM, and the inverse function. */
949 static const char * const arm_condition_codes[] =
950 {
951 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
952 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
953 };
954
955 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
956 int arm_regs_in_sequence[] =
957 {
958 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
959 };
960
961 #define ARM_LSL_NAME "lsl"
962 #define streq(string1, string2) (strcmp (string1, string2) == 0)
963
964 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
965 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
966 | (1 << PIC_OFFSET_TABLE_REGNUM)))
967 \f
968 /* Initialization code. */
969
970 struct processors
971 {
972 const char *const name;
973 enum processor_type core;
974 unsigned int tune_flags;
975 const char *arch;
976 enum base_architecture base_arch;
977 enum isa_feature isa_bits[isa_num_bits];
978 const struct tune_params *const tune;
979 };
980
981
982 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
983 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
984 { \
985 num_slots, \
986 l1_size, \
987 l1_line_size \
988 }
989
990 /* arm generic vectorizer costs. */
991 static const
992 struct cpu_vec_costs arm_default_vec_cost = {
993 1, /* scalar_stmt_cost. */
994 1, /* scalar load_cost. */
995 1, /* scalar_store_cost. */
996 1, /* vec_stmt_cost. */
997 1, /* vec_to_scalar_cost. */
998 1, /* scalar_to_vec_cost. */
999 1, /* vec_align_load_cost. */
1000 1, /* vec_unalign_load_cost. */
1001 1, /* vec_unalign_store_cost. */
1002 1, /* vec_store_cost. */
1003 3, /* cond_taken_branch_cost. */
1004 1, /* cond_not_taken_branch_cost. */
1005 };
1006
1007 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1008 #include "aarch-cost-tables.h"
1009
1010
1011
1012 const struct cpu_cost_table cortexa9_extra_costs =
1013 {
1014 /* ALU */
1015 {
1016 0, /* arith. */
1017 0, /* logical. */
1018 0, /* shift. */
1019 COSTS_N_INSNS (1), /* shift_reg. */
1020 COSTS_N_INSNS (1), /* arith_shift. */
1021 COSTS_N_INSNS (2), /* arith_shift_reg. */
1022 0, /* log_shift. */
1023 COSTS_N_INSNS (1), /* log_shift_reg. */
1024 COSTS_N_INSNS (1), /* extend. */
1025 COSTS_N_INSNS (2), /* extend_arith. */
1026 COSTS_N_INSNS (1), /* bfi. */
1027 COSTS_N_INSNS (1), /* bfx. */
1028 0, /* clz. */
1029 0, /* rev. */
1030 0, /* non_exec. */
1031 true /* non_exec_costs_exec. */
1032 },
1033 {
1034 /* MULT SImode */
1035 {
1036 COSTS_N_INSNS (3), /* simple. */
1037 COSTS_N_INSNS (3), /* flag_setting. */
1038 COSTS_N_INSNS (2), /* extend. */
1039 COSTS_N_INSNS (3), /* add. */
1040 COSTS_N_INSNS (2), /* extend_add. */
1041 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1042 },
1043 /* MULT DImode */
1044 {
1045 0, /* simple (N/A). */
1046 0, /* flag_setting (N/A). */
1047 COSTS_N_INSNS (4), /* extend. */
1048 0, /* add (N/A). */
1049 COSTS_N_INSNS (4), /* extend_add. */
1050 0 /* idiv (N/A). */
1051 }
1052 },
1053 /* LD/ST */
1054 {
1055 COSTS_N_INSNS (2), /* load. */
1056 COSTS_N_INSNS (2), /* load_sign_extend. */
1057 COSTS_N_INSNS (2), /* ldrd. */
1058 COSTS_N_INSNS (2), /* ldm_1st. */
1059 1, /* ldm_regs_per_insn_1st. */
1060 2, /* ldm_regs_per_insn_subsequent. */
1061 COSTS_N_INSNS (5), /* loadf. */
1062 COSTS_N_INSNS (5), /* loadd. */
1063 COSTS_N_INSNS (1), /* load_unaligned. */
1064 COSTS_N_INSNS (2), /* store. */
1065 COSTS_N_INSNS (2), /* strd. */
1066 COSTS_N_INSNS (2), /* stm_1st. */
1067 1, /* stm_regs_per_insn_1st. */
1068 2, /* stm_regs_per_insn_subsequent. */
1069 COSTS_N_INSNS (1), /* storef. */
1070 COSTS_N_INSNS (1), /* stored. */
1071 COSTS_N_INSNS (1), /* store_unaligned. */
1072 COSTS_N_INSNS (1), /* loadv. */
1073 COSTS_N_INSNS (1) /* storev. */
1074 },
1075 {
1076 /* FP SFmode */
1077 {
1078 COSTS_N_INSNS (14), /* div. */
1079 COSTS_N_INSNS (4), /* mult. */
1080 COSTS_N_INSNS (7), /* mult_addsub. */
1081 COSTS_N_INSNS (30), /* fma. */
1082 COSTS_N_INSNS (3), /* addsub. */
1083 COSTS_N_INSNS (1), /* fpconst. */
1084 COSTS_N_INSNS (1), /* neg. */
1085 COSTS_N_INSNS (3), /* compare. */
1086 COSTS_N_INSNS (3), /* widen. */
1087 COSTS_N_INSNS (3), /* narrow. */
1088 COSTS_N_INSNS (3), /* toint. */
1089 COSTS_N_INSNS (3), /* fromint. */
1090 COSTS_N_INSNS (3) /* roundint. */
1091 },
1092 /* FP DFmode */
1093 {
1094 COSTS_N_INSNS (24), /* div. */
1095 COSTS_N_INSNS (5), /* mult. */
1096 COSTS_N_INSNS (8), /* mult_addsub. */
1097 COSTS_N_INSNS (30), /* fma. */
1098 COSTS_N_INSNS (3), /* addsub. */
1099 COSTS_N_INSNS (1), /* fpconst. */
1100 COSTS_N_INSNS (1), /* neg. */
1101 COSTS_N_INSNS (3), /* compare. */
1102 COSTS_N_INSNS (3), /* widen. */
1103 COSTS_N_INSNS (3), /* narrow. */
1104 COSTS_N_INSNS (3), /* toint. */
1105 COSTS_N_INSNS (3), /* fromint. */
1106 COSTS_N_INSNS (3) /* roundint. */
1107 }
1108 },
1109 /* Vector */
1110 {
1111 COSTS_N_INSNS (1) /* alu. */
1112 }
1113 };
1114
1115 const struct cpu_cost_table cortexa8_extra_costs =
1116 {
1117 /* ALU */
1118 {
1119 0, /* arith. */
1120 0, /* logical. */
1121 COSTS_N_INSNS (1), /* shift. */
1122 0, /* shift_reg. */
1123 COSTS_N_INSNS (1), /* arith_shift. */
1124 0, /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift. */
1126 0, /* log_shift_reg. */
1127 0, /* extend. */
1128 0, /* extend_arith. */
1129 0, /* bfi. */
1130 0, /* bfx. */
1131 0, /* clz. */
1132 0, /* rev. */
1133 0, /* non_exec. */
1134 true /* non_exec_costs_exec. */
1135 },
1136 {
1137 /* MULT SImode */
1138 {
1139 COSTS_N_INSNS (1), /* simple. */
1140 COSTS_N_INSNS (1), /* flag_setting. */
1141 COSTS_N_INSNS (1), /* extend. */
1142 COSTS_N_INSNS (1), /* add. */
1143 COSTS_N_INSNS (1), /* extend_add. */
1144 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1145 },
1146 /* MULT DImode */
1147 {
1148 0, /* simple (N/A). */
1149 0, /* flag_setting (N/A). */
1150 COSTS_N_INSNS (2), /* extend. */
1151 0, /* add (N/A). */
1152 COSTS_N_INSNS (2), /* extend_add. */
1153 0 /* idiv (N/A). */
1154 }
1155 },
1156 /* LD/ST */
1157 {
1158 COSTS_N_INSNS (1), /* load. */
1159 COSTS_N_INSNS (1), /* load_sign_extend. */
1160 COSTS_N_INSNS (1), /* ldrd. */
1161 COSTS_N_INSNS (1), /* ldm_1st. */
1162 1, /* ldm_regs_per_insn_1st. */
1163 2, /* ldm_regs_per_insn_subsequent. */
1164 COSTS_N_INSNS (1), /* loadf. */
1165 COSTS_N_INSNS (1), /* loadd. */
1166 COSTS_N_INSNS (1), /* load_unaligned. */
1167 COSTS_N_INSNS (1), /* store. */
1168 COSTS_N_INSNS (1), /* strd. */
1169 COSTS_N_INSNS (1), /* stm_1st. */
1170 1, /* stm_regs_per_insn_1st. */
1171 2, /* stm_regs_per_insn_subsequent. */
1172 COSTS_N_INSNS (1), /* storef. */
1173 COSTS_N_INSNS (1), /* stored. */
1174 COSTS_N_INSNS (1), /* store_unaligned. */
1175 COSTS_N_INSNS (1), /* loadv. */
1176 COSTS_N_INSNS (1) /* storev. */
1177 },
1178 {
1179 /* FP SFmode */
1180 {
1181 COSTS_N_INSNS (36), /* div. */
1182 COSTS_N_INSNS (11), /* mult. */
1183 COSTS_N_INSNS (20), /* mult_addsub. */
1184 COSTS_N_INSNS (30), /* fma. */
1185 COSTS_N_INSNS (9), /* addsub. */
1186 COSTS_N_INSNS (3), /* fpconst. */
1187 COSTS_N_INSNS (3), /* neg. */
1188 COSTS_N_INSNS (6), /* compare. */
1189 COSTS_N_INSNS (4), /* widen. */
1190 COSTS_N_INSNS (4), /* narrow. */
1191 COSTS_N_INSNS (8), /* toint. */
1192 COSTS_N_INSNS (8), /* fromint. */
1193 COSTS_N_INSNS (8) /* roundint. */
1194 },
1195 /* FP DFmode */
1196 {
1197 COSTS_N_INSNS (64), /* div. */
1198 COSTS_N_INSNS (16), /* mult. */
1199 COSTS_N_INSNS (25), /* mult_addsub. */
1200 COSTS_N_INSNS (30), /* fma. */
1201 COSTS_N_INSNS (9), /* addsub. */
1202 COSTS_N_INSNS (3), /* fpconst. */
1203 COSTS_N_INSNS (3), /* neg. */
1204 COSTS_N_INSNS (6), /* compare. */
1205 COSTS_N_INSNS (6), /* widen. */
1206 COSTS_N_INSNS (6), /* narrow. */
1207 COSTS_N_INSNS (8), /* toint. */
1208 COSTS_N_INSNS (8), /* fromint. */
1209 COSTS_N_INSNS (8) /* roundint. */
1210 }
1211 },
1212 /* Vector */
1213 {
1214 COSTS_N_INSNS (1) /* alu. */
1215 }
1216 };
1217
1218 const struct cpu_cost_table cortexa5_extra_costs =
1219 {
1220 /* ALU */
1221 {
1222 0, /* arith. */
1223 0, /* logical. */
1224 COSTS_N_INSNS (1), /* shift. */
1225 COSTS_N_INSNS (1), /* shift_reg. */
1226 COSTS_N_INSNS (1), /* arith_shift. */
1227 COSTS_N_INSNS (1), /* arith_shift_reg. */
1228 COSTS_N_INSNS (1), /* log_shift. */
1229 COSTS_N_INSNS (1), /* log_shift_reg. */
1230 COSTS_N_INSNS (1), /* extend. */
1231 COSTS_N_INSNS (1), /* extend_arith. */
1232 COSTS_N_INSNS (1), /* bfi. */
1233 COSTS_N_INSNS (1), /* bfx. */
1234 COSTS_N_INSNS (1), /* clz. */
1235 COSTS_N_INSNS (1), /* rev. */
1236 0, /* non_exec. */
1237 true /* non_exec_costs_exec. */
1238 },
1239
1240 {
1241 /* MULT SImode */
1242 {
1243 0, /* simple. */
1244 COSTS_N_INSNS (1), /* flag_setting. */
1245 COSTS_N_INSNS (1), /* extend. */
1246 COSTS_N_INSNS (1), /* add. */
1247 COSTS_N_INSNS (1), /* extend_add. */
1248 COSTS_N_INSNS (7) /* idiv. */
1249 },
1250 /* MULT DImode */
1251 {
1252 0, /* simple (N/A). */
1253 0, /* flag_setting (N/A). */
1254 COSTS_N_INSNS (1), /* extend. */
1255 0, /* add. */
1256 COSTS_N_INSNS (2), /* extend_add. */
1257 0 /* idiv (N/A). */
1258 }
1259 },
1260 /* LD/ST */
1261 {
1262 COSTS_N_INSNS (1), /* load. */
1263 COSTS_N_INSNS (1), /* load_sign_extend. */
1264 COSTS_N_INSNS (6), /* ldrd. */
1265 COSTS_N_INSNS (1), /* ldm_1st. */
1266 1, /* ldm_regs_per_insn_1st. */
1267 2, /* ldm_regs_per_insn_subsequent. */
1268 COSTS_N_INSNS (2), /* loadf. */
1269 COSTS_N_INSNS (4), /* loadd. */
1270 COSTS_N_INSNS (1), /* load_unaligned. */
1271 COSTS_N_INSNS (1), /* store. */
1272 COSTS_N_INSNS (3), /* strd. */
1273 COSTS_N_INSNS (1), /* stm_1st. */
1274 1, /* stm_regs_per_insn_1st. */
1275 2, /* stm_regs_per_insn_subsequent. */
1276 COSTS_N_INSNS (2), /* storef. */
1277 COSTS_N_INSNS (2), /* stored. */
1278 COSTS_N_INSNS (1), /* store_unaligned. */
1279 COSTS_N_INSNS (1), /* loadv. */
1280 COSTS_N_INSNS (1) /* storev. */
1281 },
1282 {
1283 /* FP SFmode */
1284 {
1285 COSTS_N_INSNS (15), /* div. */
1286 COSTS_N_INSNS (3), /* mult. */
1287 COSTS_N_INSNS (7), /* mult_addsub. */
1288 COSTS_N_INSNS (7), /* fma. */
1289 COSTS_N_INSNS (3), /* addsub. */
1290 COSTS_N_INSNS (3), /* fpconst. */
1291 COSTS_N_INSNS (3), /* neg. */
1292 COSTS_N_INSNS (3), /* compare. */
1293 COSTS_N_INSNS (3), /* widen. */
1294 COSTS_N_INSNS (3), /* narrow. */
1295 COSTS_N_INSNS (3), /* toint. */
1296 COSTS_N_INSNS (3), /* fromint. */
1297 COSTS_N_INSNS (3) /* roundint. */
1298 },
1299 /* FP DFmode */
1300 {
1301 COSTS_N_INSNS (30), /* div. */
1302 COSTS_N_INSNS (6), /* mult. */
1303 COSTS_N_INSNS (10), /* mult_addsub. */
1304 COSTS_N_INSNS (7), /* fma. */
1305 COSTS_N_INSNS (3), /* addsub. */
1306 COSTS_N_INSNS (3), /* fpconst. */
1307 COSTS_N_INSNS (3), /* neg. */
1308 COSTS_N_INSNS (3), /* compare. */
1309 COSTS_N_INSNS (3), /* widen. */
1310 COSTS_N_INSNS (3), /* narrow. */
1311 COSTS_N_INSNS (3), /* toint. */
1312 COSTS_N_INSNS (3), /* fromint. */
1313 COSTS_N_INSNS (3) /* roundint. */
1314 }
1315 },
1316 /* Vector */
1317 {
1318 COSTS_N_INSNS (1) /* alu. */
1319 }
1320 };
1321
1322
1323 const struct cpu_cost_table cortexa7_extra_costs =
1324 {
1325 /* ALU */
1326 {
1327 0, /* arith. */
1328 0, /* logical. */
1329 COSTS_N_INSNS (1), /* shift. */
1330 COSTS_N_INSNS (1), /* shift_reg. */
1331 COSTS_N_INSNS (1), /* arith_shift. */
1332 COSTS_N_INSNS (1), /* arith_shift_reg. */
1333 COSTS_N_INSNS (1), /* log_shift. */
1334 COSTS_N_INSNS (1), /* log_shift_reg. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* extend_arith. */
1337 COSTS_N_INSNS (1), /* bfi. */
1338 COSTS_N_INSNS (1), /* bfx. */
1339 COSTS_N_INSNS (1), /* clz. */
1340 COSTS_N_INSNS (1), /* rev. */
1341 0, /* non_exec. */
1342 true /* non_exec_costs_exec. */
1343 },
1344
1345 {
1346 /* MULT SImode */
1347 {
1348 0, /* simple. */
1349 COSTS_N_INSNS (1), /* flag_setting. */
1350 COSTS_N_INSNS (1), /* extend. */
1351 COSTS_N_INSNS (1), /* add. */
1352 COSTS_N_INSNS (1), /* extend_add. */
1353 COSTS_N_INSNS (7) /* idiv. */
1354 },
1355 /* MULT DImode */
1356 {
1357 0, /* simple (N/A). */
1358 0, /* flag_setting (N/A). */
1359 COSTS_N_INSNS (1), /* extend. */
1360 0, /* add. */
1361 COSTS_N_INSNS (2), /* extend_add. */
1362 0 /* idiv (N/A). */
1363 }
1364 },
1365 /* LD/ST */
1366 {
1367 COSTS_N_INSNS (1), /* load. */
1368 COSTS_N_INSNS (1), /* load_sign_extend. */
1369 COSTS_N_INSNS (3), /* ldrd. */
1370 COSTS_N_INSNS (1), /* ldm_1st. */
1371 1, /* ldm_regs_per_insn_1st. */
1372 2, /* ldm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* loadf. */
1374 COSTS_N_INSNS (2), /* loadd. */
1375 COSTS_N_INSNS (1), /* load_unaligned. */
1376 COSTS_N_INSNS (1), /* store. */
1377 COSTS_N_INSNS (3), /* strd. */
1378 COSTS_N_INSNS (1), /* stm_1st. */
1379 1, /* stm_regs_per_insn_1st. */
1380 2, /* stm_regs_per_insn_subsequent. */
1381 COSTS_N_INSNS (2), /* storef. */
1382 COSTS_N_INSNS (2), /* stored. */
1383 COSTS_N_INSNS (1), /* store_unaligned. */
1384 COSTS_N_INSNS (1), /* loadv. */
1385 COSTS_N_INSNS (1) /* storev. */
1386 },
1387 {
1388 /* FP SFmode */
1389 {
1390 COSTS_N_INSNS (15), /* div. */
1391 COSTS_N_INSNS (3), /* mult. */
1392 COSTS_N_INSNS (7), /* mult_addsub. */
1393 COSTS_N_INSNS (7), /* fma. */
1394 COSTS_N_INSNS (3), /* addsub. */
1395 COSTS_N_INSNS (3), /* fpconst. */
1396 COSTS_N_INSNS (3), /* neg. */
1397 COSTS_N_INSNS (3), /* compare. */
1398 COSTS_N_INSNS (3), /* widen. */
1399 COSTS_N_INSNS (3), /* narrow. */
1400 COSTS_N_INSNS (3), /* toint. */
1401 COSTS_N_INSNS (3), /* fromint. */
1402 COSTS_N_INSNS (3) /* roundint. */
1403 },
1404 /* FP DFmode */
1405 {
1406 COSTS_N_INSNS (30), /* div. */
1407 COSTS_N_INSNS (6), /* mult. */
1408 COSTS_N_INSNS (10), /* mult_addsub. */
1409 COSTS_N_INSNS (7), /* fma. */
1410 COSTS_N_INSNS (3), /* addsub. */
1411 COSTS_N_INSNS (3), /* fpconst. */
1412 COSTS_N_INSNS (3), /* neg. */
1413 COSTS_N_INSNS (3), /* compare. */
1414 COSTS_N_INSNS (3), /* widen. */
1415 COSTS_N_INSNS (3), /* narrow. */
1416 COSTS_N_INSNS (3), /* toint. */
1417 COSTS_N_INSNS (3), /* fromint. */
1418 COSTS_N_INSNS (3) /* roundint. */
1419 }
1420 },
1421 /* Vector */
1422 {
1423 COSTS_N_INSNS (1) /* alu. */
1424 }
1425 };
1426
1427 const struct cpu_cost_table cortexa12_extra_costs =
1428 {
1429 /* ALU */
1430 {
1431 0, /* arith. */
1432 0, /* logical. */
1433 0, /* shift. */
1434 COSTS_N_INSNS (1), /* shift_reg. */
1435 COSTS_N_INSNS (1), /* arith_shift. */
1436 COSTS_N_INSNS (1), /* arith_shift_reg. */
1437 COSTS_N_INSNS (1), /* log_shift. */
1438 COSTS_N_INSNS (1), /* log_shift_reg. */
1439 0, /* extend. */
1440 COSTS_N_INSNS (1), /* extend_arith. */
1441 0, /* bfi. */
1442 COSTS_N_INSNS (1), /* bfx. */
1443 COSTS_N_INSNS (1), /* clz. */
1444 COSTS_N_INSNS (1), /* rev. */
1445 0, /* non_exec. */
1446 true /* non_exec_costs_exec. */
1447 },
1448 /* MULT SImode */
1449 {
1450 {
1451 COSTS_N_INSNS (2), /* simple. */
1452 COSTS_N_INSNS (3), /* flag_setting. */
1453 COSTS_N_INSNS (2), /* extend. */
1454 COSTS_N_INSNS (3), /* add. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1456 COSTS_N_INSNS (18) /* idiv. */
1457 },
1458 /* MULT DImode */
1459 {
1460 0, /* simple (N/A). */
1461 0, /* flag_setting (N/A). */
1462 COSTS_N_INSNS (3), /* extend. */
1463 0, /* add (N/A). */
1464 COSTS_N_INSNS (3), /* extend_add. */
1465 0 /* idiv (N/A). */
1466 }
1467 },
1468 /* LD/ST */
1469 {
1470 COSTS_N_INSNS (3), /* load. */
1471 COSTS_N_INSNS (3), /* load_sign_extend. */
1472 COSTS_N_INSNS (3), /* ldrd. */
1473 COSTS_N_INSNS (3), /* ldm_1st. */
1474 1, /* ldm_regs_per_insn_1st. */
1475 2, /* ldm_regs_per_insn_subsequent. */
1476 COSTS_N_INSNS (3), /* loadf. */
1477 COSTS_N_INSNS (3), /* loadd. */
1478 0, /* load_unaligned. */
1479 0, /* store. */
1480 0, /* strd. */
1481 0, /* stm_1st. */
1482 1, /* stm_regs_per_insn_1st. */
1483 2, /* stm_regs_per_insn_subsequent. */
1484 COSTS_N_INSNS (2), /* storef. */
1485 COSTS_N_INSNS (2), /* stored. */
1486 0, /* store_unaligned. */
1487 COSTS_N_INSNS (1), /* loadv. */
1488 COSTS_N_INSNS (1) /* storev. */
1489 },
1490 {
1491 /* FP SFmode */
1492 {
1493 COSTS_N_INSNS (17), /* div. */
1494 COSTS_N_INSNS (4), /* mult. */
1495 COSTS_N_INSNS (8), /* mult_addsub. */
1496 COSTS_N_INSNS (8), /* fma. */
1497 COSTS_N_INSNS (4), /* addsub. */
1498 COSTS_N_INSNS (2), /* fpconst. */
1499 COSTS_N_INSNS (2), /* neg. */
1500 COSTS_N_INSNS (2), /* compare. */
1501 COSTS_N_INSNS (4), /* widen. */
1502 COSTS_N_INSNS (4), /* narrow. */
1503 COSTS_N_INSNS (4), /* toint. */
1504 COSTS_N_INSNS (4), /* fromint. */
1505 COSTS_N_INSNS (4) /* roundint. */
1506 },
1507 /* FP DFmode */
1508 {
1509 COSTS_N_INSNS (31), /* div. */
1510 COSTS_N_INSNS (4), /* mult. */
1511 COSTS_N_INSNS (8), /* mult_addsub. */
1512 COSTS_N_INSNS (8), /* fma. */
1513 COSTS_N_INSNS (4), /* addsub. */
1514 COSTS_N_INSNS (2), /* fpconst. */
1515 COSTS_N_INSNS (2), /* neg. */
1516 COSTS_N_INSNS (2), /* compare. */
1517 COSTS_N_INSNS (4), /* widen. */
1518 COSTS_N_INSNS (4), /* narrow. */
1519 COSTS_N_INSNS (4), /* toint. */
1520 COSTS_N_INSNS (4), /* fromint. */
1521 COSTS_N_INSNS (4) /* roundint. */
1522 }
1523 },
1524 /* Vector */
1525 {
1526 COSTS_N_INSNS (1) /* alu. */
1527 }
1528 };
1529
1530 const struct cpu_cost_table cortexa15_extra_costs =
1531 {
1532 /* ALU */
1533 {
1534 0, /* arith. */
1535 0, /* logical. */
1536 0, /* shift. */
1537 0, /* shift_reg. */
1538 COSTS_N_INSNS (1), /* arith_shift. */
1539 COSTS_N_INSNS (1), /* arith_shift_reg. */
1540 COSTS_N_INSNS (1), /* log_shift. */
1541 COSTS_N_INSNS (1), /* log_shift_reg. */
1542 0, /* extend. */
1543 COSTS_N_INSNS (1), /* extend_arith. */
1544 COSTS_N_INSNS (1), /* bfi. */
1545 0, /* bfx. */
1546 0, /* clz. */
1547 0, /* rev. */
1548 0, /* non_exec. */
1549 true /* non_exec_costs_exec. */
1550 },
1551 /* MULT SImode */
1552 {
1553 {
1554 COSTS_N_INSNS (2), /* simple. */
1555 COSTS_N_INSNS (3), /* flag_setting. */
1556 COSTS_N_INSNS (2), /* extend. */
1557 COSTS_N_INSNS (2), /* add. */
1558 COSTS_N_INSNS (2), /* extend_add. */
1559 COSTS_N_INSNS (18) /* idiv. */
1560 },
1561 /* MULT DImode */
1562 {
1563 0, /* simple (N/A). */
1564 0, /* flag_setting (N/A). */
1565 COSTS_N_INSNS (3), /* extend. */
1566 0, /* add (N/A). */
1567 COSTS_N_INSNS (3), /* extend_add. */
1568 0 /* idiv (N/A). */
1569 }
1570 },
1571 /* LD/ST */
1572 {
1573 COSTS_N_INSNS (3), /* load. */
1574 COSTS_N_INSNS (3), /* load_sign_extend. */
1575 COSTS_N_INSNS (3), /* ldrd. */
1576 COSTS_N_INSNS (4), /* ldm_1st. */
1577 1, /* ldm_regs_per_insn_1st. */
1578 2, /* ldm_regs_per_insn_subsequent. */
1579 COSTS_N_INSNS (4), /* loadf. */
1580 COSTS_N_INSNS (4), /* loadd. */
1581 0, /* load_unaligned. */
1582 0, /* store. */
1583 0, /* strd. */
1584 COSTS_N_INSNS (1), /* stm_1st. */
1585 1, /* stm_regs_per_insn_1st. */
1586 2, /* stm_regs_per_insn_subsequent. */
1587 0, /* storef. */
1588 0, /* stored. */
1589 0, /* store_unaligned. */
1590 COSTS_N_INSNS (1), /* loadv. */
1591 COSTS_N_INSNS (1) /* storev. */
1592 },
1593 {
1594 /* FP SFmode */
1595 {
1596 COSTS_N_INSNS (17), /* div. */
1597 COSTS_N_INSNS (4), /* mult. */
1598 COSTS_N_INSNS (8), /* mult_addsub. */
1599 COSTS_N_INSNS (8), /* fma. */
1600 COSTS_N_INSNS (4), /* addsub. */
1601 COSTS_N_INSNS (2), /* fpconst. */
1602 COSTS_N_INSNS (2), /* neg. */
1603 COSTS_N_INSNS (5), /* compare. */
1604 COSTS_N_INSNS (4), /* widen. */
1605 COSTS_N_INSNS (4), /* narrow. */
1606 COSTS_N_INSNS (4), /* toint. */
1607 COSTS_N_INSNS (4), /* fromint. */
1608 COSTS_N_INSNS (4) /* roundint. */
1609 },
1610 /* FP DFmode */
1611 {
1612 COSTS_N_INSNS (31), /* div. */
1613 COSTS_N_INSNS (4), /* mult. */
1614 COSTS_N_INSNS (8), /* mult_addsub. */
1615 COSTS_N_INSNS (8), /* fma. */
1616 COSTS_N_INSNS (4), /* addsub. */
1617 COSTS_N_INSNS (2), /* fpconst. */
1618 COSTS_N_INSNS (2), /* neg. */
1619 COSTS_N_INSNS (2), /* compare. */
1620 COSTS_N_INSNS (4), /* widen. */
1621 COSTS_N_INSNS (4), /* narrow. */
1622 COSTS_N_INSNS (4), /* toint. */
1623 COSTS_N_INSNS (4), /* fromint. */
1624 COSTS_N_INSNS (4) /* roundint. */
1625 }
1626 },
1627 /* Vector */
1628 {
1629 COSTS_N_INSNS (1) /* alu. */
1630 }
1631 };
1632
1633 const struct cpu_cost_table v7m_extra_costs =
1634 {
1635 /* ALU */
1636 {
1637 0, /* arith. */
1638 0, /* logical. */
1639 0, /* shift. */
1640 0, /* shift_reg. */
1641 0, /* arith_shift. */
1642 COSTS_N_INSNS (1), /* arith_shift_reg. */
1643 0, /* log_shift. */
1644 COSTS_N_INSNS (1), /* log_shift_reg. */
1645 0, /* extend. */
1646 COSTS_N_INSNS (1), /* extend_arith. */
1647 0, /* bfi. */
1648 0, /* bfx. */
1649 0, /* clz. */
1650 0, /* rev. */
1651 COSTS_N_INSNS (1), /* non_exec. */
1652 false /* non_exec_costs_exec. */
1653 },
1654 {
1655 /* MULT SImode */
1656 {
1657 COSTS_N_INSNS (1), /* simple. */
1658 COSTS_N_INSNS (1), /* flag_setting. */
1659 COSTS_N_INSNS (2), /* extend. */
1660 COSTS_N_INSNS (1), /* add. */
1661 COSTS_N_INSNS (3), /* extend_add. */
1662 COSTS_N_INSNS (8) /* idiv. */
1663 },
1664 /* MULT DImode */
1665 {
1666 0, /* simple (N/A). */
1667 0, /* flag_setting (N/A). */
1668 COSTS_N_INSNS (2), /* extend. */
1669 0, /* add (N/A). */
1670 COSTS_N_INSNS (3), /* extend_add. */
1671 0 /* idiv (N/A). */
1672 }
1673 },
1674 /* LD/ST */
1675 {
1676 COSTS_N_INSNS (2), /* load. */
1677 0, /* load_sign_extend. */
1678 COSTS_N_INSNS (3), /* ldrd. */
1679 COSTS_N_INSNS (2), /* ldm_1st. */
1680 1, /* ldm_regs_per_insn_1st. */
1681 1, /* ldm_regs_per_insn_subsequent. */
1682 COSTS_N_INSNS (2), /* loadf. */
1683 COSTS_N_INSNS (3), /* loadd. */
1684 COSTS_N_INSNS (1), /* load_unaligned. */
1685 COSTS_N_INSNS (2), /* store. */
1686 COSTS_N_INSNS (3), /* strd. */
1687 COSTS_N_INSNS (2), /* stm_1st. */
1688 1, /* stm_regs_per_insn_1st. */
1689 1, /* stm_regs_per_insn_subsequent. */
1690 COSTS_N_INSNS (2), /* storef. */
1691 COSTS_N_INSNS (3), /* stored. */
1692 COSTS_N_INSNS (1), /* store_unaligned. */
1693 COSTS_N_INSNS (1), /* loadv. */
1694 COSTS_N_INSNS (1) /* storev. */
1695 },
1696 {
1697 /* FP SFmode */
1698 {
1699 COSTS_N_INSNS (7), /* div. */
1700 COSTS_N_INSNS (2), /* mult. */
1701 COSTS_N_INSNS (5), /* mult_addsub. */
1702 COSTS_N_INSNS (3), /* fma. */
1703 COSTS_N_INSNS (1), /* addsub. */
1704 0, /* fpconst. */
1705 0, /* neg. */
1706 0, /* compare. */
1707 0, /* widen. */
1708 0, /* narrow. */
1709 0, /* toint. */
1710 0, /* fromint. */
1711 0 /* roundint. */
1712 },
1713 /* FP DFmode */
1714 {
1715 COSTS_N_INSNS (15), /* div. */
1716 COSTS_N_INSNS (5), /* mult. */
1717 COSTS_N_INSNS (7), /* mult_addsub. */
1718 COSTS_N_INSNS (7), /* fma. */
1719 COSTS_N_INSNS (3), /* addsub. */
1720 0, /* fpconst. */
1721 0, /* neg. */
1722 0, /* compare. */
1723 0, /* widen. */
1724 0, /* narrow. */
1725 0, /* toint. */
1726 0, /* fromint. */
1727 0 /* roundint. */
1728 }
1729 },
1730 /* Vector */
1731 {
1732 COSTS_N_INSNS (1) /* alu. */
1733 }
1734 };
1735
1736 const struct tune_params arm_slowmul_tune =
1737 {
1738 &generic_extra_costs, /* Insn extra costs. */
1739 NULL, /* Sched adj cost. */
1740 arm_default_branch_cost,
1741 &arm_default_vec_cost,
1742 3, /* Constant limit. */
1743 5, /* Max cond insns. */
1744 8, /* Memset max inline. */
1745 1, /* Issue rate. */
1746 ARM_PREFETCH_NOT_BENEFICIAL,
1747 tune_params::PREF_CONST_POOL_TRUE,
1748 tune_params::PREF_LDRD_FALSE,
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1750 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1751 tune_params::DISPARAGE_FLAGS_NEITHER,
1752 tune_params::PREF_NEON_64_FALSE,
1753 tune_params::PREF_NEON_STRINGOPS_FALSE,
1754 tune_params::FUSE_NOTHING,
1755 tune_params::SCHED_AUTOPREF_OFF
1756 };
1757
1758 const struct tune_params arm_fastmul_tune =
1759 {
1760 &generic_extra_costs, /* Insn extra costs. */
1761 NULL, /* Sched adj cost. */
1762 arm_default_branch_cost,
1763 &arm_default_vec_cost,
1764 1, /* Constant limit. */
1765 5, /* Max cond insns. */
1766 8, /* Memset max inline. */
1767 1, /* Issue rate. */
1768 ARM_PREFETCH_NOT_BENEFICIAL,
1769 tune_params::PREF_CONST_POOL_TRUE,
1770 tune_params::PREF_LDRD_FALSE,
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1773 tune_params::DISPARAGE_FLAGS_NEITHER,
1774 tune_params::PREF_NEON_64_FALSE,
1775 tune_params::PREF_NEON_STRINGOPS_FALSE,
1776 tune_params::FUSE_NOTHING,
1777 tune_params::SCHED_AUTOPREF_OFF
1778 };
1779
1780 /* StrongARM has early execution of branches, so a sequence that is worth
1781 skipping is shorter. Set max_insns_skipped to a lower value. */
1782
1783 const struct tune_params arm_strongarm_tune =
1784 {
1785 &generic_extra_costs, /* Insn extra costs. */
1786 NULL, /* Sched adj cost. */
1787 arm_default_branch_cost,
1788 &arm_default_vec_cost,
1789 1, /* Constant limit. */
1790 3, /* Max cond insns. */
1791 8, /* Memset max inline. */
1792 1, /* Issue rate. */
1793 ARM_PREFETCH_NOT_BENEFICIAL,
1794 tune_params::PREF_CONST_POOL_TRUE,
1795 tune_params::PREF_LDRD_FALSE,
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1797 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1798 tune_params::DISPARAGE_FLAGS_NEITHER,
1799 tune_params::PREF_NEON_64_FALSE,
1800 tune_params::PREF_NEON_STRINGOPS_FALSE,
1801 tune_params::FUSE_NOTHING,
1802 tune_params::SCHED_AUTOPREF_OFF
1803 };
1804
1805 const struct tune_params arm_xscale_tune =
1806 {
1807 &generic_extra_costs, /* Insn extra costs. */
1808 xscale_sched_adjust_cost,
1809 arm_default_branch_cost,
1810 &arm_default_vec_cost,
1811 2, /* Constant limit. */
1812 3, /* Max cond insns. */
1813 8, /* Memset max inline. */
1814 1, /* Issue rate. */
1815 ARM_PREFETCH_NOT_BENEFICIAL,
1816 tune_params::PREF_CONST_POOL_TRUE,
1817 tune_params::PREF_LDRD_FALSE,
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1820 tune_params::DISPARAGE_FLAGS_NEITHER,
1821 tune_params::PREF_NEON_64_FALSE,
1822 tune_params::PREF_NEON_STRINGOPS_FALSE,
1823 tune_params::FUSE_NOTHING,
1824 tune_params::SCHED_AUTOPREF_OFF
1825 };
1826
1827 const struct tune_params arm_9e_tune =
1828 {
1829 &generic_extra_costs, /* Insn extra costs. */
1830 NULL, /* Sched adj cost. */
1831 arm_default_branch_cost,
1832 &arm_default_vec_cost,
1833 1, /* Constant limit. */
1834 5, /* Max cond insns. */
1835 8, /* Memset max inline. */
1836 1, /* Issue rate. */
1837 ARM_PREFETCH_NOT_BENEFICIAL,
1838 tune_params::PREF_CONST_POOL_TRUE,
1839 tune_params::PREF_LDRD_FALSE,
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1842 tune_params::DISPARAGE_FLAGS_NEITHER,
1843 tune_params::PREF_NEON_64_FALSE,
1844 tune_params::PREF_NEON_STRINGOPS_FALSE,
1845 tune_params::FUSE_NOTHING,
1846 tune_params::SCHED_AUTOPREF_OFF
1847 };
1848
1849 const struct tune_params arm_marvell_pj4_tune =
1850 {
1851 &generic_extra_costs, /* Insn extra costs. */
1852 NULL, /* Sched adj cost. */
1853 arm_default_branch_cost,
1854 &arm_default_vec_cost,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL,
1860 tune_params::PREF_CONST_POOL_TRUE,
1861 tune_params::PREF_LDRD_FALSE,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER,
1865 tune_params::PREF_NEON_64_FALSE,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE,
1867 tune_params::FUSE_NOTHING,
1868 tune_params::SCHED_AUTOPREF_OFF
1869 };
1870
1871 const struct tune_params arm_v6t2_tune =
1872 {
1873 &generic_extra_costs, /* Insn extra costs. */
1874 NULL, /* Sched adj cost. */
1875 arm_default_branch_cost,
1876 &arm_default_vec_cost,
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 8, /* Memset max inline. */
1880 1, /* Issue rate. */
1881 ARM_PREFETCH_NOT_BENEFICIAL,
1882 tune_params::PREF_CONST_POOL_FALSE,
1883 tune_params::PREF_LDRD_FALSE,
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1886 tune_params::DISPARAGE_FLAGS_NEITHER,
1887 tune_params::PREF_NEON_64_FALSE,
1888 tune_params::PREF_NEON_STRINGOPS_FALSE,
1889 tune_params::FUSE_NOTHING,
1890 tune_params::SCHED_AUTOPREF_OFF
1891 };
1892
1893
1894 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1895 const struct tune_params arm_cortex_tune =
1896 {
1897 &generic_extra_costs,
1898 NULL, /* Sched adj cost. */
1899 arm_default_branch_cost,
1900 &arm_default_vec_cost,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 tune_params::PREF_CONST_POOL_FALSE,
1907 tune_params::PREF_LDRD_FALSE,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER,
1911 tune_params::PREF_NEON_64_FALSE,
1912 tune_params::PREF_NEON_STRINGOPS_FALSE,
1913 tune_params::FUSE_NOTHING,
1914 tune_params::SCHED_AUTOPREF_OFF
1915 };
1916
1917 const struct tune_params arm_cortex_a8_tune =
1918 {
1919 &cortexa8_extra_costs,
1920 NULL, /* Sched adj cost. */
1921 arm_default_branch_cost,
1922 &arm_default_vec_cost,
1923 1, /* Constant limit. */
1924 5, /* Max cond insns. */
1925 8, /* Memset max inline. */
1926 2, /* Issue rate. */
1927 ARM_PREFETCH_NOT_BENEFICIAL,
1928 tune_params::PREF_CONST_POOL_FALSE,
1929 tune_params::PREF_LDRD_FALSE,
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1932 tune_params::DISPARAGE_FLAGS_NEITHER,
1933 tune_params::PREF_NEON_64_FALSE,
1934 tune_params::PREF_NEON_STRINGOPS_TRUE,
1935 tune_params::FUSE_NOTHING,
1936 tune_params::SCHED_AUTOPREF_OFF
1937 };
1938
1939 const struct tune_params arm_cortex_a7_tune =
1940 {
1941 &cortexa7_extra_costs,
1942 NULL, /* Sched adj cost. */
1943 arm_default_branch_cost,
1944 &arm_default_vec_cost,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 2, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL,
1950 tune_params::PREF_CONST_POOL_FALSE,
1951 tune_params::PREF_LDRD_FALSE,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER,
1955 tune_params::PREF_NEON_64_FALSE,
1956 tune_params::PREF_NEON_STRINGOPS_TRUE,
1957 tune_params::FUSE_NOTHING,
1958 tune_params::SCHED_AUTOPREF_OFF
1959 };
1960
1961 const struct tune_params arm_cortex_a15_tune =
1962 {
1963 &cortexa15_extra_costs,
1964 NULL, /* Sched adj cost. */
1965 arm_default_branch_cost,
1966 &arm_default_vec_cost,
1967 1, /* Constant limit. */
1968 2, /* Max cond insns. */
1969 8, /* Memset max inline. */
1970 3, /* Issue rate. */
1971 ARM_PREFETCH_NOT_BENEFICIAL,
1972 tune_params::PREF_CONST_POOL_FALSE,
1973 tune_params::PREF_LDRD_TRUE,
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1976 tune_params::DISPARAGE_FLAGS_ALL,
1977 tune_params::PREF_NEON_64_FALSE,
1978 tune_params::PREF_NEON_STRINGOPS_TRUE,
1979 tune_params::FUSE_NOTHING,
1980 tune_params::SCHED_AUTOPREF_FULL
1981 };
1982
1983 const struct tune_params arm_cortex_a35_tune =
1984 {
1985 &cortexa53_extra_costs,
1986 NULL, /* Sched adj cost. */
1987 arm_default_branch_cost,
1988 &arm_default_vec_cost,
1989 1, /* Constant limit. */
1990 5, /* Max cond insns. */
1991 8, /* Memset max inline. */
1992 1, /* Issue rate. */
1993 ARM_PREFETCH_NOT_BENEFICIAL,
1994 tune_params::PREF_CONST_POOL_FALSE,
1995 tune_params::PREF_LDRD_FALSE,
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1998 tune_params::DISPARAGE_FLAGS_NEITHER,
1999 tune_params::PREF_NEON_64_FALSE,
2000 tune_params::PREF_NEON_STRINGOPS_TRUE,
2001 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2002 tune_params::SCHED_AUTOPREF_OFF
2003 };
2004
2005 const struct tune_params arm_cortex_a53_tune =
2006 {
2007 &cortexa53_extra_costs,
2008 NULL, /* Sched adj cost. */
2009 arm_default_branch_cost,
2010 &arm_default_vec_cost,
2011 1, /* Constant limit. */
2012 5, /* Max cond insns. */
2013 8, /* Memset max inline. */
2014 2, /* Issue rate. */
2015 ARM_PREFETCH_NOT_BENEFICIAL,
2016 tune_params::PREF_CONST_POOL_FALSE,
2017 tune_params::PREF_LDRD_FALSE,
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2020 tune_params::DISPARAGE_FLAGS_NEITHER,
2021 tune_params::PREF_NEON_64_FALSE,
2022 tune_params::PREF_NEON_STRINGOPS_TRUE,
2023 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2024 tune_params::SCHED_AUTOPREF_OFF
2025 };
2026
2027 const struct tune_params arm_cortex_a57_tune =
2028 {
2029 &cortexa57_extra_costs,
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2046 tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_exynosm1_tune =
2050 {
2051 &exynosm1_extra_costs,
2052 NULL, /* Sched adj cost. */
2053 arm_default_branch_cost,
2054 &arm_default_vec_cost,
2055 1, /* Constant limit. */
2056 2, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 3, /* Issue rate. */
2059 ARM_PREFETCH_NOT_BENEFICIAL,
2060 tune_params::PREF_CONST_POOL_FALSE,
2061 tune_params::PREF_LDRD_TRUE,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_ALL,
2065 tune_params::PREF_NEON_64_FALSE,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE,
2067 tune_params::FUSE_NOTHING,
2068 tune_params::SCHED_AUTOPREF_OFF
2069 };
2070
2071 const struct tune_params arm_xgene1_tune =
2072 {
2073 &xgene1_extra_costs,
2074 NULL, /* Sched adj cost. */
2075 arm_default_branch_cost,
2076 &arm_default_vec_cost,
2077 1, /* Constant limit. */
2078 2, /* Max cond insns. */
2079 32, /* Memset max inline. */
2080 4, /* Issue rate. */
2081 ARM_PREFETCH_NOT_BENEFICIAL,
2082 tune_params::PREF_CONST_POOL_FALSE,
2083 tune_params::PREF_LDRD_TRUE,
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2086 tune_params::DISPARAGE_FLAGS_ALL,
2087 tune_params::PREF_NEON_64_FALSE,
2088 tune_params::PREF_NEON_STRINGOPS_FALSE,
2089 tune_params::FUSE_NOTHING,
2090 tune_params::SCHED_AUTOPREF_OFF
2091 };
2092
2093 const struct tune_params arm_qdf24xx_tune =
2094 {
2095 &qdf24xx_extra_costs,
2096 NULL, /* Scheduler cost adjustment. */
2097 arm_default_branch_cost,
2098 &arm_default_vec_cost, /* Vectorizer costs. */
2099 1, /* Constant limit. */
2100 2, /* Max cond insns. */
2101 8, /* Memset max inline. */
2102 4, /* Issue rate. */
2103 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2104 tune_params::PREF_CONST_POOL_FALSE,
2105 tune_params::PREF_LDRD_TRUE,
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2108 tune_params::DISPARAGE_FLAGS_ALL,
2109 tune_params::PREF_NEON_64_FALSE,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE,
2111 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2112 tune_params::SCHED_AUTOPREF_FULL
2113 };
2114
2115 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2116 less appealing. Set max_insns_skipped to a low value. */
2117
2118 const struct tune_params arm_cortex_a5_tune =
2119 {
2120 &cortexa5_extra_costs,
2121 NULL, /* Sched adj cost. */
2122 arm_cortex_a5_branch_cost,
2123 &arm_default_vec_cost,
2124 1, /* Constant limit. */
2125 1, /* Max cond insns. */
2126 8, /* Memset max inline. */
2127 2, /* Issue rate. */
2128 ARM_PREFETCH_NOT_BENEFICIAL,
2129 tune_params::PREF_CONST_POOL_FALSE,
2130 tune_params::PREF_LDRD_FALSE,
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2133 tune_params::DISPARAGE_FLAGS_NEITHER,
2134 tune_params::PREF_NEON_64_FALSE,
2135 tune_params::PREF_NEON_STRINGOPS_TRUE,
2136 tune_params::FUSE_NOTHING,
2137 tune_params::SCHED_AUTOPREF_OFF
2138 };
2139
2140 const struct tune_params arm_cortex_a9_tune =
2141 {
2142 &cortexa9_extra_costs,
2143 cortex_a9_sched_adjust_cost,
2144 arm_default_branch_cost,
2145 &arm_default_vec_cost,
2146 1, /* Constant limit. */
2147 5, /* Max cond insns. */
2148 8, /* Memset max inline. */
2149 2, /* Issue rate. */
2150 ARM_PREFETCH_BENEFICIAL(4,32,32),
2151 tune_params::PREF_CONST_POOL_FALSE,
2152 tune_params::PREF_LDRD_FALSE,
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2154 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2155 tune_params::DISPARAGE_FLAGS_NEITHER,
2156 tune_params::PREF_NEON_64_FALSE,
2157 tune_params::PREF_NEON_STRINGOPS_FALSE,
2158 tune_params::FUSE_NOTHING,
2159 tune_params::SCHED_AUTOPREF_OFF
2160 };
2161
2162 const struct tune_params arm_cortex_a12_tune =
2163 {
2164 &cortexa12_extra_costs,
2165 NULL, /* Sched adj cost. */
2166 arm_default_branch_cost,
2167 &arm_default_vec_cost, /* Vectorizer costs. */
2168 1, /* Constant limit. */
2169 2, /* Max cond insns. */
2170 8, /* Memset max inline. */
2171 2, /* Issue rate. */
2172 ARM_PREFETCH_NOT_BENEFICIAL,
2173 tune_params::PREF_CONST_POOL_FALSE,
2174 tune_params::PREF_LDRD_TRUE,
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2176 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2177 tune_params::DISPARAGE_FLAGS_ALL,
2178 tune_params::PREF_NEON_64_FALSE,
2179 tune_params::PREF_NEON_STRINGOPS_TRUE,
2180 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2181 tune_params::SCHED_AUTOPREF_OFF
2182 };
2183
2184 const struct tune_params arm_cortex_a73_tune =
2185 {
2186 &cortexa57_extra_costs,
2187 NULL, /* Sched adj cost. */
2188 arm_default_branch_cost,
2189 &arm_default_vec_cost, /* Vectorizer costs. */
2190 1, /* Constant limit. */
2191 2, /* Max cond insns. */
2192 8, /* Memset max inline. */
2193 2, /* Issue rate. */
2194 ARM_PREFETCH_NOT_BENEFICIAL,
2195 tune_params::PREF_CONST_POOL_FALSE,
2196 tune_params::PREF_LDRD_TRUE,
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2198 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2199 tune_params::DISPARAGE_FLAGS_ALL,
2200 tune_params::PREF_NEON_64_FALSE,
2201 tune_params::PREF_NEON_STRINGOPS_TRUE,
2202 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2203 tune_params::SCHED_AUTOPREF_FULL
2204 };
2205
2206 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2207 cycle to execute each. An LDR from the constant pool also takes two cycles
2208 to execute, but mildly increases pipelining opportunity (consecutive
2209 loads/stores can be pipelined together, saving one cycle), and may also
2210 improve icache utilisation. Hence we prefer the constant pool for such
2211 processors. */
2212
2213 const struct tune_params arm_v7m_tune =
2214 {
2215 &v7m_extra_costs,
2216 NULL, /* Sched adj cost. */
2217 arm_cortex_m_branch_cost,
2218 &arm_default_vec_cost,
2219 1, /* Constant limit. */
2220 2, /* Max cond insns. */
2221 8, /* Memset max inline. */
2222 1, /* Issue rate. */
2223 ARM_PREFETCH_NOT_BENEFICIAL,
2224 tune_params::PREF_CONST_POOL_TRUE,
2225 tune_params::PREF_LDRD_FALSE,
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2228 tune_params::DISPARAGE_FLAGS_NEITHER,
2229 tune_params::PREF_NEON_64_FALSE,
2230 tune_params::PREF_NEON_STRINGOPS_FALSE,
2231 tune_params::FUSE_NOTHING,
2232 tune_params::SCHED_AUTOPREF_OFF
2233 };
2234
2235 /* Cortex-M7 tuning. */
2236
2237 const struct tune_params arm_cortex_m7_tune =
2238 {
2239 &v7m_extra_costs,
2240 NULL, /* Sched adj cost. */
2241 arm_cortex_m7_branch_cost,
2242 &arm_default_vec_cost,
2243 0, /* Constant limit. */
2244 1, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_TRUE,
2249 tune_params::PREF_LDRD_FALSE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_NEITHER,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_FALSE,
2255 tune_params::FUSE_NOTHING,
2256 tune_params::SCHED_AUTOPREF_OFF
2257 };
2258
2259 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2260 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2261 cortex-m23. */
2262 const struct tune_params arm_v6m_tune =
2263 {
2264 &generic_extra_costs, /* Insn extra costs. */
2265 NULL, /* Sched adj cost. */
2266 arm_default_branch_cost,
2267 &arm_default_vec_cost, /* Vectorizer costs. */
2268 1, /* Constant limit. */
2269 5, /* Max cond insns. */
2270 8, /* Memset max inline. */
2271 1, /* Issue rate. */
2272 ARM_PREFETCH_NOT_BENEFICIAL,
2273 tune_params::PREF_CONST_POOL_FALSE,
2274 tune_params::PREF_LDRD_FALSE,
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2277 tune_params::DISPARAGE_FLAGS_NEITHER,
2278 tune_params::PREF_NEON_64_FALSE,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE,
2280 tune_params::FUSE_NOTHING,
2281 tune_params::SCHED_AUTOPREF_OFF
2282 };
2283
2284 const struct tune_params arm_fa726te_tune =
2285 {
2286 &generic_extra_costs, /* Insn extra costs. */
2287 fa726te_sched_adjust_cost,
2288 arm_default_branch_cost,
2289 &arm_default_vec_cost,
2290 1, /* Constant limit. */
2291 5, /* Max cond insns. */
2292 8, /* Memset max inline. */
2293 2, /* Issue rate. */
2294 ARM_PREFETCH_NOT_BENEFICIAL,
2295 tune_params::PREF_CONST_POOL_TRUE,
2296 tune_params::PREF_LDRD_FALSE,
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2299 tune_params::DISPARAGE_FLAGS_NEITHER,
2300 tune_params::PREF_NEON_64_FALSE,
2301 tune_params::PREF_NEON_STRINGOPS_FALSE,
2302 tune_params::FUSE_NOTHING,
2303 tune_params::SCHED_AUTOPREF_OFF
2304 };
2305
2306 /* Auto-generated CPU, FPU and architecture tables. */
2307 #include "arm-cpu-data.h"
2308
2309 /* The name of the preprocessor macro to define for this architecture. PROFILE
2310 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2311 is thus chosen to be big enough to hold the longest architecture name. */
2312
2313 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2314
2315 /* Supported TLS relocations. */
2316
2317 enum tls_reloc {
2318 TLS_GD32,
2319 TLS_LDM32,
2320 TLS_LDO32,
2321 TLS_IE32,
2322 TLS_LE32,
2323 TLS_DESCSEQ /* GNU scheme */
2324 };
2325
2326 /* The maximum number of insns to be used when loading a constant. */
2327 inline static int
2328 arm_constant_limit (bool size_p)
2329 {
2330 return size_p ? 1 : current_tune->constant_limit;
2331 }
2332
2333 /* Emit an insn that's a simple single-set. Both the operands must be known
2334 to be valid. */
2335 inline static rtx_insn *
2336 emit_set_insn (rtx x, rtx y)
2337 {
2338 return emit_insn (gen_rtx_SET (x, y));
2339 }
2340
2341 /* Return the number of bits set in VALUE. */
2342 static unsigned
2343 bit_count (unsigned long value)
2344 {
2345 unsigned long count = 0;
2346
2347 while (value)
2348 {
2349 count++;
2350 value &= value - 1; /* Clear the least-significant set bit. */
2351 }
2352
2353 return count;
2354 }
2355
2356 /* Return the number of bits set in BMAP. */
2357 static unsigned
2358 bitmap_popcount (const sbitmap bmap)
2359 {
2360 unsigned int count = 0;
2361 unsigned int n = 0;
2362 sbitmap_iterator sbi;
2363
2364 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2365 count++;
2366 return count;
2367 }
2368
2369 typedef struct
2370 {
2371 machine_mode mode;
2372 const char *name;
2373 } arm_fixed_mode_set;
2374
2375 /* A small helper for setting fixed-point library libfuncs. */
2376
2377 static void
2378 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2379 const char *funcname, const char *modename,
2380 int num_suffix)
2381 {
2382 char buffer[50];
2383
2384 if (num_suffix == 0)
2385 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2386 else
2387 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2388
2389 set_optab_libfunc (optable, mode, buffer);
2390 }
2391
2392 static void
2393 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2394 machine_mode from, const char *funcname,
2395 const char *toname, const char *fromname)
2396 {
2397 char buffer[50];
2398 const char *maybe_suffix_2 = "";
2399
2400 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2401 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2402 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2403 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2404 maybe_suffix_2 = "2";
2405
2406 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2407 maybe_suffix_2);
2408
2409 set_conv_libfunc (optable, to, from, buffer);
2410 }
2411
2412 /* Set up library functions unique to ARM. */
2413
2414 static void
2415 arm_init_libfuncs (void)
2416 {
2417 /* For Linux, we have access to kernel support for atomic operations. */
2418 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2419 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2420
2421 /* There are no special library functions unless we are using the
2422 ARM BPABI. */
2423 if (!TARGET_BPABI)
2424 return;
2425
2426 /* The functions below are described in Section 4 of the "Run-Time
2427 ABI for the ARM architecture", Version 1.0. */
2428
2429 /* Double-precision floating-point arithmetic. Table 2. */
2430 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2431 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2432 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2433 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2434 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2435
2436 /* Double-precision comparisons. Table 3. */
2437 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2438 set_optab_libfunc (ne_optab, DFmode, NULL);
2439 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2440 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2441 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2442 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2443 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2444
2445 /* Single-precision floating-point arithmetic. Table 4. */
2446 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2447 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2448 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2449 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2450 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2451
2452 /* Single-precision comparisons. Table 5. */
2453 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2454 set_optab_libfunc (ne_optab, SFmode, NULL);
2455 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2456 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2457 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2458 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2459 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2460
2461 /* Floating-point to integer conversions. Table 6. */
2462 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2463 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2464 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2465 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2466 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2467 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2468 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2469 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2470
2471 /* Conversions between floating types. Table 7. */
2472 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2473 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2474
2475 /* Integer to floating-point conversions. Table 8. */
2476 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2477 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2478 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2479 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2480 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2481 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2482 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2483 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2484
2485 /* Long long. Table 9. */
2486 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2487 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2488 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2489 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2490 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2491 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2492 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2493 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2494
2495 /* Integer (32/32->32) division. \S 4.3.1. */
2496 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2497 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2498
2499 /* The divmod functions are designed so that they can be used for
2500 plain division, even though they return both the quotient and the
2501 remainder. The quotient is returned in the usual location (i.e.,
2502 r0 for SImode, {r0, r1} for DImode), just as would be expected
2503 for an ordinary division routine. Because the AAPCS calling
2504 conventions specify that all of { r0, r1, r2, r3 } are
2505 callee-saved registers, there is no need to tell the compiler
2506 explicitly that those registers are clobbered by these
2507 routines. */
2508 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2509 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2510
2511 /* For SImode division the ABI provides div-without-mod routines,
2512 which are faster. */
2513 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2514 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2515
2516 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2517 divmod libcalls instead. */
2518 set_optab_libfunc (smod_optab, DImode, NULL);
2519 set_optab_libfunc (umod_optab, DImode, NULL);
2520 set_optab_libfunc (smod_optab, SImode, NULL);
2521 set_optab_libfunc (umod_optab, SImode, NULL);
2522
2523 /* Half-precision float operations. The compiler handles all operations
2524 with NULL libfuncs by converting the SFmode. */
2525 switch (arm_fp16_format)
2526 {
2527 case ARM_FP16_FORMAT_IEEE:
2528 case ARM_FP16_FORMAT_ALTERNATIVE:
2529
2530 /* Conversions. */
2531 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2532 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2533 ? "__gnu_f2h_ieee"
2534 : "__gnu_f2h_alternative"));
2535 set_conv_libfunc (sext_optab, SFmode, HFmode,
2536 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2537 ? "__gnu_h2f_ieee"
2538 : "__gnu_h2f_alternative"));
2539
2540 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2541 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2542 ? "__gnu_d2h_ieee"
2543 : "__gnu_d2h_alternative"));
2544
2545 /* Arithmetic. */
2546 set_optab_libfunc (add_optab, HFmode, NULL);
2547 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2548 set_optab_libfunc (smul_optab, HFmode, NULL);
2549 set_optab_libfunc (neg_optab, HFmode, NULL);
2550 set_optab_libfunc (sub_optab, HFmode, NULL);
2551
2552 /* Comparisons. */
2553 set_optab_libfunc (eq_optab, HFmode, NULL);
2554 set_optab_libfunc (ne_optab, HFmode, NULL);
2555 set_optab_libfunc (lt_optab, HFmode, NULL);
2556 set_optab_libfunc (le_optab, HFmode, NULL);
2557 set_optab_libfunc (ge_optab, HFmode, NULL);
2558 set_optab_libfunc (gt_optab, HFmode, NULL);
2559 set_optab_libfunc (unord_optab, HFmode, NULL);
2560 break;
2561
2562 default:
2563 break;
2564 }
2565
2566 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2567 {
2568 const arm_fixed_mode_set fixed_arith_modes[] =
2569 {
2570 { QQmode, "qq" },
2571 { UQQmode, "uqq" },
2572 { HQmode, "hq" },
2573 { UHQmode, "uhq" },
2574 { SQmode, "sq" },
2575 { USQmode, "usq" },
2576 { DQmode, "dq" },
2577 { UDQmode, "udq" },
2578 { TQmode, "tq" },
2579 { UTQmode, "utq" },
2580 { HAmode, "ha" },
2581 { UHAmode, "uha" },
2582 { SAmode, "sa" },
2583 { USAmode, "usa" },
2584 { DAmode, "da" },
2585 { UDAmode, "uda" },
2586 { TAmode, "ta" },
2587 { UTAmode, "uta" }
2588 };
2589 const arm_fixed_mode_set fixed_conv_modes[] =
2590 {
2591 { QQmode, "qq" },
2592 { UQQmode, "uqq" },
2593 { HQmode, "hq" },
2594 { UHQmode, "uhq" },
2595 { SQmode, "sq" },
2596 { USQmode, "usq" },
2597 { DQmode, "dq" },
2598 { UDQmode, "udq" },
2599 { TQmode, "tq" },
2600 { UTQmode, "utq" },
2601 { HAmode, "ha" },
2602 { UHAmode, "uha" },
2603 { SAmode, "sa" },
2604 { USAmode, "usa" },
2605 { DAmode, "da" },
2606 { UDAmode, "uda" },
2607 { TAmode, "ta" },
2608 { UTAmode, "uta" },
2609 { QImode, "qi" },
2610 { HImode, "hi" },
2611 { SImode, "si" },
2612 { DImode, "di" },
2613 { TImode, "ti" },
2614 { SFmode, "sf" },
2615 { DFmode, "df" }
2616 };
2617 unsigned int i, j;
2618
2619 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2620 {
2621 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2622 "add", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2624 "ssadd", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2626 "usadd", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2628 "sub", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2630 "sssub", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2632 "ussub", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2634 "mul", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2636 "ssmul", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2638 "usmul", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2640 "div", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2642 "udiv", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2644 "ssdiv", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2646 "usdiv", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2648 "neg", fixed_arith_modes[i].name, 2);
2649 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2650 "ssneg", fixed_arith_modes[i].name, 2);
2651 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2652 "usneg", fixed_arith_modes[i].name, 2);
2653 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2654 "ashl", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2656 "ashr", fixed_arith_modes[i].name, 3);
2657 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2658 "lshr", fixed_arith_modes[i].name, 3);
2659 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2660 "ssashl", fixed_arith_modes[i].name, 3);
2661 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2662 "usashl", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2664 "cmp", fixed_arith_modes[i].name, 2);
2665 }
2666
2667 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2668 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2669 {
2670 if (i == j
2671 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2672 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2673 continue;
2674
2675 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2676 fixed_conv_modes[j].mode, "fract",
2677 fixed_conv_modes[i].name,
2678 fixed_conv_modes[j].name);
2679 arm_set_fixed_conv_libfunc (satfract_optab,
2680 fixed_conv_modes[i].mode,
2681 fixed_conv_modes[j].mode, "satfract",
2682 fixed_conv_modes[i].name,
2683 fixed_conv_modes[j].name);
2684 arm_set_fixed_conv_libfunc (fractuns_optab,
2685 fixed_conv_modes[i].mode,
2686 fixed_conv_modes[j].mode, "fractuns",
2687 fixed_conv_modes[i].name,
2688 fixed_conv_modes[j].name);
2689 arm_set_fixed_conv_libfunc (satfractuns_optab,
2690 fixed_conv_modes[i].mode,
2691 fixed_conv_modes[j].mode, "satfractuns",
2692 fixed_conv_modes[i].name,
2693 fixed_conv_modes[j].name);
2694 }
2695 }
2696
2697 if (TARGET_AAPCS_BASED)
2698 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2699 }
2700
2701 /* On AAPCS systems, this is the "struct __va_list". */
2702 static GTY(()) tree va_list_type;
2703
2704 /* Return the type to use as __builtin_va_list. */
2705 static tree
2706 arm_build_builtin_va_list (void)
2707 {
2708 tree va_list_name;
2709 tree ap_field;
2710
2711 if (!TARGET_AAPCS_BASED)
2712 return std_build_builtin_va_list ();
2713
2714 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2715 defined as:
2716
2717 struct __va_list
2718 {
2719 void *__ap;
2720 };
2721
2722 The C Library ABI further reinforces this definition in \S
2723 4.1.
2724
2725 We must follow this definition exactly. The structure tag
2726 name is visible in C++ mangled names, and thus forms a part
2727 of the ABI. The field name may be used by people who
2728 #include <stdarg.h>. */
2729 /* Create the type. */
2730 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2731 /* Give it the required name. */
2732 va_list_name = build_decl (BUILTINS_LOCATION,
2733 TYPE_DECL,
2734 get_identifier ("__va_list"),
2735 va_list_type);
2736 DECL_ARTIFICIAL (va_list_name) = 1;
2737 TYPE_NAME (va_list_type) = va_list_name;
2738 TYPE_STUB_DECL (va_list_type) = va_list_name;
2739 /* Create the __ap field. */
2740 ap_field = build_decl (BUILTINS_LOCATION,
2741 FIELD_DECL,
2742 get_identifier ("__ap"),
2743 ptr_type_node);
2744 DECL_ARTIFICIAL (ap_field) = 1;
2745 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2746 TYPE_FIELDS (va_list_type) = ap_field;
2747 /* Compute its layout. */
2748 layout_type (va_list_type);
2749
2750 return va_list_type;
2751 }
2752
2753 /* Return an expression of type "void *" pointing to the next
2754 available argument in a variable-argument list. VALIST is the
2755 user-level va_list object, of type __builtin_va_list. */
2756 static tree
2757 arm_extract_valist_ptr (tree valist)
2758 {
2759 if (TREE_TYPE (valist) == error_mark_node)
2760 return error_mark_node;
2761
2762 /* On an AAPCS target, the pointer is stored within "struct
2763 va_list". */
2764 if (TARGET_AAPCS_BASED)
2765 {
2766 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2767 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2768 valist, ap_field, NULL_TREE);
2769 }
2770
2771 return valist;
2772 }
2773
2774 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2775 static void
2776 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2777 {
2778 valist = arm_extract_valist_ptr (valist);
2779 std_expand_builtin_va_start (valist, nextarg);
2780 }
2781
2782 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2783 static tree
2784 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2785 gimple_seq *post_p)
2786 {
2787 valist = arm_extract_valist_ptr (valist);
2788 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2789 }
2790
2791 /* Check any incompatible options that the user has specified. */
2792 static void
2793 arm_option_check_internal (struct gcc_options *opts)
2794 {
2795 int flags = opts->x_target_flags;
2796
2797 /* iWMMXt and NEON are incompatible. */
2798 if (TARGET_IWMMXT
2799 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2800 error ("iWMMXt and NEON are incompatible");
2801
2802 /* Make sure that the processor choice does not conflict with any of the
2803 other command line choices. */
2804 if (TARGET_ARM_P (flags)
2805 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2806 error ("target CPU does not support ARM mode");
2807
2808 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2809 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2810 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2811
2812 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2813 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2814
2815 /* If this target is normally configured to use APCS frames, warn if they
2816 are turned off and debugging is turned on. */
2817 if (TARGET_ARM_P (flags)
2818 && write_symbols != NO_DEBUG
2819 && !TARGET_APCS_FRAME
2820 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2821 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2822
2823 /* iWMMXt unsupported under Thumb mode. */
2824 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2825 error ("iWMMXt unsupported under Thumb mode");
2826
2827 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2828 error ("can not use -mtp=cp15 with 16-bit Thumb");
2829
2830 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2831 {
2832 error ("RTP PIC is incompatible with Thumb");
2833 flag_pic = 0;
2834 }
2835
2836 /* We only support -mslow-flash-data on armv7-m targets. */
2837 if (target_slow_flash_data
2838 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2839 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2840 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2841
2842 /* We only support pure-code on Thumb-2 M-profile targets. */
2843 if (target_pure_code
2844 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2845 error ("-mpure-code only supports non-pic code on armv7-m targets");
2846
2847 }
2848
2849 /* Recompute the global settings depending on target attribute options. */
2850
2851 static void
2852 arm_option_params_internal (void)
2853 {
2854 /* If we are not using the default (ARM mode) section anchor offset
2855 ranges, then set the correct ranges now. */
2856 if (TARGET_THUMB1)
2857 {
2858 /* Thumb-1 LDR instructions cannot have negative offsets.
2859 Permissible positive offset ranges are 5-bit (for byte loads),
2860 6-bit (for halfword loads), or 7-bit (for word loads).
2861 Empirical results suggest a 7-bit anchor range gives the best
2862 overall code size. */
2863 targetm.min_anchor_offset = 0;
2864 targetm.max_anchor_offset = 127;
2865 }
2866 else if (TARGET_THUMB2)
2867 {
2868 /* The minimum is set such that the total size of the block
2869 for a particular anchor is 248 + 1 + 4095 bytes, which is
2870 divisible by eight, ensuring natural spacing of anchors. */
2871 targetm.min_anchor_offset = -248;
2872 targetm.max_anchor_offset = 4095;
2873 }
2874 else
2875 {
2876 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2877 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2878 }
2879
2880 if (optimize_size)
2881 {
2882 /* If optimizing for size, bump the number of instructions that we
2883 are prepared to conditionally execute (even on a StrongARM). */
2884 max_insns_skipped = 6;
2885
2886 /* For THUMB2, we limit the conditional sequence to one IT block. */
2887 if (TARGET_THUMB2)
2888 max_insns_skipped = arm_restrict_it ? 1 : 4;
2889 }
2890 else
2891 /* When -mrestrict-it is in use tone down the if-conversion. */
2892 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2893 ? 1 : current_tune->max_insns_skipped;
2894 }
2895
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper;
2899
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize;
2902
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 {
2906 if (opts->x_align_functions <= 0)
2907 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908 && opts->x_optimize_size ? 2 : 4;
2909 }
2910
2911 /* Implement targetm.override_options_after_change. */
2912
2913 static void
2914 arm_override_options_after_change (void)
2915 {
2916 arm_configure_build_target (&arm_active_target,
2917 TREE_TARGET_OPTION (target_option_default_node),
2918 &global_options_set, false);
2919
2920 arm_override_options_after_change_1 (&global_options);
2921 }
2922
2923 static void
2924 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2925 {
2926 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2927 false);
2928 }
2929
2930 /* Reset options between modes that the user has specified. */
2931 static void
2932 arm_option_override_internal (struct gcc_options *opts,
2933 struct gcc_options *opts_set)
2934 {
2935 arm_override_options_after_change_1 (opts);
2936
2937 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2938 {
2939 /* The default is to enable interworking, so this warning message would
2940 be confusing to users who have just compiled with, eg, -march=armv3. */
2941 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2942 opts->x_target_flags &= ~MASK_INTERWORK;
2943 }
2944
2945 if (TARGET_THUMB_P (opts->x_target_flags)
2946 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2947 {
2948 warning (0, "target CPU does not support THUMB instructions");
2949 opts->x_target_flags &= ~MASK_THUMB;
2950 }
2951
2952 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2953 {
2954 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2955 opts->x_target_flags &= ~MASK_APCS_FRAME;
2956 }
2957
2958 /* Callee super interworking implies thumb interworking. Adding
2959 this to the flags here simplifies the logic elsewhere. */
2960 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2961 opts->x_target_flags |= MASK_INTERWORK;
2962
2963 /* need to remember initial values so combinaisons of options like
2964 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2965 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2966
2967 if (! opts_set->x_arm_restrict_it)
2968 opts->x_arm_restrict_it = arm_arch8;
2969
2970 /* ARM execution state and M profile don't have [restrict] IT. */
2971 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2972 opts->x_arm_restrict_it = 0;
2973
2974 /* Enable -munaligned-access by default for
2975 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2976 i.e. Thumb2 and ARM state only.
2977 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2978 - ARMv8 architecture-base processors.
2979
2980 Disable -munaligned-access by default for
2981 - all pre-ARMv6 architecture-based processors
2982 - ARMv6-M architecture-based processors
2983 - ARMv8-M Baseline processors. */
2984
2985 if (! opts_set->x_unaligned_access)
2986 {
2987 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2988 && arm_arch6 && (arm_arch_notm || arm_arch7));
2989 }
2990 else if (opts->x_unaligned_access == 1
2991 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2992 {
2993 warning (0, "target CPU does not support unaligned accesses");
2994 opts->x_unaligned_access = 0;
2995 }
2996
2997 /* Don't warn since it's on by default in -O2. */
2998 if (TARGET_THUMB1_P (opts->x_target_flags))
2999 opts->x_flag_schedule_insns = 0;
3000 else
3001 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3002
3003 /* Disable shrink-wrap when optimizing function for size, since it tends to
3004 generate additional returns. */
3005 if (optimize_function_for_size_p (cfun)
3006 && TARGET_THUMB2_P (opts->x_target_flags))
3007 opts->x_flag_shrink_wrap = false;
3008 else
3009 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3010
3011 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3012 - epilogue_insns - does not accurately model the corresponding insns
3013 emitted in the asm file. In particular, see the comment in thumb_exit
3014 'Find out how many of the (return) argument registers we can corrupt'.
3015 As a consequence, the epilogue may clobber registers without fipa-ra
3016 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3017 TODO: Accurately model clobbers for epilogue_insns and reenable
3018 fipa-ra. */
3019 if (TARGET_THUMB1_P (opts->x_target_flags))
3020 opts->x_flag_ipa_ra = 0;
3021 else
3022 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3023
3024 /* Thumb2 inline assembly code should always use unified syntax.
3025 This will apply to ARM and Thumb1 eventually. */
3026 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3027
3028 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3029 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3030 #endif
3031 }
3032
3033 /* Convert a static initializer array of feature bits to sbitmap
3034 representation. */
3035 static void
3036 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3037 {
3038 bitmap_clear (isa);
3039 while (*isa_bits != isa_nobit)
3040 bitmap_set_bit (isa, *(isa_bits++));
3041 }
3042
3043 static sbitmap isa_all_fpubits;
3044 static sbitmap isa_quirkbits;
3045
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3049 void
3050 arm_configure_build_target (struct arm_build_target *target,
3051 struct cl_target_option *opts,
3052 struct gcc_options *opts_set,
3053 bool warn_compatible)
3054 {
3055 const struct processors *arm_selected_tune = NULL;
3056 const struct processors *arm_selected_arch = NULL;
3057 const struct processors *arm_selected_cpu = NULL;
3058 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3059
3060 bitmap_clear (target->isa);
3061 target->core_name = NULL;
3062 target->arch_name = NULL;
3063
3064 if (opts_set->x_arm_arch_option)
3065 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3066
3067 if (opts_set->x_arm_cpu_option)
3068 {
3069 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3070 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3071 }
3072
3073 if (opts_set->x_arm_tune_option)
3074 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3075
3076 if (arm_selected_arch)
3077 {
3078 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3079
3080 if (arm_selected_cpu)
3081 {
3082 auto_sbitmap cpu_isa (isa_num_bits);
3083
3084 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3085 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3086 /* Ignore any bits that are quirk bits. */
3087 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3088 /* Ignore (for now) any bits that might be set by -mfpu. */
3089 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3090
3091 if (!bitmap_empty_p (cpu_isa))
3092 {
3093 if (warn_compatible)
3094 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3095 arm_selected_cpu->name, arm_selected_arch->name);
3096 /* -march wins for code generation.
3097 -mcpu wins for default tuning. */
3098 if (!arm_selected_tune)
3099 arm_selected_tune = arm_selected_cpu;
3100
3101 arm_selected_cpu = arm_selected_arch;
3102 target->arch_name = arm_selected_arch->name;
3103 }
3104 else
3105 {
3106 /* Architecture and CPU are essentially the same.
3107 Prefer the CPU setting. */
3108 arm_selected_arch = NULL;
3109 target->core_name = arm_selected_cpu->name;
3110 }
3111 }
3112 else
3113 {
3114 /* Pick a CPU based on the architecture. */
3115 arm_selected_cpu = arm_selected_arch;
3116 target->arch_name = arm_selected_arch->name;
3117 /* Note: target->core_name is left unset in this path. */
3118 }
3119 }
3120 else if (arm_selected_cpu)
3121 {
3122 target->core_name = arm_selected_cpu->name;
3123 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3124 }
3125 /* If the user did not specify a processor, choose one for them. */
3126 else
3127 {
3128 const struct processors * sel;
3129 auto_sbitmap sought_isa (isa_num_bits);
3130 bitmap_clear (sought_isa);
3131 auto_sbitmap default_isa (isa_num_bits);
3132
3133 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3134 gcc_assert (arm_selected_cpu->name);
3135
3136 /* RWE: All of the selection logic below (to the end of this
3137 'if' clause) looks somewhat suspect. It appears to be mostly
3138 there to support forcing thumb support when the default CPU
3139 does not have thumb (somewhat dubious in terms of what the
3140 user might be expecting). I think it should be removed once
3141 support for the pre-thumb era cores is removed. */
3142 sel = arm_selected_cpu;
3143 arm_initialize_isa (default_isa, sel->isa_bits);
3144
3145 /* Now check to see if the user has specified any command line
3146 switches that require certain abilities from the cpu. */
3147
3148 if (TARGET_INTERWORK || TARGET_THUMB)
3149 {
3150 bitmap_set_bit (sought_isa, isa_bit_thumb);
3151 bitmap_set_bit (sought_isa, isa_bit_mode32);
3152
3153 /* There are no ARM processors that support both APCS-26 and
3154 interworking. Therefore we forcibly remove MODE26 from
3155 from the isa features here (if it was set), so that the
3156 search below will always be able to find a compatible
3157 processor. */
3158 bitmap_clear_bit (default_isa, isa_bit_mode26);
3159 }
3160
3161 /* If there are such requirements and the default CPU does not
3162 satisfy them, we need to run over the complete list of
3163 cores looking for one that is satisfactory. */
3164 if (!bitmap_empty_p (sought_isa)
3165 && !bitmap_subset_p (sought_isa, default_isa))
3166 {
3167 auto_sbitmap candidate_isa (isa_num_bits);
3168 /* We're only interested in a CPU with at least the
3169 capabilities of the default CPU and the required
3170 additional features. */
3171 bitmap_ior (default_isa, default_isa, sought_isa);
3172
3173 /* Try to locate a CPU type that supports all of the abilities
3174 of the default CPU, plus the extra abilities requested by
3175 the user. */
3176 for (sel = all_cores; sel->name != NULL; sel++)
3177 {
3178 arm_initialize_isa (candidate_isa, sel->isa_bits);
3179 /* An exact match? */
3180 if (bitmap_equal_p (default_isa, candidate_isa))
3181 break;
3182 }
3183
3184 if (sel->name == NULL)
3185 {
3186 unsigned current_bit_count = isa_num_bits;
3187 const struct processors * best_fit = NULL;
3188
3189 /* Ideally we would like to issue an error message here
3190 saying that it was not possible to find a CPU compatible
3191 with the default CPU, but which also supports the command
3192 line options specified by the programmer, and so they
3193 ought to use the -mcpu=<name> command line option to
3194 override the default CPU type.
3195
3196 If we cannot find a CPU that has exactly the
3197 characteristics of the default CPU and the given
3198 command line options we scan the array again looking
3199 for a best match. The best match must have at least
3200 the capabilities of the perfect match. */
3201 for (sel = all_cores; sel->name != NULL; sel++)
3202 {
3203 arm_initialize_isa (candidate_isa, sel->isa_bits);
3204
3205 if (bitmap_subset_p (default_isa, candidate_isa))
3206 {
3207 unsigned count;
3208
3209 bitmap_and_compl (candidate_isa, candidate_isa,
3210 default_isa);
3211 count = bitmap_popcount (candidate_isa);
3212
3213 if (count < current_bit_count)
3214 {
3215 best_fit = sel;
3216 current_bit_count = count;
3217 }
3218 }
3219
3220 gcc_assert (best_fit);
3221 sel = best_fit;
3222 }
3223 }
3224 arm_selected_cpu = sel;
3225 }
3226
3227 /* Now we know the CPU, we can finally initialize the target
3228 structure. */
3229 target->core_name = arm_selected_cpu->name;
3230 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3231 }
3232
3233 gcc_assert (arm_selected_cpu);
3234
3235 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3236 {
3237 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3238 auto_sbitmap fpu_bits (isa_num_bits);
3239
3240 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3241 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3242 bitmap_ior (target->isa, target->isa, fpu_bits);
3243 }
3244 else if (target->core_name == NULL)
3245 /* To support this we need to be able to parse FPU feature options
3246 from the architecture string. */
3247 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3248
3249 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3250 if (!arm_selected_tune)
3251 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3252
3253 /* Finish initializing the target structure. */
3254 target->arch_pp_name = arm_selected_cpu->arch;
3255 target->base_arch = arm_selected_cpu->base_arch;
3256 target->arch_core = arm_selected_cpu->core;
3257
3258 target->tune_flags = arm_selected_tune->tune_flags;
3259 target->tune = arm_selected_tune->tune;
3260 target->tune_core = arm_selected_tune->core;
3261 }
3262
3263 /* Fix up any incompatible options that the user has specified. */
3264 static void
3265 arm_option_override (void)
3266 {
3267 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3268 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3269 cl_target_option opts;
3270
3271 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3272 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3273
3274 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3275 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3276
3277 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3278
3279 if (!global_options_set.x_arm_fpu_index)
3280 {
3281 const char *target_fpu_name;
3282 bool ok;
3283 int fpu_index;
3284
3285 #ifdef FPUTYPE_DEFAULT
3286 target_fpu_name = FPUTYPE_DEFAULT;
3287 #else
3288 target_fpu_name = "vfp";
3289 #endif
3290
3291 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3292 CL_TARGET);
3293 gcc_assert (ok);
3294 arm_fpu_index = (enum fpu_type) fpu_index;
3295 }
3296
3297 cl_target_option_save (&opts, &global_options);
3298 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3299 true);
3300
3301 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3302 SUBTARGET_OVERRIDE_OPTIONS;
3303 #endif
3304
3305 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3306 arm_base_arch = arm_active_target.base_arch;
3307
3308 arm_tune = arm_active_target.tune_core;
3309 tune_flags = arm_active_target.tune_flags;
3310 current_tune = arm_active_target.tune;
3311
3312 /* TBD: Dwarf info for apcs frame is not handled yet. */
3313 if (TARGET_APCS_FRAME)
3314 flag_shrink_wrap = false;
3315
3316 /* BPABI targets use linker tricks to allow interworking on cores
3317 without thumb support. */
3318 if (TARGET_INTERWORK
3319 && !TARGET_BPABI
3320 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3321 {
3322 warning (0, "target CPU does not support interworking" );
3323 target_flags &= ~MASK_INTERWORK;
3324 }
3325
3326 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3327 {
3328 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3329 target_flags |= MASK_APCS_FRAME;
3330 }
3331
3332 if (TARGET_POKE_FUNCTION_NAME)
3333 target_flags |= MASK_APCS_FRAME;
3334
3335 if (TARGET_APCS_REENT && flag_pic)
3336 error ("-fpic and -mapcs-reent are incompatible");
3337
3338 if (TARGET_APCS_REENT)
3339 warning (0, "APCS reentrant code not supported. Ignored");
3340
3341 /* Initialize boolean versions of the architectural flags, for use
3342 in the arm.md file. */
3343 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3344 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3345 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3346 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3347 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3348 arm_arch5te = arm_arch5e
3349 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3350 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3351 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3352 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3353 arm_arch6m = arm_arch6 && !arm_arch_notm;
3354 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3355 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3356 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3357 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3358 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3359 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3360 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3361 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3362 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3363 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3364 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3365 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3366 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3367 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3368 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3369 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3370 if (arm_fp16_inst)
3371 {
3372 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3373 error ("selected fp16 options are incompatible");
3374 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3375 }
3376
3377
3378 /* Set up some tuning parameters. */
3379 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3380 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3381 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3382 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3383 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3384 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3385
3386 /* And finally, set up some quirks. */
3387 arm_arch_no_volatile_ce
3388 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3389 arm_arch6kz
3390 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3391
3392 /* V5 code we generate is completely interworking capable, so we turn off
3393 TARGET_INTERWORK here to avoid many tests later on. */
3394
3395 /* XXX However, we must pass the right pre-processor defines to CPP
3396 or GLD can get confused. This is a hack. */
3397 if (TARGET_INTERWORK)
3398 arm_cpp_interwork = 1;
3399
3400 if (arm_arch5)
3401 target_flags &= ~MASK_INTERWORK;
3402
3403 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3404 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3405
3406 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3407 error ("iwmmxt abi requires an iwmmxt capable cpu");
3408
3409 /* If soft-float is specified then don't use FPU. */
3410 if (TARGET_SOFT_FLOAT)
3411 arm_fpu_attr = FPU_NONE;
3412 else
3413 arm_fpu_attr = FPU_VFP;
3414
3415 if (TARGET_AAPCS_BASED)
3416 {
3417 if (TARGET_CALLER_INTERWORKING)
3418 error ("AAPCS does not support -mcaller-super-interworking");
3419 else
3420 if (TARGET_CALLEE_INTERWORKING)
3421 error ("AAPCS does not support -mcallee-super-interworking");
3422 }
3423
3424 /* __fp16 support currently assumes the core has ldrh. */
3425 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3426 sorry ("__fp16 and no ldrh");
3427
3428 if (TARGET_AAPCS_BASED)
3429 {
3430 if (arm_abi == ARM_ABI_IWMMXT)
3431 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3432 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3433 && TARGET_HARD_FLOAT)
3434 {
3435 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3436 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3437 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3438 }
3439 else
3440 arm_pcs_default = ARM_PCS_AAPCS;
3441 }
3442 else
3443 {
3444 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3445 sorry ("-mfloat-abi=hard and VFP");
3446
3447 if (arm_abi == ARM_ABI_APCS)
3448 arm_pcs_default = ARM_PCS_APCS;
3449 else
3450 arm_pcs_default = ARM_PCS_ATPCS;
3451 }
3452
3453 /* For arm2/3 there is no need to do any scheduling if we are doing
3454 software floating-point. */
3455 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3456 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3457
3458 /* Use the cp15 method if it is available. */
3459 if (target_thread_pointer == TP_AUTO)
3460 {
3461 if (arm_arch6k && !TARGET_THUMB1)
3462 target_thread_pointer = TP_CP15;
3463 else
3464 target_thread_pointer = TP_SOFT;
3465 }
3466
3467 /* Override the default structure alignment for AAPCS ABI. */
3468 if (!global_options_set.x_arm_structure_size_boundary)
3469 {
3470 if (TARGET_AAPCS_BASED)
3471 arm_structure_size_boundary = 8;
3472 }
3473 else
3474 {
3475 if (arm_structure_size_boundary != 8
3476 && arm_structure_size_boundary != 32
3477 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3478 {
3479 if (ARM_DOUBLEWORD_ALIGN)
3480 warning (0,
3481 "structure size boundary can only be set to 8, 32 or 64");
3482 else
3483 warning (0, "structure size boundary can only be set to 8 or 32");
3484 arm_structure_size_boundary
3485 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3486 }
3487 }
3488
3489 if (TARGET_VXWORKS_RTP)
3490 {
3491 if (!global_options_set.x_arm_pic_data_is_text_relative)
3492 arm_pic_data_is_text_relative = 0;
3493 }
3494 else if (flag_pic
3495 && !arm_pic_data_is_text_relative
3496 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3497 /* When text & data segments don't have a fixed displacement, the
3498 intended use is with a single, read only, pic base register.
3499 Unless the user explicitly requested not to do that, set
3500 it. */
3501 target_flags |= MASK_SINGLE_PIC_BASE;
3502
3503 /* If stack checking is disabled, we can use r10 as the PIC register,
3504 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3505 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3506 {
3507 if (TARGET_VXWORKS_RTP)
3508 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3509 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3510 }
3511
3512 if (flag_pic && TARGET_VXWORKS_RTP)
3513 arm_pic_register = 9;
3514
3515 if (arm_pic_register_string != NULL)
3516 {
3517 int pic_register = decode_reg_name (arm_pic_register_string);
3518
3519 if (!flag_pic)
3520 warning (0, "-mpic-register= is useless without -fpic");
3521
3522 /* Prevent the user from choosing an obviously stupid PIC register. */
3523 else if (pic_register < 0 || call_used_regs[pic_register]
3524 || pic_register == HARD_FRAME_POINTER_REGNUM
3525 || pic_register == STACK_POINTER_REGNUM
3526 || pic_register >= PC_REGNUM
3527 || (TARGET_VXWORKS_RTP
3528 && (unsigned int) pic_register != arm_pic_register))
3529 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3530 else
3531 arm_pic_register = pic_register;
3532 }
3533
3534 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3535 if (fix_cm3_ldrd == 2)
3536 {
3537 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3538 fix_cm3_ldrd = 1;
3539 else
3540 fix_cm3_ldrd = 0;
3541 }
3542
3543 /* Hot/Cold partitioning is not currently supported, since we can't
3544 handle literal pool placement in that case. */
3545 if (flag_reorder_blocks_and_partition)
3546 {
3547 inform (input_location,
3548 "-freorder-blocks-and-partition not supported on this architecture");
3549 flag_reorder_blocks_and_partition = 0;
3550 flag_reorder_blocks = 1;
3551 }
3552
3553 if (flag_pic)
3554 /* Hoisting PIC address calculations more aggressively provides a small,
3555 but measurable, size reduction for PIC code. Therefore, we decrease
3556 the bar for unrestricted expression hoisting to the cost of PIC address
3557 calculation, which is 2 instructions. */
3558 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3559 global_options.x_param_values,
3560 global_options_set.x_param_values);
3561
3562 /* ARM EABI defaults to strict volatile bitfields. */
3563 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3564 && abi_version_at_least(2))
3565 flag_strict_volatile_bitfields = 1;
3566
3567 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3568 have deemed it beneficial (signified by setting
3569 prefetch.num_slots to 1 or more). */
3570 if (flag_prefetch_loop_arrays < 0
3571 && HAVE_prefetch
3572 && optimize >= 3
3573 && current_tune->prefetch.num_slots > 0)
3574 flag_prefetch_loop_arrays = 1;
3575
3576 /* Set up parameters to be used in prefetching algorithm. Do not
3577 override the defaults unless we are tuning for a core we have
3578 researched values for. */
3579 if (current_tune->prefetch.num_slots > 0)
3580 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3581 current_tune->prefetch.num_slots,
3582 global_options.x_param_values,
3583 global_options_set.x_param_values);
3584 if (current_tune->prefetch.l1_cache_line_size >= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3586 current_tune->prefetch.l1_cache_line_size,
3587 global_options.x_param_values,
3588 global_options_set.x_param_values);
3589 if (current_tune->prefetch.l1_cache_size >= 0)
3590 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3591 current_tune->prefetch.l1_cache_size,
3592 global_options.x_param_values,
3593 global_options_set.x_param_values);
3594
3595 /* Use Neon to perform 64-bits operations rather than core
3596 registers. */
3597 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3598 if (use_neon_for_64bits == 1)
3599 prefer_neon_for_64bits = true;
3600
3601 /* Use the alternative scheduling-pressure algorithm by default. */
3602 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3603 global_options.x_param_values,
3604 global_options_set.x_param_values);
3605
3606 /* Look through ready list and all of queue for instructions
3607 relevant for L2 auto-prefetcher. */
3608 int param_sched_autopref_queue_depth;
3609
3610 switch (current_tune->sched_autopref)
3611 {
3612 case tune_params::SCHED_AUTOPREF_OFF:
3613 param_sched_autopref_queue_depth = -1;
3614 break;
3615
3616 case tune_params::SCHED_AUTOPREF_RANK:
3617 param_sched_autopref_queue_depth = 0;
3618 break;
3619
3620 case tune_params::SCHED_AUTOPREF_FULL:
3621 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3622 break;
3623
3624 default:
3625 gcc_unreachable ();
3626 }
3627
3628 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3629 param_sched_autopref_queue_depth,
3630 global_options.x_param_values,
3631 global_options_set.x_param_values);
3632
3633 /* Currently, for slow flash data, we just disable literal pools. We also
3634 disable it for pure-code. */
3635 if (target_slow_flash_data || target_pure_code)
3636 arm_disable_literal_pool = true;
3637
3638 if (use_cmse && !arm_arch_cmse)
3639 error ("target CPU does not support ARMv8-M Security Extensions");
3640
3641 /* Disable scheduling fusion by default if it's not armv7 processor
3642 or doesn't prefer ldrd/strd. */
3643 if (flag_schedule_fusion == 2
3644 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3645 flag_schedule_fusion = 0;
3646
3647 /* Need to remember initial options before they are overriden. */
3648 init_optimize = build_optimization_node (&global_options);
3649
3650 arm_option_override_internal (&global_options, &global_options_set);
3651 arm_option_check_internal (&global_options);
3652 arm_option_params_internal ();
3653
3654 /* Create the default target_options structure. */
3655 target_option_default_node = target_option_current_node
3656 = build_target_option_node (&global_options);
3657
3658 /* Register global variables with the garbage collector. */
3659 arm_add_gc_roots ();
3660
3661 /* Init initial mode for testing. */
3662 thumb_flipper = TARGET_THUMB;
3663 }
3664
3665 static void
3666 arm_add_gc_roots (void)
3667 {
3668 gcc_obstack_init(&minipool_obstack);
3669 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3670 }
3671 \f
3672 /* A table of known ARM exception types.
3673 For use with the interrupt function attribute. */
3674
3675 typedef struct
3676 {
3677 const char *const arg;
3678 const unsigned long return_value;
3679 }
3680 isr_attribute_arg;
3681
3682 static const isr_attribute_arg isr_attribute_args [] =
3683 {
3684 { "IRQ", ARM_FT_ISR },
3685 { "irq", ARM_FT_ISR },
3686 { "FIQ", ARM_FT_FIQ },
3687 { "fiq", ARM_FT_FIQ },
3688 { "ABORT", ARM_FT_ISR },
3689 { "abort", ARM_FT_ISR },
3690 { "ABORT", ARM_FT_ISR },
3691 { "abort", ARM_FT_ISR },
3692 { "UNDEF", ARM_FT_EXCEPTION },
3693 { "undef", ARM_FT_EXCEPTION },
3694 { "SWI", ARM_FT_EXCEPTION },
3695 { "swi", ARM_FT_EXCEPTION },
3696 { NULL, ARM_FT_NORMAL }
3697 };
3698
3699 /* Returns the (interrupt) function type of the current
3700 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3701
3702 static unsigned long
3703 arm_isr_value (tree argument)
3704 {
3705 const isr_attribute_arg * ptr;
3706 const char * arg;
3707
3708 if (!arm_arch_notm)
3709 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3710
3711 /* No argument - default to IRQ. */
3712 if (argument == NULL_TREE)
3713 return ARM_FT_ISR;
3714
3715 /* Get the value of the argument. */
3716 if (TREE_VALUE (argument) == NULL_TREE
3717 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3718 return ARM_FT_UNKNOWN;
3719
3720 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3721
3722 /* Check it against the list of known arguments. */
3723 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3724 if (streq (arg, ptr->arg))
3725 return ptr->return_value;
3726
3727 /* An unrecognized interrupt type. */
3728 return ARM_FT_UNKNOWN;
3729 }
3730
3731 /* Computes the type of the current function. */
3732
3733 static unsigned long
3734 arm_compute_func_type (void)
3735 {
3736 unsigned long type = ARM_FT_UNKNOWN;
3737 tree a;
3738 tree attr;
3739
3740 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3741
3742 /* Decide if the current function is volatile. Such functions
3743 never return, and many memory cycles can be saved by not storing
3744 register values that will never be needed again. This optimization
3745 was added to speed up context switching in a kernel application. */
3746 if (optimize > 0
3747 && (TREE_NOTHROW (current_function_decl)
3748 || !(flag_unwind_tables
3749 || (flag_exceptions
3750 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3751 && TREE_THIS_VOLATILE (current_function_decl))
3752 type |= ARM_FT_VOLATILE;
3753
3754 if (cfun->static_chain_decl != NULL)
3755 type |= ARM_FT_NESTED;
3756
3757 attr = DECL_ATTRIBUTES (current_function_decl);
3758
3759 a = lookup_attribute ("naked", attr);
3760 if (a != NULL_TREE)
3761 type |= ARM_FT_NAKED;
3762
3763 a = lookup_attribute ("isr", attr);
3764 if (a == NULL_TREE)
3765 a = lookup_attribute ("interrupt", attr);
3766
3767 if (a == NULL_TREE)
3768 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3769 else
3770 type |= arm_isr_value (TREE_VALUE (a));
3771
3772 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3773 type |= ARM_FT_CMSE_ENTRY;
3774
3775 return type;
3776 }
3777
3778 /* Returns the type of the current function. */
3779
3780 unsigned long
3781 arm_current_func_type (void)
3782 {
3783 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3784 cfun->machine->func_type = arm_compute_func_type ();
3785
3786 return cfun->machine->func_type;
3787 }
3788
3789 bool
3790 arm_allocate_stack_slots_for_args (void)
3791 {
3792 /* Naked functions should not allocate stack slots for arguments. */
3793 return !IS_NAKED (arm_current_func_type ());
3794 }
3795
3796 static bool
3797 arm_warn_func_return (tree decl)
3798 {
3799 /* Naked functions are implemented entirely in assembly, including the
3800 return sequence, so suppress warnings about this. */
3801 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3802 }
3803
3804 \f
3805 /* Output assembler code for a block containing the constant parts
3806 of a trampoline, leaving space for the variable parts.
3807
3808 On the ARM, (if r8 is the static chain regnum, and remembering that
3809 referencing pc adds an offset of 8) the trampoline looks like:
3810 ldr r8, [pc, #0]
3811 ldr pc, [pc]
3812 .word static chain value
3813 .word function's address
3814 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3815
3816 static void
3817 arm_asm_trampoline_template (FILE *f)
3818 {
3819 fprintf (f, "\t.syntax unified\n");
3820
3821 if (TARGET_ARM)
3822 {
3823 fprintf (f, "\t.arm\n");
3824 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3825 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3826 }
3827 else if (TARGET_THUMB2)
3828 {
3829 fprintf (f, "\t.thumb\n");
3830 /* The Thumb-2 trampoline is similar to the arm implementation.
3831 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3832 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3833 STATIC_CHAIN_REGNUM, PC_REGNUM);
3834 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3835 }
3836 else
3837 {
3838 ASM_OUTPUT_ALIGN (f, 2);
3839 fprintf (f, "\t.code\t16\n");
3840 fprintf (f, ".Ltrampoline_start:\n");
3841 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3842 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3843 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3844 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3845 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3846 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3847 }
3848 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3849 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3850 }
3851
3852 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3853
3854 static void
3855 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3856 {
3857 rtx fnaddr, mem, a_tramp;
3858
3859 emit_block_move (m_tramp, assemble_trampoline_template (),
3860 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3861
3862 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3863 emit_move_insn (mem, chain_value);
3864
3865 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3866 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3867 emit_move_insn (mem, fnaddr);
3868
3869 a_tramp = XEXP (m_tramp, 0);
3870 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3871 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3872 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3873 }
3874
3875 /* Thumb trampolines should be entered in thumb mode, so set
3876 the bottom bit of the address. */
3877
3878 static rtx
3879 arm_trampoline_adjust_address (rtx addr)
3880 {
3881 if (TARGET_THUMB)
3882 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3883 NULL, 0, OPTAB_LIB_WIDEN);
3884 return addr;
3885 }
3886 \f
3887 /* Return 1 if it is possible to return using a single instruction.
3888 If SIBLING is non-null, this is a test for a return before a sibling
3889 call. SIBLING is the call insn, so we can examine its register usage. */
3890
3891 int
3892 use_return_insn (int iscond, rtx sibling)
3893 {
3894 int regno;
3895 unsigned int func_type;
3896 unsigned long saved_int_regs;
3897 unsigned HOST_WIDE_INT stack_adjust;
3898 arm_stack_offsets *offsets;
3899
3900 /* Never use a return instruction before reload has run. */
3901 if (!reload_completed)
3902 return 0;
3903
3904 func_type = arm_current_func_type ();
3905
3906 /* Naked, volatile and stack alignment functions need special
3907 consideration. */
3908 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3909 return 0;
3910
3911 /* So do interrupt functions that use the frame pointer and Thumb
3912 interrupt functions. */
3913 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3914 return 0;
3915
3916 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3917 && !optimize_function_for_size_p (cfun))
3918 return 0;
3919
3920 offsets = arm_get_frame_offsets ();
3921 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3922
3923 /* As do variadic functions. */
3924 if (crtl->args.pretend_args_size
3925 || cfun->machine->uses_anonymous_args
3926 /* Or if the function calls __builtin_eh_return () */
3927 || crtl->calls_eh_return
3928 /* Or if the function calls alloca */
3929 || cfun->calls_alloca
3930 /* Or if there is a stack adjustment. However, if the stack pointer
3931 is saved on the stack, we can use a pre-incrementing stack load. */
3932 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3933 && stack_adjust == 4))
3934 /* Or if the static chain register was saved above the frame, under the
3935 assumption that the stack pointer isn't saved on the stack. */
3936 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3937 && arm_compute_static_chain_stack_bytes() != 0))
3938 return 0;
3939
3940 saved_int_regs = offsets->saved_regs_mask;
3941
3942 /* Unfortunately, the insn
3943
3944 ldmib sp, {..., sp, ...}
3945
3946 triggers a bug on most SA-110 based devices, such that the stack
3947 pointer won't be correctly restored if the instruction takes a
3948 page fault. We work around this problem by popping r3 along with
3949 the other registers, since that is never slower than executing
3950 another instruction.
3951
3952 We test for !arm_arch5 here, because code for any architecture
3953 less than this could potentially be run on one of the buggy
3954 chips. */
3955 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3956 {
3957 /* Validate that r3 is a call-clobbered register (always true in
3958 the default abi) ... */
3959 if (!call_used_regs[3])
3960 return 0;
3961
3962 /* ... that it isn't being used for a return value ... */
3963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3964 return 0;
3965
3966 /* ... or for a tail-call argument ... */
3967 if (sibling)
3968 {
3969 gcc_assert (CALL_P (sibling));
3970
3971 if (find_regno_fusage (sibling, USE, 3))
3972 return 0;
3973 }
3974
3975 /* ... and that there are no call-saved registers in r0-r2
3976 (always true in the default ABI). */
3977 if (saved_int_regs & 0x7)
3978 return 0;
3979 }
3980
3981 /* Can't be done if interworking with Thumb, and any registers have been
3982 stacked. */
3983 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3984 return 0;
3985
3986 /* On StrongARM, conditional returns are expensive if they aren't
3987 taken and multiple registers have been stacked. */
3988 if (iscond && arm_tune_strongarm)
3989 {
3990 /* Conditional return when just the LR is stored is a simple
3991 conditional-load instruction, that's not expensive. */
3992 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3993 return 0;
3994
3995 if (flag_pic
3996 && arm_pic_register != INVALID_REGNUM
3997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3998 return 0;
3999 }
4000
4001 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4002 several instructions if anything needs to be popped. */
4003 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4004 return 0;
4005
4006 /* If there are saved registers but the LR isn't saved, then we need
4007 two instructions for the return. */
4008 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4009 return 0;
4010
4011 /* Can't be done if any of the VFP regs are pushed,
4012 since this also requires an insn. */
4013 if (TARGET_HARD_FLOAT)
4014 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4015 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4016 return 0;
4017
4018 if (TARGET_REALLY_IWMMXT)
4019 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4020 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4021 return 0;
4022
4023 return 1;
4024 }
4025
4026 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4027 shrink-wrapping if possible. This is the case if we need to emit a
4028 prologue, which we can test by looking at the offsets. */
4029 bool
4030 use_simple_return_p (void)
4031 {
4032 arm_stack_offsets *offsets;
4033
4034 offsets = arm_get_frame_offsets ();
4035 return offsets->outgoing_args != 0;
4036 }
4037
4038 /* Return TRUE if int I is a valid immediate ARM constant. */
4039
4040 int
4041 const_ok_for_arm (HOST_WIDE_INT i)
4042 {
4043 int lowbit;
4044
4045 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4046 be all zero, or all one. */
4047 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4048 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4049 != ((~(unsigned HOST_WIDE_INT) 0)
4050 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4051 return FALSE;
4052
4053 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4054
4055 /* Fast return for 0 and small values. We must do this for zero, since
4056 the code below can't handle that one case. */
4057 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4058 return TRUE;
4059
4060 /* Get the number of trailing zeros. */
4061 lowbit = ffs((int) i) - 1;
4062
4063 /* Only even shifts are allowed in ARM mode so round down to the
4064 nearest even number. */
4065 if (TARGET_ARM)
4066 lowbit &= ~1;
4067
4068 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4069 return TRUE;
4070
4071 if (TARGET_ARM)
4072 {
4073 /* Allow rotated constants in ARM mode. */
4074 if (lowbit <= 4
4075 && ((i & ~0xc000003f) == 0
4076 || (i & ~0xf000000f) == 0
4077 || (i & ~0xfc000003) == 0))
4078 return TRUE;
4079 }
4080 else
4081 {
4082 HOST_WIDE_INT v;
4083
4084 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4085 v = i & 0xff;
4086 v |= v << 16;
4087 if (i == v || i == (v | (v << 8)))
4088 return TRUE;
4089
4090 /* Allow repeated pattern 0xXY00XY00. */
4091 v = i & 0xff00;
4092 v |= v << 16;
4093 if (i == v)
4094 return TRUE;
4095 }
4096
4097 return FALSE;
4098 }
4099
4100 /* Return true if I is a valid constant for the operation CODE. */
4101 int
4102 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4103 {
4104 if (const_ok_for_arm (i))
4105 return 1;
4106
4107 switch (code)
4108 {
4109 case SET:
4110 /* See if we can use movw. */
4111 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4112 return 1;
4113 else
4114 /* Otherwise, try mvn. */
4115 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4116
4117 case PLUS:
4118 /* See if we can use addw or subw. */
4119 if (TARGET_THUMB2
4120 && ((i & 0xfffff000) == 0
4121 || ((-i) & 0xfffff000) == 0))
4122 return 1;
4123 /* Fall through. */
4124 case COMPARE:
4125 case EQ:
4126 case NE:
4127 case GT:
4128 case LE:
4129 case LT:
4130 case GE:
4131 case GEU:
4132 case LTU:
4133 case GTU:
4134 case LEU:
4135 case UNORDERED:
4136 case ORDERED:
4137 case UNEQ:
4138 case UNGE:
4139 case UNLT:
4140 case UNGT:
4141 case UNLE:
4142 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4143
4144 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4145 case XOR:
4146 return 0;
4147
4148 case IOR:
4149 if (TARGET_THUMB2)
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4151 return 0;
4152
4153 case AND:
4154 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4155
4156 default:
4157 gcc_unreachable ();
4158 }
4159 }
4160
4161 /* Return true if I is a valid di mode constant for the operation CODE. */
4162 int
4163 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4164 {
4165 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4166 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4167 rtx hi = GEN_INT (hi_val);
4168 rtx lo = GEN_INT (lo_val);
4169
4170 if (TARGET_THUMB1)
4171 return 0;
4172
4173 switch (code)
4174 {
4175 case AND:
4176 case IOR:
4177 case XOR:
4178 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4179 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4180 case PLUS:
4181 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4182
4183 default:
4184 return 0;
4185 }
4186 }
4187
4188 /* Emit a sequence of insns to handle a large constant.
4189 CODE is the code of the operation required, it can be any of SET, PLUS,
4190 IOR, AND, XOR, MINUS;
4191 MODE is the mode in which the operation is being performed;
4192 VAL is the integer to operate on;
4193 SOURCE is the other operand (a register, or a null-pointer for SET);
4194 SUBTARGETS means it is safe to create scratch registers if that will
4195 either produce a simpler sequence, or we will want to cse the values.
4196 Return value is the number of insns emitted. */
4197
4198 /* ??? Tweak this for thumb2. */
4199 int
4200 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4201 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4202 {
4203 rtx cond;
4204
4205 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4206 cond = COND_EXEC_TEST (PATTERN (insn));
4207 else
4208 cond = NULL_RTX;
4209
4210 if (subtargets || code == SET
4211 || (REG_P (target) && REG_P (source)
4212 && REGNO (target) != REGNO (source)))
4213 {
4214 /* After arm_reorg has been called, we can't fix up expensive
4215 constants by pushing them into memory so we must synthesize
4216 them in-line, regardless of the cost. This is only likely to
4217 be more costly on chips that have load delay slots and we are
4218 compiling without running the scheduler (so no splitting
4219 occurred before the final instruction emission).
4220
4221 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4222 */
4223 if (!cfun->machine->after_arm_reorg
4224 && !cond
4225 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4226 1, 0)
4227 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4228 + (code != SET))))
4229 {
4230 if (code == SET)
4231 {
4232 /* Currently SET is the only monadic value for CODE, all
4233 the rest are diadic. */
4234 if (TARGET_USE_MOVT)
4235 arm_emit_movpair (target, GEN_INT (val));
4236 else
4237 emit_set_insn (target, GEN_INT (val));
4238
4239 return 1;
4240 }
4241 else
4242 {
4243 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4244
4245 if (TARGET_USE_MOVT)
4246 arm_emit_movpair (temp, GEN_INT (val));
4247 else
4248 emit_set_insn (temp, GEN_INT (val));
4249
4250 /* For MINUS, the value is subtracted from, since we never
4251 have subtraction of a constant. */
4252 if (code == MINUS)
4253 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4254 else
4255 emit_set_insn (target,
4256 gen_rtx_fmt_ee (code, mode, source, temp));
4257 return 2;
4258 }
4259 }
4260 }
4261
4262 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4263 1);
4264 }
4265
4266 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4267 ARM/THUMB2 immediates, and add up to VAL.
4268 Thr function return value gives the number of insns required. */
4269 static int
4270 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4271 struct four_ints *return_sequence)
4272 {
4273 int best_consecutive_zeros = 0;
4274 int i;
4275 int best_start = 0;
4276 int insns1, insns2;
4277 struct four_ints tmp_sequence;
4278
4279 /* If we aren't targeting ARM, the best place to start is always at
4280 the bottom, otherwise look more closely. */
4281 if (TARGET_ARM)
4282 {
4283 for (i = 0; i < 32; i += 2)
4284 {
4285 int consecutive_zeros = 0;
4286
4287 if (!(val & (3 << i)))
4288 {
4289 while ((i < 32) && !(val & (3 << i)))
4290 {
4291 consecutive_zeros += 2;
4292 i += 2;
4293 }
4294 if (consecutive_zeros > best_consecutive_zeros)
4295 {
4296 best_consecutive_zeros = consecutive_zeros;
4297 best_start = i - consecutive_zeros;
4298 }
4299 i -= 2;
4300 }
4301 }
4302 }
4303
4304 /* So long as it won't require any more insns to do so, it's
4305 desirable to emit a small constant (in bits 0...9) in the last
4306 insn. This way there is more chance that it can be combined with
4307 a later addressing insn to form a pre-indexed load or store
4308 operation. Consider:
4309
4310 *((volatile int *)0xe0000100) = 1;
4311 *((volatile int *)0xe0000110) = 2;
4312
4313 We want this to wind up as:
4314
4315 mov rA, #0xe0000000
4316 mov rB, #1
4317 str rB, [rA, #0x100]
4318 mov rB, #2
4319 str rB, [rA, #0x110]
4320
4321 rather than having to synthesize both large constants from scratch.
4322
4323 Therefore, we calculate how many insns would be required to emit
4324 the constant starting from `best_start', and also starting from
4325 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4326 yield a shorter sequence, we may as well use zero. */
4327 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4328 if (best_start != 0
4329 && ((HOST_WIDE_INT_1U << best_start) < val))
4330 {
4331 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4332 if (insns2 <= insns1)
4333 {
4334 *return_sequence = tmp_sequence;
4335 insns1 = insns2;
4336 }
4337 }
4338
4339 return insns1;
4340 }
4341
4342 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4343 static int
4344 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4345 struct four_ints *return_sequence, int i)
4346 {
4347 int remainder = val & 0xffffffff;
4348 int insns = 0;
4349
4350 /* Try and find a way of doing the job in either two or three
4351 instructions.
4352
4353 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4354 location. We start at position I. This may be the MSB, or
4355 optimial_immediate_sequence may have positioned it at the largest block
4356 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4357 wrapping around to the top of the word when we drop off the bottom.
4358 In the worst case this code should produce no more than four insns.
4359
4360 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4361 constants, shifted to any arbitrary location. We should always start
4362 at the MSB. */
4363 do
4364 {
4365 int end;
4366 unsigned int b1, b2, b3, b4;
4367 unsigned HOST_WIDE_INT result;
4368 int loc;
4369
4370 gcc_assert (insns < 4);
4371
4372 if (i <= 0)
4373 i += 32;
4374
4375 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4376 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4377 {
4378 loc = i;
4379 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4380 /* We can use addw/subw for the last 12 bits. */
4381 result = remainder;
4382 else
4383 {
4384 /* Use an 8-bit shifted/rotated immediate. */
4385 end = i - 8;
4386 if (end < 0)
4387 end += 32;
4388 result = remainder & ((0x0ff << end)
4389 | ((i < end) ? (0xff >> (32 - end))
4390 : 0));
4391 i -= 8;
4392 }
4393 }
4394 else
4395 {
4396 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4397 arbitrary shifts. */
4398 i -= TARGET_ARM ? 2 : 1;
4399 continue;
4400 }
4401
4402 /* Next, see if we can do a better job with a thumb2 replicated
4403 constant.
4404
4405 We do it this way around to catch the cases like 0x01F001E0 where
4406 two 8-bit immediates would work, but a replicated constant would
4407 make it worse.
4408
4409 TODO: 16-bit constants that don't clear all the bits, but still win.
4410 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4411 if (TARGET_THUMB2)
4412 {
4413 b1 = (remainder & 0xff000000) >> 24;
4414 b2 = (remainder & 0x00ff0000) >> 16;
4415 b3 = (remainder & 0x0000ff00) >> 8;
4416 b4 = remainder & 0xff;
4417
4418 if (loc > 24)
4419 {
4420 /* The 8-bit immediate already found clears b1 (and maybe b2),
4421 but must leave b3 and b4 alone. */
4422
4423 /* First try to find a 32-bit replicated constant that clears
4424 almost everything. We can assume that we can't do it in one,
4425 or else we wouldn't be here. */
4426 unsigned int tmp = b1 & b2 & b3 & b4;
4427 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4428 + (tmp << 24);
4429 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4430 + (tmp == b3) + (tmp == b4);
4431 if (tmp
4432 && (matching_bytes >= 3
4433 || (matching_bytes == 2
4434 && const_ok_for_op (remainder & ~tmp2, code))))
4435 {
4436 /* At least 3 of the bytes match, and the fourth has at
4437 least as many bits set, or two of the bytes match
4438 and it will only require one more insn to finish. */
4439 result = tmp2;
4440 i = tmp != b1 ? 32
4441 : tmp != b2 ? 24
4442 : tmp != b3 ? 16
4443 : 8;
4444 }
4445
4446 /* Second, try to find a 16-bit replicated constant that can
4447 leave three of the bytes clear. If b2 or b4 is already
4448 zero, then we can. If the 8-bit from above would not
4449 clear b2 anyway, then we still win. */
4450 else if (b1 == b3 && (!b2 || !b4
4451 || (remainder & 0x00ff0000 & ~result)))
4452 {
4453 result = remainder & 0xff00ff00;
4454 i = 24;
4455 }
4456 }
4457 else if (loc > 16)
4458 {
4459 /* The 8-bit immediate already found clears b2 (and maybe b3)
4460 and we don't get here unless b1 is alredy clear, but it will
4461 leave b4 unchanged. */
4462
4463 /* If we can clear b2 and b4 at once, then we win, since the
4464 8-bits couldn't possibly reach that far. */
4465 if (b2 == b4)
4466 {
4467 result = remainder & 0x00ff00ff;
4468 i = 16;
4469 }
4470 }
4471 }
4472
4473 return_sequence->i[insns++] = result;
4474 remainder &= ~result;
4475
4476 if (code == SET || code == MINUS)
4477 code = PLUS;
4478 }
4479 while (remainder);
4480
4481 return insns;
4482 }
4483
4484 /* Emit an instruction with the indicated PATTERN. If COND is
4485 non-NULL, conditionalize the execution of the instruction on COND
4486 being true. */
4487
4488 static void
4489 emit_constant_insn (rtx cond, rtx pattern)
4490 {
4491 if (cond)
4492 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4493 emit_insn (pattern);
4494 }
4495
4496 /* As above, but extra parameter GENERATE which, if clear, suppresses
4497 RTL generation. */
4498
4499 static int
4500 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4501 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4502 int subtargets, int generate)
4503 {
4504 int can_invert = 0;
4505 int can_negate = 0;
4506 int final_invert = 0;
4507 int i;
4508 int set_sign_bit_copies = 0;
4509 int clear_sign_bit_copies = 0;
4510 int clear_zero_bit_copies = 0;
4511 int set_zero_bit_copies = 0;
4512 int insns = 0, neg_insns, inv_insns;
4513 unsigned HOST_WIDE_INT temp1, temp2;
4514 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4515 struct four_ints *immediates;
4516 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4517
4518 /* Find out which operations are safe for a given CODE. Also do a quick
4519 check for degenerate cases; these can occur when DImode operations
4520 are split. */
4521 switch (code)
4522 {
4523 case SET:
4524 can_invert = 1;
4525 break;
4526
4527 case PLUS:
4528 can_negate = 1;
4529 break;
4530
4531 case IOR:
4532 if (remainder == 0xffffffff)
4533 {
4534 if (generate)
4535 emit_constant_insn (cond,
4536 gen_rtx_SET (target,
4537 GEN_INT (ARM_SIGN_EXTEND (val))));
4538 return 1;
4539 }
4540
4541 if (remainder == 0)
4542 {
4543 if (reload_completed && rtx_equal_p (target, source))
4544 return 0;
4545
4546 if (generate)
4547 emit_constant_insn (cond, gen_rtx_SET (target, source));
4548 return 1;
4549 }
4550 break;
4551
4552 case AND:
4553 if (remainder == 0)
4554 {
4555 if (generate)
4556 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4557 return 1;
4558 }
4559 if (remainder == 0xffffffff)
4560 {
4561 if (reload_completed && rtx_equal_p (target, source))
4562 return 0;
4563 if (generate)
4564 emit_constant_insn (cond, gen_rtx_SET (target, source));
4565 return 1;
4566 }
4567 can_invert = 1;
4568 break;
4569
4570 case XOR:
4571 if (remainder == 0)
4572 {
4573 if (reload_completed && rtx_equal_p (target, source))
4574 return 0;
4575 if (generate)
4576 emit_constant_insn (cond, gen_rtx_SET (target, source));
4577 return 1;
4578 }
4579
4580 if (remainder == 0xffffffff)
4581 {
4582 if (generate)
4583 emit_constant_insn (cond,
4584 gen_rtx_SET (target,
4585 gen_rtx_NOT (mode, source)));
4586 return 1;
4587 }
4588 final_invert = 1;
4589 break;
4590
4591 case MINUS:
4592 /* We treat MINUS as (val - source), since (source - val) is always
4593 passed as (source + (-val)). */
4594 if (remainder == 0)
4595 {
4596 if (generate)
4597 emit_constant_insn (cond,
4598 gen_rtx_SET (target,
4599 gen_rtx_NEG (mode, source)));
4600 return 1;
4601 }
4602 if (const_ok_for_arm (val))
4603 {
4604 if (generate)
4605 emit_constant_insn (cond,
4606 gen_rtx_SET (target,
4607 gen_rtx_MINUS (mode, GEN_INT (val),
4608 source)));
4609 return 1;
4610 }
4611
4612 break;
4613
4614 default:
4615 gcc_unreachable ();
4616 }
4617
4618 /* If we can do it in one insn get out quickly. */
4619 if (const_ok_for_op (val, code))
4620 {
4621 if (generate)
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (target,
4624 (source
4625 ? gen_rtx_fmt_ee (code, mode, source,
4626 GEN_INT (val))
4627 : GEN_INT (val))));
4628 return 1;
4629 }
4630
4631 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4632 insn. */
4633 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4634 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4635 {
4636 if (generate)
4637 {
4638 if (mode == SImode && i == 16)
4639 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4640 smaller insn. */
4641 emit_constant_insn (cond,
4642 gen_zero_extendhisi2
4643 (target, gen_lowpart (HImode, source)));
4644 else
4645 /* Extz only supports SImode, but we can coerce the operands
4646 into that mode. */
4647 emit_constant_insn (cond,
4648 gen_extzv_t2 (gen_lowpart (SImode, target),
4649 gen_lowpart (SImode, source),
4650 GEN_INT (i), const0_rtx));
4651 }
4652
4653 return 1;
4654 }
4655
4656 /* Calculate a few attributes that may be useful for specific
4657 optimizations. */
4658 /* Count number of leading zeros. */
4659 for (i = 31; i >= 0; i--)
4660 {
4661 if ((remainder & (1 << i)) == 0)
4662 clear_sign_bit_copies++;
4663 else
4664 break;
4665 }
4666
4667 /* Count number of leading 1's. */
4668 for (i = 31; i >= 0; i--)
4669 {
4670 if ((remainder & (1 << i)) != 0)
4671 set_sign_bit_copies++;
4672 else
4673 break;
4674 }
4675
4676 /* Count number of trailing zero's. */
4677 for (i = 0; i <= 31; i++)
4678 {
4679 if ((remainder & (1 << i)) == 0)
4680 clear_zero_bit_copies++;
4681 else
4682 break;
4683 }
4684
4685 /* Count number of trailing 1's. */
4686 for (i = 0; i <= 31; i++)
4687 {
4688 if ((remainder & (1 << i)) != 0)
4689 set_zero_bit_copies++;
4690 else
4691 break;
4692 }
4693
4694 switch (code)
4695 {
4696 case SET:
4697 /* See if we can do this by sign_extending a constant that is known
4698 to be negative. This is a good, way of doing it, since the shift
4699 may well merge into a subsequent insn. */
4700 if (set_sign_bit_copies > 1)
4701 {
4702 if (const_ok_for_arm
4703 (temp1 = ARM_SIGN_EXTEND (remainder
4704 << (set_sign_bit_copies - 1))))
4705 {
4706 if (generate)
4707 {
4708 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4709 emit_constant_insn (cond,
4710 gen_rtx_SET (new_src, GEN_INT (temp1)));
4711 emit_constant_insn (cond,
4712 gen_ashrsi3 (target, new_src,
4713 GEN_INT (set_sign_bit_copies - 1)));
4714 }
4715 return 2;
4716 }
4717 /* For an inverted constant, we will need to set the low bits,
4718 these will be shifted out of harm's way. */
4719 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4720 if (const_ok_for_arm (~temp1))
4721 {
4722 if (generate)
4723 {
4724 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4725 emit_constant_insn (cond,
4726 gen_rtx_SET (new_src, GEN_INT (temp1)));
4727 emit_constant_insn (cond,
4728 gen_ashrsi3 (target, new_src,
4729 GEN_INT (set_sign_bit_copies - 1)));
4730 }
4731 return 2;
4732 }
4733 }
4734
4735 /* See if we can calculate the value as the difference between two
4736 valid immediates. */
4737 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4738 {
4739 int topshift = clear_sign_bit_copies & ~1;
4740
4741 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4742 & (0xff000000 >> topshift));
4743
4744 /* If temp1 is zero, then that means the 9 most significant
4745 bits of remainder were 1 and we've caused it to overflow.
4746 When topshift is 0 we don't need to do anything since we
4747 can borrow from 'bit 32'. */
4748 if (temp1 == 0 && topshift != 0)
4749 temp1 = 0x80000000 >> (topshift - 1);
4750
4751 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4752
4753 if (const_ok_for_arm (temp2))
4754 {
4755 if (generate)
4756 {
4757 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4758 emit_constant_insn (cond,
4759 gen_rtx_SET (new_src, GEN_INT (temp1)));
4760 emit_constant_insn (cond,
4761 gen_addsi3 (target, new_src,
4762 GEN_INT (-temp2)));
4763 }
4764
4765 return 2;
4766 }
4767 }
4768
4769 /* See if we can generate this by setting the bottom (or the top)
4770 16 bits, and then shifting these into the other half of the
4771 word. We only look for the simplest cases, to do more would cost
4772 too much. Be careful, however, not to generate this when the
4773 alternative would take fewer insns. */
4774 if (val & 0xffff0000)
4775 {
4776 temp1 = remainder & 0xffff0000;
4777 temp2 = remainder & 0x0000ffff;
4778
4779 /* Overlaps outside this range are best done using other methods. */
4780 for (i = 9; i < 24; i++)
4781 {
4782 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4783 && !const_ok_for_arm (temp2))
4784 {
4785 rtx new_src = (subtargets
4786 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4787 : target);
4788 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4789 source, subtargets, generate);
4790 source = new_src;
4791 if (generate)
4792 emit_constant_insn
4793 (cond,
4794 gen_rtx_SET
4795 (target,
4796 gen_rtx_IOR (mode,
4797 gen_rtx_ASHIFT (mode, source,
4798 GEN_INT (i)),
4799 source)));
4800 return insns + 1;
4801 }
4802 }
4803
4804 /* Don't duplicate cases already considered. */
4805 for (i = 17; i < 24; i++)
4806 {
4807 if (((temp1 | (temp1 >> i)) == remainder)
4808 && !const_ok_for_arm (temp1))
4809 {
4810 rtx new_src = (subtargets
4811 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4812 : target);
4813 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4814 source, subtargets, generate);
4815 source = new_src;
4816 if (generate)
4817 emit_constant_insn
4818 (cond,
4819 gen_rtx_SET (target,
4820 gen_rtx_IOR
4821 (mode,
4822 gen_rtx_LSHIFTRT (mode, source,
4823 GEN_INT (i)),
4824 source)));
4825 return insns + 1;
4826 }
4827 }
4828 }
4829 break;
4830
4831 case IOR:
4832 case XOR:
4833 /* If we have IOR or XOR, and the constant can be loaded in a
4834 single instruction, and we can find a temporary to put it in,
4835 then this can be done in two instructions instead of 3-4. */
4836 if (subtargets
4837 /* TARGET can't be NULL if SUBTARGETS is 0 */
4838 || (reload_completed && !reg_mentioned_p (target, source)))
4839 {
4840 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4841 {
4842 if (generate)
4843 {
4844 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4845
4846 emit_constant_insn (cond,
4847 gen_rtx_SET (sub, GEN_INT (val)));
4848 emit_constant_insn (cond,
4849 gen_rtx_SET (target,
4850 gen_rtx_fmt_ee (code, mode,
4851 source, sub)));
4852 }
4853 return 2;
4854 }
4855 }
4856
4857 if (code == XOR)
4858 break;
4859
4860 /* Convert.
4861 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4862 and the remainder 0s for e.g. 0xfff00000)
4863 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4864
4865 This can be done in 2 instructions by using shifts with mov or mvn.
4866 e.g. for
4867 x = x | 0xfff00000;
4868 we generate.
4869 mvn r0, r0, asl #12
4870 mvn r0, r0, lsr #12 */
4871 if (set_sign_bit_copies > 8
4872 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4873 {
4874 if (generate)
4875 {
4876 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4877 rtx shift = GEN_INT (set_sign_bit_copies);
4878
4879 emit_constant_insn
4880 (cond,
4881 gen_rtx_SET (sub,
4882 gen_rtx_NOT (mode,
4883 gen_rtx_ASHIFT (mode,
4884 source,
4885 shift))));
4886 emit_constant_insn
4887 (cond,
4888 gen_rtx_SET (target,
4889 gen_rtx_NOT (mode,
4890 gen_rtx_LSHIFTRT (mode, sub,
4891 shift))));
4892 }
4893 return 2;
4894 }
4895
4896 /* Convert
4897 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4898 to
4899 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4900
4901 For eg. r0 = r0 | 0xfff
4902 mvn r0, r0, lsr #12
4903 mvn r0, r0, asl #12
4904
4905 */
4906 if (set_zero_bit_copies > 8
4907 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4908 {
4909 if (generate)
4910 {
4911 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4912 rtx shift = GEN_INT (set_zero_bit_copies);
4913
4914 emit_constant_insn
4915 (cond,
4916 gen_rtx_SET (sub,
4917 gen_rtx_NOT (mode,
4918 gen_rtx_LSHIFTRT (mode,
4919 source,
4920 shift))));
4921 emit_constant_insn
4922 (cond,
4923 gen_rtx_SET (target,
4924 gen_rtx_NOT (mode,
4925 gen_rtx_ASHIFT (mode, sub,
4926 shift))));
4927 }
4928 return 2;
4929 }
4930
4931 /* This will never be reached for Thumb2 because orn is a valid
4932 instruction. This is for Thumb1 and the ARM 32 bit cases.
4933
4934 x = y | constant (such that ~constant is a valid constant)
4935 Transform this to
4936 x = ~(~y & ~constant).
4937 */
4938 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4939 {
4940 if (generate)
4941 {
4942 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4943 emit_constant_insn (cond,
4944 gen_rtx_SET (sub,
4945 gen_rtx_NOT (mode, source)));
4946 source = sub;
4947 if (subtargets)
4948 sub = gen_reg_rtx (mode);
4949 emit_constant_insn (cond,
4950 gen_rtx_SET (sub,
4951 gen_rtx_AND (mode, source,
4952 GEN_INT (temp1))));
4953 emit_constant_insn (cond,
4954 gen_rtx_SET (target,
4955 gen_rtx_NOT (mode, sub)));
4956 }
4957 return 3;
4958 }
4959 break;
4960
4961 case AND:
4962 /* See if two shifts will do 2 or more insn's worth of work. */
4963 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4964 {
4965 HOST_WIDE_INT shift_mask = ((0xffffffff
4966 << (32 - clear_sign_bit_copies))
4967 & 0xffffffff);
4968
4969 if ((remainder | shift_mask) != 0xffffffff)
4970 {
4971 HOST_WIDE_INT new_val
4972 = ARM_SIGN_EXTEND (remainder | shift_mask);
4973
4974 if (generate)
4975 {
4976 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4977 insns = arm_gen_constant (AND, SImode, cond, new_val,
4978 new_src, source, subtargets, 1);
4979 source = new_src;
4980 }
4981 else
4982 {
4983 rtx targ = subtargets ? NULL_RTX : target;
4984 insns = arm_gen_constant (AND, mode, cond, new_val,
4985 targ, source, subtargets, 0);
4986 }
4987 }
4988
4989 if (generate)
4990 {
4991 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4992 rtx shift = GEN_INT (clear_sign_bit_copies);
4993
4994 emit_insn (gen_ashlsi3 (new_src, source, shift));
4995 emit_insn (gen_lshrsi3 (target, new_src, shift));
4996 }
4997
4998 return insns + 2;
4999 }
5000
5001 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5002 {
5003 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5004
5005 if ((remainder | shift_mask) != 0xffffffff)
5006 {
5007 HOST_WIDE_INT new_val
5008 = ARM_SIGN_EXTEND (remainder | shift_mask);
5009 if (generate)
5010 {
5011 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5012
5013 insns = arm_gen_constant (AND, mode, cond, new_val,
5014 new_src, source, subtargets, 1);
5015 source = new_src;
5016 }
5017 else
5018 {
5019 rtx targ = subtargets ? NULL_RTX : target;
5020
5021 insns = arm_gen_constant (AND, mode, cond, new_val,
5022 targ, source, subtargets, 0);
5023 }
5024 }
5025
5026 if (generate)
5027 {
5028 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5029 rtx shift = GEN_INT (clear_zero_bit_copies);
5030
5031 emit_insn (gen_lshrsi3 (new_src, source, shift));
5032 emit_insn (gen_ashlsi3 (target, new_src, shift));
5033 }
5034
5035 return insns + 2;
5036 }
5037
5038 break;
5039
5040 default:
5041 break;
5042 }
5043
5044 /* Calculate what the instruction sequences would be if we generated it
5045 normally, negated, or inverted. */
5046 if (code == AND)
5047 /* AND cannot be split into multiple insns, so invert and use BIC. */
5048 insns = 99;
5049 else
5050 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5051
5052 if (can_negate)
5053 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5054 &neg_immediates);
5055 else
5056 neg_insns = 99;
5057
5058 if (can_invert || final_invert)
5059 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5060 &inv_immediates);
5061 else
5062 inv_insns = 99;
5063
5064 immediates = &pos_immediates;
5065
5066 /* Is the negated immediate sequence more efficient? */
5067 if (neg_insns < insns && neg_insns <= inv_insns)
5068 {
5069 insns = neg_insns;
5070 immediates = &neg_immediates;
5071 }
5072 else
5073 can_negate = 0;
5074
5075 /* Is the inverted immediate sequence more efficient?
5076 We must allow for an extra NOT instruction for XOR operations, although
5077 there is some chance that the final 'mvn' will get optimized later. */
5078 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5079 {
5080 insns = inv_insns;
5081 immediates = &inv_immediates;
5082 }
5083 else
5084 {
5085 can_invert = 0;
5086 final_invert = 0;
5087 }
5088
5089 /* Now output the chosen sequence as instructions. */
5090 if (generate)
5091 {
5092 for (i = 0; i < insns; i++)
5093 {
5094 rtx new_src, temp1_rtx;
5095
5096 temp1 = immediates->i[i];
5097
5098 if (code == SET || code == MINUS)
5099 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5100 else if ((final_invert || i < (insns - 1)) && subtargets)
5101 new_src = gen_reg_rtx (mode);
5102 else
5103 new_src = target;
5104
5105 if (can_invert)
5106 temp1 = ~temp1;
5107 else if (can_negate)
5108 temp1 = -temp1;
5109
5110 temp1 = trunc_int_for_mode (temp1, mode);
5111 temp1_rtx = GEN_INT (temp1);
5112
5113 if (code == SET)
5114 ;
5115 else if (code == MINUS)
5116 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5117 else
5118 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5119
5120 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5121 source = new_src;
5122
5123 if (code == SET)
5124 {
5125 can_negate = can_invert;
5126 can_invert = 0;
5127 code = PLUS;
5128 }
5129 else if (code == MINUS)
5130 code = PLUS;
5131 }
5132 }
5133
5134 if (final_invert)
5135 {
5136 if (generate)
5137 emit_constant_insn (cond, gen_rtx_SET (target,
5138 gen_rtx_NOT (mode, source)));
5139 insns++;
5140 }
5141
5142 return insns;
5143 }
5144
5145 /* Canonicalize a comparison so that we are more likely to recognize it.
5146 This can be done for a few constant compares, where we can make the
5147 immediate value easier to load. */
5148
5149 static void
5150 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5151 bool op0_preserve_value)
5152 {
5153 machine_mode mode;
5154 unsigned HOST_WIDE_INT i, maxval;
5155
5156 mode = GET_MODE (*op0);
5157 if (mode == VOIDmode)
5158 mode = GET_MODE (*op1);
5159
5160 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5161
5162 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5163 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5164 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5165 for GTU/LEU in Thumb mode. */
5166 if (mode == DImode)
5167 {
5168
5169 if (*code == GT || *code == LE
5170 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5171 {
5172 /* Missing comparison. First try to use an available
5173 comparison. */
5174 if (CONST_INT_P (*op1))
5175 {
5176 i = INTVAL (*op1);
5177 switch (*code)
5178 {
5179 case GT:
5180 case LE:
5181 if (i != maxval
5182 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5183 {
5184 *op1 = GEN_INT (i + 1);
5185 *code = *code == GT ? GE : LT;
5186 return;
5187 }
5188 break;
5189 case GTU:
5190 case LEU:
5191 if (i != ~((unsigned HOST_WIDE_INT) 0)
5192 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5193 {
5194 *op1 = GEN_INT (i + 1);
5195 *code = *code == GTU ? GEU : LTU;
5196 return;
5197 }
5198 break;
5199 default:
5200 gcc_unreachable ();
5201 }
5202 }
5203
5204 /* If that did not work, reverse the condition. */
5205 if (!op0_preserve_value)
5206 {
5207 std::swap (*op0, *op1);
5208 *code = (int)swap_condition ((enum rtx_code)*code);
5209 }
5210 }
5211 return;
5212 }
5213
5214 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5215 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5216 to facilitate possible combining with a cmp into 'ands'. */
5217 if (mode == SImode
5218 && GET_CODE (*op0) == ZERO_EXTEND
5219 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5220 && GET_MODE (XEXP (*op0, 0)) == QImode
5221 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5222 && subreg_lowpart_p (XEXP (*op0, 0))
5223 && *op1 == const0_rtx)
5224 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5225 GEN_INT (255));
5226
5227 /* Comparisons smaller than DImode. Only adjust comparisons against
5228 an out-of-range constant. */
5229 if (!CONST_INT_P (*op1)
5230 || const_ok_for_arm (INTVAL (*op1))
5231 || const_ok_for_arm (- INTVAL (*op1)))
5232 return;
5233
5234 i = INTVAL (*op1);
5235
5236 switch (*code)
5237 {
5238 case EQ:
5239 case NE:
5240 return;
5241
5242 case GT:
5243 case LE:
5244 if (i != maxval
5245 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5246 {
5247 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5248 *code = *code == GT ? GE : LT;
5249 return;
5250 }
5251 break;
5252
5253 case GE:
5254 case LT:
5255 if (i != ~maxval
5256 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5257 {
5258 *op1 = GEN_INT (i - 1);
5259 *code = *code == GE ? GT : LE;
5260 return;
5261 }
5262 break;
5263
5264 case GTU:
5265 case LEU:
5266 if (i != ~((unsigned HOST_WIDE_INT) 0)
5267 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5268 {
5269 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5270 *code = *code == GTU ? GEU : LTU;
5271 return;
5272 }
5273 break;
5274
5275 case GEU:
5276 case LTU:
5277 if (i != 0
5278 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5279 {
5280 *op1 = GEN_INT (i - 1);
5281 *code = *code == GEU ? GTU : LEU;
5282 return;
5283 }
5284 break;
5285
5286 default:
5287 gcc_unreachable ();
5288 }
5289 }
5290
5291
5292 /* Define how to find the value returned by a function. */
5293
5294 static rtx
5295 arm_function_value(const_tree type, const_tree func,
5296 bool outgoing ATTRIBUTE_UNUSED)
5297 {
5298 machine_mode mode;
5299 int unsignedp ATTRIBUTE_UNUSED;
5300 rtx r ATTRIBUTE_UNUSED;
5301
5302 mode = TYPE_MODE (type);
5303
5304 if (TARGET_AAPCS_BASED)
5305 return aapcs_allocate_return_reg (mode, type, func);
5306
5307 /* Promote integer types. */
5308 if (INTEGRAL_TYPE_P (type))
5309 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5310
5311 /* Promotes small structs returned in a register to full-word size
5312 for big-endian AAPCS. */
5313 if (arm_return_in_msb (type))
5314 {
5315 HOST_WIDE_INT size = int_size_in_bytes (type);
5316 if (size % UNITS_PER_WORD != 0)
5317 {
5318 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5319 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5320 }
5321 }
5322
5323 return arm_libcall_value_1 (mode);
5324 }
5325
5326 /* libcall hashtable helpers. */
5327
5328 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5329 {
5330 static inline hashval_t hash (const rtx_def *);
5331 static inline bool equal (const rtx_def *, const rtx_def *);
5332 static inline void remove (rtx_def *);
5333 };
5334
5335 inline bool
5336 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5337 {
5338 return rtx_equal_p (p1, p2);
5339 }
5340
5341 inline hashval_t
5342 libcall_hasher::hash (const rtx_def *p1)
5343 {
5344 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5345 }
5346
5347 typedef hash_table<libcall_hasher> libcall_table_type;
5348
5349 static void
5350 add_libcall (libcall_table_type *htab, rtx libcall)
5351 {
5352 *htab->find_slot (libcall, INSERT) = libcall;
5353 }
5354
5355 static bool
5356 arm_libcall_uses_aapcs_base (const_rtx libcall)
5357 {
5358 static bool init_done = false;
5359 static libcall_table_type *libcall_htab = NULL;
5360
5361 if (!init_done)
5362 {
5363 init_done = true;
5364
5365 libcall_htab = new libcall_table_type (31);
5366 add_libcall (libcall_htab,
5367 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5368 add_libcall (libcall_htab,
5369 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5370 add_libcall (libcall_htab,
5371 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5372 add_libcall (libcall_htab,
5373 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5374
5375 add_libcall (libcall_htab,
5376 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5377 add_libcall (libcall_htab,
5378 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5379 add_libcall (libcall_htab,
5380 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5381 add_libcall (libcall_htab,
5382 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5383
5384 add_libcall (libcall_htab,
5385 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5386 add_libcall (libcall_htab,
5387 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5388 add_libcall (libcall_htab,
5389 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5390 add_libcall (libcall_htab,
5391 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5392 add_libcall (libcall_htab,
5393 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5394 add_libcall (libcall_htab,
5395 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5396 add_libcall (libcall_htab,
5397 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5398 add_libcall (libcall_htab,
5399 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5400
5401 /* Values from double-precision helper functions are returned in core
5402 registers if the selected core only supports single-precision
5403 arithmetic, even if we are using the hard-float ABI. The same is
5404 true for single-precision helpers, but we will never be using the
5405 hard-float ABI on a CPU which doesn't support single-precision
5406 operations in hardware. */
5407 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5408 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5409 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5410 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5411 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5412 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5413 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5414 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5415 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5416 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5417 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5418 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5419 SFmode));
5420 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5421 DFmode));
5422 add_libcall (libcall_htab,
5423 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5424 }
5425
5426 return libcall && libcall_htab->find (libcall) != NULL;
5427 }
5428
5429 static rtx
5430 arm_libcall_value_1 (machine_mode mode)
5431 {
5432 if (TARGET_AAPCS_BASED)
5433 return aapcs_libcall_value (mode);
5434 else if (TARGET_IWMMXT_ABI
5435 && arm_vector_mode_supported_p (mode))
5436 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5437 else
5438 return gen_rtx_REG (mode, ARG_REGISTER (1));
5439 }
5440
5441 /* Define how to find the value returned by a library function
5442 assuming the value has mode MODE. */
5443
5444 static rtx
5445 arm_libcall_value (machine_mode mode, const_rtx libcall)
5446 {
5447 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5448 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5449 {
5450 /* The following libcalls return their result in integer registers,
5451 even though they return a floating point value. */
5452 if (arm_libcall_uses_aapcs_base (libcall))
5453 return gen_rtx_REG (mode, ARG_REGISTER(1));
5454
5455 }
5456
5457 return arm_libcall_value_1 (mode);
5458 }
5459
5460 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5461
5462 static bool
5463 arm_function_value_regno_p (const unsigned int regno)
5464 {
5465 if (regno == ARG_REGISTER (1)
5466 || (TARGET_32BIT
5467 && TARGET_AAPCS_BASED
5468 && TARGET_HARD_FLOAT
5469 && regno == FIRST_VFP_REGNUM)
5470 || (TARGET_IWMMXT_ABI
5471 && regno == FIRST_IWMMXT_REGNUM))
5472 return true;
5473
5474 return false;
5475 }
5476
5477 /* Determine the amount of memory needed to store the possible return
5478 registers of an untyped call. */
5479 int
5480 arm_apply_result_size (void)
5481 {
5482 int size = 16;
5483
5484 if (TARGET_32BIT)
5485 {
5486 if (TARGET_HARD_FLOAT_ABI)
5487 size += 32;
5488 if (TARGET_IWMMXT_ABI)
5489 size += 8;
5490 }
5491
5492 return size;
5493 }
5494
5495 /* Decide whether TYPE should be returned in memory (true)
5496 or in a register (false). FNTYPE is the type of the function making
5497 the call. */
5498 static bool
5499 arm_return_in_memory (const_tree type, const_tree fntype)
5500 {
5501 HOST_WIDE_INT size;
5502
5503 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5504
5505 if (TARGET_AAPCS_BASED)
5506 {
5507 /* Simple, non-aggregate types (ie not including vectors and
5508 complex) are always returned in a register (or registers).
5509 We don't care about which register here, so we can short-cut
5510 some of the detail. */
5511 if (!AGGREGATE_TYPE_P (type)
5512 && TREE_CODE (type) != VECTOR_TYPE
5513 && TREE_CODE (type) != COMPLEX_TYPE)
5514 return false;
5515
5516 /* Any return value that is no larger than one word can be
5517 returned in r0. */
5518 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5519 return false;
5520
5521 /* Check any available co-processors to see if they accept the
5522 type as a register candidate (VFP, for example, can return
5523 some aggregates in consecutive registers). These aren't
5524 available if the call is variadic. */
5525 if (aapcs_select_return_coproc (type, fntype) >= 0)
5526 return false;
5527
5528 /* Vector values should be returned using ARM registers, not
5529 memory (unless they're over 16 bytes, which will break since
5530 we only have four call-clobbered registers to play with). */
5531 if (TREE_CODE (type) == VECTOR_TYPE)
5532 return (size < 0 || size > (4 * UNITS_PER_WORD));
5533
5534 /* The rest go in memory. */
5535 return true;
5536 }
5537
5538 if (TREE_CODE (type) == VECTOR_TYPE)
5539 return (size < 0 || size > (4 * UNITS_PER_WORD));
5540
5541 if (!AGGREGATE_TYPE_P (type) &&
5542 (TREE_CODE (type) != VECTOR_TYPE))
5543 /* All simple types are returned in registers. */
5544 return false;
5545
5546 if (arm_abi != ARM_ABI_APCS)
5547 {
5548 /* ATPCS and later return aggregate types in memory only if they are
5549 larger than a word (or are variable size). */
5550 return (size < 0 || size > UNITS_PER_WORD);
5551 }
5552
5553 /* For the arm-wince targets we choose to be compatible with Microsoft's
5554 ARM and Thumb compilers, which always return aggregates in memory. */
5555 #ifndef ARM_WINCE
5556 /* All structures/unions bigger than one word are returned in memory.
5557 Also catch the case where int_size_in_bytes returns -1. In this case
5558 the aggregate is either huge or of variable size, and in either case
5559 we will want to return it via memory and not in a register. */
5560 if (size < 0 || size > UNITS_PER_WORD)
5561 return true;
5562
5563 if (TREE_CODE (type) == RECORD_TYPE)
5564 {
5565 tree field;
5566
5567 /* For a struct the APCS says that we only return in a register
5568 if the type is 'integer like' and every addressable element
5569 has an offset of zero. For practical purposes this means
5570 that the structure can have at most one non bit-field element
5571 and that this element must be the first one in the structure. */
5572
5573 /* Find the first field, ignoring non FIELD_DECL things which will
5574 have been created by C++. */
5575 for (field = TYPE_FIELDS (type);
5576 field && TREE_CODE (field) != FIELD_DECL;
5577 field = DECL_CHAIN (field))
5578 continue;
5579
5580 if (field == NULL)
5581 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5582
5583 /* Check that the first field is valid for returning in a register. */
5584
5585 /* ... Floats are not allowed */
5586 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5587 return true;
5588
5589 /* ... Aggregates that are not themselves valid for returning in
5590 a register are not allowed. */
5591 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5592 return true;
5593
5594 /* Now check the remaining fields, if any. Only bitfields are allowed,
5595 since they are not addressable. */
5596 for (field = DECL_CHAIN (field);
5597 field;
5598 field = DECL_CHAIN (field))
5599 {
5600 if (TREE_CODE (field) != FIELD_DECL)
5601 continue;
5602
5603 if (!DECL_BIT_FIELD_TYPE (field))
5604 return true;
5605 }
5606
5607 return false;
5608 }
5609
5610 if (TREE_CODE (type) == UNION_TYPE)
5611 {
5612 tree field;
5613
5614 /* Unions can be returned in registers if every element is
5615 integral, or can be returned in an integer register. */
5616 for (field = TYPE_FIELDS (type);
5617 field;
5618 field = DECL_CHAIN (field))
5619 {
5620 if (TREE_CODE (field) != FIELD_DECL)
5621 continue;
5622
5623 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5624 return true;
5625
5626 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5627 return true;
5628 }
5629
5630 return false;
5631 }
5632 #endif /* not ARM_WINCE */
5633
5634 /* Return all other types in memory. */
5635 return true;
5636 }
5637
5638 const struct pcs_attribute_arg
5639 {
5640 const char *arg;
5641 enum arm_pcs value;
5642 } pcs_attribute_args[] =
5643 {
5644 {"aapcs", ARM_PCS_AAPCS},
5645 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5646 #if 0
5647 /* We could recognize these, but changes would be needed elsewhere
5648 * to implement them. */
5649 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5650 {"atpcs", ARM_PCS_ATPCS},
5651 {"apcs", ARM_PCS_APCS},
5652 #endif
5653 {NULL, ARM_PCS_UNKNOWN}
5654 };
5655
5656 static enum arm_pcs
5657 arm_pcs_from_attribute (tree attr)
5658 {
5659 const struct pcs_attribute_arg *ptr;
5660 const char *arg;
5661
5662 /* Get the value of the argument. */
5663 if (TREE_VALUE (attr) == NULL_TREE
5664 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5665 return ARM_PCS_UNKNOWN;
5666
5667 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5668
5669 /* Check it against the list of known arguments. */
5670 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5671 if (streq (arg, ptr->arg))
5672 return ptr->value;
5673
5674 /* An unrecognized interrupt type. */
5675 return ARM_PCS_UNKNOWN;
5676 }
5677
5678 /* Get the PCS variant to use for this call. TYPE is the function's type
5679 specification, DECL is the specific declartion. DECL may be null if
5680 the call could be indirect or if this is a library call. */
5681 static enum arm_pcs
5682 arm_get_pcs_model (const_tree type, const_tree decl)
5683 {
5684 bool user_convention = false;
5685 enum arm_pcs user_pcs = arm_pcs_default;
5686 tree attr;
5687
5688 gcc_assert (type);
5689
5690 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5691 if (attr)
5692 {
5693 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5694 user_convention = true;
5695 }
5696
5697 if (TARGET_AAPCS_BASED)
5698 {
5699 /* Detect varargs functions. These always use the base rules
5700 (no argument is ever a candidate for a co-processor
5701 register). */
5702 bool base_rules = stdarg_p (type);
5703
5704 if (user_convention)
5705 {
5706 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5707 sorry ("non-AAPCS derived PCS variant");
5708 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5709 error ("variadic functions must use the base AAPCS variant");
5710 }
5711
5712 if (base_rules)
5713 return ARM_PCS_AAPCS;
5714 else if (user_convention)
5715 return user_pcs;
5716 else if (decl && flag_unit_at_a_time)
5717 {
5718 /* Local functions never leak outside this compilation unit,
5719 so we are free to use whatever conventions are
5720 appropriate. */
5721 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5722 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5723 if (i && i->local)
5724 return ARM_PCS_AAPCS_LOCAL;
5725 }
5726 }
5727 else if (user_convention && user_pcs != arm_pcs_default)
5728 sorry ("PCS variant");
5729
5730 /* For everything else we use the target's default. */
5731 return arm_pcs_default;
5732 }
5733
5734
5735 static void
5736 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5737 const_tree fntype ATTRIBUTE_UNUSED,
5738 rtx libcall ATTRIBUTE_UNUSED,
5739 const_tree fndecl ATTRIBUTE_UNUSED)
5740 {
5741 /* Record the unallocated VFP registers. */
5742 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5743 pcum->aapcs_vfp_reg_alloc = 0;
5744 }
5745
5746 /* Walk down the type tree of TYPE counting consecutive base elements.
5747 If *MODEP is VOIDmode, then set it to the first valid floating point
5748 type. If a non-floating point type is found, or if a floating point
5749 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5750 otherwise return the count in the sub-tree. */
5751 static int
5752 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5753 {
5754 machine_mode mode;
5755 HOST_WIDE_INT size;
5756
5757 switch (TREE_CODE (type))
5758 {
5759 case REAL_TYPE:
5760 mode = TYPE_MODE (type);
5761 if (mode != DFmode && mode != SFmode && mode != HFmode)
5762 return -1;
5763
5764 if (*modep == VOIDmode)
5765 *modep = mode;
5766
5767 if (*modep == mode)
5768 return 1;
5769
5770 break;
5771
5772 case COMPLEX_TYPE:
5773 mode = TYPE_MODE (TREE_TYPE (type));
5774 if (mode != DFmode && mode != SFmode)
5775 return -1;
5776
5777 if (*modep == VOIDmode)
5778 *modep = mode;
5779
5780 if (*modep == mode)
5781 return 2;
5782
5783 break;
5784
5785 case VECTOR_TYPE:
5786 /* Use V2SImode and V4SImode as representatives of all 64-bit
5787 and 128-bit vector types, whether or not those modes are
5788 supported with the present options. */
5789 size = int_size_in_bytes (type);
5790 switch (size)
5791 {
5792 case 8:
5793 mode = V2SImode;
5794 break;
5795 case 16:
5796 mode = V4SImode;
5797 break;
5798 default:
5799 return -1;
5800 }
5801
5802 if (*modep == VOIDmode)
5803 *modep = mode;
5804
5805 /* Vector modes are considered to be opaque: two vectors are
5806 equivalent for the purposes of being homogeneous aggregates
5807 if they are the same size. */
5808 if (*modep == mode)
5809 return 1;
5810
5811 break;
5812
5813 case ARRAY_TYPE:
5814 {
5815 int count;
5816 tree index = TYPE_DOMAIN (type);
5817
5818 /* Can't handle incomplete types nor sizes that are not
5819 fixed. */
5820 if (!COMPLETE_TYPE_P (type)
5821 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5822 return -1;
5823
5824 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5825 if (count == -1
5826 || !index
5827 || !TYPE_MAX_VALUE (index)
5828 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5829 || !TYPE_MIN_VALUE (index)
5830 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5831 || count < 0)
5832 return -1;
5833
5834 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5835 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5836
5837 /* There must be no padding. */
5838 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5839 return -1;
5840
5841 return count;
5842 }
5843
5844 case RECORD_TYPE:
5845 {
5846 int count = 0;
5847 int sub_count;
5848 tree field;
5849
5850 /* Can't handle incomplete types nor sizes that are not
5851 fixed. */
5852 if (!COMPLETE_TYPE_P (type)
5853 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5854 return -1;
5855
5856 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5857 {
5858 if (TREE_CODE (field) != FIELD_DECL)
5859 continue;
5860
5861 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5862 if (sub_count < 0)
5863 return -1;
5864 count += sub_count;
5865 }
5866
5867 /* There must be no padding. */
5868 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5869 return -1;
5870
5871 return count;
5872 }
5873
5874 case UNION_TYPE:
5875 case QUAL_UNION_TYPE:
5876 {
5877 /* These aren't very interesting except in a degenerate case. */
5878 int count = 0;
5879 int sub_count;
5880 tree field;
5881
5882 /* Can't handle incomplete types nor sizes that are not
5883 fixed. */
5884 if (!COMPLETE_TYPE_P (type)
5885 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5886 return -1;
5887
5888 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5889 {
5890 if (TREE_CODE (field) != FIELD_DECL)
5891 continue;
5892
5893 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5894 if (sub_count < 0)
5895 return -1;
5896 count = count > sub_count ? count : sub_count;
5897 }
5898
5899 /* There must be no padding. */
5900 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5901 return -1;
5902
5903 return count;
5904 }
5905
5906 default:
5907 break;
5908 }
5909
5910 return -1;
5911 }
5912
5913 /* Return true if PCS_VARIANT should use VFP registers. */
5914 static bool
5915 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5916 {
5917 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5918 {
5919 static bool seen_thumb1_vfp = false;
5920
5921 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5922 {
5923 sorry ("Thumb-1 hard-float VFP ABI");
5924 /* sorry() is not immediately fatal, so only display this once. */
5925 seen_thumb1_vfp = true;
5926 }
5927
5928 return true;
5929 }
5930
5931 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5932 return false;
5933
5934 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5935 (TARGET_VFP_DOUBLE || !is_double));
5936 }
5937
5938 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5939 suitable for passing or returning in VFP registers for the PCS
5940 variant selected. If it is, then *BASE_MODE is updated to contain
5941 a machine mode describing each element of the argument's type and
5942 *COUNT to hold the number of such elements. */
5943 static bool
5944 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5945 machine_mode mode, const_tree type,
5946 machine_mode *base_mode, int *count)
5947 {
5948 machine_mode new_mode = VOIDmode;
5949
5950 /* If we have the type information, prefer that to working things
5951 out from the mode. */
5952 if (type)
5953 {
5954 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5955
5956 if (ag_count > 0 && ag_count <= 4)
5957 *count = ag_count;
5958 else
5959 return false;
5960 }
5961 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5962 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5963 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5964 {
5965 *count = 1;
5966 new_mode = mode;
5967 }
5968 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5969 {
5970 *count = 2;
5971 new_mode = (mode == DCmode ? DFmode : SFmode);
5972 }
5973 else
5974 return false;
5975
5976
5977 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5978 return false;
5979
5980 *base_mode = new_mode;
5981 return true;
5982 }
5983
5984 static bool
5985 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5986 machine_mode mode, const_tree type)
5987 {
5988 int count ATTRIBUTE_UNUSED;
5989 machine_mode ag_mode ATTRIBUTE_UNUSED;
5990
5991 if (!use_vfp_abi (pcs_variant, false))
5992 return false;
5993 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5994 &ag_mode, &count);
5995 }
5996
5997 static bool
5998 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5999 const_tree type)
6000 {
6001 if (!use_vfp_abi (pcum->pcs_variant, false))
6002 return false;
6003
6004 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6005 &pcum->aapcs_vfp_rmode,
6006 &pcum->aapcs_vfp_rcount);
6007 }
6008
6009 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6010 for the behaviour of this function. */
6011
6012 static bool
6013 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6014 const_tree type ATTRIBUTE_UNUSED)
6015 {
6016 int rmode_size
6017 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6018 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6019 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6020 int regno;
6021
6022 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6023 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6024 {
6025 pcum->aapcs_vfp_reg_alloc = mask << regno;
6026 if (mode == BLKmode
6027 || (mode == TImode && ! TARGET_NEON)
6028 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6029 {
6030 int i;
6031 int rcount = pcum->aapcs_vfp_rcount;
6032 int rshift = shift;
6033 machine_mode rmode = pcum->aapcs_vfp_rmode;
6034 rtx par;
6035 if (!TARGET_NEON)
6036 {
6037 /* Avoid using unsupported vector modes. */
6038 if (rmode == V2SImode)
6039 rmode = DImode;
6040 else if (rmode == V4SImode)
6041 {
6042 rmode = DImode;
6043 rcount *= 2;
6044 rshift /= 2;
6045 }
6046 }
6047 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6048 for (i = 0; i < rcount; i++)
6049 {
6050 rtx tmp = gen_rtx_REG (rmode,
6051 FIRST_VFP_REGNUM + regno + i * rshift);
6052 tmp = gen_rtx_EXPR_LIST
6053 (VOIDmode, tmp,
6054 GEN_INT (i * GET_MODE_SIZE (rmode)));
6055 XVECEXP (par, 0, i) = tmp;
6056 }
6057
6058 pcum->aapcs_reg = par;
6059 }
6060 else
6061 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6062 return true;
6063 }
6064 return false;
6065 }
6066
6067 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6068 comment there for the behaviour of this function. */
6069
6070 static rtx
6071 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6072 machine_mode mode,
6073 const_tree type ATTRIBUTE_UNUSED)
6074 {
6075 if (!use_vfp_abi (pcs_variant, false))
6076 return NULL;
6077
6078 if (mode == BLKmode
6079 || (GET_MODE_CLASS (mode) == MODE_INT
6080 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6081 && !TARGET_NEON))
6082 {
6083 int count;
6084 machine_mode ag_mode;
6085 int i;
6086 rtx par;
6087 int shift;
6088
6089 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6090 &ag_mode, &count);
6091
6092 if (!TARGET_NEON)
6093 {
6094 if (ag_mode == V2SImode)
6095 ag_mode = DImode;
6096 else if (ag_mode == V4SImode)
6097 {
6098 ag_mode = DImode;
6099 count *= 2;
6100 }
6101 }
6102 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6103 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6104 for (i = 0; i < count; i++)
6105 {
6106 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6107 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6108 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6109 XVECEXP (par, 0, i) = tmp;
6110 }
6111
6112 return par;
6113 }
6114
6115 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6116 }
6117
6118 static void
6119 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6120 machine_mode mode ATTRIBUTE_UNUSED,
6121 const_tree type ATTRIBUTE_UNUSED)
6122 {
6123 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6124 pcum->aapcs_vfp_reg_alloc = 0;
6125 return;
6126 }
6127
6128 #define AAPCS_CP(X) \
6129 { \
6130 aapcs_ ## X ## _cum_init, \
6131 aapcs_ ## X ## _is_call_candidate, \
6132 aapcs_ ## X ## _allocate, \
6133 aapcs_ ## X ## _is_return_candidate, \
6134 aapcs_ ## X ## _allocate_return_reg, \
6135 aapcs_ ## X ## _advance \
6136 }
6137
6138 /* Table of co-processors that can be used to pass arguments in
6139 registers. Idealy no arugment should be a candidate for more than
6140 one co-processor table entry, but the table is processed in order
6141 and stops after the first match. If that entry then fails to put
6142 the argument into a co-processor register, the argument will go on
6143 the stack. */
6144 static struct
6145 {
6146 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6147 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6148
6149 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6150 BLKmode) is a candidate for this co-processor's registers; this
6151 function should ignore any position-dependent state in
6152 CUMULATIVE_ARGS and only use call-type dependent information. */
6153 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6154
6155 /* Return true if the argument does get a co-processor register; it
6156 should set aapcs_reg to an RTX of the register allocated as is
6157 required for a return from FUNCTION_ARG. */
6158 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6159
6160 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6161 be returned in this co-processor's registers. */
6162 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6163
6164 /* Allocate and return an RTX element to hold the return type of a call. This
6165 routine must not fail and will only be called if is_return_candidate
6166 returned true with the same parameters. */
6167 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6168
6169 /* Finish processing this argument and prepare to start processing
6170 the next one. */
6171 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6172 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6173 {
6174 AAPCS_CP(vfp)
6175 };
6176
6177 #undef AAPCS_CP
6178
6179 static int
6180 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6181 const_tree type)
6182 {
6183 int i;
6184
6185 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6186 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6187 return i;
6188
6189 return -1;
6190 }
6191
6192 static int
6193 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6194 {
6195 /* We aren't passed a decl, so we can't check that a call is local.
6196 However, it isn't clear that that would be a win anyway, since it
6197 might limit some tail-calling opportunities. */
6198 enum arm_pcs pcs_variant;
6199
6200 if (fntype)
6201 {
6202 const_tree fndecl = NULL_TREE;
6203
6204 if (TREE_CODE (fntype) == FUNCTION_DECL)
6205 {
6206 fndecl = fntype;
6207 fntype = TREE_TYPE (fntype);
6208 }
6209
6210 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6211 }
6212 else
6213 pcs_variant = arm_pcs_default;
6214
6215 if (pcs_variant != ARM_PCS_AAPCS)
6216 {
6217 int i;
6218
6219 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6220 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6221 TYPE_MODE (type),
6222 type))
6223 return i;
6224 }
6225 return -1;
6226 }
6227
6228 static rtx
6229 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6230 const_tree fntype)
6231 {
6232 /* We aren't passed a decl, so we can't check that a call is local.
6233 However, it isn't clear that that would be a win anyway, since it
6234 might limit some tail-calling opportunities. */
6235 enum arm_pcs pcs_variant;
6236 int unsignedp ATTRIBUTE_UNUSED;
6237
6238 if (fntype)
6239 {
6240 const_tree fndecl = NULL_TREE;
6241
6242 if (TREE_CODE (fntype) == FUNCTION_DECL)
6243 {
6244 fndecl = fntype;
6245 fntype = TREE_TYPE (fntype);
6246 }
6247
6248 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6249 }
6250 else
6251 pcs_variant = arm_pcs_default;
6252
6253 /* Promote integer types. */
6254 if (type && INTEGRAL_TYPE_P (type))
6255 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6256
6257 if (pcs_variant != ARM_PCS_AAPCS)
6258 {
6259 int i;
6260
6261 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6262 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6263 type))
6264 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6265 mode, type);
6266 }
6267
6268 /* Promotes small structs returned in a register to full-word size
6269 for big-endian AAPCS. */
6270 if (type && arm_return_in_msb (type))
6271 {
6272 HOST_WIDE_INT size = int_size_in_bytes (type);
6273 if (size % UNITS_PER_WORD != 0)
6274 {
6275 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6276 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6277 }
6278 }
6279
6280 return gen_rtx_REG (mode, R0_REGNUM);
6281 }
6282
6283 static rtx
6284 aapcs_libcall_value (machine_mode mode)
6285 {
6286 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6287 && GET_MODE_SIZE (mode) <= 4)
6288 mode = SImode;
6289
6290 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6291 }
6292
6293 /* Lay out a function argument using the AAPCS rules. The rule
6294 numbers referred to here are those in the AAPCS. */
6295 static void
6296 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6297 const_tree type, bool named)
6298 {
6299 int nregs, nregs2;
6300 int ncrn;
6301
6302 /* We only need to do this once per argument. */
6303 if (pcum->aapcs_arg_processed)
6304 return;
6305
6306 pcum->aapcs_arg_processed = true;
6307
6308 /* Special case: if named is false then we are handling an incoming
6309 anonymous argument which is on the stack. */
6310 if (!named)
6311 return;
6312
6313 /* Is this a potential co-processor register candidate? */
6314 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6315 {
6316 int slot = aapcs_select_call_coproc (pcum, mode, type);
6317 pcum->aapcs_cprc_slot = slot;
6318
6319 /* We don't have to apply any of the rules from part B of the
6320 preparation phase, these are handled elsewhere in the
6321 compiler. */
6322
6323 if (slot >= 0)
6324 {
6325 /* A Co-processor register candidate goes either in its own
6326 class of registers or on the stack. */
6327 if (!pcum->aapcs_cprc_failed[slot])
6328 {
6329 /* C1.cp - Try to allocate the argument to co-processor
6330 registers. */
6331 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6332 return;
6333
6334 /* C2.cp - Put the argument on the stack and note that we
6335 can't assign any more candidates in this slot. We also
6336 need to note that we have allocated stack space, so that
6337 we won't later try to split a non-cprc candidate between
6338 core registers and the stack. */
6339 pcum->aapcs_cprc_failed[slot] = true;
6340 pcum->can_split = false;
6341 }
6342
6343 /* We didn't get a register, so this argument goes on the
6344 stack. */
6345 gcc_assert (pcum->can_split == false);
6346 return;
6347 }
6348 }
6349
6350 /* C3 - For double-word aligned arguments, round the NCRN up to the
6351 next even number. */
6352 ncrn = pcum->aapcs_ncrn;
6353 if (ncrn & 1)
6354 {
6355 int res = arm_needs_doubleword_align (mode, type);
6356 /* Only warn during RTL expansion of call stmts, otherwise we would
6357 warn e.g. during gimplification even on functions that will be
6358 always inlined, and we'd warn multiple times. Don't warn when
6359 called in expand_function_start either, as we warn instead in
6360 arm_function_arg_boundary in that case. */
6361 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6362 inform (input_location, "parameter passing for argument of type "
6363 "%qT changed in GCC 7.1", type);
6364 else if (res > 0)
6365 ncrn++;
6366 }
6367
6368 nregs = ARM_NUM_REGS2(mode, type);
6369
6370 /* Sigh, this test should really assert that nregs > 0, but a GCC
6371 extension allows empty structs and then gives them empty size; it
6372 then allows such a structure to be passed by value. For some of
6373 the code below we have to pretend that such an argument has
6374 non-zero size so that we 'locate' it correctly either in
6375 registers or on the stack. */
6376 gcc_assert (nregs >= 0);
6377
6378 nregs2 = nregs ? nregs : 1;
6379
6380 /* C4 - Argument fits entirely in core registers. */
6381 if (ncrn + nregs2 <= NUM_ARG_REGS)
6382 {
6383 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6384 pcum->aapcs_next_ncrn = ncrn + nregs;
6385 return;
6386 }
6387
6388 /* C5 - Some core registers left and there are no arguments already
6389 on the stack: split this argument between the remaining core
6390 registers and the stack. */
6391 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6392 {
6393 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6394 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6395 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6396 return;
6397 }
6398
6399 /* C6 - NCRN is set to 4. */
6400 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6401
6402 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6403 return;
6404 }
6405
6406 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6407 for a call to a function whose data type is FNTYPE.
6408 For a library call, FNTYPE is NULL. */
6409 void
6410 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6411 rtx libname,
6412 tree fndecl ATTRIBUTE_UNUSED)
6413 {
6414 /* Long call handling. */
6415 if (fntype)
6416 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6417 else
6418 pcum->pcs_variant = arm_pcs_default;
6419
6420 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6421 {
6422 if (arm_libcall_uses_aapcs_base (libname))
6423 pcum->pcs_variant = ARM_PCS_AAPCS;
6424
6425 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6426 pcum->aapcs_reg = NULL_RTX;
6427 pcum->aapcs_partial = 0;
6428 pcum->aapcs_arg_processed = false;
6429 pcum->aapcs_cprc_slot = -1;
6430 pcum->can_split = true;
6431
6432 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6433 {
6434 int i;
6435
6436 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6437 {
6438 pcum->aapcs_cprc_failed[i] = false;
6439 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6440 }
6441 }
6442 return;
6443 }
6444
6445 /* Legacy ABIs */
6446
6447 /* On the ARM, the offset starts at 0. */
6448 pcum->nregs = 0;
6449 pcum->iwmmxt_nregs = 0;
6450 pcum->can_split = true;
6451
6452 /* Varargs vectors are treated the same as long long.
6453 named_count avoids having to change the way arm handles 'named' */
6454 pcum->named_count = 0;
6455 pcum->nargs = 0;
6456
6457 if (TARGET_REALLY_IWMMXT && fntype)
6458 {
6459 tree fn_arg;
6460
6461 for (fn_arg = TYPE_ARG_TYPES (fntype);
6462 fn_arg;
6463 fn_arg = TREE_CHAIN (fn_arg))
6464 pcum->named_count += 1;
6465
6466 if (! pcum->named_count)
6467 pcum->named_count = INT_MAX;
6468 }
6469 }
6470
6471 /* Return 1 if double word alignment is required for argument passing.
6472 Return -1 if double word alignment used to be required for argument
6473 passing before PR77728 ABI fix, but is not required anymore.
6474 Return 0 if double word alignment is not required and wasn't requried
6475 before either. */
6476 static int
6477 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6478 {
6479 if (!type)
6480 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6481
6482 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6483 if (!AGGREGATE_TYPE_P (type))
6484 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6485
6486 /* Array types: Use member alignment of element type. */
6487 if (TREE_CODE (type) == ARRAY_TYPE)
6488 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6489
6490 int ret = 0;
6491 /* Record/aggregate types: Use greatest member alignment of any member. */
6492 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6493 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6494 {
6495 if (TREE_CODE (field) == FIELD_DECL)
6496 return 1;
6497 else
6498 /* Before PR77728 fix, we were incorrectly considering also
6499 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6500 Make sure we can warn about that with -Wpsabi. */
6501 ret = -1;
6502 }
6503
6504 return ret;
6505 }
6506
6507
6508 /* Determine where to put an argument to a function.
6509 Value is zero to push the argument on the stack,
6510 or a hard register in which to store the argument.
6511
6512 MODE is the argument's machine mode.
6513 TYPE is the data type of the argument (as a tree).
6514 This is null for libcalls where that information may
6515 not be available.
6516 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6517 the preceding args and about the function being called.
6518 NAMED is nonzero if this argument is a named parameter
6519 (otherwise it is an extra parameter matching an ellipsis).
6520
6521 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6522 other arguments are passed on the stack. If (NAMED == 0) (which happens
6523 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6524 defined), say it is passed in the stack (function_prologue will
6525 indeed make it pass in the stack if necessary). */
6526
6527 static rtx
6528 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6529 const_tree type, bool named)
6530 {
6531 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6532 int nregs;
6533
6534 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6535 a call insn (op3 of a call_value insn). */
6536 if (mode == VOIDmode)
6537 return const0_rtx;
6538
6539 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6540 {
6541 aapcs_layout_arg (pcum, mode, type, named);
6542 return pcum->aapcs_reg;
6543 }
6544
6545 /* Varargs vectors are treated the same as long long.
6546 named_count avoids having to change the way arm handles 'named' */
6547 if (TARGET_IWMMXT_ABI
6548 && arm_vector_mode_supported_p (mode)
6549 && pcum->named_count > pcum->nargs + 1)
6550 {
6551 if (pcum->iwmmxt_nregs <= 9)
6552 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6553 else
6554 {
6555 pcum->can_split = false;
6556 return NULL_RTX;
6557 }
6558 }
6559
6560 /* Put doubleword aligned quantities in even register pairs. */
6561 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6562 {
6563 int res = arm_needs_doubleword_align (mode, type);
6564 if (res < 0 && warn_psabi)
6565 inform (input_location, "parameter passing for argument of type "
6566 "%qT changed in GCC 7.1", type);
6567 else if (res > 0)
6568 pcum->nregs++;
6569 }
6570
6571 /* Only allow splitting an arg between regs and memory if all preceding
6572 args were allocated to regs. For args passed by reference we only count
6573 the reference pointer. */
6574 if (pcum->can_split)
6575 nregs = 1;
6576 else
6577 nregs = ARM_NUM_REGS2 (mode, type);
6578
6579 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6580 return NULL_RTX;
6581
6582 return gen_rtx_REG (mode, pcum->nregs);
6583 }
6584
6585 static unsigned int
6586 arm_function_arg_boundary (machine_mode mode, const_tree type)
6587 {
6588 if (!ARM_DOUBLEWORD_ALIGN)
6589 return PARM_BOUNDARY;
6590
6591 int res = arm_needs_doubleword_align (mode, type);
6592 if (res < 0 && warn_psabi)
6593 inform (input_location, "parameter passing for argument of type %qT "
6594 "changed in GCC 7.1", type);
6595
6596 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6597 }
6598
6599 static int
6600 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6601 tree type, bool named)
6602 {
6603 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6604 int nregs = pcum->nregs;
6605
6606 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6607 {
6608 aapcs_layout_arg (pcum, mode, type, named);
6609 return pcum->aapcs_partial;
6610 }
6611
6612 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6613 return 0;
6614
6615 if (NUM_ARG_REGS > nregs
6616 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6617 && pcum->can_split)
6618 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6619
6620 return 0;
6621 }
6622
6623 /* Update the data in PCUM to advance over an argument
6624 of mode MODE and data type TYPE.
6625 (TYPE is null for libcalls where that information may not be available.) */
6626
6627 static void
6628 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6629 const_tree type, bool named)
6630 {
6631 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6632
6633 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6634 {
6635 aapcs_layout_arg (pcum, mode, type, named);
6636
6637 if (pcum->aapcs_cprc_slot >= 0)
6638 {
6639 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6640 type);
6641 pcum->aapcs_cprc_slot = -1;
6642 }
6643
6644 /* Generic stuff. */
6645 pcum->aapcs_arg_processed = false;
6646 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6647 pcum->aapcs_reg = NULL_RTX;
6648 pcum->aapcs_partial = 0;
6649 }
6650 else
6651 {
6652 pcum->nargs += 1;
6653 if (arm_vector_mode_supported_p (mode)
6654 && pcum->named_count > pcum->nargs
6655 && TARGET_IWMMXT_ABI)
6656 pcum->iwmmxt_nregs += 1;
6657 else
6658 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6659 }
6660 }
6661
6662 /* Variable sized types are passed by reference. This is a GCC
6663 extension to the ARM ABI. */
6664
6665 static bool
6666 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6667 machine_mode mode ATTRIBUTE_UNUSED,
6668 const_tree type, bool named ATTRIBUTE_UNUSED)
6669 {
6670 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6671 }
6672 \f
6673 /* Encode the current state of the #pragma [no_]long_calls. */
6674 typedef enum
6675 {
6676 OFF, /* No #pragma [no_]long_calls is in effect. */
6677 LONG, /* #pragma long_calls is in effect. */
6678 SHORT /* #pragma no_long_calls is in effect. */
6679 } arm_pragma_enum;
6680
6681 static arm_pragma_enum arm_pragma_long_calls = OFF;
6682
6683 void
6684 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6685 {
6686 arm_pragma_long_calls = LONG;
6687 }
6688
6689 void
6690 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6691 {
6692 arm_pragma_long_calls = SHORT;
6693 }
6694
6695 void
6696 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6697 {
6698 arm_pragma_long_calls = OFF;
6699 }
6700 \f
6701 /* Handle an attribute requiring a FUNCTION_DECL;
6702 arguments as in struct attribute_spec.handler. */
6703 static tree
6704 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6705 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6706 {
6707 if (TREE_CODE (*node) != FUNCTION_DECL)
6708 {
6709 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6710 name);
6711 *no_add_attrs = true;
6712 }
6713
6714 return NULL_TREE;
6715 }
6716
6717 /* Handle an "interrupt" or "isr" attribute;
6718 arguments as in struct attribute_spec.handler. */
6719 static tree
6720 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6721 bool *no_add_attrs)
6722 {
6723 if (DECL_P (*node))
6724 {
6725 if (TREE_CODE (*node) != FUNCTION_DECL)
6726 {
6727 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6728 name);
6729 *no_add_attrs = true;
6730 }
6731 /* FIXME: the argument if any is checked for type attributes;
6732 should it be checked for decl ones? */
6733 }
6734 else
6735 {
6736 if (TREE_CODE (*node) == FUNCTION_TYPE
6737 || TREE_CODE (*node) == METHOD_TYPE)
6738 {
6739 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6740 {
6741 warning (OPT_Wattributes, "%qE attribute ignored",
6742 name);
6743 *no_add_attrs = true;
6744 }
6745 }
6746 else if (TREE_CODE (*node) == POINTER_TYPE
6747 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6748 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6749 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6750 {
6751 *node = build_variant_type_copy (*node);
6752 TREE_TYPE (*node) = build_type_attribute_variant
6753 (TREE_TYPE (*node),
6754 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6755 *no_add_attrs = true;
6756 }
6757 else
6758 {
6759 /* Possibly pass this attribute on from the type to a decl. */
6760 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6761 | (int) ATTR_FLAG_FUNCTION_NEXT
6762 | (int) ATTR_FLAG_ARRAY_NEXT))
6763 {
6764 *no_add_attrs = true;
6765 return tree_cons (name, args, NULL_TREE);
6766 }
6767 else
6768 {
6769 warning (OPT_Wattributes, "%qE attribute ignored",
6770 name);
6771 }
6772 }
6773 }
6774
6775 return NULL_TREE;
6776 }
6777
6778 /* Handle a "pcs" attribute; arguments as in struct
6779 attribute_spec.handler. */
6780 static tree
6781 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6782 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6783 {
6784 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6785 {
6786 warning (OPT_Wattributes, "%qE attribute ignored", name);
6787 *no_add_attrs = true;
6788 }
6789 return NULL_TREE;
6790 }
6791
6792 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6793 /* Handle the "notshared" attribute. This attribute is another way of
6794 requesting hidden visibility. ARM's compiler supports
6795 "__declspec(notshared)"; we support the same thing via an
6796 attribute. */
6797
6798 static tree
6799 arm_handle_notshared_attribute (tree *node,
6800 tree name ATTRIBUTE_UNUSED,
6801 tree args ATTRIBUTE_UNUSED,
6802 int flags ATTRIBUTE_UNUSED,
6803 bool *no_add_attrs)
6804 {
6805 tree decl = TYPE_NAME (*node);
6806
6807 if (decl)
6808 {
6809 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6810 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6811 *no_add_attrs = false;
6812 }
6813 return NULL_TREE;
6814 }
6815 #endif
6816
6817 /* This function returns true if a function with declaration FNDECL and type
6818 FNTYPE uses the stack to pass arguments or return variables and false
6819 otherwise. This is used for functions with the attributes
6820 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6821 diagnostic messages if the stack is used. NAME is the name of the attribute
6822 used. */
6823
6824 static bool
6825 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6826 {
6827 function_args_iterator args_iter;
6828 CUMULATIVE_ARGS args_so_far_v;
6829 cumulative_args_t args_so_far;
6830 bool first_param = true;
6831 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6832
6833 /* Error out if any argument is passed on the stack. */
6834 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6835 args_so_far = pack_cumulative_args (&args_so_far_v);
6836 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6837 {
6838 rtx arg_rtx;
6839 machine_mode arg_mode = TYPE_MODE (arg_type);
6840
6841 prev_arg_type = arg_type;
6842 if (VOID_TYPE_P (arg_type))
6843 continue;
6844
6845 if (!first_param)
6846 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6847 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6848 if (!arg_rtx
6849 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6850 {
6851 error ("%qE attribute not available to functions with arguments "
6852 "passed on the stack", name);
6853 return true;
6854 }
6855 first_param = false;
6856 }
6857
6858 /* Error out for variadic functions since we cannot control how many
6859 arguments will be passed and thus stack could be used. stdarg_p () is not
6860 used for the checking to avoid browsing arguments twice. */
6861 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6862 {
6863 error ("%qE attribute not available to functions with variable number "
6864 "of arguments", name);
6865 return true;
6866 }
6867
6868 /* Error out if return value is passed on the stack. */
6869 ret_type = TREE_TYPE (fntype);
6870 if (arm_return_in_memory (ret_type, fntype))
6871 {
6872 error ("%qE attribute not available to functions that return value on "
6873 "the stack", name);
6874 return true;
6875 }
6876 return false;
6877 }
6878
6879 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6880 function will check whether the attribute is allowed here and will add the
6881 attribute to the function declaration tree or otherwise issue a warning. */
6882
6883 static tree
6884 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6885 tree /* args */,
6886 int /* flags */,
6887 bool *no_add_attrs)
6888 {
6889 tree fndecl;
6890
6891 if (!use_cmse)
6892 {
6893 *no_add_attrs = true;
6894 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6895 name);
6896 return NULL_TREE;
6897 }
6898
6899 /* Ignore attribute for function types. */
6900 if (TREE_CODE (*node) != FUNCTION_DECL)
6901 {
6902 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6903 name);
6904 *no_add_attrs = true;
6905 return NULL_TREE;
6906 }
6907
6908 fndecl = *node;
6909
6910 /* Warn for static linkage functions. */
6911 if (!TREE_PUBLIC (fndecl))
6912 {
6913 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6914 "with static linkage", name);
6915 *no_add_attrs = true;
6916 return NULL_TREE;
6917 }
6918
6919 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6920 TREE_TYPE (fndecl));
6921 return NULL_TREE;
6922 }
6923
6924
6925 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6926 function will check whether the attribute is allowed here and will add the
6927 attribute to the function type tree or otherwise issue a diagnostic. The
6928 reason we check this at declaration time is to only allow the use of the
6929 attribute with declarations of function pointers and not function
6930 declarations. This function checks NODE is of the expected type and issues
6931 diagnostics otherwise using NAME. If it is not of the expected type
6932 *NO_ADD_ATTRS will be set to true. */
6933
6934 static tree
6935 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6936 tree /* args */,
6937 int /* flags */,
6938 bool *no_add_attrs)
6939 {
6940 tree decl = NULL_TREE, fntype = NULL_TREE;
6941 tree type;
6942
6943 if (!use_cmse)
6944 {
6945 *no_add_attrs = true;
6946 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6947 name);
6948 return NULL_TREE;
6949 }
6950
6951 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6952 {
6953 decl = *node;
6954 fntype = TREE_TYPE (decl);
6955 }
6956
6957 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6958 fntype = TREE_TYPE (fntype);
6959
6960 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6961 {
6962 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6963 "function pointer", name);
6964 *no_add_attrs = true;
6965 return NULL_TREE;
6966 }
6967
6968 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6969
6970 if (*no_add_attrs)
6971 return NULL_TREE;
6972
6973 /* Prevent trees being shared among function types with and without
6974 cmse_nonsecure_call attribute. */
6975 type = TREE_TYPE (decl);
6976
6977 type = build_distinct_type_copy (type);
6978 TREE_TYPE (decl) = type;
6979 fntype = type;
6980
6981 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6982 {
6983 type = fntype;
6984 fntype = TREE_TYPE (fntype);
6985 fntype = build_distinct_type_copy (fntype);
6986 TREE_TYPE (type) = fntype;
6987 }
6988
6989 /* Construct a type attribute and add it to the function type. */
6990 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6991 TYPE_ATTRIBUTES (fntype));
6992 TYPE_ATTRIBUTES (fntype) = attrs;
6993 return NULL_TREE;
6994 }
6995
6996 /* Return 0 if the attributes for two types are incompatible, 1 if they
6997 are compatible, and 2 if they are nearly compatible (which causes a
6998 warning to be generated). */
6999 static int
7000 arm_comp_type_attributes (const_tree type1, const_tree type2)
7001 {
7002 int l1, l2, s1, s2;
7003
7004 /* Check for mismatch of non-default calling convention. */
7005 if (TREE_CODE (type1) != FUNCTION_TYPE)
7006 return 1;
7007
7008 /* Check for mismatched call attributes. */
7009 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7010 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7011 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7012 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7013
7014 /* Only bother to check if an attribute is defined. */
7015 if (l1 | l2 | s1 | s2)
7016 {
7017 /* If one type has an attribute, the other must have the same attribute. */
7018 if ((l1 != l2) || (s1 != s2))
7019 return 0;
7020
7021 /* Disallow mixed attributes. */
7022 if ((l1 & s2) || (l2 & s1))
7023 return 0;
7024 }
7025
7026 /* Check for mismatched ISR attribute. */
7027 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7028 if (! l1)
7029 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7030 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7031 if (! l2)
7032 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7033 if (l1 != l2)
7034 return 0;
7035
7036 l1 = lookup_attribute ("cmse_nonsecure_call",
7037 TYPE_ATTRIBUTES (type1)) != NULL;
7038 l2 = lookup_attribute ("cmse_nonsecure_call",
7039 TYPE_ATTRIBUTES (type2)) != NULL;
7040
7041 if (l1 != l2)
7042 return 0;
7043
7044 return 1;
7045 }
7046
7047 /* Assigns default attributes to newly defined type. This is used to
7048 set short_call/long_call attributes for function types of
7049 functions defined inside corresponding #pragma scopes. */
7050 static void
7051 arm_set_default_type_attributes (tree type)
7052 {
7053 /* Add __attribute__ ((long_call)) to all functions, when
7054 inside #pragma long_calls or __attribute__ ((short_call)),
7055 when inside #pragma no_long_calls. */
7056 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7057 {
7058 tree type_attr_list, attr_name;
7059 type_attr_list = TYPE_ATTRIBUTES (type);
7060
7061 if (arm_pragma_long_calls == LONG)
7062 attr_name = get_identifier ("long_call");
7063 else if (arm_pragma_long_calls == SHORT)
7064 attr_name = get_identifier ("short_call");
7065 else
7066 return;
7067
7068 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7069 TYPE_ATTRIBUTES (type) = type_attr_list;
7070 }
7071 }
7072 \f
7073 /* Return true if DECL is known to be linked into section SECTION. */
7074
7075 static bool
7076 arm_function_in_section_p (tree decl, section *section)
7077 {
7078 /* We can only be certain about the prevailing symbol definition. */
7079 if (!decl_binds_to_current_def_p (decl))
7080 return false;
7081
7082 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7083 if (!DECL_SECTION_NAME (decl))
7084 {
7085 /* Make sure that we will not create a unique section for DECL. */
7086 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7087 return false;
7088 }
7089
7090 return function_section (decl) == section;
7091 }
7092
7093 /* Return nonzero if a 32-bit "long_call" should be generated for
7094 a call from the current function to DECL. We generate a long_call
7095 if the function:
7096
7097 a. has an __attribute__((long call))
7098 or b. is within the scope of a #pragma long_calls
7099 or c. the -mlong-calls command line switch has been specified
7100
7101 However we do not generate a long call if the function:
7102
7103 d. has an __attribute__ ((short_call))
7104 or e. is inside the scope of a #pragma no_long_calls
7105 or f. is defined in the same section as the current function. */
7106
7107 bool
7108 arm_is_long_call_p (tree decl)
7109 {
7110 tree attrs;
7111
7112 if (!decl)
7113 return TARGET_LONG_CALLS;
7114
7115 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7116 if (lookup_attribute ("short_call", attrs))
7117 return false;
7118
7119 /* For "f", be conservative, and only cater for cases in which the
7120 whole of the current function is placed in the same section. */
7121 if (!flag_reorder_blocks_and_partition
7122 && TREE_CODE (decl) == FUNCTION_DECL
7123 && arm_function_in_section_p (decl, current_function_section ()))
7124 return false;
7125
7126 if (lookup_attribute ("long_call", attrs))
7127 return true;
7128
7129 return TARGET_LONG_CALLS;
7130 }
7131
7132 /* Return nonzero if it is ok to make a tail-call to DECL. */
7133 static bool
7134 arm_function_ok_for_sibcall (tree decl, tree exp)
7135 {
7136 unsigned long func_type;
7137
7138 if (cfun->machine->sibcall_blocked)
7139 return false;
7140
7141 /* Never tailcall something if we are generating code for Thumb-1. */
7142 if (TARGET_THUMB1)
7143 return false;
7144
7145 /* The PIC register is live on entry to VxWorks PLT entries, so we
7146 must make the call before restoring the PIC register. */
7147 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7148 return false;
7149
7150 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7151 may be used both as target of the call and base register for restoring
7152 the VFP registers */
7153 if (TARGET_APCS_FRAME && TARGET_ARM
7154 && TARGET_HARD_FLOAT
7155 && decl && arm_is_long_call_p (decl))
7156 return false;
7157
7158 /* If we are interworking and the function is not declared static
7159 then we can't tail-call it unless we know that it exists in this
7160 compilation unit (since it might be a Thumb routine). */
7161 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7162 && !TREE_ASM_WRITTEN (decl))
7163 return false;
7164
7165 func_type = arm_current_func_type ();
7166 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7167 if (IS_INTERRUPT (func_type))
7168 return false;
7169
7170 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7171 generated for entry functions themselves. */
7172 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7173 return false;
7174
7175 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7176 this would complicate matters for later code generation. */
7177 if (TREE_CODE (exp) == CALL_EXPR)
7178 {
7179 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7180 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7181 return false;
7182 }
7183
7184 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7185 {
7186 /* Check that the return value locations are the same. For
7187 example that we aren't returning a value from the sibling in
7188 a VFP register but then need to transfer it to a core
7189 register. */
7190 rtx a, b;
7191 tree decl_or_type = decl;
7192
7193 /* If it is an indirect function pointer, get the function type. */
7194 if (!decl)
7195 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7196
7197 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7198 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7199 cfun->decl, false);
7200 if (!rtx_equal_p (a, b))
7201 return false;
7202 }
7203
7204 /* Never tailcall if function may be called with a misaligned SP. */
7205 if (IS_STACKALIGN (func_type))
7206 return false;
7207
7208 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7209 references should become a NOP. Don't convert such calls into
7210 sibling calls. */
7211 if (TARGET_AAPCS_BASED
7212 && arm_abi == ARM_ABI_AAPCS
7213 && decl
7214 && DECL_WEAK (decl))
7215 return false;
7216
7217 /* We cannot do a tailcall for an indirect call by descriptor if all the
7218 argument registers are used because the only register left to load the
7219 address is IP and it will already contain the static chain. */
7220 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7221 {
7222 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7223 CUMULATIVE_ARGS cum;
7224 cumulative_args_t cum_v;
7225
7226 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7227 cum_v = pack_cumulative_args (&cum);
7228
7229 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7230 {
7231 tree type = TREE_VALUE (t);
7232 if (!VOID_TYPE_P (type))
7233 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7234 }
7235
7236 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7237 return false;
7238 }
7239
7240 /* Everything else is ok. */
7241 return true;
7242 }
7243
7244 \f
7245 /* Addressing mode support functions. */
7246
7247 /* Return nonzero if X is a legitimate immediate operand when compiling
7248 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7249 int
7250 legitimate_pic_operand_p (rtx x)
7251 {
7252 if (GET_CODE (x) == SYMBOL_REF
7253 || (GET_CODE (x) == CONST
7254 && GET_CODE (XEXP (x, 0)) == PLUS
7255 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7256 return 0;
7257
7258 return 1;
7259 }
7260
7261 /* Record that the current function needs a PIC register. Initialize
7262 cfun->machine->pic_reg if we have not already done so. */
7263
7264 static void
7265 require_pic_register (void)
7266 {
7267 /* A lot of the logic here is made obscure by the fact that this
7268 routine gets called as part of the rtx cost estimation process.
7269 We don't want those calls to affect any assumptions about the real
7270 function; and further, we can't call entry_of_function() until we
7271 start the real expansion process. */
7272 if (!crtl->uses_pic_offset_table)
7273 {
7274 gcc_assert (can_create_pseudo_p ());
7275 if (arm_pic_register != INVALID_REGNUM
7276 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7277 {
7278 if (!cfun->machine->pic_reg)
7279 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7280
7281 /* Play games to avoid marking the function as needing pic
7282 if we are being called as part of the cost-estimation
7283 process. */
7284 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7285 crtl->uses_pic_offset_table = 1;
7286 }
7287 else
7288 {
7289 rtx_insn *seq, *insn;
7290
7291 if (!cfun->machine->pic_reg)
7292 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7293
7294 /* Play games to avoid marking the function as needing pic
7295 if we are being called as part of the cost-estimation
7296 process. */
7297 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7298 {
7299 crtl->uses_pic_offset_table = 1;
7300 start_sequence ();
7301
7302 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7303 && arm_pic_register > LAST_LO_REGNUM)
7304 emit_move_insn (cfun->machine->pic_reg,
7305 gen_rtx_REG (Pmode, arm_pic_register));
7306 else
7307 arm_load_pic_register (0UL);
7308
7309 seq = get_insns ();
7310 end_sequence ();
7311
7312 for (insn = seq; insn; insn = NEXT_INSN (insn))
7313 if (INSN_P (insn))
7314 INSN_LOCATION (insn) = prologue_location;
7315
7316 /* We can be called during expansion of PHI nodes, where
7317 we can't yet emit instructions directly in the final
7318 insn stream. Queue the insns on the entry edge, they will
7319 be committed after everything else is expanded. */
7320 insert_insn_on_edge (seq,
7321 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7322 }
7323 }
7324 }
7325 }
7326
7327 rtx
7328 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7329 {
7330 if (GET_CODE (orig) == SYMBOL_REF
7331 || GET_CODE (orig) == LABEL_REF)
7332 {
7333 if (reg == 0)
7334 {
7335 gcc_assert (can_create_pseudo_p ());
7336 reg = gen_reg_rtx (Pmode);
7337 }
7338
7339 /* VxWorks does not impose a fixed gap between segments; the run-time
7340 gap can be different from the object-file gap. We therefore can't
7341 use GOTOFF unless we are absolutely sure that the symbol is in the
7342 same segment as the GOT. Unfortunately, the flexibility of linker
7343 scripts means that we can't be sure of that in general, so assume
7344 that GOTOFF is never valid on VxWorks. */
7345 /* References to weak symbols cannot be resolved locally: they
7346 may be overridden by a non-weak definition at link time. */
7347 rtx_insn *insn;
7348 if ((GET_CODE (orig) == LABEL_REF
7349 || (GET_CODE (orig) == SYMBOL_REF
7350 && SYMBOL_REF_LOCAL_P (orig)
7351 && (SYMBOL_REF_DECL (orig)
7352 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7353 && NEED_GOT_RELOC
7354 && arm_pic_data_is_text_relative)
7355 insn = arm_pic_static_addr (orig, reg);
7356 else
7357 {
7358 rtx pat;
7359 rtx mem;
7360
7361 /* If this function doesn't have a pic register, create one now. */
7362 require_pic_register ();
7363
7364 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7365
7366 /* Make the MEM as close to a constant as possible. */
7367 mem = SET_SRC (pat);
7368 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7369 MEM_READONLY_P (mem) = 1;
7370 MEM_NOTRAP_P (mem) = 1;
7371
7372 insn = emit_insn (pat);
7373 }
7374
7375 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7376 by loop. */
7377 set_unique_reg_note (insn, REG_EQUAL, orig);
7378
7379 return reg;
7380 }
7381 else if (GET_CODE (orig) == CONST)
7382 {
7383 rtx base, offset;
7384
7385 if (GET_CODE (XEXP (orig, 0)) == PLUS
7386 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7387 return orig;
7388
7389 /* Handle the case where we have: const (UNSPEC_TLS). */
7390 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7391 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7392 return orig;
7393
7394 /* Handle the case where we have:
7395 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7396 CONST_INT. */
7397 if (GET_CODE (XEXP (orig, 0)) == PLUS
7398 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7399 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7400 {
7401 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7402 return orig;
7403 }
7404
7405 if (reg == 0)
7406 {
7407 gcc_assert (can_create_pseudo_p ());
7408 reg = gen_reg_rtx (Pmode);
7409 }
7410
7411 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7412
7413 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7414 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7415 base == reg ? 0 : reg);
7416
7417 if (CONST_INT_P (offset))
7418 {
7419 /* The base register doesn't really matter, we only want to
7420 test the index for the appropriate mode. */
7421 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7422 {
7423 gcc_assert (can_create_pseudo_p ());
7424 offset = force_reg (Pmode, offset);
7425 }
7426
7427 if (CONST_INT_P (offset))
7428 return plus_constant (Pmode, base, INTVAL (offset));
7429 }
7430
7431 if (GET_MODE_SIZE (mode) > 4
7432 && (GET_MODE_CLASS (mode) == MODE_INT
7433 || TARGET_SOFT_FLOAT))
7434 {
7435 emit_insn (gen_addsi3 (reg, base, offset));
7436 return reg;
7437 }
7438
7439 return gen_rtx_PLUS (Pmode, base, offset);
7440 }
7441
7442 return orig;
7443 }
7444
7445
7446 /* Find a spare register to use during the prolog of a function. */
7447
7448 static int
7449 thumb_find_work_register (unsigned long pushed_regs_mask)
7450 {
7451 int reg;
7452
7453 /* Check the argument registers first as these are call-used. The
7454 register allocation order means that sometimes r3 might be used
7455 but earlier argument registers might not, so check them all. */
7456 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7457 if (!df_regs_ever_live_p (reg))
7458 return reg;
7459
7460 /* Before going on to check the call-saved registers we can try a couple
7461 more ways of deducing that r3 is available. The first is when we are
7462 pushing anonymous arguments onto the stack and we have less than 4
7463 registers worth of fixed arguments(*). In this case r3 will be part of
7464 the variable argument list and so we can be sure that it will be
7465 pushed right at the start of the function. Hence it will be available
7466 for the rest of the prologue.
7467 (*): ie crtl->args.pretend_args_size is greater than 0. */
7468 if (cfun->machine->uses_anonymous_args
7469 && crtl->args.pretend_args_size > 0)
7470 return LAST_ARG_REGNUM;
7471
7472 /* The other case is when we have fixed arguments but less than 4 registers
7473 worth. In this case r3 might be used in the body of the function, but
7474 it is not being used to convey an argument into the function. In theory
7475 we could just check crtl->args.size to see how many bytes are
7476 being passed in argument registers, but it seems that it is unreliable.
7477 Sometimes it will have the value 0 when in fact arguments are being
7478 passed. (See testcase execute/20021111-1.c for an example). So we also
7479 check the args_info.nregs field as well. The problem with this field is
7480 that it makes no allowances for arguments that are passed to the
7481 function but which are not used. Hence we could miss an opportunity
7482 when a function has an unused argument in r3. But it is better to be
7483 safe than to be sorry. */
7484 if (! cfun->machine->uses_anonymous_args
7485 && crtl->args.size >= 0
7486 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7487 && (TARGET_AAPCS_BASED
7488 ? crtl->args.info.aapcs_ncrn < 4
7489 : crtl->args.info.nregs < 4))
7490 return LAST_ARG_REGNUM;
7491
7492 /* Otherwise look for a call-saved register that is going to be pushed. */
7493 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7494 if (pushed_regs_mask & (1 << reg))
7495 return reg;
7496
7497 if (TARGET_THUMB2)
7498 {
7499 /* Thumb-2 can use high regs. */
7500 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7501 if (pushed_regs_mask & (1 << reg))
7502 return reg;
7503 }
7504 /* Something went wrong - thumb_compute_save_reg_mask()
7505 should have arranged for a suitable register to be pushed. */
7506 gcc_unreachable ();
7507 }
7508
7509 static GTY(()) int pic_labelno;
7510
7511 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7512 low register. */
7513
7514 void
7515 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7516 {
7517 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7518
7519 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7520 return;
7521
7522 gcc_assert (flag_pic);
7523
7524 pic_reg = cfun->machine->pic_reg;
7525 if (TARGET_VXWORKS_RTP)
7526 {
7527 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7528 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7529 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7530
7531 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7532
7533 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7534 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7535 }
7536 else
7537 {
7538 /* We use an UNSPEC rather than a LABEL_REF because this label
7539 never appears in the code stream. */
7540
7541 labelno = GEN_INT (pic_labelno++);
7542 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7543 l1 = gen_rtx_CONST (VOIDmode, l1);
7544
7545 /* On the ARM the PC register contains 'dot + 8' at the time of the
7546 addition, on the Thumb it is 'dot + 4'. */
7547 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7548 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7549 UNSPEC_GOTSYM_OFF);
7550 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7551
7552 if (TARGET_32BIT)
7553 {
7554 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7555 }
7556 else /* TARGET_THUMB1 */
7557 {
7558 if (arm_pic_register != INVALID_REGNUM
7559 && REGNO (pic_reg) > LAST_LO_REGNUM)
7560 {
7561 /* We will have pushed the pic register, so we should always be
7562 able to find a work register. */
7563 pic_tmp = gen_rtx_REG (SImode,
7564 thumb_find_work_register (saved_regs));
7565 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7566 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7567 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7568 }
7569 else if (arm_pic_register != INVALID_REGNUM
7570 && arm_pic_register > LAST_LO_REGNUM
7571 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7572 {
7573 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7574 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7575 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7576 }
7577 else
7578 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7579 }
7580 }
7581
7582 /* Need to emit this whether or not we obey regdecls,
7583 since setjmp/longjmp can cause life info to screw up. */
7584 emit_use (pic_reg);
7585 }
7586
7587 /* Generate code to load the address of a static var when flag_pic is set. */
7588 static rtx_insn *
7589 arm_pic_static_addr (rtx orig, rtx reg)
7590 {
7591 rtx l1, labelno, offset_rtx;
7592
7593 gcc_assert (flag_pic);
7594
7595 /* We use an UNSPEC rather than a LABEL_REF because this label
7596 never appears in the code stream. */
7597 labelno = GEN_INT (pic_labelno++);
7598 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7599 l1 = gen_rtx_CONST (VOIDmode, l1);
7600
7601 /* On the ARM the PC register contains 'dot + 8' at the time of the
7602 addition, on the Thumb it is 'dot + 4'. */
7603 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7604 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7605 UNSPEC_SYMBOL_OFFSET);
7606 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7607
7608 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7609 }
7610
7611 /* Return nonzero if X is valid as an ARM state addressing register. */
7612 static int
7613 arm_address_register_rtx_p (rtx x, int strict_p)
7614 {
7615 int regno;
7616
7617 if (!REG_P (x))
7618 return 0;
7619
7620 regno = REGNO (x);
7621
7622 if (strict_p)
7623 return ARM_REGNO_OK_FOR_BASE_P (regno);
7624
7625 return (regno <= LAST_ARM_REGNUM
7626 || regno >= FIRST_PSEUDO_REGISTER
7627 || regno == FRAME_POINTER_REGNUM
7628 || regno == ARG_POINTER_REGNUM);
7629 }
7630
7631 /* Return TRUE if this rtx is the difference of a symbol and a label,
7632 and will reduce to a PC-relative relocation in the object file.
7633 Expressions like this can be left alone when generating PIC, rather
7634 than forced through the GOT. */
7635 static int
7636 pcrel_constant_p (rtx x)
7637 {
7638 if (GET_CODE (x) == MINUS)
7639 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7640
7641 return FALSE;
7642 }
7643
7644 /* Return true if X will surely end up in an index register after next
7645 splitting pass. */
7646 static bool
7647 will_be_in_index_register (const_rtx x)
7648 {
7649 /* arm.md: calculate_pic_address will split this into a register. */
7650 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7651 }
7652
7653 /* Return nonzero if X is a valid ARM state address operand. */
7654 int
7655 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7656 int strict_p)
7657 {
7658 bool use_ldrd;
7659 enum rtx_code code = GET_CODE (x);
7660
7661 if (arm_address_register_rtx_p (x, strict_p))
7662 return 1;
7663
7664 use_ldrd = (TARGET_LDRD
7665 && (mode == DImode || mode == DFmode));
7666
7667 if (code == POST_INC || code == PRE_DEC
7668 || ((code == PRE_INC || code == POST_DEC)
7669 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7670 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7671
7672 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7673 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7674 && GET_CODE (XEXP (x, 1)) == PLUS
7675 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7676 {
7677 rtx addend = XEXP (XEXP (x, 1), 1);
7678
7679 /* Don't allow ldrd post increment by register because it's hard
7680 to fixup invalid register choices. */
7681 if (use_ldrd
7682 && GET_CODE (x) == POST_MODIFY
7683 && REG_P (addend))
7684 return 0;
7685
7686 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7687 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7688 }
7689
7690 /* After reload constants split into minipools will have addresses
7691 from a LABEL_REF. */
7692 else if (reload_completed
7693 && (code == LABEL_REF
7694 || (code == CONST
7695 && GET_CODE (XEXP (x, 0)) == PLUS
7696 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7697 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7698 return 1;
7699
7700 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7701 return 0;
7702
7703 else if (code == PLUS)
7704 {
7705 rtx xop0 = XEXP (x, 0);
7706 rtx xop1 = XEXP (x, 1);
7707
7708 return ((arm_address_register_rtx_p (xop0, strict_p)
7709 && ((CONST_INT_P (xop1)
7710 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7711 || (!strict_p && will_be_in_index_register (xop1))))
7712 || (arm_address_register_rtx_p (xop1, strict_p)
7713 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7714 }
7715
7716 #if 0
7717 /* Reload currently can't handle MINUS, so disable this for now */
7718 else if (GET_CODE (x) == MINUS)
7719 {
7720 rtx xop0 = XEXP (x, 0);
7721 rtx xop1 = XEXP (x, 1);
7722
7723 return (arm_address_register_rtx_p (xop0, strict_p)
7724 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7725 }
7726 #endif
7727
7728 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7729 && code == SYMBOL_REF
7730 && CONSTANT_POOL_ADDRESS_P (x)
7731 && ! (flag_pic
7732 && symbol_mentioned_p (get_pool_constant (x))
7733 && ! pcrel_constant_p (get_pool_constant (x))))
7734 return 1;
7735
7736 return 0;
7737 }
7738
7739 /* Return nonzero if X is a valid Thumb-2 address operand. */
7740 static int
7741 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7742 {
7743 bool use_ldrd;
7744 enum rtx_code code = GET_CODE (x);
7745
7746 if (arm_address_register_rtx_p (x, strict_p))
7747 return 1;
7748
7749 use_ldrd = (TARGET_LDRD
7750 && (mode == DImode || mode == DFmode));
7751
7752 if (code == POST_INC || code == PRE_DEC
7753 || ((code == PRE_INC || code == POST_DEC)
7754 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7755 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7756
7757 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7758 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7759 && GET_CODE (XEXP (x, 1)) == PLUS
7760 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7761 {
7762 /* Thumb-2 only has autoincrement by constant. */
7763 rtx addend = XEXP (XEXP (x, 1), 1);
7764 HOST_WIDE_INT offset;
7765
7766 if (!CONST_INT_P (addend))
7767 return 0;
7768
7769 offset = INTVAL(addend);
7770 if (GET_MODE_SIZE (mode) <= 4)
7771 return (offset > -256 && offset < 256);
7772
7773 return (use_ldrd && offset > -1024 && offset < 1024
7774 && (offset & 3) == 0);
7775 }
7776
7777 /* After reload constants split into minipools will have addresses
7778 from a LABEL_REF. */
7779 else if (reload_completed
7780 && (code == LABEL_REF
7781 || (code == CONST
7782 && GET_CODE (XEXP (x, 0)) == PLUS
7783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7784 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7785 return 1;
7786
7787 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7788 return 0;
7789
7790 else if (code == PLUS)
7791 {
7792 rtx xop0 = XEXP (x, 0);
7793 rtx xop1 = XEXP (x, 1);
7794
7795 return ((arm_address_register_rtx_p (xop0, strict_p)
7796 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7797 || (!strict_p && will_be_in_index_register (xop1))))
7798 || (arm_address_register_rtx_p (xop1, strict_p)
7799 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7800 }
7801
7802 /* Normally we can assign constant values to target registers without
7803 the help of constant pool. But there are cases we have to use constant
7804 pool like:
7805 1) assign a label to register.
7806 2) sign-extend a 8bit value to 32bit and then assign to register.
7807
7808 Constant pool access in format:
7809 (set (reg r0) (mem (symbol_ref (".LC0"))))
7810 will cause the use of literal pool (later in function arm_reorg).
7811 So here we mark such format as an invalid format, then the compiler
7812 will adjust it into:
7813 (set (reg r0) (symbol_ref (".LC0")))
7814 (set (reg r0) (mem (reg r0))).
7815 No extra register is required, and (mem (reg r0)) won't cause the use
7816 of literal pools. */
7817 else if (arm_disable_literal_pool && code == SYMBOL_REF
7818 && CONSTANT_POOL_ADDRESS_P (x))
7819 return 0;
7820
7821 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7822 && code == SYMBOL_REF
7823 && CONSTANT_POOL_ADDRESS_P (x)
7824 && ! (flag_pic
7825 && symbol_mentioned_p (get_pool_constant (x))
7826 && ! pcrel_constant_p (get_pool_constant (x))))
7827 return 1;
7828
7829 return 0;
7830 }
7831
7832 /* Return nonzero if INDEX is valid for an address index operand in
7833 ARM state. */
7834 static int
7835 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7836 int strict_p)
7837 {
7838 HOST_WIDE_INT range;
7839 enum rtx_code code = GET_CODE (index);
7840
7841 /* Standard coprocessor addressing modes. */
7842 if (TARGET_HARD_FLOAT
7843 && (mode == SFmode || mode == DFmode))
7844 return (code == CONST_INT && INTVAL (index) < 1024
7845 && INTVAL (index) > -1024
7846 && (INTVAL (index) & 3) == 0);
7847
7848 /* For quad modes, we restrict the constant offset to be slightly less
7849 than what the instruction format permits. We do this because for
7850 quad mode moves, we will actually decompose them into two separate
7851 double-mode reads or writes. INDEX must therefore be a valid
7852 (double-mode) offset and so should INDEX+8. */
7853 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7854 return (code == CONST_INT
7855 && INTVAL (index) < 1016
7856 && INTVAL (index) > -1024
7857 && (INTVAL (index) & 3) == 0);
7858
7859 /* We have no such constraint on double mode offsets, so we permit the
7860 full range of the instruction format. */
7861 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7862 return (code == CONST_INT
7863 && INTVAL (index) < 1024
7864 && INTVAL (index) > -1024
7865 && (INTVAL (index) & 3) == 0);
7866
7867 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7868 return (code == CONST_INT
7869 && INTVAL (index) < 1024
7870 && INTVAL (index) > -1024
7871 && (INTVAL (index) & 3) == 0);
7872
7873 if (arm_address_register_rtx_p (index, strict_p)
7874 && (GET_MODE_SIZE (mode) <= 4))
7875 return 1;
7876
7877 if (mode == DImode || mode == DFmode)
7878 {
7879 if (code == CONST_INT)
7880 {
7881 HOST_WIDE_INT val = INTVAL (index);
7882
7883 if (TARGET_LDRD)
7884 return val > -256 && val < 256;
7885 else
7886 return val > -4096 && val < 4092;
7887 }
7888
7889 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7890 }
7891
7892 if (GET_MODE_SIZE (mode) <= 4
7893 && ! (arm_arch4
7894 && (mode == HImode
7895 || mode == HFmode
7896 || (mode == QImode && outer == SIGN_EXTEND))))
7897 {
7898 if (code == MULT)
7899 {
7900 rtx xiop0 = XEXP (index, 0);
7901 rtx xiop1 = XEXP (index, 1);
7902
7903 return ((arm_address_register_rtx_p (xiop0, strict_p)
7904 && power_of_two_operand (xiop1, SImode))
7905 || (arm_address_register_rtx_p (xiop1, strict_p)
7906 && power_of_two_operand (xiop0, SImode)));
7907 }
7908 else if (code == LSHIFTRT || code == ASHIFTRT
7909 || code == ASHIFT || code == ROTATERT)
7910 {
7911 rtx op = XEXP (index, 1);
7912
7913 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7914 && CONST_INT_P (op)
7915 && INTVAL (op) > 0
7916 && INTVAL (op) <= 31);
7917 }
7918 }
7919
7920 /* For ARM v4 we may be doing a sign-extend operation during the
7921 load. */
7922 if (arm_arch4)
7923 {
7924 if (mode == HImode
7925 || mode == HFmode
7926 || (outer == SIGN_EXTEND && mode == QImode))
7927 range = 256;
7928 else
7929 range = 4096;
7930 }
7931 else
7932 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7933
7934 return (code == CONST_INT
7935 && INTVAL (index) < range
7936 && INTVAL (index) > -range);
7937 }
7938
7939 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7940 index operand. i.e. 1, 2, 4 or 8. */
7941 static bool
7942 thumb2_index_mul_operand (rtx op)
7943 {
7944 HOST_WIDE_INT val;
7945
7946 if (!CONST_INT_P (op))
7947 return false;
7948
7949 val = INTVAL(op);
7950 return (val == 1 || val == 2 || val == 4 || val == 8);
7951 }
7952
7953 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7954 static int
7955 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7956 {
7957 enum rtx_code code = GET_CODE (index);
7958
7959 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7960 /* Standard coprocessor addressing modes. */
7961 if (TARGET_HARD_FLOAT
7962 && (mode == SFmode || mode == DFmode))
7963 return (code == CONST_INT && INTVAL (index) < 1024
7964 /* Thumb-2 allows only > -256 index range for it's core register
7965 load/stores. Since we allow SF/DF in core registers, we have
7966 to use the intersection between -256~4096 (core) and -1024~1024
7967 (coprocessor). */
7968 && INTVAL (index) > -256
7969 && (INTVAL (index) & 3) == 0);
7970
7971 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7972 {
7973 /* For DImode assume values will usually live in core regs
7974 and only allow LDRD addressing modes. */
7975 if (!TARGET_LDRD || mode != DImode)
7976 return (code == CONST_INT
7977 && INTVAL (index) < 1024
7978 && INTVAL (index) > -1024
7979 && (INTVAL (index) & 3) == 0);
7980 }
7981
7982 /* For quad modes, we restrict the constant offset to be slightly less
7983 than what the instruction format permits. We do this because for
7984 quad mode moves, we will actually decompose them into two separate
7985 double-mode reads or writes. INDEX must therefore be a valid
7986 (double-mode) offset and so should INDEX+8. */
7987 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7988 return (code == CONST_INT
7989 && INTVAL (index) < 1016
7990 && INTVAL (index) > -1024
7991 && (INTVAL (index) & 3) == 0);
7992
7993 /* We have no such constraint on double mode offsets, so we permit the
7994 full range of the instruction format. */
7995 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7996 return (code == CONST_INT
7997 && INTVAL (index) < 1024
7998 && INTVAL (index) > -1024
7999 && (INTVAL (index) & 3) == 0);
8000
8001 if (arm_address_register_rtx_p (index, strict_p)
8002 && (GET_MODE_SIZE (mode) <= 4))
8003 return 1;
8004
8005 if (mode == DImode || mode == DFmode)
8006 {
8007 if (code == CONST_INT)
8008 {
8009 HOST_WIDE_INT val = INTVAL (index);
8010 /* ??? Can we assume ldrd for thumb2? */
8011 /* Thumb-2 ldrd only has reg+const addressing modes. */
8012 /* ldrd supports offsets of +-1020.
8013 However the ldr fallback does not. */
8014 return val > -256 && val < 256 && (val & 3) == 0;
8015 }
8016 else
8017 return 0;
8018 }
8019
8020 if (code == MULT)
8021 {
8022 rtx xiop0 = XEXP (index, 0);
8023 rtx xiop1 = XEXP (index, 1);
8024
8025 return ((arm_address_register_rtx_p (xiop0, strict_p)
8026 && thumb2_index_mul_operand (xiop1))
8027 || (arm_address_register_rtx_p (xiop1, strict_p)
8028 && thumb2_index_mul_operand (xiop0)));
8029 }
8030 else if (code == ASHIFT)
8031 {
8032 rtx op = XEXP (index, 1);
8033
8034 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8035 && CONST_INT_P (op)
8036 && INTVAL (op) > 0
8037 && INTVAL (op) <= 3);
8038 }
8039
8040 return (code == CONST_INT
8041 && INTVAL (index) < 4096
8042 && INTVAL (index) > -256);
8043 }
8044
8045 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8046 static int
8047 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8048 {
8049 int regno;
8050
8051 if (!REG_P (x))
8052 return 0;
8053
8054 regno = REGNO (x);
8055
8056 if (strict_p)
8057 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8058
8059 return (regno <= LAST_LO_REGNUM
8060 || regno > LAST_VIRTUAL_REGISTER
8061 || regno == FRAME_POINTER_REGNUM
8062 || (GET_MODE_SIZE (mode) >= 4
8063 && (regno == STACK_POINTER_REGNUM
8064 || regno >= FIRST_PSEUDO_REGISTER
8065 || x == hard_frame_pointer_rtx
8066 || x == arg_pointer_rtx)));
8067 }
8068
8069 /* Return nonzero if x is a legitimate index register. This is the case
8070 for any base register that can access a QImode object. */
8071 inline static int
8072 thumb1_index_register_rtx_p (rtx x, int strict_p)
8073 {
8074 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8075 }
8076
8077 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8078
8079 The AP may be eliminated to either the SP or the FP, so we use the
8080 least common denominator, e.g. SImode, and offsets from 0 to 64.
8081
8082 ??? Verify whether the above is the right approach.
8083
8084 ??? Also, the FP may be eliminated to the SP, so perhaps that
8085 needs special handling also.
8086
8087 ??? Look at how the mips16 port solves this problem. It probably uses
8088 better ways to solve some of these problems.
8089
8090 Although it is not incorrect, we don't accept QImode and HImode
8091 addresses based on the frame pointer or arg pointer until the
8092 reload pass starts. This is so that eliminating such addresses
8093 into stack based ones won't produce impossible code. */
8094 int
8095 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8096 {
8097 /* ??? Not clear if this is right. Experiment. */
8098 if (GET_MODE_SIZE (mode) < 4
8099 && !(reload_in_progress || reload_completed)
8100 && (reg_mentioned_p (frame_pointer_rtx, x)
8101 || reg_mentioned_p (arg_pointer_rtx, x)
8102 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8103 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8104 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8105 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8106 return 0;
8107
8108 /* Accept any base register. SP only in SImode or larger. */
8109 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8110 return 1;
8111
8112 /* This is PC relative data before arm_reorg runs. */
8113 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8114 && GET_CODE (x) == SYMBOL_REF
8115 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8116 return 1;
8117
8118 /* This is PC relative data after arm_reorg runs. */
8119 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8120 && reload_completed
8121 && (GET_CODE (x) == LABEL_REF
8122 || (GET_CODE (x) == CONST
8123 && GET_CODE (XEXP (x, 0)) == PLUS
8124 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8125 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8126 return 1;
8127
8128 /* Post-inc indexing only supported for SImode and larger. */
8129 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8130 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8131 return 1;
8132
8133 else if (GET_CODE (x) == PLUS)
8134 {
8135 /* REG+REG address can be any two index registers. */
8136 /* We disallow FRAME+REG addressing since we know that FRAME
8137 will be replaced with STACK, and SP relative addressing only
8138 permits SP+OFFSET. */
8139 if (GET_MODE_SIZE (mode) <= 4
8140 && XEXP (x, 0) != frame_pointer_rtx
8141 && XEXP (x, 1) != frame_pointer_rtx
8142 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8143 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8144 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8145 return 1;
8146
8147 /* REG+const has 5-7 bit offset for non-SP registers. */
8148 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8149 || XEXP (x, 0) == arg_pointer_rtx)
8150 && CONST_INT_P (XEXP (x, 1))
8151 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8152 return 1;
8153
8154 /* REG+const has 10-bit offset for SP, but only SImode and
8155 larger is supported. */
8156 /* ??? Should probably check for DI/DFmode overflow here
8157 just like GO_IF_LEGITIMATE_OFFSET does. */
8158 else if (REG_P (XEXP (x, 0))
8159 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8160 && GET_MODE_SIZE (mode) >= 4
8161 && CONST_INT_P (XEXP (x, 1))
8162 && INTVAL (XEXP (x, 1)) >= 0
8163 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8164 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8165 return 1;
8166
8167 else if (REG_P (XEXP (x, 0))
8168 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8169 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8170 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8171 && REGNO (XEXP (x, 0))
8172 <= LAST_VIRTUAL_POINTER_REGISTER))
8173 && GET_MODE_SIZE (mode) >= 4
8174 && CONST_INT_P (XEXP (x, 1))
8175 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8176 return 1;
8177 }
8178
8179 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8180 && GET_MODE_SIZE (mode) == 4
8181 && GET_CODE (x) == SYMBOL_REF
8182 && CONSTANT_POOL_ADDRESS_P (x)
8183 && ! (flag_pic
8184 && symbol_mentioned_p (get_pool_constant (x))
8185 && ! pcrel_constant_p (get_pool_constant (x))))
8186 return 1;
8187
8188 return 0;
8189 }
8190
8191 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8192 instruction of mode MODE. */
8193 int
8194 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8195 {
8196 switch (GET_MODE_SIZE (mode))
8197 {
8198 case 1:
8199 return val >= 0 && val < 32;
8200
8201 case 2:
8202 return val >= 0 && val < 64 && (val & 1) == 0;
8203
8204 default:
8205 return (val >= 0
8206 && (val + GET_MODE_SIZE (mode)) <= 128
8207 && (val & 3) == 0);
8208 }
8209 }
8210
8211 bool
8212 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8213 {
8214 if (TARGET_ARM)
8215 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8216 else if (TARGET_THUMB2)
8217 return thumb2_legitimate_address_p (mode, x, strict_p);
8218 else /* if (TARGET_THUMB1) */
8219 return thumb1_legitimate_address_p (mode, x, strict_p);
8220 }
8221
8222 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8223
8224 Given an rtx X being reloaded into a reg required to be
8225 in class CLASS, return the class of reg to actually use.
8226 In general this is just CLASS, but for the Thumb core registers and
8227 immediate constants we prefer a LO_REGS class or a subset. */
8228
8229 static reg_class_t
8230 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8231 {
8232 if (TARGET_32BIT)
8233 return rclass;
8234 else
8235 {
8236 if (rclass == GENERAL_REGS)
8237 return LO_REGS;
8238 else
8239 return rclass;
8240 }
8241 }
8242
8243 /* Build the SYMBOL_REF for __tls_get_addr. */
8244
8245 static GTY(()) rtx tls_get_addr_libfunc;
8246
8247 static rtx
8248 get_tls_get_addr (void)
8249 {
8250 if (!tls_get_addr_libfunc)
8251 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8252 return tls_get_addr_libfunc;
8253 }
8254
8255 rtx
8256 arm_load_tp (rtx target)
8257 {
8258 if (!target)
8259 target = gen_reg_rtx (SImode);
8260
8261 if (TARGET_HARD_TP)
8262 {
8263 /* Can return in any reg. */
8264 emit_insn (gen_load_tp_hard (target));
8265 }
8266 else
8267 {
8268 /* Always returned in r0. Immediately copy the result into a pseudo,
8269 otherwise other uses of r0 (e.g. setting up function arguments) may
8270 clobber the value. */
8271
8272 rtx tmp;
8273
8274 emit_insn (gen_load_tp_soft ());
8275
8276 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8277 emit_move_insn (target, tmp);
8278 }
8279 return target;
8280 }
8281
8282 static rtx
8283 load_tls_operand (rtx x, rtx reg)
8284 {
8285 rtx tmp;
8286
8287 if (reg == NULL_RTX)
8288 reg = gen_reg_rtx (SImode);
8289
8290 tmp = gen_rtx_CONST (SImode, x);
8291
8292 emit_move_insn (reg, tmp);
8293
8294 return reg;
8295 }
8296
8297 static rtx_insn *
8298 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8299 {
8300 rtx label, labelno, sum;
8301
8302 gcc_assert (reloc != TLS_DESCSEQ);
8303 start_sequence ();
8304
8305 labelno = GEN_INT (pic_labelno++);
8306 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8307 label = gen_rtx_CONST (VOIDmode, label);
8308
8309 sum = gen_rtx_UNSPEC (Pmode,
8310 gen_rtvec (4, x, GEN_INT (reloc), label,
8311 GEN_INT (TARGET_ARM ? 8 : 4)),
8312 UNSPEC_TLS);
8313 reg = load_tls_operand (sum, reg);
8314
8315 if (TARGET_ARM)
8316 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8317 else
8318 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8319
8320 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8321 LCT_PURE, /* LCT_CONST? */
8322 Pmode, 1, reg, Pmode);
8323
8324 rtx_insn *insns = get_insns ();
8325 end_sequence ();
8326
8327 return insns;
8328 }
8329
8330 static rtx
8331 arm_tls_descseq_addr (rtx x, rtx reg)
8332 {
8333 rtx labelno = GEN_INT (pic_labelno++);
8334 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8335 rtx sum = gen_rtx_UNSPEC (Pmode,
8336 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8337 gen_rtx_CONST (VOIDmode, label),
8338 GEN_INT (!TARGET_ARM)),
8339 UNSPEC_TLS);
8340 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8341
8342 emit_insn (gen_tlscall (x, labelno));
8343 if (!reg)
8344 reg = gen_reg_rtx (SImode);
8345 else
8346 gcc_assert (REGNO (reg) != R0_REGNUM);
8347
8348 emit_move_insn (reg, reg0);
8349
8350 return reg;
8351 }
8352
8353 rtx
8354 legitimize_tls_address (rtx x, rtx reg)
8355 {
8356 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8357 rtx_insn *insns;
8358 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8359
8360 switch (model)
8361 {
8362 case TLS_MODEL_GLOBAL_DYNAMIC:
8363 if (TARGET_GNU2_TLS)
8364 {
8365 reg = arm_tls_descseq_addr (x, reg);
8366
8367 tp = arm_load_tp (NULL_RTX);
8368
8369 dest = gen_rtx_PLUS (Pmode, tp, reg);
8370 }
8371 else
8372 {
8373 /* Original scheme */
8374 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8375 dest = gen_reg_rtx (Pmode);
8376 emit_libcall_block (insns, dest, ret, x);
8377 }
8378 return dest;
8379
8380 case TLS_MODEL_LOCAL_DYNAMIC:
8381 if (TARGET_GNU2_TLS)
8382 {
8383 reg = arm_tls_descseq_addr (x, reg);
8384
8385 tp = arm_load_tp (NULL_RTX);
8386
8387 dest = gen_rtx_PLUS (Pmode, tp, reg);
8388 }
8389 else
8390 {
8391 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8392
8393 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8394 share the LDM result with other LD model accesses. */
8395 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8396 UNSPEC_TLS);
8397 dest = gen_reg_rtx (Pmode);
8398 emit_libcall_block (insns, dest, ret, eqv);
8399
8400 /* Load the addend. */
8401 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8402 GEN_INT (TLS_LDO32)),
8403 UNSPEC_TLS);
8404 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8405 dest = gen_rtx_PLUS (Pmode, dest, addend);
8406 }
8407 return dest;
8408
8409 case TLS_MODEL_INITIAL_EXEC:
8410 labelno = GEN_INT (pic_labelno++);
8411 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8412 label = gen_rtx_CONST (VOIDmode, label);
8413 sum = gen_rtx_UNSPEC (Pmode,
8414 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8415 GEN_INT (TARGET_ARM ? 8 : 4)),
8416 UNSPEC_TLS);
8417 reg = load_tls_operand (sum, reg);
8418
8419 if (TARGET_ARM)
8420 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8421 else if (TARGET_THUMB2)
8422 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8423 else
8424 {
8425 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8426 emit_move_insn (reg, gen_const_mem (SImode, reg));
8427 }
8428
8429 tp = arm_load_tp (NULL_RTX);
8430
8431 return gen_rtx_PLUS (Pmode, tp, reg);
8432
8433 case TLS_MODEL_LOCAL_EXEC:
8434 tp = arm_load_tp (NULL_RTX);
8435
8436 reg = gen_rtx_UNSPEC (Pmode,
8437 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8438 UNSPEC_TLS);
8439 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8440
8441 return gen_rtx_PLUS (Pmode, tp, reg);
8442
8443 default:
8444 abort ();
8445 }
8446 }
8447
8448 /* Try machine-dependent ways of modifying an illegitimate address
8449 to be legitimate. If we find one, return the new, valid address. */
8450 rtx
8451 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8452 {
8453 if (arm_tls_referenced_p (x))
8454 {
8455 rtx addend = NULL;
8456
8457 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8458 {
8459 addend = XEXP (XEXP (x, 0), 1);
8460 x = XEXP (XEXP (x, 0), 0);
8461 }
8462
8463 if (GET_CODE (x) != SYMBOL_REF)
8464 return x;
8465
8466 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8467
8468 x = legitimize_tls_address (x, NULL_RTX);
8469
8470 if (addend)
8471 {
8472 x = gen_rtx_PLUS (SImode, x, addend);
8473 orig_x = x;
8474 }
8475 else
8476 return x;
8477 }
8478
8479 if (!TARGET_ARM)
8480 {
8481 /* TODO: legitimize_address for Thumb2. */
8482 if (TARGET_THUMB2)
8483 return x;
8484 return thumb_legitimize_address (x, orig_x, mode);
8485 }
8486
8487 if (GET_CODE (x) == PLUS)
8488 {
8489 rtx xop0 = XEXP (x, 0);
8490 rtx xop1 = XEXP (x, 1);
8491
8492 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8493 xop0 = force_reg (SImode, xop0);
8494
8495 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8496 && !symbol_mentioned_p (xop1))
8497 xop1 = force_reg (SImode, xop1);
8498
8499 if (ARM_BASE_REGISTER_RTX_P (xop0)
8500 && CONST_INT_P (xop1))
8501 {
8502 HOST_WIDE_INT n, low_n;
8503 rtx base_reg, val;
8504 n = INTVAL (xop1);
8505
8506 /* VFP addressing modes actually allow greater offsets, but for
8507 now we just stick with the lowest common denominator. */
8508 if (mode == DImode || mode == DFmode)
8509 {
8510 low_n = n & 0x0f;
8511 n &= ~0x0f;
8512 if (low_n > 4)
8513 {
8514 n += 16;
8515 low_n -= 16;
8516 }
8517 }
8518 else
8519 {
8520 low_n = ((mode) == TImode ? 0
8521 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8522 n -= low_n;
8523 }
8524
8525 base_reg = gen_reg_rtx (SImode);
8526 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8527 emit_move_insn (base_reg, val);
8528 x = plus_constant (Pmode, base_reg, low_n);
8529 }
8530 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8531 x = gen_rtx_PLUS (SImode, xop0, xop1);
8532 }
8533
8534 /* XXX We don't allow MINUS any more -- see comment in
8535 arm_legitimate_address_outer_p (). */
8536 else if (GET_CODE (x) == MINUS)
8537 {
8538 rtx xop0 = XEXP (x, 0);
8539 rtx xop1 = XEXP (x, 1);
8540
8541 if (CONSTANT_P (xop0))
8542 xop0 = force_reg (SImode, xop0);
8543
8544 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8545 xop1 = force_reg (SImode, xop1);
8546
8547 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8548 x = gen_rtx_MINUS (SImode, xop0, xop1);
8549 }
8550
8551 /* Make sure to take full advantage of the pre-indexed addressing mode
8552 with absolute addresses which often allows for the base register to
8553 be factorized for multiple adjacent memory references, and it might
8554 even allows for the mini pool to be avoided entirely. */
8555 else if (CONST_INT_P (x) && optimize > 0)
8556 {
8557 unsigned int bits;
8558 HOST_WIDE_INT mask, base, index;
8559 rtx base_reg;
8560
8561 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8562 use a 8-bit index. So let's use a 12-bit index for SImode only and
8563 hope that arm_gen_constant will enable ldrb to use more bits. */
8564 bits = (mode == SImode) ? 12 : 8;
8565 mask = (1 << bits) - 1;
8566 base = INTVAL (x) & ~mask;
8567 index = INTVAL (x) & mask;
8568 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8569 {
8570 /* It'll most probably be more efficient to generate the base
8571 with more bits set and use a negative index instead. */
8572 base |= mask;
8573 index -= mask;
8574 }
8575 base_reg = force_reg (SImode, GEN_INT (base));
8576 x = plus_constant (Pmode, base_reg, index);
8577 }
8578
8579 if (flag_pic)
8580 {
8581 /* We need to find and carefully transform any SYMBOL and LABEL
8582 references; so go back to the original address expression. */
8583 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8584
8585 if (new_x != orig_x)
8586 x = new_x;
8587 }
8588
8589 return x;
8590 }
8591
8592
8593 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8594 to be legitimate. If we find one, return the new, valid address. */
8595 rtx
8596 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8597 {
8598 if (GET_CODE (x) == PLUS
8599 && CONST_INT_P (XEXP (x, 1))
8600 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8601 || INTVAL (XEXP (x, 1)) < 0))
8602 {
8603 rtx xop0 = XEXP (x, 0);
8604 rtx xop1 = XEXP (x, 1);
8605 HOST_WIDE_INT offset = INTVAL (xop1);
8606
8607 /* Try and fold the offset into a biasing of the base register and
8608 then offsetting that. Don't do this when optimizing for space
8609 since it can cause too many CSEs. */
8610 if (optimize_size && offset >= 0
8611 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8612 {
8613 HOST_WIDE_INT delta;
8614
8615 if (offset >= 256)
8616 delta = offset - (256 - GET_MODE_SIZE (mode));
8617 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8618 delta = 31 * GET_MODE_SIZE (mode);
8619 else
8620 delta = offset & (~31 * GET_MODE_SIZE (mode));
8621
8622 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8623 NULL_RTX);
8624 x = plus_constant (Pmode, xop0, delta);
8625 }
8626 else if (offset < 0 && offset > -256)
8627 /* Small negative offsets are best done with a subtract before the
8628 dereference, forcing these into a register normally takes two
8629 instructions. */
8630 x = force_operand (x, NULL_RTX);
8631 else
8632 {
8633 /* For the remaining cases, force the constant into a register. */
8634 xop1 = force_reg (SImode, xop1);
8635 x = gen_rtx_PLUS (SImode, xop0, xop1);
8636 }
8637 }
8638 else if (GET_CODE (x) == PLUS
8639 && s_register_operand (XEXP (x, 1), SImode)
8640 && !s_register_operand (XEXP (x, 0), SImode))
8641 {
8642 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8643
8644 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8645 }
8646
8647 if (flag_pic)
8648 {
8649 /* We need to find and carefully transform any SYMBOL and LABEL
8650 references; so go back to the original address expression. */
8651 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8652
8653 if (new_x != orig_x)
8654 x = new_x;
8655 }
8656
8657 return x;
8658 }
8659
8660 /* Return TRUE if X contains any TLS symbol references. */
8661
8662 bool
8663 arm_tls_referenced_p (rtx x)
8664 {
8665 if (! TARGET_HAVE_TLS)
8666 return false;
8667
8668 subrtx_iterator::array_type array;
8669 FOR_EACH_SUBRTX (iter, array, x, ALL)
8670 {
8671 const_rtx x = *iter;
8672 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8673 return true;
8674
8675 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8676 TLS offsets, not real symbol references. */
8677 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8678 iter.skip_subrtxes ();
8679 }
8680 return false;
8681 }
8682
8683 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8684
8685 On the ARM, allow any integer (invalid ones are removed later by insn
8686 patterns), nice doubles and symbol_refs which refer to the function's
8687 constant pool XXX.
8688
8689 When generating pic allow anything. */
8690
8691 static bool
8692 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8693 {
8694 return flag_pic || !label_mentioned_p (x);
8695 }
8696
8697 static bool
8698 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8699 {
8700 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8701 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8702 for ARMv8-M Baseline or later the result is valid. */
8703 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8704 x = XEXP (x, 0);
8705
8706 return (CONST_INT_P (x)
8707 || CONST_DOUBLE_P (x)
8708 || CONSTANT_ADDRESS_P (x)
8709 || flag_pic);
8710 }
8711
8712 static bool
8713 arm_legitimate_constant_p (machine_mode mode, rtx x)
8714 {
8715 return (!arm_cannot_force_const_mem (mode, x)
8716 && (TARGET_32BIT
8717 ? arm_legitimate_constant_p_1 (mode, x)
8718 : thumb_legitimate_constant_p (mode, x)));
8719 }
8720
8721 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8722
8723 static bool
8724 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8725 {
8726 rtx base, offset;
8727
8728 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8729 {
8730 split_const (x, &base, &offset);
8731 if (GET_CODE (base) == SYMBOL_REF
8732 && !offset_within_block_p (base, INTVAL (offset)))
8733 return true;
8734 }
8735 return arm_tls_referenced_p (x);
8736 }
8737 \f
8738 #define REG_OR_SUBREG_REG(X) \
8739 (REG_P (X) \
8740 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8741
8742 #define REG_OR_SUBREG_RTX(X) \
8743 (REG_P (X) ? (X) : SUBREG_REG (X))
8744
8745 static inline int
8746 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8747 {
8748 machine_mode mode = GET_MODE (x);
8749 int total, words;
8750
8751 switch (code)
8752 {
8753 case ASHIFT:
8754 case ASHIFTRT:
8755 case LSHIFTRT:
8756 case ROTATERT:
8757 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8758
8759 case PLUS:
8760 case MINUS:
8761 case COMPARE:
8762 case NEG:
8763 case NOT:
8764 return COSTS_N_INSNS (1);
8765
8766 case MULT:
8767 if (arm_arch6m && arm_m_profile_small_mul)
8768 return COSTS_N_INSNS (32);
8769
8770 if (CONST_INT_P (XEXP (x, 1)))
8771 {
8772 int cycles = 0;
8773 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8774
8775 while (i)
8776 {
8777 i >>= 2;
8778 cycles++;
8779 }
8780 return COSTS_N_INSNS (2) + cycles;
8781 }
8782 return COSTS_N_INSNS (1) + 16;
8783
8784 case SET:
8785 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8786 the mode. */
8787 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8788 return (COSTS_N_INSNS (words)
8789 + 4 * ((MEM_P (SET_SRC (x)))
8790 + MEM_P (SET_DEST (x))));
8791
8792 case CONST_INT:
8793 if (outer == SET)
8794 {
8795 if (UINTVAL (x) < 256
8796 /* 16-bit constant. */
8797 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8798 return 0;
8799 if (thumb_shiftable_const (INTVAL (x)))
8800 return COSTS_N_INSNS (2);
8801 return COSTS_N_INSNS (3);
8802 }
8803 else if ((outer == PLUS || outer == COMPARE)
8804 && INTVAL (x) < 256 && INTVAL (x) > -256)
8805 return 0;
8806 else if ((outer == IOR || outer == XOR || outer == AND)
8807 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8808 return COSTS_N_INSNS (1);
8809 else if (outer == AND)
8810 {
8811 int i;
8812 /* This duplicates the tests in the andsi3 expander. */
8813 for (i = 9; i <= 31; i++)
8814 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8815 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8816 return COSTS_N_INSNS (2);
8817 }
8818 else if (outer == ASHIFT || outer == ASHIFTRT
8819 || outer == LSHIFTRT)
8820 return 0;
8821 return COSTS_N_INSNS (2);
8822
8823 case CONST:
8824 case CONST_DOUBLE:
8825 case LABEL_REF:
8826 case SYMBOL_REF:
8827 return COSTS_N_INSNS (3);
8828
8829 case UDIV:
8830 case UMOD:
8831 case DIV:
8832 case MOD:
8833 return 100;
8834
8835 case TRUNCATE:
8836 return 99;
8837
8838 case AND:
8839 case XOR:
8840 case IOR:
8841 /* XXX guess. */
8842 return 8;
8843
8844 case MEM:
8845 /* XXX another guess. */
8846 /* Memory costs quite a lot for the first word, but subsequent words
8847 load at the equivalent of a single insn each. */
8848 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8849 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8850 ? 4 : 0));
8851
8852 case IF_THEN_ELSE:
8853 /* XXX a guess. */
8854 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8855 return 14;
8856 return 2;
8857
8858 case SIGN_EXTEND:
8859 case ZERO_EXTEND:
8860 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8861 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8862
8863 if (mode == SImode)
8864 return total;
8865
8866 if (arm_arch6)
8867 return total + COSTS_N_INSNS (1);
8868
8869 /* Assume a two-shift sequence. Increase the cost slightly so
8870 we prefer actual shifts over an extend operation. */
8871 return total + 1 + COSTS_N_INSNS (2);
8872
8873 default:
8874 return 99;
8875 }
8876 }
8877
8878 /* Estimates the size cost of thumb1 instructions.
8879 For now most of the code is copied from thumb1_rtx_costs. We need more
8880 fine grain tuning when we have more related test cases. */
8881 static inline int
8882 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8883 {
8884 machine_mode mode = GET_MODE (x);
8885 int words, cost;
8886
8887 switch (code)
8888 {
8889 case ASHIFT:
8890 case ASHIFTRT:
8891 case LSHIFTRT:
8892 case ROTATERT:
8893 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8894
8895 case PLUS:
8896 case MINUS:
8897 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8898 defined by RTL expansion, especially for the expansion of
8899 multiplication. */
8900 if ((GET_CODE (XEXP (x, 0)) == MULT
8901 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8902 || (GET_CODE (XEXP (x, 1)) == MULT
8903 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8904 return COSTS_N_INSNS (2);
8905 /* Fall through. */
8906 case COMPARE:
8907 case NEG:
8908 case NOT:
8909 return COSTS_N_INSNS (1);
8910
8911 case MULT:
8912 if (CONST_INT_P (XEXP (x, 1)))
8913 {
8914 /* Thumb1 mul instruction can't operate on const. We must Load it
8915 into a register first. */
8916 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8917 /* For the targets which have a very small and high-latency multiply
8918 unit, we prefer to synthesize the mult with up to 5 instructions,
8919 giving a good balance between size and performance. */
8920 if (arm_arch6m && arm_m_profile_small_mul)
8921 return COSTS_N_INSNS (5);
8922 else
8923 return COSTS_N_INSNS (1) + const_size;
8924 }
8925 return COSTS_N_INSNS (1);
8926
8927 case SET:
8928 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8929 the mode. */
8930 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8931 cost = COSTS_N_INSNS (words);
8932 if (satisfies_constraint_J (SET_SRC (x))
8933 || satisfies_constraint_K (SET_SRC (x))
8934 /* Too big an immediate for a 2-byte mov, using MOVT. */
8935 || (CONST_INT_P (SET_SRC (x))
8936 && UINTVAL (SET_SRC (x)) >= 256
8937 && TARGET_HAVE_MOVT
8938 && satisfies_constraint_j (SET_SRC (x)))
8939 /* thumb1_movdi_insn. */
8940 || ((words > 1) && MEM_P (SET_SRC (x))))
8941 cost += COSTS_N_INSNS (1);
8942 return cost;
8943
8944 case CONST_INT:
8945 if (outer == SET)
8946 {
8947 if (UINTVAL (x) < 256)
8948 return COSTS_N_INSNS (1);
8949 /* movw is 4byte long. */
8950 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8951 return COSTS_N_INSNS (2);
8952 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8953 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8954 return COSTS_N_INSNS (2);
8955 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8956 if (thumb_shiftable_const (INTVAL (x)))
8957 return COSTS_N_INSNS (2);
8958 return COSTS_N_INSNS (3);
8959 }
8960 else if ((outer == PLUS || outer == COMPARE)
8961 && INTVAL (x) < 256 && INTVAL (x) > -256)
8962 return 0;
8963 else if ((outer == IOR || outer == XOR || outer == AND)
8964 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8965 return COSTS_N_INSNS (1);
8966 else if (outer == AND)
8967 {
8968 int i;
8969 /* This duplicates the tests in the andsi3 expander. */
8970 for (i = 9; i <= 31; i++)
8971 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8972 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8973 return COSTS_N_INSNS (2);
8974 }
8975 else if (outer == ASHIFT || outer == ASHIFTRT
8976 || outer == LSHIFTRT)
8977 return 0;
8978 return COSTS_N_INSNS (2);
8979
8980 case CONST:
8981 case CONST_DOUBLE:
8982 case LABEL_REF:
8983 case SYMBOL_REF:
8984 return COSTS_N_INSNS (3);
8985
8986 case UDIV:
8987 case UMOD:
8988 case DIV:
8989 case MOD:
8990 return 100;
8991
8992 case TRUNCATE:
8993 return 99;
8994
8995 case AND:
8996 case XOR:
8997 case IOR:
8998 return COSTS_N_INSNS (1);
8999
9000 case MEM:
9001 return (COSTS_N_INSNS (1)
9002 + COSTS_N_INSNS (1)
9003 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9004 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9005 ? COSTS_N_INSNS (1) : 0));
9006
9007 case IF_THEN_ELSE:
9008 /* XXX a guess. */
9009 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9010 return 14;
9011 return 2;
9012
9013 case ZERO_EXTEND:
9014 /* XXX still guessing. */
9015 switch (GET_MODE (XEXP (x, 0)))
9016 {
9017 case QImode:
9018 return (1 + (mode == DImode ? 4 : 0)
9019 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9020
9021 case HImode:
9022 return (4 + (mode == DImode ? 4 : 0)
9023 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9024
9025 case SImode:
9026 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9027
9028 default:
9029 return 99;
9030 }
9031
9032 default:
9033 return 99;
9034 }
9035 }
9036
9037 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9038 operand, then return the operand that is being shifted. If the shift
9039 is not by a constant, then set SHIFT_REG to point to the operand.
9040 Return NULL if OP is not a shifter operand. */
9041 static rtx
9042 shifter_op_p (rtx op, rtx *shift_reg)
9043 {
9044 enum rtx_code code = GET_CODE (op);
9045
9046 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9047 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9048 return XEXP (op, 0);
9049 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9050 return XEXP (op, 0);
9051 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9052 || code == ASHIFTRT)
9053 {
9054 if (!CONST_INT_P (XEXP (op, 1)))
9055 *shift_reg = XEXP (op, 1);
9056 return XEXP (op, 0);
9057 }
9058
9059 return NULL;
9060 }
9061
9062 static bool
9063 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9064 {
9065 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9066 rtx_code code = GET_CODE (x);
9067 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9068
9069 switch (XINT (x, 1))
9070 {
9071 case UNSPEC_UNALIGNED_LOAD:
9072 /* We can only do unaligned loads into the integer unit, and we can't
9073 use LDM or LDRD. */
9074 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9075 if (speed_p)
9076 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9077 + extra_cost->ldst.load_unaligned);
9078
9079 #ifdef NOT_YET
9080 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9081 ADDR_SPACE_GENERIC, speed_p);
9082 #endif
9083 return true;
9084
9085 case UNSPEC_UNALIGNED_STORE:
9086 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9087 if (speed_p)
9088 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9089 + extra_cost->ldst.store_unaligned);
9090
9091 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9092 #ifdef NOT_YET
9093 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9094 ADDR_SPACE_GENERIC, speed_p);
9095 #endif
9096 return true;
9097
9098 case UNSPEC_VRINTZ:
9099 case UNSPEC_VRINTP:
9100 case UNSPEC_VRINTM:
9101 case UNSPEC_VRINTR:
9102 case UNSPEC_VRINTX:
9103 case UNSPEC_VRINTA:
9104 if (speed_p)
9105 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9106
9107 return true;
9108 default:
9109 *cost = COSTS_N_INSNS (2);
9110 break;
9111 }
9112 return true;
9113 }
9114
9115 /* Cost of a libcall. We assume one insn per argument, an amount for the
9116 call (one insn for -Os) and then one for processing the result. */
9117 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9118
9119 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9120 do \
9121 { \
9122 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9123 if (shift_op != NULL \
9124 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9125 { \
9126 if (shift_reg) \
9127 { \
9128 if (speed_p) \
9129 *cost += extra_cost->alu.arith_shift_reg; \
9130 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9131 ASHIFT, 1, speed_p); \
9132 } \
9133 else if (speed_p) \
9134 *cost += extra_cost->alu.arith_shift; \
9135 \
9136 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9137 ASHIFT, 0, speed_p) \
9138 + rtx_cost (XEXP (x, 1 - IDX), \
9139 GET_MODE (shift_op), \
9140 OP, 1, speed_p)); \
9141 return true; \
9142 } \
9143 } \
9144 while (0);
9145
9146 /* RTX costs. Make an estimate of the cost of executing the operation
9147 X, which is contained with an operation with code OUTER_CODE.
9148 SPEED_P indicates whether the cost desired is the performance cost,
9149 or the size cost. The estimate is stored in COST and the return
9150 value is TRUE if the cost calculation is final, or FALSE if the
9151 caller should recurse through the operands of X to add additional
9152 costs.
9153
9154 We currently make no attempt to model the size savings of Thumb-2
9155 16-bit instructions. At the normal points in compilation where
9156 this code is called we have no measure of whether the condition
9157 flags are live or not, and thus no realistic way to determine what
9158 the size will eventually be. */
9159 static bool
9160 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9161 const struct cpu_cost_table *extra_cost,
9162 int *cost, bool speed_p)
9163 {
9164 machine_mode mode = GET_MODE (x);
9165
9166 *cost = COSTS_N_INSNS (1);
9167
9168 if (TARGET_THUMB1)
9169 {
9170 if (speed_p)
9171 *cost = thumb1_rtx_costs (x, code, outer_code);
9172 else
9173 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9174 return true;
9175 }
9176
9177 switch (code)
9178 {
9179 case SET:
9180 *cost = 0;
9181 /* SET RTXs don't have a mode so we get it from the destination. */
9182 mode = GET_MODE (SET_DEST (x));
9183
9184 if (REG_P (SET_SRC (x))
9185 && REG_P (SET_DEST (x)))
9186 {
9187 /* Assume that most copies can be done with a single insn,
9188 unless we don't have HW FP, in which case everything
9189 larger than word mode will require two insns. */
9190 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9191 && GET_MODE_SIZE (mode) > 4)
9192 || mode == DImode)
9193 ? 2 : 1);
9194 /* Conditional register moves can be encoded
9195 in 16 bits in Thumb mode. */
9196 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9197 *cost >>= 1;
9198
9199 return true;
9200 }
9201
9202 if (CONST_INT_P (SET_SRC (x)))
9203 {
9204 /* Handle CONST_INT here, since the value doesn't have a mode
9205 and we would otherwise be unable to work out the true cost. */
9206 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9207 0, speed_p);
9208 outer_code = SET;
9209 /* Slightly lower the cost of setting a core reg to a constant.
9210 This helps break up chains and allows for better scheduling. */
9211 if (REG_P (SET_DEST (x))
9212 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9213 *cost -= 1;
9214 x = SET_SRC (x);
9215 /* Immediate moves with an immediate in the range [0, 255] can be
9216 encoded in 16 bits in Thumb mode. */
9217 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9218 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9219 *cost >>= 1;
9220 goto const_int_cost;
9221 }
9222
9223 return false;
9224
9225 case MEM:
9226 /* A memory access costs 1 insn if the mode is small, or the address is
9227 a single register, otherwise it costs one insn per word. */
9228 if (REG_P (XEXP (x, 0)))
9229 *cost = COSTS_N_INSNS (1);
9230 else if (flag_pic
9231 && GET_CODE (XEXP (x, 0)) == PLUS
9232 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9233 /* This will be split into two instructions.
9234 See arm.md:calculate_pic_address. */
9235 *cost = COSTS_N_INSNS (2);
9236 else
9237 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9238
9239 /* For speed optimizations, add the costs of the address and
9240 accessing memory. */
9241 if (speed_p)
9242 #ifdef NOT_YET
9243 *cost += (extra_cost->ldst.load
9244 + arm_address_cost (XEXP (x, 0), mode,
9245 ADDR_SPACE_GENERIC, speed_p));
9246 #else
9247 *cost += extra_cost->ldst.load;
9248 #endif
9249 return true;
9250
9251 case PARALLEL:
9252 {
9253 /* Calculations of LDM costs are complex. We assume an initial cost
9254 (ldm_1st) which will load the number of registers mentioned in
9255 ldm_regs_per_insn_1st registers; then each additional
9256 ldm_regs_per_insn_subsequent registers cost one more insn. The
9257 formula for N regs is thus:
9258
9259 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9260 + ldm_regs_per_insn_subsequent - 1)
9261 / ldm_regs_per_insn_subsequent).
9262
9263 Additional costs may also be added for addressing. A similar
9264 formula is used for STM. */
9265
9266 bool is_ldm = load_multiple_operation (x, SImode);
9267 bool is_stm = store_multiple_operation (x, SImode);
9268
9269 if (is_ldm || is_stm)
9270 {
9271 if (speed_p)
9272 {
9273 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9274 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9275 ? extra_cost->ldst.ldm_regs_per_insn_1st
9276 : extra_cost->ldst.stm_regs_per_insn_1st;
9277 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9278 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9279 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9280
9281 *cost += regs_per_insn_1st
9282 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9283 + regs_per_insn_sub - 1)
9284 / regs_per_insn_sub);
9285 return true;
9286 }
9287
9288 }
9289 return false;
9290 }
9291 case DIV:
9292 case UDIV:
9293 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9294 && (mode == SFmode || !TARGET_VFP_SINGLE))
9295 *cost += COSTS_N_INSNS (speed_p
9296 ? extra_cost->fp[mode != SFmode].div : 0);
9297 else if (mode == SImode && TARGET_IDIV)
9298 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9299 else
9300 *cost = LIBCALL_COST (2);
9301 return false; /* All arguments must be in registers. */
9302
9303 case MOD:
9304 /* MOD by a power of 2 can be expanded as:
9305 rsbs r1, r0, #0
9306 and r0, r0, #(n - 1)
9307 and r1, r1, #(n - 1)
9308 rsbpl r0, r1, #0. */
9309 if (CONST_INT_P (XEXP (x, 1))
9310 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9311 && mode == SImode)
9312 {
9313 *cost += COSTS_N_INSNS (3);
9314
9315 if (speed_p)
9316 *cost += 2 * extra_cost->alu.logical
9317 + extra_cost->alu.arith;
9318 return true;
9319 }
9320
9321 /* Fall-through. */
9322 case UMOD:
9323 *cost = LIBCALL_COST (2);
9324 return false; /* All arguments must be in registers. */
9325
9326 case ROTATE:
9327 if (mode == SImode && REG_P (XEXP (x, 1)))
9328 {
9329 *cost += (COSTS_N_INSNS (1)
9330 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9331 if (speed_p)
9332 *cost += extra_cost->alu.shift_reg;
9333 return true;
9334 }
9335 /* Fall through */
9336 case ROTATERT:
9337 case ASHIFT:
9338 case LSHIFTRT:
9339 case ASHIFTRT:
9340 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9341 {
9342 *cost += (COSTS_N_INSNS (2)
9343 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9344 if (speed_p)
9345 *cost += 2 * extra_cost->alu.shift;
9346 return true;
9347 }
9348 else if (mode == SImode)
9349 {
9350 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9351 /* Slightly disparage register shifts at -Os, but not by much. */
9352 if (!CONST_INT_P (XEXP (x, 1)))
9353 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9354 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9355 return true;
9356 }
9357 else if (GET_MODE_CLASS (mode) == MODE_INT
9358 && GET_MODE_SIZE (mode) < 4)
9359 {
9360 if (code == ASHIFT)
9361 {
9362 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9363 /* Slightly disparage register shifts at -Os, but not by
9364 much. */
9365 if (!CONST_INT_P (XEXP (x, 1)))
9366 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9367 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9368 }
9369 else if (code == LSHIFTRT || code == ASHIFTRT)
9370 {
9371 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9372 {
9373 /* Can use SBFX/UBFX. */
9374 if (speed_p)
9375 *cost += extra_cost->alu.bfx;
9376 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9377 }
9378 else
9379 {
9380 *cost += COSTS_N_INSNS (1);
9381 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9382 if (speed_p)
9383 {
9384 if (CONST_INT_P (XEXP (x, 1)))
9385 *cost += 2 * extra_cost->alu.shift;
9386 else
9387 *cost += (extra_cost->alu.shift
9388 + extra_cost->alu.shift_reg);
9389 }
9390 else
9391 /* Slightly disparage register shifts. */
9392 *cost += !CONST_INT_P (XEXP (x, 1));
9393 }
9394 }
9395 else /* Rotates. */
9396 {
9397 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9398 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9399 if (speed_p)
9400 {
9401 if (CONST_INT_P (XEXP (x, 1)))
9402 *cost += (2 * extra_cost->alu.shift
9403 + extra_cost->alu.log_shift);
9404 else
9405 *cost += (extra_cost->alu.shift
9406 + extra_cost->alu.shift_reg
9407 + extra_cost->alu.log_shift_reg);
9408 }
9409 }
9410 return true;
9411 }
9412
9413 *cost = LIBCALL_COST (2);
9414 return false;
9415
9416 case BSWAP:
9417 if (arm_arch6)
9418 {
9419 if (mode == SImode)
9420 {
9421 if (speed_p)
9422 *cost += extra_cost->alu.rev;
9423
9424 return false;
9425 }
9426 }
9427 else
9428 {
9429 /* No rev instruction available. Look at arm_legacy_rev
9430 and thumb_legacy_rev for the form of RTL used then. */
9431 if (TARGET_THUMB)
9432 {
9433 *cost += COSTS_N_INSNS (9);
9434
9435 if (speed_p)
9436 {
9437 *cost += 6 * extra_cost->alu.shift;
9438 *cost += 3 * extra_cost->alu.logical;
9439 }
9440 }
9441 else
9442 {
9443 *cost += COSTS_N_INSNS (4);
9444
9445 if (speed_p)
9446 {
9447 *cost += 2 * extra_cost->alu.shift;
9448 *cost += extra_cost->alu.arith_shift;
9449 *cost += 2 * extra_cost->alu.logical;
9450 }
9451 }
9452 return true;
9453 }
9454 return false;
9455
9456 case MINUS:
9457 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9458 && (mode == SFmode || !TARGET_VFP_SINGLE))
9459 {
9460 if (GET_CODE (XEXP (x, 0)) == MULT
9461 || GET_CODE (XEXP (x, 1)) == MULT)
9462 {
9463 rtx mul_op0, mul_op1, sub_op;
9464
9465 if (speed_p)
9466 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9467
9468 if (GET_CODE (XEXP (x, 0)) == MULT)
9469 {
9470 mul_op0 = XEXP (XEXP (x, 0), 0);
9471 mul_op1 = XEXP (XEXP (x, 0), 1);
9472 sub_op = XEXP (x, 1);
9473 }
9474 else
9475 {
9476 mul_op0 = XEXP (XEXP (x, 1), 0);
9477 mul_op1 = XEXP (XEXP (x, 1), 1);
9478 sub_op = XEXP (x, 0);
9479 }
9480
9481 /* The first operand of the multiply may be optionally
9482 negated. */
9483 if (GET_CODE (mul_op0) == NEG)
9484 mul_op0 = XEXP (mul_op0, 0);
9485
9486 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9487 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9488 + rtx_cost (sub_op, mode, code, 0, speed_p));
9489
9490 return true;
9491 }
9492
9493 if (speed_p)
9494 *cost += extra_cost->fp[mode != SFmode].addsub;
9495 return false;
9496 }
9497
9498 if (mode == SImode)
9499 {
9500 rtx shift_by_reg = NULL;
9501 rtx shift_op;
9502 rtx non_shift_op;
9503
9504 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9505 if (shift_op == NULL)
9506 {
9507 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9508 non_shift_op = XEXP (x, 0);
9509 }
9510 else
9511 non_shift_op = XEXP (x, 1);
9512
9513 if (shift_op != NULL)
9514 {
9515 if (shift_by_reg != NULL)
9516 {
9517 if (speed_p)
9518 *cost += extra_cost->alu.arith_shift_reg;
9519 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9520 }
9521 else if (speed_p)
9522 *cost += extra_cost->alu.arith_shift;
9523
9524 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9525 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9526 return true;
9527 }
9528
9529 if (arm_arch_thumb2
9530 && GET_CODE (XEXP (x, 1)) == MULT)
9531 {
9532 /* MLS. */
9533 if (speed_p)
9534 *cost += extra_cost->mult[0].add;
9535 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9536 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9537 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9538 return true;
9539 }
9540
9541 if (CONST_INT_P (XEXP (x, 0)))
9542 {
9543 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9544 INTVAL (XEXP (x, 0)), NULL_RTX,
9545 NULL_RTX, 1, 0);
9546 *cost = COSTS_N_INSNS (insns);
9547 if (speed_p)
9548 *cost += insns * extra_cost->alu.arith;
9549 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9550 return true;
9551 }
9552 else if (speed_p)
9553 *cost += extra_cost->alu.arith;
9554
9555 return false;
9556 }
9557
9558 if (GET_MODE_CLASS (mode) == MODE_INT
9559 && GET_MODE_SIZE (mode) < 4)
9560 {
9561 rtx shift_op, shift_reg;
9562 shift_reg = NULL;
9563
9564 /* We check both sides of the MINUS for shifter operands since,
9565 unlike PLUS, it's not commutative. */
9566
9567 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9568 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9569
9570 /* Slightly disparage, as we might need to widen the result. */
9571 *cost += 1;
9572 if (speed_p)
9573 *cost += extra_cost->alu.arith;
9574
9575 if (CONST_INT_P (XEXP (x, 0)))
9576 {
9577 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9578 return true;
9579 }
9580
9581 return false;
9582 }
9583
9584 if (mode == DImode)
9585 {
9586 *cost += COSTS_N_INSNS (1);
9587
9588 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9589 {
9590 rtx op1 = XEXP (x, 1);
9591
9592 if (speed_p)
9593 *cost += 2 * extra_cost->alu.arith;
9594
9595 if (GET_CODE (op1) == ZERO_EXTEND)
9596 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9597 0, speed_p);
9598 else
9599 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9600 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9601 0, speed_p);
9602 return true;
9603 }
9604 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9605 {
9606 if (speed_p)
9607 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9608 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9609 0, speed_p)
9610 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9611 return true;
9612 }
9613 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9614 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9615 {
9616 if (speed_p)
9617 *cost += (extra_cost->alu.arith
9618 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9619 ? extra_cost->alu.arith
9620 : extra_cost->alu.arith_shift));
9621 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9622 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9623 GET_CODE (XEXP (x, 1)), 0, speed_p));
9624 return true;
9625 }
9626
9627 if (speed_p)
9628 *cost += 2 * extra_cost->alu.arith;
9629 return false;
9630 }
9631
9632 /* Vector mode? */
9633
9634 *cost = LIBCALL_COST (2);
9635 return false;
9636
9637 case PLUS:
9638 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9639 && (mode == SFmode || !TARGET_VFP_SINGLE))
9640 {
9641 if (GET_CODE (XEXP (x, 0)) == MULT)
9642 {
9643 rtx mul_op0, mul_op1, add_op;
9644
9645 if (speed_p)
9646 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9647
9648 mul_op0 = XEXP (XEXP (x, 0), 0);
9649 mul_op1 = XEXP (XEXP (x, 0), 1);
9650 add_op = XEXP (x, 1);
9651
9652 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9653 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9654 + rtx_cost (add_op, mode, code, 0, speed_p));
9655
9656 return true;
9657 }
9658
9659 if (speed_p)
9660 *cost += extra_cost->fp[mode != SFmode].addsub;
9661 return false;
9662 }
9663 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9664 {
9665 *cost = LIBCALL_COST (2);
9666 return false;
9667 }
9668
9669 /* Narrow modes can be synthesized in SImode, but the range
9670 of useful sub-operations is limited. Check for shift operations
9671 on one of the operands. Only left shifts can be used in the
9672 narrow modes. */
9673 if (GET_MODE_CLASS (mode) == MODE_INT
9674 && GET_MODE_SIZE (mode) < 4)
9675 {
9676 rtx shift_op, shift_reg;
9677 shift_reg = NULL;
9678
9679 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9680
9681 if (CONST_INT_P (XEXP (x, 1)))
9682 {
9683 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9684 INTVAL (XEXP (x, 1)), NULL_RTX,
9685 NULL_RTX, 1, 0);
9686 *cost = COSTS_N_INSNS (insns);
9687 if (speed_p)
9688 *cost += insns * extra_cost->alu.arith;
9689 /* Slightly penalize a narrow operation as the result may
9690 need widening. */
9691 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9692 return true;
9693 }
9694
9695 /* Slightly penalize a narrow operation as the result may
9696 need widening. */
9697 *cost += 1;
9698 if (speed_p)
9699 *cost += extra_cost->alu.arith;
9700
9701 return false;
9702 }
9703
9704 if (mode == SImode)
9705 {
9706 rtx shift_op, shift_reg;
9707
9708 if (TARGET_INT_SIMD
9709 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9710 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9711 {
9712 /* UXTA[BH] or SXTA[BH]. */
9713 if (speed_p)
9714 *cost += extra_cost->alu.extend_arith;
9715 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9716 0, speed_p)
9717 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9718 return true;
9719 }
9720
9721 shift_reg = NULL;
9722 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9723 if (shift_op != NULL)
9724 {
9725 if (shift_reg)
9726 {
9727 if (speed_p)
9728 *cost += extra_cost->alu.arith_shift_reg;
9729 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9730 }
9731 else if (speed_p)
9732 *cost += extra_cost->alu.arith_shift;
9733
9734 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9735 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9736 return true;
9737 }
9738 if (GET_CODE (XEXP (x, 0)) == MULT)
9739 {
9740 rtx mul_op = XEXP (x, 0);
9741
9742 if (TARGET_DSP_MULTIPLY
9743 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9744 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9745 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9746 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9747 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9748 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9749 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9750 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9751 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9752 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9753 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9754 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9755 == 16))))))
9756 {
9757 /* SMLA[BT][BT]. */
9758 if (speed_p)
9759 *cost += extra_cost->mult[0].extend_add;
9760 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9761 SIGN_EXTEND, 0, speed_p)
9762 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9763 SIGN_EXTEND, 0, speed_p)
9764 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9765 return true;
9766 }
9767
9768 if (speed_p)
9769 *cost += extra_cost->mult[0].add;
9770 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9771 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9772 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9773 return true;
9774 }
9775 if (CONST_INT_P (XEXP (x, 1)))
9776 {
9777 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9778 INTVAL (XEXP (x, 1)), NULL_RTX,
9779 NULL_RTX, 1, 0);
9780 *cost = COSTS_N_INSNS (insns);
9781 if (speed_p)
9782 *cost += insns * extra_cost->alu.arith;
9783 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9784 return true;
9785 }
9786 else if (speed_p)
9787 *cost += extra_cost->alu.arith;
9788
9789 return false;
9790 }
9791
9792 if (mode == DImode)
9793 {
9794 if (arm_arch3m
9795 && GET_CODE (XEXP (x, 0)) == MULT
9796 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9797 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9798 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9799 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9800 {
9801 if (speed_p)
9802 *cost += extra_cost->mult[1].extend_add;
9803 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9804 ZERO_EXTEND, 0, speed_p)
9805 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9806 ZERO_EXTEND, 0, speed_p)
9807 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9808 return true;
9809 }
9810
9811 *cost += COSTS_N_INSNS (1);
9812
9813 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9814 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9815 {
9816 if (speed_p)
9817 *cost += (extra_cost->alu.arith
9818 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9819 ? extra_cost->alu.arith
9820 : extra_cost->alu.arith_shift));
9821
9822 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9823 0, speed_p)
9824 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9825 return true;
9826 }
9827
9828 if (speed_p)
9829 *cost += 2 * extra_cost->alu.arith;
9830 return false;
9831 }
9832
9833 /* Vector mode? */
9834 *cost = LIBCALL_COST (2);
9835 return false;
9836 case IOR:
9837 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9838 {
9839 if (speed_p)
9840 *cost += extra_cost->alu.rev;
9841
9842 return true;
9843 }
9844 /* Fall through. */
9845 case AND: case XOR:
9846 if (mode == SImode)
9847 {
9848 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9849 rtx op0 = XEXP (x, 0);
9850 rtx shift_op, shift_reg;
9851
9852 if (subcode == NOT
9853 && (code == AND
9854 || (code == IOR && TARGET_THUMB2)))
9855 op0 = XEXP (op0, 0);
9856
9857 shift_reg = NULL;
9858 shift_op = shifter_op_p (op0, &shift_reg);
9859 if (shift_op != NULL)
9860 {
9861 if (shift_reg)
9862 {
9863 if (speed_p)
9864 *cost += extra_cost->alu.log_shift_reg;
9865 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9866 }
9867 else if (speed_p)
9868 *cost += extra_cost->alu.log_shift;
9869
9870 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9871 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9872 return true;
9873 }
9874
9875 if (CONST_INT_P (XEXP (x, 1)))
9876 {
9877 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9878 INTVAL (XEXP (x, 1)), NULL_RTX,
9879 NULL_RTX, 1, 0);
9880
9881 *cost = COSTS_N_INSNS (insns);
9882 if (speed_p)
9883 *cost += insns * extra_cost->alu.logical;
9884 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9885 return true;
9886 }
9887
9888 if (speed_p)
9889 *cost += extra_cost->alu.logical;
9890 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9891 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9892 return true;
9893 }
9894
9895 if (mode == DImode)
9896 {
9897 rtx op0 = XEXP (x, 0);
9898 enum rtx_code subcode = GET_CODE (op0);
9899
9900 *cost += COSTS_N_INSNS (1);
9901
9902 if (subcode == NOT
9903 && (code == AND
9904 || (code == IOR && TARGET_THUMB2)))
9905 op0 = XEXP (op0, 0);
9906
9907 if (GET_CODE (op0) == ZERO_EXTEND)
9908 {
9909 if (speed_p)
9910 *cost += 2 * extra_cost->alu.logical;
9911
9912 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9913 0, speed_p)
9914 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9915 return true;
9916 }
9917 else if (GET_CODE (op0) == SIGN_EXTEND)
9918 {
9919 if (speed_p)
9920 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9921
9922 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9923 0, speed_p)
9924 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9925 return true;
9926 }
9927
9928 if (speed_p)
9929 *cost += 2 * extra_cost->alu.logical;
9930
9931 return true;
9932 }
9933 /* Vector mode? */
9934
9935 *cost = LIBCALL_COST (2);
9936 return false;
9937
9938 case MULT:
9939 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9940 && (mode == SFmode || !TARGET_VFP_SINGLE))
9941 {
9942 rtx op0 = XEXP (x, 0);
9943
9944 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9945 op0 = XEXP (op0, 0);
9946
9947 if (speed_p)
9948 *cost += extra_cost->fp[mode != SFmode].mult;
9949
9950 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9951 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9952 return true;
9953 }
9954 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9955 {
9956 *cost = LIBCALL_COST (2);
9957 return false;
9958 }
9959
9960 if (mode == SImode)
9961 {
9962 if (TARGET_DSP_MULTIPLY
9963 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9964 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9967 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9968 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9969 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9970 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9971 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9972 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9973 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9974 && (INTVAL (XEXP (XEXP (x, 1), 1))
9975 == 16))))))
9976 {
9977 /* SMUL[TB][TB]. */
9978 if (speed_p)
9979 *cost += extra_cost->mult[0].extend;
9980 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9981 SIGN_EXTEND, 0, speed_p);
9982 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9983 SIGN_EXTEND, 1, speed_p);
9984 return true;
9985 }
9986 if (speed_p)
9987 *cost += extra_cost->mult[0].simple;
9988 return false;
9989 }
9990
9991 if (mode == DImode)
9992 {
9993 if (arm_arch3m
9994 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9995 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9996 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9997 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9998 {
9999 if (speed_p)
10000 *cost += extra_cost->mult[1].extend;
10001 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10002 ZERO_EXTEND, 0, speed_p)
10003 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10004 ZERO_EXTEND, 0, speed_p));
10005 return true;
10006 }
10007
10008 *cost = LIBCALL_COST (2);
10009 return false;
10010 }
10011
10012 /* Vector mode? */
10013 *cost = LIBCALL_COST (2);
10014 return false;
10015
10016 case NEG:
10017 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10018 && (mode == SFmode || !TARGET_VFP_SINGLE))
10019 {
10020 if (GET_CODE (XEXP (x, 0)) == MULT)
10021 {
10022 /* VNMUL. */
10023 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10024 return true;
10025 }
10026
10027 if (speed_p)
10028 *cost += extra_cost->fp[mode != SFmode].neg;
10029
10030 return false;
10031 }
10032 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10033 {
10034 *cost = LIBCALL_COST (1);
10035 return false;
10036 }
10037
10038 if (mode == SImode)
10039 {
10040 if (GET_CODE (XEXP (x, 0)) == ABS)
10041 {
10042 *cost += COSTS_N_INSNS (1);
10043 /* Assume the non-flag-changing variant. */
10044 if (speed_p)
10045 *cost += (extra_cost->alu.log_shift
10046 + extra_cost->alu.arith_shift);
10047 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10048 return true;
10049 }
10050
10051 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10052 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10053 {
10054 *cost += COSTS_N_INSNS (1);
10055 /* No extra cost for MOV imm and MVN imm. */
10056 /* If the comparison op is using the flags, there's no further
10057 cost, otherwise we need to add the cost of the comparison. */
10058 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10059 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10060 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10061 {
10062 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10063 *cost += (COSTS_N_INSNS (1)
10064 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10065 0, speed_p)
10066 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10067 1, speed_p));
10068 if (speed_p)
10069 *cost += extra_cost->alu.arith;
10070 }
10071 return true;
10072 }
10073
10074 if (speed_p)
10075 *cost += extra_cost->alu.arith;
10076 return false;
10077 }
10078
10079 if (GET_MODE_CLASS (mode) == MODE_INT
10080 && GET_MODE_SIZE (mode) < 4)
10081 {
10082 /* Slightly disparage, as we might need an extend operation. */
10083 *cost += 1;
10084 if (speed_p)
10085 *cost += extra_cost->alu.arith;
10086 return false;
10087 }
10088
10089 if (mode == DImode)
10090 {
10091 *cost += COSTS_N_INSNS (1);
10092 if (speed_p)
10093 *cost += 2 * extra_cost->alu.arith;
10094 return false;
10095 }
10096
10097 /* Vector mode? */
10098 *cost = LIBCALL_COST (1);
10099 return false;
10100
10101 case NOT:
10102 if (mode == SImode)
10103 {
10104 rtx shift_op;
10105 rtx shift_reg = NULL;
10106
10107 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10108
10109 if (shift_op)
10110 {
10111 if (shift_reg != NULL)
10112 {
10113 if (speed_p)
10114 *cost += extra_cost->alu.log_shift_reg;
10115 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10116 }
10117 else if (speed_p)
10118 *cost += extra_cost->alu.log_shift;
10119 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10120 return true;
10121 }
10122
10123 if (speed_p)
10124 *cost += extra_cost->alu.logical;
10125 return false;
10126 }
10127 if (mode == DImode)
10128 {
10129 *cost += COSTS_N_INSNS (1);
10130 return false;
10131 }
10132
10133 /* Vector mode? */
10134
10135 *cost += LIBCALL_COST (1);
10136 return false;
10137
10138 case IF_THEN_ELSE:
10139 {
10140 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10141 {
10142 *cost += COSTS_N_INSNS (3);
10143 return true;
10144 }
10145 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10146 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10147
10148 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10149 /* Assume that if one arm of the if_then_else is a register,
10150 that it will be tied with the result and eliminate the
10151 conditional insn. */
10152 if (REG_P (XEXP (x, 1)))
10153 *cost += op2cost;
10154 else if (REG_P (XEXP (x, 2)))
10155 *cost += op1cost;
10156 else
10157 {
10158 if (speed_p)
10159 {
10160 if (extra_cost->alu.non_exec_costs_exec)
10161 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10162 else
10163 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10164 }
10165 else
10166 *cost += op1cost + op2cost;
10167 }
10168 }
10169 return true;
10170
10171 case COMPARE:
10172 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10173 *cost = 0;
10174 else
10175 {
10176 machine_mode op0mode;
10177 /* We'll mostly assume that the cost of a compare is the cost of the
10178 LHS. However, there are some notable exceptions. */
10179
10180 /* Floating point compares are never done as side-effects. */
10181 op0mode = GET_MODE (XEXP (x, 0));
10182 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10183 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10184 {
10185 if (speed_p)
10186 *cost += extra_cost->fp[op0mode != SFmode].compare;
10187
10188 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10189 {
10190 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10191 return true;
10192 }
10193
10194 return false;
10195 }
10196 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10197 {
10198 *cost = LIBCALL_COST (2);
10199 return false;
10200 }
10201
10202 /* DImode compares normally take two insns. */
10203 if (op0mode == DImode)
10204 {
10205 *cost += COSTS_N_INSNS (1);
10206 if (speed_p)
10207 *cost += 2 * extra_cost->alu.arith;
10208 return false;
10209 }
10210
10211 if (op0mode == SImode)
10212 {
10213 rtx shift_op;
10214 rtx shift_reg;
10215
10216 if (XEXP (x, 1) == const0_rtx
10217 && !(REG_P (XEXP (x, 0))
10218 || (GET_CODE (XEXP (x, 0)) == SUBREG
10219 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10220 {
10221 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10222
10223 /* Multiply operations that set the flags are often
10224 significantly more expensive. */
10225 if (speed_p
10226 && GET_CODE (XEXP (x, 0)) == MULT
10227 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10228 *cost += extra_cost->mult[0].flag_setting;
10229
10230 if (speed_p
10231 && GET_CODE (XEXP (x, 0)) == PLUS
10232 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10233 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10234 0), 1), mode))
10235 *cost += extra_cost->mult[0].flag_setting;
10236 return true;
10237 }
10238
10239 shift_reg = NULL;
10240 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10241 if (shift_op != NULL)
10242 {
10243 if (shift_reg != NULL)
10244 {
10245 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10246 1, speed_p);
10247 if (speed_p)
10248 *cost += extra_cost->alu.arith_shift_reg;
10249 }
10250 else if (speed_p)
10251 *cost += extra_cost->alu.arith_shift;
10252 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10253 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10254 return true;
10255 }
10256
10257 if (speed_p)
10258 *cost += extra_cost->alu.arith;
10259 if (CONST_INT_P (XEXP (x, 1))
10260 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10261 {
10262 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10263 return true;
10264 }
10265 return false;
10266 }
10267
10268 /* Vector mode? */
10269
10270 *cost = LIBCALL_COST (2);
10271 return false;
10272 }
10273 return true;
10274
10275 case EQ:
10276 case NE:
10277 case LT:
10278 case LE:
10279 case GT:
10280 case GE:
10281 case LTU:
10282 case LEU:
10283 case GEU:
10284 case GTU:
10285 case ORDERED:
10286 case UNORDERED:
10287 case UNEQ:
10288 case UNLE:
10289 case UNLT:
10290 case UNGE:
10291 case UNGT:
10292 case LTGT:
10293 if (outer_code == SET)
10294 {
10295 /* Is it a store-flag operation? */
10296 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10297 && XEXP (x, 1) == const0_rtx)
10298 {
10299 /* Thumb also needs an IT insn. */
10300 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10301 return true;
10302 }
10303 if (XEXP (x, 1) == const0_rtx)
10304 {
10305 switch (code)
10306 {
10307 case LT:
10308 /* LSR Rd, Rn, #31. */
10309 if (speed_p)
10310 *cost += extra_cost->alu.shift;
10311 break;
10312
10313 case EQ:
10314 /* RSBS T1, Rn, #0
10315 ADC Rd, Rn, T1. */
10316
10317 case NE:
10318 /* SUBS T1, Rn, #1
10319 SBC Rd, Rn, T1. */
10320 *cost += COSTS_N_INSNS (1);
10321 break;
10322
10323 case LE:
10324 /* RSBS T1, Rn, Rn, LSR #31
10325 ADC Rd, Rn, T1. */
10326 *cost += COSTS_N_INSNS (1);
10327 if (speed_p)
10328 *cost += extra_cost->alu.arith_shift;
10329 break;
10330
10331 case GT:
10332 /* RSB Rd, Rn, Rn, ASR #1
10333 LSR Rd, Rd, #31. */
10334 *cost += COSTS_N_INSNS (1);
10335 if (speed_p)
10336 *cost += (extra_cost->alu.arith_shift
10337 + extra_cost->alu.shift);
10338 break;
10339
10340 case GE:
10341 /* ASR Rd, Rn, #31
10342 ADD Rd, Rn, #1. */
10343 *cost += COSTS_N_INSNS (1);
10344 if (speed_p)
10345 *cost += extra_cost->alu.shift;
10346 break;
10347
10348 default:
10349 /* Remaining cases are either meaningless or would take
10350 three insns anyway. */
10351 *cost = COSTS_N_INSNS (3);
10352 break;
10353 }
10354 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10355 return true;
10356 }
10357 else
10358 {
10359 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10360 if (CONST_INT_P (XEXP (x, 1))
10361 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10362 {
10363 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10364 return true;
10365 }
10366
10367 return false;
10368 }
10369 }
10370 /* Not directly inside a set. If it involves the condition code
10371 register it must be the condition for a branch, cond_exec or
10372 I_T_E operation. Since the comparison is performed elsewhere
10373 this is just the control part which has no additional
10374 cost. */
10375 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10376 && XEXP (x, 1) == const0_rtx)
10377 {
10378 *cost = 0;
10379 return true;
10380 }
10381 return false;
10382
10383 case ABS:
10384 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10385 && (mode == SFmode || !TARGET_VFP_SINGLE))
10386 {
10387 if (speed_p)
10388 *cost += extra_cost->fp[mode != SFmode].neg;
10389
10390 return false;
10391 }
10392 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10393 {
10394 *cost = LIBCALL_COST (1);
10395 return false;
10396 }
10397
10398 if (mode == SImode)
10399 {
10400 if (speed_p)
10401 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10402 return false;
10403 }
10404 /* Vector mode? */
10405 *cost = LIBCALL_COST (1);
10406 return false;
10407
10408 case SIGN_EXTEND:
10409 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10410 && MEM_P (XEXP (x, 0)))
10411 {
10412 if (mode == DImode)
10413 *cost += COSTS_N_INSNS (1);
10414
10415 if (!speed_p)
10416 return true;
10417
10418 if (GET_MODE (XEXP (x, 0)) == SImode)
10419 *cost += extra_cost->ldst.load;
10420 else
10421 *cost += extra_cost->ldst.load_sign_extend;
10422
10423 if (mode == DImode)
10424 *cost += extra_cost->alu.shift;
10425
10426 return true;
10427 }
10428
10429 /* Widening from less than 32-bits requires an extend operation. */
10430 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10431 {
10432 /* We have SXTB/SXTH. */
10433 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10434 if (speed_p)
10435 *cost += extra_cost->alu.extend;
10436 }
10437 else if (GET_MODE (XEXP (x, 0)) != SImode)
10438 {
10439 /* Needs two shifts. */
10440 *cost += COSTS_N_INSNS (1);
10441 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10442 if (speed_p)
10443 *cost += 2 * extra_cost->alu.shift;
10444 }
10445
10446 /* Widening beyond 32-bits requires one more insn. */
10447 if (mode == DImode)
10448 {
10449 *cost += COSTS_N_INSNS (1);
10450 if (speed_p)
10451 *cost += extra_cost->alu.shift;
10452 }
10453
10454 return true;
10455
10456 case ZERO_EXTEND:
10457 if ((arm_arch4
10458 || GET_MODE (XEXP (x, 0)) == SImode
10459 || GET_MODE (XEXP (x, 0)) == QImode)
10460 && MEM_P (XEXP (x, 0)))
10461 {
10462 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10463
10464 if (mode == DImode)
10465 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10466
10467 return true;
10468 }
10469
10470 /* Widening from less than 32-bits requires an extend operation. */
10471 if (GET_MODE (XEXP (x, 0)) == QImode)
10472 {
10473 /* UXTB can be a shorter instruction in Thumb2, but it might
10474 be slower than the AND Rd, Rn, #255 alternative. When
10475 optimizing for speed it should never be slower to use
10476 AND, and we don't really model 16-bit vs 32-bit insns
10477 here. */
10478 if (speed_p)
10479 *cost += extra_cost->alu.logical;
10480 }
10481 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10482 {
10483 /* We have UXTB/UXTH. */
10484 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10485 if (speed_p)
10486 *cost += extra_cost->alu.extend;
10487 }
10488 else if (GET_MODE (XEXP (x, 0)) != SImode)
10489 {
10490 /* Needs two shifts. It's marginally preferable to use
10491 shifts rather than two BIC instructions as the second
10492 shift may merge with a subsequent insn as a shifter
10493 op. */
10494 *cost = COSTS_N_INSNS (2);
10495 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10496 if (speed_p)
10497 *cost += 2 * extra_cost->alu.shift;
10498 }
10499
10500 /* Widening beyond 32-bits requires one more insn. */
10501 if (mode == DImode)
10502 {
10503 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10504 }
10505
10506 return true;
10507
10508 case CONST_INT:
10509 *cost = 0;
10510 /* CONST_INT has no mode, so we cannot tell for sure how many
10511 insns are really going to be needed. The best we can do is
10512 look at the value passed. If it fits in SImode, then assume
10513 that's the mode it will be used for. Otherwise assume it
10514 will be used in DImode. */
10515 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10516 mode = SImode;
10517 else
10518 mode = DImode;
10519
10520 /* Avoid blowing up in arm_gen_constant (). */
10521 if (!(outer_code == PLUS
10522 || outer_code == AND
10523 || outer_code == IOR
10524 || outer_code == XOR
10525 || outer_code == MINUS))
10526 outer_code = SET;
10527
10528 const_int_cost:
10529 if (mode == SImode)
10530 {
10531 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10532 INTVAL (x), NULL, NULL,
10533 0, 0));
10534 /* Extra costs? */
10535 }
10536 else
10537 {
10538 *cost += COSTS_N_INSNS (arm_gen_constant
10539 (outer_code, SImode, NULL,
10540 trunc_int_for_mode (INTVAL (x), SImode),
10541 NULL, NULL, 0, 0)
10542 + arm_gen_constant (outer_code, SImode, NULL,
10543 INTVAL (x) >> 32, NULL,
10544 NULL, 0, 0));
10545 /* Extra costs? */
10546 }
10547
10548 return true;
10549
10550 case CONST:
10551 case LABEL_REF:
10552 case SYMBOL_REF:
10553 if (speed_p)
10554 {
10555 if (arm_arch_thumb2 && !flag_pic)
10556 *cost += COSTS_N_INSNS (1);
10557 else
10558 *cost += extra_cost->ldst.load;
10559 }
10560 else
10561 *cost += COSTS_N_INSNS (1);
10562
10563 if (flag_pic)
10564 {
10565 *cost += COSTS_N_INSNS (1);
10566 if (speed_p)
10567 *cost += extra_cost->alu.arith;
10568 }
10569
10570 return true;
10571
10572 case CONST_FIXED:
10573 *cost = COSTS_N_INSNS (4);
10574 /* Fixme. */
10575 return true;
10576
10577 case CONST_DOUBLE:
10578 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10579 && (mode == SFmode || !TARGET_VFP_SINGLE))
10580 {
10581 if (vfp3_const_double_rtx (x))
10582 {
10583 if (speed_p)
10584 *cost += extra_cost->fp[mode == DFmode].fpconst;
10585 return true;
10586 }
10587
10588 if (speed_p)
10589 {
10590 if (mode == DFmode)
10591 *cost += extra_cost->ldst.loadd;
10592 else
10593 *cost += extra_cost->ldst.loadf;
10594 }
10595 else
10596 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10597
10598 return true;
10599 }
10600 *cost = COSTS_N_INSNS (4);
10601 return true;
10602
10603 case CONST_VECTOR:
10604 /* Fixme. */
10605 if (TARGET_NEON
10606 && TARGET_HARD_FLOAT
10607 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10608 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10609 *cost = COSTS_N_INSNS (1);
10610 else
10611 *cost = COSTS_N_INSNS (4);
10612 return true;
10613
10614 case HIGH:
10615 case LO_SUM:
10616 /* When optimizing for size, we prefer constant pool entries to
10617 MOVW/MOVT pairs, so bump the cost of these slightly. */
10618 if (!speed_p)
10619 *cost += 1;
10620 return true;
10621
10622 case CLZ:
10623 if (speed_p)
10624 *cost += extra_cost->alu.clz;
10625 return false;
10626
10627 case SMIN:
10628 if (XEXP (x, 1) == const0_rtx)
10629 {
10630 if (speed_p)
10631 *cost += extra_cost->alu.log_shift;
10632 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10633 return true;
10634 }
10635 /* Fall through. */
10636 case SMAX:
10637 case UMIN:
10638 case UMAX:
10639 *cost += COSTS_N_INSNS (1);
10640 return false;
10641
10642 case TRUNCATE:
10643 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10644 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10645 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10646 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10647 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10648 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10649 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10650 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10651 == ZERO_EXTEND))))
10652 {
10653 if (speed_p)
10654 *cost += extra_cost->mult[1].extend;
10655 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10656 ZERO_EXTEND, 0, speed_p)
10657 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10658 ZERO_EXTEND, 0, speed_p));
10659 return true;
10660 }
10661 *cost = LIBCALL_COST (1);
10662 return false;
10663
10664 case UNSPEC_VOLATILE:
10665 case UNSPEC:
10666 return arm_unspec_cost (x, outer_code, speed_p, cost);
10667
10668 case PC:
10669 /* Reading the PC is like reading any other register. Writing it
10670 is more expensive, but we take that into account elsewhere. */
10671 *cost = 0;
10672 return true;
10673
10674 case ZERO_EXTRACT:
10675 /* TODO: Simple zero_extract of bottom bits using AND. */
10676 /* Fall through. */
10677 case SIGN_EXTRACT:
10678 if (arm_arch6
10679 && mode == SImode
10680 && CONST_INT_P (XEXP (x, 1))
10681 && CONST_INT_P (XEXP (x, 2)))
10682 {
10683 if (speed_p)
10684 *cost += extra_cost->alu.bfx;
10685 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10686 return true;
10687 }
10688 /* Without UBFX/SBFX, need to resort to shift operations. */
10689 *cost += COSTS_N_INSNS (1);
10690 if (speed_p)
10691 *cost += 2 * extra_cost->alu.shift;
10692 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10693 return true;
10694
10695 case FLOAT_EXTEND:
10696 if (TARGET_HARD_FLOAT)
10697 {
10698 if (speed_p)
10699 *cost += extra_cost->fp[mode == DFmode].widen;
10700 if (!TARGET_FPU_ARMV8
10701 && GET_MODE (XEXP (x, 0)) == HFmode)
10702 {
10703 /* Pre v8, widening HF->DF is a two-step process, first
10704 widening to SFmode. */
10705 *cost += COSTS_N_INSNS (1);
10706 if (speed_p)
10707 *cost += extra_cost->fp[0].widen;
10708 }
10709 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10710 return true;
10711 }
10712
10713 *cost = LIBCALL_COST (1);
10714 return false;
10715
10716 case FLOAT_TRUNCATE:
10717 if (TARGET_HARD_FLOAT)
10718 {
10719 if (speed_p)
10720 *cost += extra_cost->fp[mode == DFmode].narrow;
10721 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10722 return true;
10723 /* Vector modes? */
10724 }
10725 *cost = LIBCALL_COST (1);
10726 return false;
10727
10728 case FMA:
10729 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10730 {
10731 rtx op0 = XEXP (x, 0);
10732 rtx op1 = XEXP (x, 1);
10733 rtx op2 = XEXP (x, 2);
10734
10735
10736 /* vfms or vfnma. */
10737 if (GET_CODE (op0) == NEG)
10738 op0 = XEXP (op0, 0);
10739
10740 /* vfnms or vfnma. */
10741 if (GET_CODE (op2) == NEG)
10742 op2 = XEXP (op2, 0);
10743
10744 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10745 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10746 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10747
10748 if (speed_p)
10749 *cost += extra_cost->fp[mode ==DFmode].fma;
10750
10751 return true;
10752 }
10753
10754 *cost = LIBCALL_COST (3);
10755 return false;
10756
10757 case FIX:
10758 case UNSIGNED_FIX:
10759 if (TARGET_HARD_FLOAT)
10760 {
10761 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10762 a vcvt fixed-point conversion. */
10763 if (code == FIX && mode == SImode
10764 && GET_CODE (XEXP (x, 0)) == FIX
10765 && GET_MODE (XEXP (x, 0)) == SFmode
10766 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10767 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10768 > 0)
10769 {
10770 if (speed_p)
10771 *cost += extra_cost->fp[0].toint;
10772
10773 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10774 code, 0, speed_p);
10775 return true;
10776 }
10777
10778 if (GET_MODE_CLASS (mode) == MODE_INT)
10779 {
10780 mode = GET_MODE (XEXP (x, 0));
10781 if (speed_p)
10782 *cost += extra_cost->fp[mode == DFmode].toint;
10783 /* Strip of the 'cost' of rounding towards zero. */
10784 if (GET_CODE (XEXP (x, 0)) == FIX)
10785 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10786 0, speed_p);
10787 else
10788 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10789 /* ??? Increase the cost to deal with transferring from
10790 FP -> CORE registers? */
10791 return true;
10792 }
10793 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10794 && TARGET_FPU_ARMV8)
10795 {
10796 if (speed_p)
10797 *cost += extra_cost->fp[mode == DFmode].roundint;
10798 return false;
10799 }
10800 /* Vector costs? */
10801 }
10802 *cost = LIBCALL_COST (1);
10803 return false;
10804
10805 case FLOAT:
10806 case UNSIGNED_FLOAT:
10807 if (TARGET_HARD_FLOAT)
10808 {
10809 /* ??? Increase the cost to deal with transferring from CORE
10810 -> FP registers? */
10811 if (speed_p)
10812 *cost += extra_cost->fp[mode == DFmode].fromint;
10813 return false;
10814 }
10815 *cost = LIBCALL_COST (1);
10816 return false;
10817
10818 case CALL:
10819 return true;
10820
10821 case ASM_OPERANDS:
10822 {
10823 /* Just a guess. Guess number of instructions in the asm
10824 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10825 though (see PR60663). */
10826 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10827 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10828
10829 *cost = COSTS_N_INSNS (asm_length + num_operands);
10830 return true;
10831 }
10832 default:
10833 if (mode != VOIDmode)
10834 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10835 else
10836 *cost = COSTS_N_INSNS (4); /* Who knows? */
10837 return false;
10838 }
10839 }
10840
10841 #undef HANDLE_NARROW_SHIFT_ARITH
10842
10843 /* RTX costs entry point. */
10844
10845 static bool
10846 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10847 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10848 {
10849 bool result;
10850 int code = GET_CODE (x);
10851 gcc_assert (current_tune->insn_extra_cost);
10852
10853 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10854 (enum rtx_code) outer_code,
10855 current_tune->insn_extra_cost,
10856 total, speed);
10857
10858 if (dump_file && (dump_flags & TDF_DETAILS))
10859 {
10860 print_rtl_single (dump_file, x);
10861 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10862 *total, result ? "final" : "partial");
10863 }
10864 return result;
10865 }
10866
10867 /* All address computations that can be done are free, but rtx cost returns
10868 the same for practically all of them. So we weight the different types
10869 of address here in the order (most pref first):
10870 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10871 static inline int
10872 arm_arm_address_cost (rtx x)
10873 {
10874 enum rtx_code c = GET_CODE (x);
10875
10876 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10877 return 0;
10878 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10879 return 10;
10880
10881 if (c == PLUS)
10882 {
10883 if (CONST_INT_P (XEXP (x, 1)))
10884 return 2;
10885
10886 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10887 return 3;
10888
10889 return 4;
10890 }
10891
10892 return 6;
10893 }
10894
10895 static inline int
10896 arm_thumb_address_cost (rtx x)
10897 {
10898 enum rtx_code c = GET_CODE (x);
10899
10900 if (c == REG)
10901 return 1;
10902 if (c == PLUS
10903 && REG_P (XEXP (x, 0))
10904 && CONST_INT_P (XEXP (x, 1)))
10905 return 1;
10906
10907 return 2;
10908 }
10909
10910 static int
10911 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10912 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10913 {
10914 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10915 }
10916
10917 /* Adjust cost hook for XScale. */
10918 static bool
10919 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10920 int * cost)
10921 {
10922 /* Some true dependencies can have a higher cost depending
10923 on precisely how certain input operands are used. */
10924 if (dep_type == 0
10925 && recog_memoized (insn) >= 0
10926 && recog_memoized (dep) >= 0)
10927 {
10928 int shift_opnum = get_attr_shift (insn);
10929 enum attr_type attr_type = get_attr_type (dep);
10930
10931 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10932 operand for INSN. If we have a shifted input operand and the
10933 instruction we depend on is another ALU instruction, then we may
10934 have to account for an additional stall. */
10935 if (shift_opnum != 0
10936 && (attr_type == TYPE_ALU_SHIFT_IMM
10937 || attr_type == TYPE_ALUS_SHIFT_IMM
10938 || attr_type == TYPE_LOGIC_SHIFT_IMM
10939 || attr_type == TYPE_LOGICS_SHIFT_IMM
10940 || attr_type == TYPE_ALU_SHIFT_REG
10941 || attr_type == TYPE_ALUS_SHIFT_REG
10942 || attr_type == TYPE_LOGIC_SHIFT_REG
10943 || attr_type == TYPE_LOGICS_SHIFT_REG
10944 || attr_type == TYPE_MOV_SHIFT
10945 || attr_type == TYPE_MVN_SHIFT
10946 || attr_type == TYPE_MOV_SHIFT_REG
10947 || attr_type == TYPE_MVN_SHIFT_REG))
10948 {
10949 rtx shifted_operand;
10950 int opno;
10951
10952 /* Get the shifted operand. */
10953 extract_insn (insn);
10954 shifted_operand = recog_data.operand[shift_opnum];
10955
10956 /* Iterate over all the operands in DEP. If we write an operand
10957 that overlaps with SHIFTED_OPERAND, then we have increase the
10958 cost of this dependency. */
10959 extract_insn (dep);
10960 preprocess_constraints (dep);
10961 for (opno = 0; opno < recog_data.n_operands; opno++)
10962 {
10963 /* We can ignore strict inputs. */
10964 if (recog_data.operand_type[opno] == OP_IN)
10965 continue;
10966
10967 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10968 shifted_operand))
10969 {
10970 *cost = 2;
10971 return false;
10972 }
10973 }
10974 }
10975 }
10976 return true;
10977 }
10978
10979 /* Adjust cost hook for Cortex A9. */
10980 static bool
10981 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10982 int * cost)
10983 {
10984 switch (dep_type)
10985 {
10986 case REG_DEP_ANTI:
10987 *cost = 0;
10988 return false;
10989
10990 case REG_DEP_TRUE:
10991 case REG_DEP_OUTPUT:
10992 if (recog_memoized (insn) >= 0
10993 && recog_memoized (dep) >= 0)
10994 {
10995 if (GET_CODE (PATTERN (insn)) == SET)
10996 {
10997 if (GET_MODE_CLASS
10998 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10999 || GET_MODE_CLASS
11000 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11001 {
11002 enum attr_type attr_type_insn = get_attr_type (insn);
11003 enum attr_type attr_type_dep = get_attr_type (dep);
11004
11005 /* By default all dependencies of the form
11006 s0 = s0 <op> s1
11007 s0 = s0 <op> s2
11008 have an extra latency of 1 cycle because
11009 of the input and output dependency in this
11010 case. However this gets modeled as an true
11011 dependency and hence all these checks. */
11012 if (REG_P (SET_DEST (PATTERN (insn)))
11013 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11014 {
11015 /* FMACS is a special case where the dependent
11016 instruction can be issued 3 cycles before
11017 the normal latency in case of an output
11018 dependency. */
11019 if ((attr_type_insn == TYPE_FMACS
11020 || attr_type_insn == TYPE_FMACD)
11021 && (attr_type_dep == TYPE_FMACS
11022 || attr_type_dep == TYPE_FMACD))
11023 {
11024 if (dep_type == REG_DEP_OUTPUT)
11025 *cost = insn_default_latency (dep) - 3;
11026 else
11027 *cost = insn_default_latency (dep);
11028 return false;
11029 }
11030 else
11031 {
11032 if (dep_type == REG_DEP_OUTPUT)
11033 *cost = insn_default_latency (dep) + 1;
11034 else
11035 *cost = insn_default_latency (dep);
11036 }
11037 return false;
11038 }
11039 }
11040 }
11041 }
11042 break;
11043
11044 default:
11045 gcc_unreachable ();
11046 }
11047
11048 return true;
11049 }
11050
11051 /* Adjust cost hook for FA726TE. */
11052 static bool
11053 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11054 int * cost)
11055 {
11056 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11057 have penalty of 3. */
11058 if (dep_type == REG_DEP_TRUE
11059 && recog_memoized (insn) >= 0
11060 && recog_memoized (dep) >= 0
11061 && get_attr_conds (dep) == CONDS_SET)
11062 {
11063 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11064 if (get_attr_conds (insn) == CONDS_USE
11065 && get_attr_type (insn) != TYPE_BRANCH)
11066 {
11067 *cost = 3;
11068 return false;
11069 }
11070
11071 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11072 || get_attr_conds (insn) == CONDS_USE)
11073 {
11074 *cost = 0;
11075 return false;
11076 }
11077 }
11078
11079 return true;
11080 }
11081
11082 /* Implement TARGET_REGISTER_MOVE_COST.
11083
11084 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11085 it is typically more expensive than a single memory access. We set
11086 the cost to less than two memory accesses so that floating
11087 point to integer conversion does not go through memory. */
11088
11089 int
11090 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11091 reg_class_t from, reg_class_t to)
11092 {
11093 if (TARGET_32BIT)
11094 {
11095 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11096 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11097 return 15;
11098 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11099 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11100 return 4;
11101 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11102 return 20;
11103 else
11104 return 2;
11105 }
11106 else
11107 {
11108 if (from == HI_REGS || to == HI_REGS)
11109 return 4;
11110 else
11111 return 2;
11112 }
11113 }
11114
11115 /* Implement TARGET_MEMORY_MOVE_COST. */
11116
11117 int
11118 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11119 bool in ATTRIBUTE_UNUSED)
11120 {
11121 if (TARGET_32BIT)
11122 return 10;
11123 else
11124 {
11125 if (GET_MODE_SIZE (mode) < 4)
11126 return 8;
11127 else
11128 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11129 }
11130 }
11131
11132 /* Vectorizer cost model implementation. */
11133
11134 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11135 static int
11136 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11137 tree vectype,
11138 int misalign ATTRIBUTE_UNUSED)
11139 {
11140 unsigned elements;
11141
11142 switch (type_of_cost)
11143 {
11144 case scalar_stmt:
11145 return current_tune->vec_costs->scalar_stmt_cost;
11146
11147 case scalar_load:
11148 return current_tune->vec_costs->scalar_load_cost;
11149
11150 case scalar_store:
11151 return current_tune->vec_costs->scalar_store_cost;
11152
11153 case vector_stmt:
11154 return current_tune->vec_costs->vec_stmt_cost;
11155
11156 case vector_load:
11157 return current_tune->vec_costs->vec_align_load_cost;
11158
11159 case vector_store:
11160 return current_tune->vec_costs->vec_store_cost;
11161
11162 case vec_to_scalar:
11163 return current_tune->vec_costs->vec_to_scalar_cost;
11164
11165 case scalar_to_vec:
11166 return current_tune->vec_costs->scalar_to_vec_cost;
11167
11168 case unaligned_load:
11169 return current_tune->vec_costs->vec_unalign_load_cost;
11170
11171 case unaligned_store:
11172 return current_tune->vec_costs->vec_unalign_store_cost;
11173
11174 case cond_branch_taken:
11175 return current_tune->vec_costs->cond_taken_branch_cost;
11176
11177 case cond_branch_not_taken:
11178 return current_tune->vec_costs->cond_not_taken_branch_cost;
11179
11180 case vec_perm:
11181 case vec_promote_demote:
11182 return current_tune->vec_costs->vec_stmt_cost;
11183
11184 case vec_construct:
11185 elements = TYPE_VECTOR_SUBPARTS (vectype);
11186 return elements / 2 + 1;
11187
11188 default:
11189 gcc_unreachable ();
11190 }
11191 }
11192
11193 /* Implement targetm.vectorize.add_stmt_cost. */
11194
11195 static unsigned
11196 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11197 struct _stmt_vec_info *stmt_info, int misalign,
11198 enum vect_cost_model_location where)
11199 {
11200 unsigned *cost = (unsigned *) data;
11201 unsigned retval = 0;
11202
11203 if (flag_vect_cost_model)
11204 {
11205 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11206 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11207
11208 /* Statements in an inner loop relative to the loop being
11209 vectorized are weighted more heavily. The value here is
11210 arbitrary and could potentially be improved with analysis. */
11211 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11212 count *= 50; /* FIXME. */
11213
11214 retval = (unsigned) (count * stmt_cost);
11215 cost[where] += retval;
11216 }
11217
11218 return retval;
11219 }
11220
11221 /* Return true if and only if this insn can dual-issue only as older. */
11222 static bool
11223 cortexa7_older_only (rtx_insn *insn)
11224 {
11225 if (recog_memoized (insn) < 0)
11226 return false;
11227
11228 switch (get_attr_type (insn))
11229 {
11230 case TYPE_ALU_DSP_REG:
11231 case TYPE_ALU_SREG:
11232 case TYPE_ALUS_SREG:
11233 case TYPE_LOGIC_REG:
11234 case TYPE_LOGICS_REG:
11235 case TYPE_ADC_REG:
11236 case TYPE_ADCS_REG:
11237 case TYPE_ADR:
11238 case TYPE_BFM:
11239 case TYPE_REV:
11240 case TYPE_MVN_REG:
11241 case TYPE_SHIFT_IMM:
11242 case TYPE_SHIFT_REG:
11243 case TYPE_LOAD_BYTE:
11244 case TYPE_LOAD1:
11245 case TYPE_STORE1:
11246 case TYPE_FFARITHS:
11247 case TYPE_FADDS:
11248 case TYPE_FFARITHD:
11249 case TYPE_FADDD:
11250 case TYPE_FMOV:
11251 case TYPE_F_CVT:
11252 case TYPE_FCMPS:
11253 case TYPE_FCMPD:
11254 case TYPE_FCONSTS:
11255 case TYPE_FCONSTD:
11256 case TYPE_FMULS:
11257 case TYPE_FMACS:
11258 case TYPE_FMULD:
11259 case TYPE_FMACD:
11260 case TYPE_FDIVS:
11261 case TYPE_FDIVD:
11262 case TYPE_F_MRC:
11263 case TYPE_F_MRRC:
11264 case TYPE_F_FLAG:
11265 case TYPE_F_LOADS:
11266 case TYPE_F_STORES:
11267 return true;
11268 default:
11269 return false;
11270 }
11271 }
11272
11273 /* Return true if and only if this insn can dual-issue as younger. */
11274 static bool
11275 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11276 {
11277 if (recog_memoized (insn) < 0)
11278 {
11279 if (verbose > 5)
11280 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11281 return false;
11282 }
11283
11284 switch (get_attr_type (insn))
11285 {
11286 case TYPE_ALU_IMM:
11287 case TYPE_ALUS_IMM:
11288 case TYPE_LOGIC_IMM:
11289 case TYPE_LOGICS_IMM:
11290 case TYPE_EXTEND:
11291 case TYPE_MVN_IMM:
11292 case TYPE_MOV_IMM:
11293 case TYPE_MOV_REG:
11294 case TYPE_MOV_SHIFT:
11295 case TYPE_MOV_SHIFT_REG:
11296 case TYPE_BRANCH:
11297 case TYPE_CALL:
11298 return true;
11299 default:
11300 return false;
11301 }
11302 }
11303
11304
11305 /* Look for an instruction that can dual issue only as an older
11306 instruction, and move it in front of any instructions that can
11307 dual-issue as younger, while preserving the relative order of all
11308 other instructions in the ready list. This is a hueuristic to help
11309 dual-issue in later cycles, by postponing issue of more flexible
11310 instructions. This heuristic may affect dual issue opportunities
11311 in the current cycle. */
11312 static void
11313 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11314 int *n_readyp, int clock)
11315 {
11316 int i;
11317 int first_older_only = -1, first_younger = -1;
11318
11319 if (verbose > 5)
11320 fprintf (file,
11321 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11322 clock,
11323 *n_readyp);
11324
11325 /* Traverse the ready list from the head (the instruction to issue
11326 first), and looking for the first instruction that can issue as
11327 younger and the first instruction that can dual-issue only as
11328 older. */
11329 for (i = *n_readyp - 1; i >= 0; i--)
11330 {
11331 rtx_insn *insn = ready[i];
11332 if (cortexa7_older_only (insn))
11333 {
11334 first_older_only = i;
11335 if (verbose > 5)
11336 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11337 break;
11338 }
11339 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11340 first_younger = i;
11341 }
11342
11343 /* Nothing to reorder because either no younger insn found or insn
11344 that can dual-issue only as older appears before any insn that
11345 can dual-issue as younger. */
11346 if (first_younger == -1)
11347 {
11348 if (verbose > 5)
11349 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11350 return;
11351 }
11352
11353 /* Nothing to reorder because no older-only insn in the ready list. */
11354 if (first_older_only == -1)
11355 {
11356 if (verbose > 5)
11357 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11358 return;
11359 }
11360
11361 /* Move first_older_only insn before first_younger. */
11362 if (verbose > 5)
11363 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11364 INSN_UID(ready [first_older_only]),
11365 INSN_UID(ready [first_younger]));
11366 rtx_insn *first_older_only_insn = ready [first_older_only];
11367 for (i = first_older_only; i < first_younger; i++)
11368 {
11369 ready[i] = ready[i+1];
11370 }
11371
11372 ready[i] = first_older_only_insn;
11373 return;
11374 }
11375
11376 /* Implement TARGET_SCHED_REORDER. */
11377 static int
11378 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11379 int clock)
11380 {
11381 switch (arm_tune)
11382 {
11383 case TARGET_CPU_cortexa7:
11384 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11385 break;
11386 default:
11387 /* Do nothing for other cores. */
11388 break;
11389 }
11390
11391 return arm_issue_rate ();
11392 }
11393
11394 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11395 It corrects the value of COST based on the relationship between
11396 INSN and DEP through the dependence LINK. It returns the new
11397 value. There is a per-core adjust_cost hook to adjust scheduler costs
11398 and the per-core hook can choose to completely override the generic
11399 adjust_cost function. Only put bits of code into arm_adjust_cost that
11400 are common across all cores. */
11401 static int
11402 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11403 unsigned int)
11404 {
11405 rtx i_pat, d_pat;
11406
11407 /* When generating Thumb-1 code, we want to place flag-setting operations
11408 close to a conditional branch which depends on them, so that we can
11409 omit the comparison. */
11410 if (TARGET_THUMB1
11411 && dep_type == 0
11412 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11413 && recog_memoized (dep) >= 0
11414 && get_attr_conds (dep) == CONDS_SET)
11415 return 0;
11416
11417 if (current_tune->sched_adjust_cost != NULL)
11418 {
11419 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11420 return cost;
11421 }
11422
11423 /* XXX Is this strictly true? */
11424 if (dep_type == REG_DEP_ANTI
11425 || dep_type == REG_DEP_OUTPUT)
11426 return 0;
11427
11428 /* Call insns don't incur a stall, even if they follow a load. */
11429 if (dep_type == 0
11430 && CALL_P (insn))
11431 return 1;
11432
11433 if ((i_pat = single_set (insn)) != NULL
11434 && MEM_P (SET_SRC (i_pat))
11435 && (d_pat = single_set (dep)) != NULL
11436 && MEM_P (SET_DEST (d_pat)))
11437 {
11438 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11439 /* This is a load after a store, there is no conflict if the load reads
11440 from a cached area. Assume that loads from the stack, and from the
11441 constant pool are cached, and that others will miss. This is a
11442 hack. */
11443
11444 if ((GET_CODE (src_mem) == SYMBOL_REF
11445 && CONSTANT_POOL_ADDRESS_P (src_mem))
11446 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11447 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11448 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11449 return 1;
11450 }
11451
11452 return cost;
11453 }
11454
11455 int
11456 arm_max_conditional_execute (void)
11457 {
11458 return max_insns_skipped;
11459 }
11460
11461 static int
11462 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11463 {
11464 if (TARGET_32BIT)
11465 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11466 else
11467 return (optimize > 0) ? 2 : 0;
11468 }
11469
11470 static int
11471 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11472 {
11473 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11474 }
11475
11476 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11477 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11478 sequences of non-executed instructions in IT blocks probably take the same
11479 amount of time as executed instructions (and the IT instruction itself takes
11480 space in icache). This function was experimentally determined to give good
11481 results on a popular embedded benchmark. */
11482
11483 static int
11484 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11485 {
11486 return (TARGET_32BIT && speed_p) ? 1
11487 : arm_default_branch_cost (speed_p, predictable_p);
11488 }
11489
11490 static int
11491 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11492 {
11493 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11494 }
11495
11496 static bool fp_consts_inited = false;
11497
11498 static REAL_VALUE_TYPE value_fp0;
11499
11500 static void
11501 init_fp_table (void)
11502 {
11503 REAL_VALUE_TYPE r;
11504
11505 r = REAL_VALUE_ATOF ("0", DFmode);
11506 value_fp0 = r;
11507 fp_consts_inited = true;
11508 }
11509
11510 /* Return TRUE if rtx X is a valid immediate FP constant. */
11511 int
11512 arm_const_double_rtx (rtx x)
11513 {
11514 const REAL_VALUE_TYPE *r;
11515
11516 if (!fp_consts_inited)
11517 init_fp_table ();
11518
11519 r = CONST_DOUBLE_REAL_VALUE (x);
11520 if (REAL_VALUE_MINUS_ZERO (*r))
11521 return 0;
11522
11523 if (real_equal (r, &value_fp0))
11524 return 1;
11525
11526 return 0;
11527 }
11528
11529 /* VFPv3 has a fairly wide range of representable immediates, formed from
11530 "quarter-precision" floating-point values. These can be evaluated using this
11531 formula (with ^ for exponentiation):
11532
11533 -1^s * n * 2^-r
11534
11535 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11536 16 <= n <= 31 and 0 <= r <= 7.
11537
11538 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11539
11540 - A (most-significant) is the sign bit.
11541 - BCD are the exponent (encoded as r XOR 3).
11542 - EFGH are the mantissa (encoded as n - 16).
11543 */
11544
11545 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11546 fconst[sd] instruction, or -1 if X isn't suitable. */
11547 static int
11548 vfp3_const_double_index (rtx x)
11549 {
11550 REAL_VALUE_TYPE r, m;
11551 int sign, exponent;
11552 unsigned HOST_WIDE_INT mantissa, mant_hi;
11553 unsigned HOST_WIDE_INT mask;
11554 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11555 bool fail;
11556
11557 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11558 return -1;
11559
11560 r = *CONST_DOUBLE_REAL_VALUE (x);
11561
11562 /* We can't represent these things, so detect them first. */
11563 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11564 return -1;
11565
11566 /* Extract sign, exponent and mantissa. */
11567 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11568 r = real_value_abs (&r);
11569 exponent = REAL_EXP (&r);
11570 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11571 highest (sign) bit, with a fixed binary point at bit point_pos.
11572 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11573 bits for the mantissa, this may fail (low bits would be lost). */
11574 real_ldexp (&m, &r, point_pos - exponent);
11575 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11576 mantissa = w.elt (0);
11577 mant_hi = w.elt (1);
11578
11579 /* If there are bits set in the low part of the mantissa, we can't
11580 represent this value. */
11581 if (mantissa != 0)
11582 return -1;
11583
11584 /* Now make it so that mantissa contains the most-significant bits, and move
11585 the point_pos to indicate that the least-significant bits have been
11586 discarded. */
11587 point_pos -= HOST_BITS_PER_WIDE_INT;
11588 mantissa = mant_hi;
11589
11590 /* We can permit four significant bits of mantissa only, plus a high bit
11591 which is always 1. */
11592 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11593 if ((mantissa & mask) != 0)
11594 return -1;
11595
11596 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11597 mantissa >>= point_pos - 5;
11598
11599 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11600 floating-point immediate zero with Neon using an integer-zero load, but
11601 that case is handled elsewhere.) */
11602 if (mantissa == 0)
11603 return -1;
11604
11605 gcc_assert (mantissa >= 16 && mantissa <= 31);
11606
11607 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11608 normalized significands are in the range [1, 2). (Our mantissa is shifted
11609 left 4 places at this point relative to normalized IEEE754 values). GCC
11610 internally uses [0.5, 1) (see real.c), so the exponent returned from
11611 REAL_EXP must be altered. */
11612 exponent = 5 - exponent;
11613
11614 if (exponent < 0 || exponent > 7)
11615 return -1;
11616
11617 /* Sign, mantissa and exponent are now in the correct form to plug into the
11618 formula described in the comment above. */
11619 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11620 }
11621
11622 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11623 int
11624 vfp3_const_double_rtx (rtx x)
11625 {
11626 if (!TARGET_VFP3)
11627 return 0;
11628
11629 return vfp3_const_double_index (x) != -1;
11630 }
11631
11632 /* Recognize immediates which can be used in various Neon instructions. Legal
11633 immediates are described by the following table (for VMVN variants, the
11634 bitwise inverse of the constant shown is recognized. In either case, VMOV
11635 is output and the correct instruction to use for a given constant is chosen
11636 by the assembler). The constant shown is replicated across all elements of
11637 the destination vector.
11638
11639 insn elems variant constant (binary)
11640 ---- ----- ------- -----------------
11641 vmov i32 0 00000000 00000000 00000000 abcdefgh
11642 vmov i32 1 00000000 00000000 abcdefgh 00000000
11643 vmov i32 2 00000000 abcdefgh 00000000 00000000
11644 vmov i32 3 abcdefgh 00000000 00000000 00000000
11645 vmov i16 4 00000000 abcdefgh
11646 vmov i16 5 abcdefgh 00000000
11647 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11648 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11649 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11650 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11651 vmvn i16 10 00000000 abcdefgh
11652 vmvn i16 11 abcdefgh 00000000
11653 vmov i32 12 00000000 00000000 abcdefgh 11111111
11654 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11655 vmov i32 14 00000000 abcdefgh 11111111 11111111
11656 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11657 vmov i8 16 abcdefgh
11658 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11659 eeeeeeee ffffffff gggggggg hhhhhhhh
11660 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11661 vmov f32 19 00000000 00000000 00000000 00000000
11662
11663 For case 18, B = !b. Representable values are exactly those accepted by
11664 vfp3_const_double_index, but are output as floating-point numbers rather
11665 than indices.
11666
11667 For case 19, we will change it to vmov.i32 when assembling.
11668
11669 Variants 0-5 (inclusive) may also be used as immediates for the second
11670 operand of VORR/VBIC instructions.
11671
11672 The INVERSE argument causes the bitwise inverse of the given operand to be
11673 recognized instead (used for recognizing legal immediates for the VAND/VORN
11674 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11675 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11676 output, rather than the real insns vbic/vorr).
11677
11678 INVERSE makes no difference to the recognition of float vectors.
11679
11680 The return value is the variant of immediate as shown in the above table, or
11681 -1 if the given value doesn't match any of the listed patterns.
11682 */
11683 static int
11684 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11685 rtx *modconst, int *elementwidth)
11686 {
11687 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11688 matches = 1; \
11689 for (i = 0; i < idx; i += (STRIDE)) \
11690 if (!(TEST)) \
11691 matches = 0; \
11692 if (matches) \
11693 { \
11694 immtype = (CLASS); \
11695 elsize = (ELSIZE); \
11696 break; \
11697 }
11698
11699 unsigned int i, elsize = 0, idx = 0, n_elts;
11700 unsigned int innersize;
11701 unsigned char bytes[16];
11702 int immtype = -1, matches;
11703 unsigned int invmask = inverse ? 0xff : 0;
11704 bool vector = GET_CODE (op) == CONST_VECTOR;
11705
11706 if (vector)
11707 n_elts = CONST_VECTOR_NUNITS (op);
11708 else
11709 {
11710 n_elts = 1;
11711 if (mode == VOIDmode)
11712 mode = DImode;
11713 }
11714
11715 innersize = GET_MODE_UNIT_SIZE (mode);
11716
11717 /* Vectors of float constants. */
11718 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11719 {
11720 rtx el0 = CONST_VECTOR_ELT (op, 0);
11721
11722 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11723 return -1;
11724
11725 /* FP16 vectors cannot be represented. */
11726 if (GET_MODE_INNER (mode) == HFmode)
11727 return -1;
11728
11729 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11730 are distinct in this context. */
11731 if (!const_vec_duplicate_p (op))
11732 return -1;
11733
11734 if (modconst)
11735 *modconst = CONST_VECTOR_ELT (op, 0);
11736
11737 if (elementwidth)
11738 *elementwidth = 0;
11739
11740 if (el0 == CONST0_RTX (GET_MODE (el0)))
11741 return 19;
11742 else
11743 return 18;
11744 }
11745
11746 /* The tricks done in the code below apply for little-endian vector layout.
11747 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11748 FIXME: Implement logic for big-endian vectors. */
11749 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11750 return -1;
11751
11752 /* Splat vector constant out into a byte vector. */
11753 for (i = 0; i < n_elts; i++)
11754 {
11755 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11756 unsigned HOST_WIDE_INT elpart;
11757
11758 gcc_assert (CONST_INT_P (el));
11759 elpart = INTVAL (el);
11760
11761 for (unsigned int byte = 0; byte < innersize; byte++)
11762 {
11763 bytes[idx++] = (elpart & 0xff) ^ invmask;
11764 elpart >>= BITS_PER_UNIT;
11765 }
11766 }
11767
11768 /* Sanity check. */
11769 gcc_assert (idx == GET_MODE_SIZE (mode));
11770
11771 do
11772 {
11773 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11774 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11775
11776 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11777 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11778
11779 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11780 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11781
11782 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11783 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11784
11785 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11786
11787 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11788
11789 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11790 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11791
11792 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11793 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11794
11795 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11796 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11797
11798 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11799 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11800
11801 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11802
11803 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11804
11805 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11806 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11807
11808 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11809 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11810
11811 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11812 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11813
11814 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11815 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11816
11817 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11818
11819 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11820 && bytes[i] == bytes[(i + 8) % idx]);
11821 }
11822 while (0);
11823
11824 if (immtype == -1)
11825 return -1;
11826
11827 if (elementwidth)
11828 *elementwidth = elsize;
11829
11830 if (modconst)
11831 {
11832 unsigned HOST_WIDE_INT imm = 0;
11833
11834 /* Un-invert bytes of recognized vector, if necessary. */
11835 if (invmask != 0)
11836 for (i = 0; i < idx; i++)
11837 bytes[i] ^= invmask;
11838
11839 if (immtype == 17)
11840 {
11841 /* FIXME: Broken on 32-bit H_W_I hosts. */
11842 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11843
11844 for (i = 0; i < 8; i++)
11845 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11846 << (i * BITS_PER_UNIT);
11847
11848 *modconst = GEN_INT (imm);
11849 }
11850 else
11851 {
11852 unsigned HOST_WIDE_INT imm = 0;
11853
11854 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11855 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11856
11857 *modconst = GEN_INT (imm);
11858 }
11859 }
11860
11861 return immtype;
11862 #undef CHECK
11863 }
11864
11865 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11866 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11867 float elements), and a modified constant (whatever should be output for a
11868 VMOV) in *MODCONST. */
11869
11870 int
11871 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11872 rtx *modconst, int *elementwidth)
11873 {
11874 rtx tmpconst;
11875 int tmpwidth;
11876 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11877
11878 if (retval == -1)
11879 return 0;
11880
11881 if (modconst)
11882 *modconst = tmpconst;
11883
11884 if (elementwidth)
11885 *elementwidth = tmpwidth;
11886
11887 return 1;
11888 }
11889
11890 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11891 the immediate is valid, write a constant suitable for using as an operand
11892 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11893 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11894
11895 int
11896 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11897 rtx *modconst, int *elementwidth)
11898 {
11899 rtx tmpconst;
11900 int tmpwidth;
11901 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11902
11903 if (retval < 0 || retval > 5)
11904 return 0;
11905
11906 if (modconst)
11907 *modconst = tmpconst;
11908
11909 if (elementwidth)
11910 *elementwidth = tmpwidth;
11911
11912 return 1;
11913 }
11914
11915 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11916 the immediate is valid, write a constant suitable for using as an operand
11917 to VSHR/VSHL to *MODCONST and the corresponding element width to
11918 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11919 because they have different limitations. */
11920
11921 int
11922 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11923 rtx *modconst, int *elementwidth,
11924 bool isleftshift)
11925 {
11926 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11927 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11928 unsigned HOST_WIDE_INT last_elt = 0;
11929 unsigned HOST_WIDE_INT maxshift;
11930
11931 /* Split vector constant out into a byte vector. */
11932 for (i = 0; i < n_elts; i++)
11933 {
11934 rtx el = CONST_VECTOR_ELT (op, i);
11935 unsigned HOST_WIDE_INT elpart;
11936
11937 if (CONST_INT_P (el))
11938 elpart = INTVAL (el);
11939 else if (CONST_DOUBLE_P (el))
11940 return 0;
11941 else
11942 gcc_unreachable ();
11943
11944 if (i != 0 && elpart != last_elt)
11945 return 0;
11946
11947 last_elt = elpart;
11948 }
11949
11950 /* Shift less than element size. */
11951 maxshift = innersize * 8;
11952
11953 if (isleftshift)
11954 {
11955 /* Left shift immediate value can be from 0 to <size>-1. */
11956 if (last_elt >= maxshift)
11957 return 0;
11958 }
11959 else
11960 {
11961 /* Right shift immediate value can be from 1 to <size>. */
11962 if (last_elt == 0 || last_elt > maxshift)
11963 return 0;
11964 }
11965
11966 if (elementwidth)
11967 *elementwidth = innersize * 8;
11968
11969 if (modconst)
11970 *modconst = CONST_VECTOR_ELT (op, 0);
11971
11972 return 1;
11973 }
11974
11975 /* Return a string suitable for output of Neon immediate logic operation
11976 MNEM. */
11977
11978 char *
11979 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11980 int inverse, int quad)
11981 {
11982 int width, is_valid;
11983 static char templ[40];
11984
11985 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11986
11987 gcc_assert (is_valid != 0);
11988
11989 if (quad)
11990 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11991 else
11992 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11993
11994 return templ;
11995 }
11996
11997 /* Return a string suitable for output of Neon immediate shift operation
11998 (VSHR or VSHL) MNEM. */
11999
12000 char *
12001 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12002 machine_mode mode, int quad,
12003 bool isleftshift)
12004 {
12005 int width, is_valid;
12006 static char templ[40];
12007
12008 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12009 gcc_assert (is_valid != 0);
12010
12011 if (quad)
12012 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12013 else
12014 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12015
12016 return templ;
12017 }
12018
12019 /* Output a sequence of pairwise operations to implement a reduction.
12020 NOTE: We do "too much work" here, because pairwise operations work on two
12021 registers-worth of operands in one go. Unfortunately we can't exploit those
12022 extra calculations to do the full operation in fewer steps, I don't think.
12023 Although all vector elements of the result but the first are ignored, we
12024 actually calculate the same result in each of the elements. An alternative
12025 such as initially loading a vector with zero to use as each of the second
12026 operands would use up an additional register and take an extra instruction,
12027 for no particular gain. */
12028
12029 void
12030 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12031 rtx (*reduc) (rtx, rtx, rtx))
12032 {
12033 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12034 rtx tmpsum = op1;
12035
12036 for (i = parts / 2; i >= 1; i /= 2)
12037 {
12038 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12039 emit_insn (reduc (dest, tmpsum, tmpsum));
12040 tmpsum = dest;
12041 }
12042 }
12043
12044 /* If VALS is a vector constant that can be loaded into a register
12045 using VDUP, generate instructions to do so and return an RTX to
12046 assign to the register. Otherwise return NULL_RTX. */
12047
12048 static rtx
12049 neon_vdup_constant (rtx vals)
12050 {
12051 machine_mode mode = GET_MODE (vals);
12052 machine_mode inner_mode = GET_MODE_INNER (mode);
12053 rtx x;
12054
12055 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12056 return NULL_RTX;
12057
12058 if (!const_vec_duplicate_p (vals, &x))
12059 /* The elements are not all the same. We could handle repeating
12060 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12061 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12062 vdup.i16). */
12063 return NULL_RTX;
12064
12065 /* We can load this constant by using VDUP and a constant in a
12066 single ARM register. This will be cheaper than a vector
12067 load. */
12068
12069 x = copy_to_mode_reg (inner_mode, x);
12070 return gen_rtx_VEC_DUPLICATE (mode, x);
12071 }
12072
12073 /* Generate code to load VALS, which is a PARALLEL containing only
12074 constants (for vec_init) or CONST_VECTOR, efficiently into a
12075 register. Returns an RTX to copy into the register, or NULL_RTX
12076 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12077
12078 rtx
12079 neon_make_constant (rtx vals)
12080 {
12081 machine_mode mode = GET_MODE (vals);
12082 rtx target;
12083 rtx const_vec = NULL_RTX;
12084 int n_elts = GET_MODE_NUNITS (mode);
12085 int n_const = 0;
12086 int i;
12087
12088 if (GET_CODE (vals) == CONST_VECTOR)
12089 const_vec = vals;
12090 else if (GET_CODE (vals) == PARALLEL)
12091 {
12092 /* A CONST_VECTOR must contain only CONST_INTs and
12093 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12094 Only store valid constants in a CONST_VECTOR. */
12095 for (i = 0; i < n_elts; ++i)
12096 {
12097 rtx x = XVECEXP (vals, 0, i);
12098 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12099 n_const++;
12100 }
12101 if (n_const == n_elts)
12102 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12103 }
12104 else
12105 gcc_unreachable ();
12106
12107 if (const_vec != NULL
12108 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12109 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12110 return const_vec;
12111 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12112 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12113 pipeline cycle; creating the constant takes one or two ARM
12114 pipeline cycles. */
12115 return target;
12116 else if (const_vec != NULL_RTX)
12117 /* Load from constant pool. On Cortex-A8 this takes two cycles
12118 (for either double or quad vectors). We can not take advantage
12119 of single-cycle VLD1 because we need a PC-relative addressing
12120 mode. */
12121 return const_vec;
12122 else
12123 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12124 We can not construct an initializer. */
12125 return NULL_RTX;
12126 }
12127
12128 /* Initialize vector TARGET to VALS. */
12129
12130 void
12131 neon_expand_vector_init (rtx target, rtx vals)
12132 {
12133 machine_mode mode = GET_MODE (target);
12134 machine_mode inner_mode = GET_MODE_INNER (mode);
12135 int n_elts = GET_MODE_NUNITS (mode);
12136 int n_var = 0, one_var = -1;
12137 bool all_same = true;
12138 rtx x, mem;
12139 int i;
12140
12141 for (i = 0; i < n_elts; ++i)
12142 {
12143 x = XVECEXP (vals, 0, i);
12144 if (!CONSTANT_P (x))
12145 ++n_var, one_var = i;
12146
12147 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12148 all_same = false;
12149 }
12150
12151 if (n_var == 0)
12152 {
12153 rtx constant = neon_make_constant (vals);
12154 if (constant != NULL_RTX)
12155 {
12156 emit_move_insn (target, constant);
12157 return;
12158 }
12159 }
12160
12161 /* Splat a single non-constant element if we can. */
12162 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12163 {
12164 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12165 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12166 return;
12167 }
12168
12169 /* One field is non-constant. Load constant then overwrite varying
12170 field. This is more efficient than using the stack. */
12171 if (n_var == 1)
12172 {
12173 rtx copy = copy_rtx (vals);
12174 rtx index = GEN_INT (one_var);
12175
12176 /* Load constant part of vector, substitute neighboring value for
12177 varying element. */
12178 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12179 neon_expand_vector_init (target, copy);
12180
12181 /* Insert variable. */
12182 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12183 switch (mode)
12184 {
12185 case V8QImode:
12186 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12187 break;
12188 case V16QImode:
12189 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12190 break;
12191 case V4HImode:
12192 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12193 break;
12194 case V8HImode:
12195 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12196 break;
12197 case V2SImode:
12198 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12199 break;
12200 case V4SImode:
12201 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12202 break;
12203 case V2SFmode:
12204 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12205 break;
12206 case V4SFmode:
12207 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12208 break;
12209 case V2DImode:
12210 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12211 break;
12212 default:
12213 gcc_unreachable ();
12214 }
12215 return;
12216 }
12217
12218 /* Construct the vector in memory one field at a time
12219 and load the whole vector. */
12220 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12221 for (i = 0; i < n_elts; i++)
12222 emit_move_insn (adjust_address_nv (mem, inner_mode,
12223 i * GET_MODE_SIZE (inner_mode)),
12224 XVECEXP (vals, 0, i));
12225 emit_move_insn (target, mem);
12226 }
12227
12228 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12229 ERR if it doesn't. EXP indicates the source location, which includes the
12230 inlining history for intrinsics. */
12231
12232 static void
12233 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12234 const_tree exp, const char *desc)
12235 {
12236 HOST_WIDE_INT lane;
12237
12238 gcc_assert (CONST_INT_P (operand));
12239
12240 lane = INTVAL (operand);
12241
12242 if (lane < low || lane >= high)
12243 {
12244 if (exp)
12245 error ("%K%s %wd out of range %wd - %wd",
12246 exp, desc, lane, low, high - 1);
12247 else
12248 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12249 }
12250 }
12251
12252 /* Bounds-check lanes. */
12253
12254 void
12255 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12256 const_tree exp)
12257 {
12258 bounds_check (operand, low, high, exp, "lane");
12259 }
12260
12261 /* Bounds-check constants. */
12262
12263 void
12264 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12265 {
12266 bounds_check (operand, low, high, NULL_TREE, "constant");
12267 }
12268
12269 HOST_WIDE_INT
12270 neon_element_bits (machine_mode mode)
12271 {
12272 return GET_MODE_UNIT_BITSIZE (mode);
12273 }
12274
12275 \f
12276 /* Predicates for `match_operand' and `match_operator'. */
12277
12278 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12279 WB is true if full writeback address modes are allowed and is false
12280 if limited writeback address modes (POST_INC and PRE_DEC) are
12281 allowed. */
12282
12283 int
12284 arm_coproc_mem_operand (rtx op, bool wb)
12285 {
12286 rtx ind;
12287
12288 /* Reject eliminable registers. */
12289 if (! (reload_in_progress || reload_completed || lra_in_progress)
12290 && ( reg_mentioned_p (frame_pointer_rtx, op)
12291 || reg_mentioned_p (arg_pointer_rtx, op)
12292 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12293 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12294 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12295 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12296 return FALSE;
12297
12298 /* Constants are converted into offsets from labels. */
12299 if (!MEM_P (op))
12300 return FALSE;
12301
12302 ind = XEXP (op, 0);
12303
12304 if (reload_completed
12305 && (GET_CODE (ind) == LABEL_REF
12306 || (GET_CODE (ind) == CONST
12307 && GET_CODE (XEXP (ind, 0)) == PLUS
12308 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12309 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12310 return TRUE;
12311
12312 /* Match: (mem (reg)). */
12313 if (REG_P (ind))
12314 return arm_address_register_rtx_p (ind, 0);
12315
12316 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12317 acceptable in any case (subject to verification by
12318 arm_address_register_rtx_p). We need WB to be true to accept
12319 PRE_INC and POST_DEC. */
12320 if (GET_CODE (ind) == POST_INC
12321 || GET_CODE (ind) == PRE_DEC
12322 || (wb
12323 && (GET_CODE (ind) == PRE_INC
12324 || GET_CODE (ind) == POST_DEC)))
12325 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12326
12327 if (wb
12328 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12329 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12330 && GET_CODE (XEXP (ind, 1)) == PLUS
12331 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12332 ind = XEXP (ind, 1);
12333
12334 /* Match:
12335 (plus (reg)
12336 (const)). */
12337 if (GET_CODE (ind) == PLUS
12338 && REG_P (XEXP (ind, 0))
12339 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12340 && CONST_INT_P (XEXP (ind, 1))
12341 && INTVAL (XEXP (ind, 1)) > -1024
12342 && INTVAL (XEXP (ind, 1)) < 1024
12343 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12344 return TRUE;
12345
12346 return FALSE;
12347 }
12348
12349 /* Return TRUE if OP is a memory operand which we can load or store a vector
12350 to/from. TYPE is one of the following values:
12351 0 - Vector load/stor (vldr)
12352 1 - Core registers (ldm)
12353 2 - Element/structure loads (vld1)
12354 */
12355 int
12356 neon_vector_mem_operand (rtx op, int type, bool strict)
12357 {
12358 rtx ind;
12359
12360 /* Reject eliminable registers. */
12361 if (strict && ! (reload_in_progress || reload_completed)
12362 && (reg_mentioned_p (frame_pointer_rtx, op)
12363 || reg_mentioned_p (arg_pointer_rtx, op)
12364 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12365 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12366 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12367 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12368 return FALSE;
12369
12370 /* Constants are converted into offsets from labels. */
12371 if (!MEM_P (op))
12372 return FALSE;
12373
12374 ind = XEXP (op, 0);
12375
12376 if (reload_completed
12377 && (GET_CODE (ind) == LABEL_REF
12378 || (GET_CODE (ind) == CONST
12379 && GET_CODE (XEXP (ind, 0)) == PLUS
12380 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12381 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12382 return TRUE;
12383
12384 /* Match: (mem (reg)). */
12385 if (REG_P (ind))
12386 return arm_address_register_rtx_p (ind, 0);
12387
12388 /* Allow post-increment with Neon registers. */
12389 if ((type != 1 && GET_CODE (ind) == POST_INC)
12390 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12391 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12392
12393 /* Allow post-increment by register for VLDn */
12394 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12395 && GET_CODE (XEXP (ind, 1)) == PLUS
12396 && REG_P (XEXP (XEXP (ind, 1), 1)))
12397 return true;
12398
12399 /* Match:
12400 (plus (reg)
12401 (const)). */
12402 if (type == 0
12403 && GET_CODE (ind) == PLUS
12404 && REG_P (XEXP (ind, 0))
12405 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12406 && CONST_INT_P (XEXP (ind, 1))
12407 && INTVAL (XEXP (ind, 1)) > -1024
12408 /* For quad modes, we restrict the constant offset to be slightly less
12409 than what the instruction format permits. We have no such constraint
12410 on double mode offsets. (This must match arm_legitimate_index_p.) */
12411 && (INTVAL (XEXP (ind, 1))
12412 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12413 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12414 return TRUE;
12415
12416 return FALSE;
12417 }
12418
12419 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12420 type. */
12421 int
12422 neon_struct_mem_operand (rtx op)
12423 {
12424 rtx ind;
12425
12426 /* Reject eliminable registers. */
12427 if (! (reload_in_progress || reload_completed)
12428 && ( reg_mentioned_p (frame_pointer_rtx, op)
12429 || reg_mentioned_p (arg_pointer_rtx, op)
12430 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12431 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12432 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12433 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12434 return FALSE;
12435
12436 /* Constants are converted into offsets from labels. */
12437 if (!MEM_P (op))
12438 return FALSE;
12439
12440 ind = XEXP (op, 0);
12441
12442 if (reload_completed
12443 && (GET_CODE (ind) == LABEL_REF
12444 || (GET_CODE (ind) == CONST
12445 && GET_CODE (XEXP (ind, 0)) == PLUS
12446 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12447 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12448 return TRUE;
12449
12450 /* Match: (mem (reg)). */
12451 if (REG_P (ind))
12452 return arm_address_register_rtx_p (ind, 0);
12453
12454 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12455 if (GET_CODE (ind) == POST_INC
12456 || GET_CODE (ind) == PRE_DEC)
12457 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12458
12459 return FALSE;
12460 }
12461
12462 /* Return true if X is a register that will be eliminated later on. */
12463 int
12464 arm_eliminable_register (rtx x)
12465 {
12466 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12467 || REGNO (x) == ARG_POINTER_REGNUM
12468 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12469 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12470 }
12471
12472 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12473 coprocessor registers. Otherwise return NO_REGS. */
12474
12475 enum reg_class
12476 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12477 {
12478 if (mode == HFmode)
12479 {
12480 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12481 return GENERAL_REGS;
12482 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12483 return NO_REGS;
12484 return GENERAL_REGS;
12485 }
12486
12487 /* The neon move patterns handle all legitimate vector and struct
12488 addresses. */
12489 if (TARGET_NEON
12490 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12491 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12492 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12493 || VALID_NEON_STRUCT_MODE (mode)))
12494 return NO_REGS;
12495
12496 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12497 return NO_REGS;
12498
12499 return GENERAL_REGS;
12500 }
12501
12502 /* Values which must be returned in the most-significant end of the return
12503 register. */
12504
12505 static bool
12506 arm_return_in_msb (const_tree valtype)
12507 {
12508 return (TARGET_AAPCS_BASED
12509 && BYTES_BIG_ENDIAN
12510 && (AGGREGATE_TYPE_P (valtype)
12511 || TREE_CODE (valtype) == COMPLEX_TYPE
12512 || FIXED_POINT_TYPE_P (valtype)));
12513 }
12514
12515 /* Return TRUE if X references a SYMBOL_REF. */
12516 int
12517 symbol_mentioned_p (rtx x)
12518 {
12519 const char * fmt;
12520 int i;
12521
12522 if (GET_CODE (x) == SYMBOL_REF)
12523 return 1;
12524
12525 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12526 are constant offsets, not symbols. */
12527 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12528 return 0;
12529
12530 fmt = GET_RTX_FORMAT (GET_CODE (x));
12531
12532 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12533 {
12534 if (fmt[i] == 'E')
12535 {
12536 int j;
12537
12538 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12539 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12540 return 1;
12541 }
12542 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12543 return 1;
12544 }
12545
12546 return 0;
12547 }
12548
12549 /* Return TRUE if X references a LABEL_REF. */
12550 int
12551 label_mentioned_p (rtx x)
12552 {
12553 const char * fmt;
12554 int i;
12555
12556 if (GET_CODE (x) == LABEL_REF)
12557 return 1;
12558
12559 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12560 instruction, but they are constant offsets, not symbols. */
12561 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12562 return 0;
12563
12564 fmt = GET_RTX_FORMAT (GET_CODE (x));
12565 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12566 {
12567 if (fmt[i] == 'E')
12568 {
12569 int j;
12570
12571 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12572 if (label_mentioned_p (XVECEXP (x, i, j)))
12573 return 1;
12574 }
12575 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12576 return 1;
12577 }
12578
12579 return 0;
12580 }
12581
12582 int
12583 tls_mentioned_p (rtx x)
12584 {
12585 switch (GET_CODE (x))
12586 {
12587 case CONST:
12588 return tls_mentioned_p (XEXP (x, 0));
12589
12590 case UNSPEC:
12591 if (XINT (x, 1) == UNSPEC_TLS)
12592 return 1;
12593
12594 /* Fall through. */
12595 default:
12596 return 0;
12597 }
12598 }
12599
12600 /* Must not copy any rtx that uses a pc-relative address.
12601 Also, disallow copying of load-exclusive instructions that
12602 may appear after splitting of compare-and-swap-style operations
12603 so as to prevent those loops from being transformed away from their
12604 canonical forms (see PR 69904). */
12605
12606 static bool
12607 arm_cannot_copy_insn_p (rtx_insn *insn)
12608 {
12609 /* The tls call insn cannot be copied, as it is paired with a data
12610 word. */
12611 if (recog_memoized (insn) == CODE_FOR_tlscall)
12612 return true;
12613
12614 subrtx_iterator::array_type array;
12615 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12616 {
12617 const_rtx x = *iter;
12618 if (GET_CODE (x) == UNSPEC
12619 && (XINT (x, 1) == UNSPEC_PIC_BASE
12620 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12621 return true;
12622 }
12623
12624 rtx set = single_set (insn);
12625 if (set)
12626 {
12627 rtx src = SET_SRC (set);
12628 if (GET_CODE (src) == ZERO_EXTEND)
12629 src = XEXP (src, 0);
12630
12631 /* Catch the load-exclusive and load-acquire operations. */
12632 if (GET_CODE (src) == UNSPEC_VOLATILE
12633 && (XINT (src, 1) == VUNSPEC_LL
12634 || XINT (src, 1) == VUNSPEC_LAX))
12635 return true;
12636 }
12637 return false;
12638 }
12639
12640 enum rtx_code
12641 minmax_code (rtx x)
12642 {
12643 enum rtx_code code = GET_CODE (x);
12644
12645 switch (code)
12646 {
12647 case SMAX:
12648 return GE;
12649 case SMIN:
12650 return LE;
12651 case UMIN:
12652 return LEU;
12653 case UMAX:
12654 return GEU;
12655 default:
12656 gcc_unreachable ();
12657 }
12658 }
12659
12660 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12661
12662 bool
12663 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12664 int *mask, bool *signed_sat)
12665 {
12666 /* The high bound must be a power of two minus one. */
12667 int log = exact_log2 (INTVAL (hi_bound) + 1);
12668 if (log == -1)
12669 return false;
12670
12671 /* The low bound is either zero (for usat) or one less than the
12672 negation of the high bound (for ssat). */
12673 if (INTVAL (lo_bound) == 0)
12674 {
12675 if (mask)
12676 *mask = log;
12677 if (signed_sat)
12678 *signed_sat = false;
12679
12680 return true;
12681 }
12682
12683 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12684 {
12685 if (mask)
12686 *mask = log + 1;
12687 if (signed_sat)
12688 *signed_sat = true;
12689
12690 return true;
12691 }
12692
12693 return false;
12694 }
12695
12696 /* Return 1 if memory locations are adjacent. */
12697 int
12698 adjacent_mem_locations (rtx a, rtx b)
12699 {
12700 /* We don't guarantee to preserve the order of these memory refs. */
12701 if (volatile_refs_p (a) || volatile_refs_p (b))
12702 return 0;
12703
12704 if ((REG_P (XEXP (a, 0))
12705 || (GET_CODE (XEXP (a, 0)) == PLUS
12706 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12707 && (REG_P (XEXP (b, 0))
12708 || (GET_CODE (XEXP (b, 0)) == PLUS
12709 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12710 {
12711 HOST_WIDE_INT val0 = 0, val1 = 0;
12712 rtx reg0, reg1;
12713 int val_diff;
12714
12715 if (GET_CODE (XEXP (a, 0)) == PLUS)
12716 {
12717 reg0 = XEXP (XEXP (a, 0), 0);
12718 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12719 }
12720 else
12721 reg0 = XEXP (a, 0);
12722
12723 if (GET_CODE (XEXP (b, 0)) == PLUS)
12724 {
12725 reg1 = XEXP (XEXP (b, 0), 0);
12726 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12727 }
12728 else
12729 reg1 = XEXP (b, 0);
12730
12731 /* Don't accept any offset that will require multiple
12732 instructions to handle, since this would cause the
12733 arith_adjacentmem pattern to output an overlong sequence. */
12734 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12735 return 0;
12736
12737 /* Don't allow an eliminable register: register elimination can make
12738 the offset too large. */
12739 if (arm_eliminable_register (reg0))
12740 return 0;
12741
12742 val_diff = val1 - val0;
12743
12744 if (arm_ld_sched)
12745 {
12746 /* If the target has load delay slots, then there's no benefit
12747 to using an ldm instruction unless the offset is zero and
12748 we are optimizing for size. */
12749 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12750 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12751 && (val_diff == 4 || val_diff == -4));
12752 }
12753
12754 return ((REGNO (reg0) == REGNO (reg1))
12755 && (val_diff == 4 || val_diff == -4));
12756 }
12757
12758 return 0;
12759 }
12760
12761 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12762 for load operations, false for store operations. CONSECUTIVE is true
12763 if the register numbers in the operation must be consecutive in the register
12764 bank. RETURN_PC is true if value is to be loaded in PC.
12765 The pattern we are trying to match for load is:
12766 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12767 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12768 :
12769 :
12770 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12771 ]
12772 where
12773 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12774 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12775 3. If consecutive is TRUE, then for kth register being loaded,
12776 REGNO (R_dk) = REGNO (R_d0) + k.
12777 The pattern for store is similar. */
12778 bool
12779 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12780 bool consecutive, bool return_pc)
12781 {
12782 HOST_WIDE_INT count = XVECLEN (op, 0);
12783 rtx reg, mem, addr;
12784 unsigned regno;
12785 unsigned first_regno;
12786 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12787 rtx elt;
12788 bool addr_reg_in_reglist = false;
12789 bool update = false;
12790 int reg_increment;
12791 int offset_adj;
12792 int regs_per_val;
12793
12794 /* If not in SImode, then registers must be consecutive
12795 (e.g., VLDM instructions for DFmode). */
12796 gcc_assert ((mode == SImode) || consecutive);
12797 /* Setting return_pc for stores is illegal. */
12798 gcc_assert (!return_pc || load);
12799
12800 /* Set up the increments and the regs per val based on the mode. */
12801 reg_increment = GET_MODE_SIZE (mode);
12802 regs_per_val = reg_increment / 4;
12803 offset_adj = return_pc ? 1 : 0;
12804
12805 if (count <= 1
12806 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12807 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12808 return false;
12809
12810 /* Check if this is a write-back. */
12811 elt = XVECEXP (op, 0, offset_adj);
12812 if (GET_CODE (SET_SRC (elt)) == PLUS)
12813 {
12814 i++;
12815 base = 1;
12816 update = true;
12817
12818 /* The offset adjustment must be the number of registers being
12819 popped times the size of a single register. */
12820 if (!REG_P (SET_DEST (elt))
12821 || !REG_P (XEXP (SET_SRC (elt), 0))
12822 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12823 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12824 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12825 ((count - 1 - offset_adj) * reg_increment))
12826 return false;
12827 }
12828
12829 i = i + offset_adj;
12830 base = base + offset_adj;
12831 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12832 success depends on the type: VLDM can do just one reg,
12833 LDM must do at least two. */
12834 if ((count <= i) && (mode == SImode))
12835 return false;
12836
12837 elt = XVECEXP (op, 0, i - 1);
12838 if (GET_CODE (elt) != SET)
12839 return false;
12840
12841 if (load)
12842 {
12843 reg = SET_DEST (elt);
12844 mem = SET_SRC (elt);
12845 }
12846 else
12847 {
12848 reg = SET_SRC (elt);
12849 mem = SET_DEST (elt);
12850 }
12851
12852 if (!REG_P (reg) || !MEM_P (mem))
12853 return false;
12854
12855 regno = REGNO (reg);
12856 first_regno = regno;
12857 addr = XEXP (mem, 0);
12858 if (GET_CODE (addr) == PLUS)
12859 {
12860 if (!CONST_INT_P (XEXP (addr, 1)))
12861 return false;
12862
12863 offset = INTVAL (XEXP (addr, 1));
12864 addr = XEXP (addr, 0);
12865 }
12866
12867 if (!REG_P (addr))
12868 return false;
12869
12870 /* Don't allow SP to be loaded unless it is also the base register. It
12871 guarantees that SP is reset correctly when an LDM instruction
12872 is interrupted. Otherwise, we might end up with a corrupt stack. */
12873 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12874 return false;
12875
12876 for (; i < count; i++)
12877 {
12878 elt = XVECEXP (op, 0, i);
12879 if (GET_CODE (elt) != SET)
12880 return false;
12881
12882 if (load)
12883 {
12884 reg = SET_DEST (elt);
12885 mem = SET_SRC (elt);
12886 }
12887 else
12888 {
12889 reg = SET_SRC (elt);
12890 mem = SET_DEST (elt);
12891 }
12892
12893 if (!REG_P (reg)
12894 || GET_MODE (reg) != mode
12895 || REGNO (reg) <= regno
12896 || (consecutive
12897 && (REGNO (reg) !=
12898 (unsigned int) (first_regno + regs_per_val * (i - base))))
12899 /* Don't allow SP to be loaded unless it is also the base register. It
12900 guarantees that SP is reset correctly when an LDM instruction
12901 is interrupted. Otherwise, we might end up with a corrupt stack. */
12902 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12903 || !MEM_P (mem)
12904 || GET_MODE (mem) != mode
12905 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12906 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12907 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12908 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12909 offset + (i - base) * reg_increment))
12910 && (!REG_P (XEXP (mem, 0))
12911 || offset + (i - base) * reg_increment != 0)))
12912 return false;
12913
12914 regno = REGNO (reg);
12915 if (regno == REGNO (addr))
12916 addr_reg_in_reglist = true;
12917 }
12918
12919 if (load)
12920 {
12921 if (update && addr_reg_in_reglist)
12922 return false;
12923
12924 /* For Thumb-1, address register is always modified - either by write-back
12925 or by explicit load. If the pattern does not describe an update,
12926 then the address register must be in the list of loaded registers. */
12927 if (TARGET_THUMB1)
12928 return update || addr_reg_in_reglist;
12929 }
12930
12931 return true;
12932 }
12933
12934 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12935 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12936 instruction. ADD_OFFSET is nonzero if the base address register needs
12937 to be modified with an add instruction before we can use it. */
12938
12939 static bool
12940 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12941 int nops, HOST_WIDE_INT add_offset)
12942 {
12943 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12944 if the offset isn't small enough. The reason 2 ldrs are faster
12945 is because these ARMs are able to do more than one cache access
12946 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12947 whilst the ARM8 has a double bandwidth cache. This means that
12948 these cores can do both an instruction fetch and a data fetch in
12949 a single cycle, so the trick of calculating the address into a
12950 scratch register (one of the result regs) and then doing a load
12951 multiple actually becomes slower (and no smaller in code size).
12952 That is the transformation
12953
12954 ldr rd1, [rbase + offset]
12955 ldr rd2, [rbase + offset + 4]
12956
12957 to
12958
12959 add rd1, rbase, offset
12960 ldmia rd1, {rd1, rd2}
12961
12962 produces worse code -- '3 cycles + any stalls on rd2' instead of
12963 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12964 access per cycle, the first sequence could never complete in less
12965 than 6 cycles, whereas the ldm sequence would only take 5 and
12966 would make better use of sequential accesses if not hitting the
12967 cache.
12968
12969 We cheat here and test 'arm_ld_sched' which we currently know to
12970 only be true for the ARM8, ARM9 and StrongARM. If this ever
12971 changes, then the test below needs to be reworked. */
12972 if (nops == 2 && arm_ld_sched && add_offset != 0)
12973 return false;
12974
12975 /* XScale has load-store double instructions, but they have stricter
12976 alignment requirements than load-store multiple, so we cannot
12977 use them.
12978
12979 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12980 the pipeline until completion.
12981
12982 NREGS CYCLES
12983 1 3
12984 2 4
12985 3 5
12986 4 6
12987
12988 An ldr instruction takes 1-3 cycles, but does not block the
12989 pipeline.
12990
12991 NREGS CYCLES
12992 1 1-3
12993 2 2-6
12994 3 3-9
12995 4 4-12
12996
12997 Best case ldr will always win. However, the more ldr instructions
12998 we issue, the less likely we are to be able to schedule them well.
12999 Using ldr instructions also increases code size.
13000
13001 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13002 for counts of 3 or 4 regs. */
13003 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13004 return false;
13005 return true;
13006 }
13007
13008 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13009 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13010 an array ORDER which describes the sequence to use when accessing the
13011 offsets that produces an ascending order. In this sequence, each
13012 offset must be larger by exactly 4 than the previous one. ORDER[0]
13013 must have been filled in with the lowest offset by the caller.
13014 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13015 we use to verify that ORDER produces an ascending order of registers.
13016 Return true if it was possible to construct such an order, false if
13017 not. */
13018
13019 static bool
13020 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13021 int *unsorted_regs)
13022 {
13023 int i;
13024 for (i = 1; i < nops; i++)
13025 {
13026 int j;
13027
13028 order[i] = order[i - 1];
13029 for (j = 0; j < nops; j++)
13030 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13031 {
13032 /* We must find exactly one offset that is higher than the
13033 previous one by 4. */
13034 if (order[i] != order[i - 1])
13035 return false;
13036 order[i] = j;
13037 }
13038 if (order[i] == order[i - 1])
13039 return false;
13040 /* The register numbers must be ascending. */
13041 if (unsorted_regs != NULL
13042 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13043 return false;
13044 }
13045 return true;
13046 }
13047
13048 /* Used to determine in a peephole whether a sequence of load
13049 instructions can be changed into a load-multiple instruction.
13050 NOPS is the number of separate load instructions we are examining. The
13051 first NOPS entries in OPERANDS are the destination registers, the
13052 next NOPS entries are memory operands. If this function is
13053 successful, *BASE is set to the common base register of the memory
13054 accesses; *LOAD_OFFSET is set to the first memory location's offset
13055 from that base register.
13056 REGS is an array filled in with the destination register numbers.
13057 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13058 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13059 the sequence of registers in REGS matches the loads from ascending memory
13060 locations, and the function verifies that the register numbers are
13061 themselves ascending. If CHECK_REGS is false, the register numbers
13062 are stored in the order they are found in the operands. */
13063 static int
13064 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13065 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13066 {
13067 int unsorted_regs[MAX_LDM_STM_OPS];
13068 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13069 int order[MAX_LDM_STM_OPS];
13070 rtx base_reg_rtx = NULL;
13071 int base_reg = -1;
13072 int i, ldm_case;
13073
13074 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13075 easily extended if required. */
13076 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13077
13078 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13079
13080 /* Loop over the operands and check that the memory references are
13081 suitable (i.e. immediate offsets from the same base register). At
13082 the same time, extract the target register, and the memory
13083 offsets. */
13084 for (i = 0; i < nops; i++)
13085 {
13086 rtx reg;
13087 rtx offset;
13088
13089 /* Convert a subreg of a mem into the mem itself. */
13090 if (GET_CODE (operands[nops + i]) == SUBREG)
13091 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13092
13093 gcc_assert (MEM_P (operands[nops + i]));
13094
13095 /* Don't reorder volatile memory references; it doesn't seem worth
13096 looking for the case where the order is ok anyway. */
13097 if (MEM_VOLATILE_P (operands[nops + i]))
13098 return 0;
13099
13100 offset = const0_rtx;
13101
13102 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13103 || (GET_CODE (reg) == SUBREG
13104 && REG_P (reg = SUBREG_REG (reg))))
13105 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13106 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13107 || (GET_CODE (reg) == SUBREG
13108 && REG_P (reg = SUBREG_REG (reg))))
13109 && (CONST_INT_P (offset
13110 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13111 {
13112 if (i == 0)
13113 {
13114 base_reg = REGNO (reg);
13115 base_reg_rtx = reg;
13116 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13117 return 0;
13118 }
13119 else if (base_reg != (int) REGNO (reg))
13120 /* Not addressed from the same base register. */
13121 return 0;
13122
13123 unsorted_regs[i] = (REG_P (operands[i])
13124 ? REGNO (operands[i])
13125 : REGNO (SUBREG_REG (operands[i])));
13126
13127 /* If it isn't an integer register, or if it overwrites the
13128 base register but isn't the last insn in the list, then
13129 we can't do this. */
13130 if (unsorted_regs[i] < 0
13131 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13132 || unsorted_regs[i] > 14
13133 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13134 return 0;
13135
13136 /* Don't allow SP to be loaded unless it is also the base
13137 register. It guarantees that SP is reset correctly when
13138 an LDM instruction is interrupted. Otherwise, we might
13139 end up with a corrupt stack. */
13140 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13141 return 0;
13142
13143 unsorted_offsets[i] = INTVAL (offset);
13144 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13145 order[0] = i;
13146 }
13147 else
13148 /* Not a suitable memory address. */
13149 return 0;
13150 }
13151
13152 /* All the useful information has now been extracted from the
13153 operands into unsorted_regs and unsorted_offsets; additionally,
13154 order[0] has been set to the lowest offset in the list. Sort
13155 the offsets into order, verifying that they are adjacent, and
13156 check that the register numbers are ascending. */
13157 if (!compute_offset_order (nops, unsorted_offsets, order,
13158 check_regs ? unsorted_regs : NULL))
13159 return 0;
13160
13161 if (saved_order)
13162 memcpy (saved_order, order, sizeof order);
13163
13164 if (base)
13165 {
13166 *base = base_reg;
13167
13168 for (i = 0; i < nops; i++)
13169 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13170
13171 *load_offset = unsorted_offsets[order[0]];
13172 }
13173
13174 if (TARGET_THUMB1
13175 && !peep2_reg_dead_p (nops, base_reg_rtx))
13176 return 0;
13177
13178 if (unsorted_offsets[order[0]] == 0)
13179 ldm_case = 1; /* ldmia */
13180 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13181 ldm_case = 2; /* ldmib */
13182 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13183 ldm_case = 3; /* ldmda */
13184 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13185 ldm_case = 4; /* ldmdb */
13186 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13187 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13188 ldm_case = 5;
13189 else
13190 return 0;
13191
13192 if (!multiple_operation_profitable_p (false, nops,
13193 ldm_case == 5
13194 ? unsorted_offsets[order[0]] : 0))
13195 return 0;
13196
13197 return ldm_case;
13198 }
13199
13200 /* Used to determine in a peephole whether a sequence of store instructions can
13201 be changed into a store-multiple instruction.
13202 NOPS is the number of separate store instructions we are examining.
13203 NOPS_TOTAL is the total number of instructions recognized by the peephole
13204 pattern.
13205 The first NOPS entries in OPERANDS are the source registers, the next
13206 NOPS entries are memory operands. If this function is successful, *BASE is
13207 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13208 to the first memory location's offset from that base register. REGS is an
13209 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13210 likewise filled with the corresponding rtx's.
13211 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13212 numbers to an ascending order of stores.
13213 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13214 from ascending memory locations, and the function verifies that the register
13215 numbers are themselves ascending. If CHECK_REGS is false, the register
13216 numbers are stored in the order they are found in the operands. */
13217 static int
13218 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13219 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13220 HOST_WIDE_INT *load_offset, bool check_regs)
13221 {
13222 int unsorted_regs[MAX_LDM_STM_OPS];
13223 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13224 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13225 int order[MAX_LDM_STM_OPS];
13226 int base_reg = -1;
13227 rtx base_reg_rtx = NULL;
13228 int i, stm_case;
13229
13230 /* Write back of base register is currently only supported for Thumb 1. */
13231 int base_writeback = TARGET_THUMB1;
13232
13233 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13234 easily extended if required. */
13235 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13236
13237 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13238
13239 /* Loop over the operands and check that the memory references are
13240 suitable (i.e. immediate offsets from the same base register). At
13241 the same time, extract the target register, and the memory
13242 offsets. */
13243 for (i = 0; i < nops; i++)
13244 {
13245 rtx reg;
13246 rtx offset;
13247
13248 /* Convert a subreg of a mem into the mem itself. */
13249 if (GET_CODE (operands[nops + i]) == SUBREG)
13250 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13251
13252 gcc_assert (MEM_P (operands[nops + i]));
13253
13254 /* Don't reorder volatile memory references; it doesn't seem worth
13255 looking for the case where the order is ok anyway. */
13256 if (MEM_VOLATILE_P (operands[nops + i]))
13257 return 0;
13258
13259 offset = const0_rtx;
13260
13261 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13262 || (GET_CODE (reg) == SUBREG
13263 && REG_P (reg = SUBREG_REG (reg))))
13264 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13265 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13266 || (GET_CODE (reg) == SUBREG
13267 && REG_P (reg = SUBREG_REG (reg))))
13268 && (CONST_INT_P (offset
13269 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13270 {
13271 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13272 ? operands[i] : SUBREG_REG (operands[i]));
13273 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13274
13275 if (i == 0)
13276 {
13277 base_reg = REGNO (reg);
13278 base_reg_rtx = reg;
13279 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13280 return 0;
13281 }
13282 else if (base_reg != (int) REGNO (reg))
13283 /* Not addressed from the same base register. */
13284 return 0;
13285
13286 /* If it isn't an integer register, then we can't do this. */
13287 if (unsorted_regs[i] < 0
13288 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13289 /* The effects are unpredictable if the base register is
13290 both updated and stored. */
13291 || (base_writeback && unsorted_regs[i] == base_reg)
13292 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13293 || unsorted_regs[i] > 14)
13294 return 0;
13295
13296 unsorted_offsets[i] = INTVAL (offset);
13297 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13298 order[0] = i;
13299 }
13300 else
13301 /* Not a suitable memory address. */
13302 return 0;
13303 }
13304
13305 /* All the useful information has now been extracted from the
13306 operands into unsorted_regs and unsorted_offsets; additionally,
13307 order[0] has been set to the lowest offset in the list. Sort
13308 the offsets into order, verifying that they are adjacent, and
13309 check that the register numbers are ascending. */
13310 if (!compute_offset_order (nops, unsorted_offsets, order,
13311 check_regs ? unsorted_regs : NULL))
13312 return 0;
13313
13314 if (saved_order)
13315 memcpy (saved_order, order, sizeof order);
13316
13317 if (base)
13318 {
13319 *base = base_reg;
13320
13321 for (i = 0; i < nops; i++)
13322 {
13323 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13324 if (reg_rtxs)
13325 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13326 }
13327
13328 *load_offset = unsorted_offsets[order[0]];
13329 }
13330
13331 if (TARGET_THUMB1
13332 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13333 return 0;
13334
13335 if (unsorted_offsets[order[0]] == 0)
13336 stm_case = 1; /* stmia */
13337 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13338 stm_case = 2; /* stmib */
13339 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13340 stm_case = 3; /* stmda */
13341 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13342 stm_case = 4; /* stmdb */
13343 else
13344 return 0;
13345
13346 if (!multiple_operation_profitable_p (false, nops, 0))
13347 return 0;
13348
13349 return stm_case;
13350 }
13351 \f
13352 /* Routines for use in generating RTL. */
13353
13354 /* Generate a load-multiple instruction. COUNT is the number of loads in
13355 the instruction; REGS and MEMS are arrays containing the operands.
13356 BASEREG is the base register to be used in addressing the memory operands.
13357 WBACK_OFFSET is nonzero if the instruction should update the base
13358 register. */
13359
13360 static rtx
13361 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13362 HOST_WIDE_INT wback_offset)
13363 {
13364 int i = 0, j;
13365 rtx result;
13366
13367 if (!multiple_operation_profitable_p (false, count, 0))
13368 {
13369 rtx seq;
13370
13371 start_sequence ();
13372
13373 for (i = 0; i < count; i++)
13374 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13375
13376 if (wback_offset != 0)
13377 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13378
13379 seq = get_insns ();
13380 end_sequence ();
13381
13382 return seq;
13383 }
13384
13385 result = gen_rtx_PARALLEL (VOIDmode,
13386 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13387 if (wback_offset != 0)
13388 {
13389 XVECEXP (result, 0, 0)
13390 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13391 i = 1;
13392 count++;
13393 }
13394
13395 for (j = 0; i < count; i++, j++)
13396 XVECEXP (result, 0, i)
13397 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13398
13399 return result;
13400 }
13401
13402 /* Generate a store-multiple instruction. COUNT is the number of stores in
13403 the instruction; REGS and MEMS are arrays containing the operands.
13404 BASEREG is the base register to be used in addressing the memory operands.
13405 WBACK_OFFSET is nonzero if the instruction should update the base
13406 register. */
13407
13408 static rtx
13409 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13410 HOST_WIDE_INT wback_offset)
13411 {
13412 int i = 0, j;
13413 rtx result;
13414
13415 if (GET_CODE (basereg) == PLUS)
13416 basereg = XEXP (basereg, 0);
13417
13418 if (!multiple_operation_profitable_p (false, count, 0))
13419 {
13420 rtx seq;
13421
13422 start_sequence ();
13423
13424 for (i = 0; i < count; i++)
13425 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13426
13427 if (wback_offset != 0)
13428 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13429
13430 seq = get_insns ();
13431 end_sequence ();
13432
13433 return seq;
13434 }
13435
13436 result = gen_rtx_PARALLEL (VOIDmode,
13437 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13438 if (wback_offset != 0)
13439 {
13440 XVECEXP (result, 0, 0)
13441 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13442 i = 1;
13443 count++;
13444 }
13445
13446 for (j = 0; i < count; i++, j++)
13447 XVECEXP (result, 0, i)
13448 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13449
13450 return result;
13451 }
13452
13453 /* Generate either a load-multiple or a store-multiple instruction. This
13454 function can be used in situations where we can start with a single MEM
13455 rtx and adjust its address upwards.
13456 COUNT is the number of operations in the instruction, not counting a
13457 possible update of the base register. REGS is an array containing the
13458 register operands.
13459 BASEREG is the base register to be used in addressing the memory operands,
13460 which are constructed from BASEMEM.
13461 WRITE_BACK specifies whether the generated instruction should include an
13462 update of the base register.
13463 OFFSETP is used to pass an offset to and from this function; this offset
13464 is not used when constructing the address (instead BASEMEM should have an
13465 appropriate offset in its address), it is used only for setting
13466 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13467
13468 static rtx
13469 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13470 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13471 {
13472 rtx mems[MAX_LDM_STM_OPS];
13473 HOST_WIDE_INT offset = *offsetp;
13474 int i;
13475
13476 gcc_assert (count <= MAX_LDM_STM_OPS);
13477
13478 if (GET_CODE (basereg) == PLUS)
13479 basereg = XEXP (basereg, 0);
13480
13481 for (i = 0; i < count; i++)
13482 {
13483 rtx addr = plus_constant (Pmode, basereg, i * 4);
13484 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13485 offset += 4;
13486 }
13487
13488 if (write_back)
13489 *offsetp = offset;
13490
13491 if (is_load)
13492 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13493 write_back ? 4 * count : 0);
13494 else
13495 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13496 write_back ? 4 * count : 0);
13497 }
13498
13499 rtx
13500 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13501 rtx basemem, HOST_WIDE_INT *offsetp)
13502 {
13503 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13504 offsetp);
13505 }
13506
13507 rtx
13508 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13509 rtx basemem, HOST_WIDE_INT *offsetp)
13510 {
13511 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13512 offsetp);
13513 }
13514
13515 /* Called from a peephole2 expander to turn a sequence of loads into an
13516 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13517 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13518 is true if we can reorder the registers because they are used commutatively
13519 subsequently.
13520 Returns true iff we could generate a new instruction. */
13521
13522 bool
13523 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13524 {
13525 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13526 rtx mems[MAX_LDM_STM_OPS];
13527 int i, j, base_reg;
13528 rtx base_reg_rtx;
13529 HOST_WIDE_INT offset;
13530 int write_back = FALSE;
13531 int ldm_case;
13532 rtx addr;
13533
13534 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13535 &base_reg, &offset, !sort_regs);
13536
13537 if (ldm_case == 0)
13538 return false;
13539
13540 if (sort_regs)
13541 for (i = 0; i < nops - 1; i++)
13542 for (j = i + 1; j < nops; j++)
13543 if (regs[i] > regs[j])
13544 {
13545 int t = regs[i];
13546 regs[i] = regs[j];
13547 regs[j] = t;
13548 }
13549 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13550
13551 if (TARGET_THUMB1)
13552 {
13553 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13554 gcc_assert (ldm_case == 1 || ldm_case == 5);
13555 write_back = TRUE;
13556 }
13557
13558 if (ldm_case == 5)
13559 {
13560 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13561 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13562 offset = 0;
13563 if (!TARGET_THUMB1)
13564 {
13565 base_reg = regs[0];
13566 base_reg_rtx = newbase;
13567 }
13568 }
13569
13570 for (i = 0; i < nops; i++)
13571 {
13572 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13573 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13574 SImode, addr, 0);
13575 }
13576 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13577 write_back ? offset + i * 4 : 0));
13578 return true;
13579 }
13580
13581 /* Called from a peephole2 expander to turn a sequence of stores into an
13582 STM instruction. OPERANDS are the operands found by the peephole matcher;
13583 NOPS indicates how many separate stores we are trying to combine.
13584 Returns true iff we could generate a new instruction. */
13585
13586 bool
13587 gen_stm_seq (rtx *operands, int nops)
13588 {
13589 int i;
13590 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13591 rtx mems[MAX_LDM_STM_OPS];
13592 int base_reg;
13593 rtx base_reg_rtx;
13594 HOST_WIDE_INT offset;
13595 int write_back = FALSE;
13596 int stm_case;
13597 rtx addr;
13598 bool base_reg_dies;
13599
13600 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13601 mem_order, &base_reg, &offset, true);
13602
13603 if (stm_case == 0)
13604 return false;
13605
13606 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13607
13608 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13609 if (TARGET_THUMB1)
13610 {
13611 gcc_assert (base_reg_dies);
13612 write_back = TRUE;
13613 }
13614
13615 if (stm_case == 5)
13616 {
13617 gcc_assert (base_reg_dies);
13618 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13619 offset = 0;
13620 }
13621
13622 addr = plus_constant (Pmode, base_reg_rtx, offset);
13623
13624 for (i = 0; i < nops; i++)
13625 {
13626 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13627 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13628 SImode, addr, 0);
13629 }
13630 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13631 write_back ? offset + i * 4 : 0));
13632 return true;
13633 }
13634
13635 /* Called from a peephole2 expander to turn a sequence of stores that are
13636 preceded by constant loads into an STM instruction. OPERANDS are the
13637 operands found by the peephole matcher; NOPS indicates how many
13638 separate stores we are trying to combine; there are 2 * NOPS
13639 instructions in the peephole.
13640 Returns true iff we could generate a new instruction. */
13641
13642 bool
13643 gen_const_stm_seq (rtx *operands, int nops)
13644 {
13645 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13646 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13647 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13648 rtx mems[MAX_LDM_STM_OPS];
13649 int base_reg;
13650 rtx base_reg_rtx;
13651 HOST_WIDE_INT offset;
13652 int write_back = FALSE;
13653 int stm_case;
13654 rtx addr;
13655 bool base_reg_dies;
13656 int i, j;
13657 HARD_REG_SET allocated;
13658
13659 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13660 mem_order, &base_reg, &offset, false);
13661
13662 if (stm_case == 0)
13663 return false;
13664
13665 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13666
13667 /* If the same register is used more than once, try to find a free
13668 register. */
13669 CLEAR_HARD_REG_SET (allocated);
13670 for (i = 0; i < nops; i++)
13671 {
13672 for (j = i + 1; j < nops; j++)
13673 if (regs[i] == regs[j])
13674 {
13675 rtx t = peep2_find_free_register (0, nops * 2,
13676 TARGET_THUMB1 ? "l" : "r",
13677 SImode, &allocated);
13678 if (t == NULL_RTX)
13679 return false;
13680 reg_rtxs[i] = t;
13681 regs[i] = REGNO (t);
13682 }
13683 }
13684
13685 /* Compute an ordering that maps the register numbers to an ascending
13686 sequence. */
13687 reg_order[0] = 0;
13688 for (i = 0; i < nops; i++)
13689 if (regs[i] < regs[reg_order[0]])
13690 reg_order[0] = i;
13691
13692 for (i = 1; i < nops; i++)
13693 {
13694 int this_order = reg_order[i - 1];
13695 for (j = 0; j < nops; j++)
13696 if (regs[j] > regs[reg_order[i - 1]]
13697 && (this_order == reg_order[i - 1]
13698 || regs[j] < regs[this_order]))
13699 this_order = j;
13700 reg_order[i] = this_order;
13701 }
13702
13703 /* Ensure that registers that must be live after the instruction end
13704 up with the correct value. */
13705 for (i = 0; i < nops; i++)
13706 {
13707 int this_order = reg_order[i];
13708 if ((this_order != mem_order[i]
13709 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13710 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13711 return false;
13712 }
13713
13714 /* Load the constants. */
13715 for (i = 0; i < nops; i++)
13716 {
13717 rtx op = operands[2 * nops + mem_order[i]];
13718 sorted_regs[i] = regs[reg_order[i]];
13719 emit_move_insn (reg_rtxs[reg_order[i]], op);
13720 }
13721
13722 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13723
13724 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13725 if (TARGET_THUMB1)
13726 {
13727 gcc_assert (base_reg_dies);
13728 write_back = TRUE;
13729 }
13730
13731 if (stm_case == 5)
13732 {
13733 gcc_assert (base_reg_dies);
13734 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13735 offset = 0;
13736 }
13737
13738 addr = plus_constant (Pmode, base_reg_rtx, offset);
13739
13740 for (i = 0; i < nops; i++)
13741 {
13742 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13743 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13744 SImode, addr, 0);
13745 }
13746 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13747 write_back ? offset + i * 4 : 0));
13748 return true;
13749 }
13750
13751 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13752 unaligned copies on processors which support unaligned semantics for those
13753 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13754 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13755 An interleave factor of 1 (the minimum) will perform no interleaving.
13756 Load/store multiple are used for aligned addresses where possible. */
13757
13758 static void
13759 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13760 HOST_WIDE_INT length,
13761 unsigned int interleave_factor)
13762 {
13763 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13764 int *regnos = XALLOCAVEC (int, interleave_factor);
13765 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13766 HOST_WIDE_INT i, j;
13767 HOST_WIDE_INT remaining = length, words;
13768 rtx halfword_tmp = NULL, byte_tmp = NULL;
13769 rtx dst, src;
13770 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13771 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13772 HOST_WIDE_INT srcoffset, dstoffset;
13773 HOST_WIDE_INT src_autoinc, dst_autoinc;
13774 rtx mem, addr;
13775
13776 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13777
13778 /* Use hard registers if we have aligned source or destination so we can use
13779 load/store multiple with contiguous registers. */
13780 if (dst_aligned || src_aligned)
13781 for (i = 0; i < interleave_factor; i++)
13782 regs[i] = gen_rtx_REG (SImode, i);
13783 else
13784 for (i = 0; i < interleave_factor; i++)
13785 regs[i] = gen_reg_rtx (SImode);
13786
13787 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13788 src = copy_addr_to_reg (XEXP (srcbase, 0));
13789
13790 srcoffset = dstoffset = 0;
13791
13792 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13793 For copying the last bytes we want to subtract this offset again. */
13794 src_autoinc = dst_autoinc = 0;
13795
13796 for (i = 0; i < interleave_factor; i++)
13797 regnos[i] = i;
13798
13799 /* Copy BLOCK_SIZE_BYTES chunks. */
13800
13801 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13802 {
13803 /* Load words. */
13804 if (src_aligned && interleave_factor > 1)
13805 {
13806 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13807 TRUE, srcbase, &srcoffset));
13808 src_autoinc += UNITS_PER_WORD * interleave_factor;
13809 }
13810 else
13811 {
13812 for (j = 0; j < interleave_factor; j++)
13813 {
13814 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13815 - src_autoinc));
13816 mem = adjust_automodify_address (srcbase, SImode, addr,
13817 srcoffset + j * UNITS_PER_WORD);
13818 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13819 }
13820 srcoffset += block_size_bytes;
13821 }
13822
13823 /* Store words. */
13824 if (dst_aligned && interleave_factor > 1)
13825 {
13826 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13827 TRUE, dstbase, &dstoffset));
13828 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13829 }
13830 else
13831 {
13832 for (j = 0; j < interleave_factor; j++)
13833 {
13834 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13835 - dst_autoinc));
13836 mem = adjust_automodify_address (dstbase, SImode, addr,
13837 dstoffset + j * UNITS_PER_WORD);
13838 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13839 }
13840 dstoffset += block_size_bytes;
13841 }
13842
13843 remaining -= block_size_bytes;
13844 }
13845
13846 /* Copy any whole words left (note these aren't interleaved with any
13847 subsequent halfword/byte load/stores in the interests of simplicity). */
13848
13849 words = remaining / UNITS_PER_WORD;
13850
13851 gcc_assert (words < interleave_factor);
13852
13853 if (src_aligned && words > 1)
13854 {
13855 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13856 &srcoffset));
13857 src_autoinc += UNITS_PER_WORD * words;
13858 }
13859 else
13860 {
13861 for (j = 0; j < words; j++)
13862 {
13863 addr = plus_constant (Pmode, src,
13864 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13865 mem = adjust_automodify_address (srcbase, SImode, addr,
13866 srcoffset + j * UNITS_PER_WORD);
13867 if (src_aligned)
13868 emit_move_insn (regs[j], mem);
13869 else
13870 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13871 }
13872 srcoffset += words * UNITS_PER_WORD;
13873 }
13874
13875 if (dst_aligned && words > 1)
13876 {
13877 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13878 &dstoffset));
13879 dst_autoinc += words * UNITS_PER_WORD;
13880 }
13881 else
13882 {
13883 for (j = 0; j < words; j++)
13884 {
13885 addr = plus_constant (Pmode, dst,
13886 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13887 mem = adjust_automodify_address (dstbase, SImode, addr,
13888 dstoffset + j * UNITS_PER_WORD);
13889 if (dst_aligned)
13890 emit_move_insn (mem, regs[j]);
13891 else
13892 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13893 }
13894 dstoffset += words * UNITS_PER_WORD;
13895 }
13896
13897 remaining -= words * UNITS_PER_WORD;
13898
13899 gcc_assert (remaining < 4);
13900
13901 /* Copy a halfword if necessary. */
13902
13903 if (remaining >= 2)
13904 {
13905 halfword_tmp = gen_reg_rtx (SImode);
13906
13907 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13908 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13909 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13910
13911 /* Either write out immediately, or delay until we've loaded the last
13912 byte, depending on interleave factor. */
13913 if (interleave_factor == 1)
13914 {
13915 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13916 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13917 emit_insn (gen_unaligned_storehi (mem,
13918 gen_lowpart (HImode, halfword_tmp)));
13919 halfword_tmp = NULL;
13920 dstoffset += 2;
13921 }
13922
13923 remaining -= 2;
13924 srcoffset += 2;
13925 }
13926
13927 gcc_assert (remaining < 2);
13928
13929 /* Copy last byte. */
13930
13931 if ((remaining & 1) != 0)
13932 {
13933 byte_tmp = gen_reg_rtx (SImode);
13934
13935 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13936 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13937 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13938
13939 if (interleave_factor == 1)
13940 {
13941 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13942 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13943 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13944 byte_tmp = NULL;
13945 dstoffset++;
13946 }
13947
13948 remaining--;
13949 srcoffset++;
13950 }
13951
13952 /* Store last halfword if we haven't done so already. */
13953
13954 if (halfword_tmp)
13955 {
13956 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13957 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13958 emit_insn (gen_unaligned_storehi (mem,
13959 gen_lowpart (HImode, halfword_tmp)));
13960 dstoffset += 2;
13961 }
13962
13963 /* Likewise for last byte. */
13964
13965 if (byte_tmp)
13966 {
13967 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13968 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13969 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13970 dstoffset++;
13971 }
13972
13973 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13974 }
13975
13976 /* From mips_adjust_block_mem:
13977
13978 Helper function for doing a loop-based block operation on memory
13979 reference MEM. Each iteration of the loop will operate on LENGTH
13980 bytes of MEM.
13981
13982 Create a new base register for use within the loop and point it to
13983 the start of MEM. Create a new memory reference that uses this
13984 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13985
13986 static void
13987 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13988 rtx *loop_mem)
13989 {
13990 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13991
13992 /* Although the new mem does not refer to a known location,
13993 it does keep up to LENGTH bytes of alignment. */
13994 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13995 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13996 }
13997
13998 /* From mips_block_move_loop:
13999
14000 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14001 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14002 the memory regions do not overlap. */
14003
14004 static void
14005 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14006 unsigned int interleave_factor,
14007 HOST_WIDE_INT bytes_per_iter)
14008 {
14009 rtx src_reg, dest_reg, final_src, test;
14010 HOST_WIDE_INT leftover;
14011
14012 leftover = length % bytes_per_iter;
14013 length -= leftover;
14014
14015 /* Create registers and memory references for use within the loop. */
14016 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14017 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14018
14019 /* Calculate the value that SRC_REG should have after the last iteration of
14020 the loop. */
14021 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14022 0, 0, OPTAB_WIDEN);
14023
14024 /* Emit the start of the loop. */
14025 rtx_code_label *label = gen_label_rtx ();
14026 emit_label (label);
14027
14028 /* Emit the loop body. */
14029 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14030 interleave_factor);
14031
14032 /* Move on to the next block. */
14033 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14034 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14035
14036 /* Emit the loop condition. */
14037 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14038 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14039
14040 /* Mop up any left-over bytes. */
14041 if (leftover)
14042 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14043 }
14044
14045 /* Emit a block move when either the source or destination is unaligned (not
14046 aligned to a four-byte boundary). This may need further tuning depending on
14047 core type, optimize_size setting, etc. */
14048
14049 static int
14050 arm_movmemqi_unaligned (rtx *operands)
14051 {
14052 HOST_WIDE_INT length = INTVAL (operands[2]);
14053
14054 if (optimize_size)
14055 {
14056 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14057 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14058 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14059 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14060 or dst_aligned though: allow more interleaving in those cases since the
14061 resulting code can be smaller. */
14062 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14063 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14064
14065 if (length > 12)
14066 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14067 interleave_factor, bytes_per_iter);
14068 else
14069 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14070 interleave_factor);
14071 }
14072 else
14073 {
14074 /* Note that the loop created by arm_block_move_unaligned_loop may be
14075 subject to loop unrolling, which makes tuning this condition a little
14076 redundant. */
14077 if (length > 32)
14078 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14079 else
14080 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14081 }
14082
14083 return 1;
14084 }
14085
14086 int
14087 arm_gen_movmemqi (rtx *operands)
14088 {
14089 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14090 HOST_WIDE_INT srcoffset, dstoffset;
14091 int i;
14092 rtx src, dst, srcbase, dstbase;
14093 rtx part_bytes_reg = NULL;
14094 rtx mem;
14095
14096 if (!CONST_INT_P (operands[2])
14097 || !CONST_INT_P (operands[3])
14098 || INTVAL (operands[2]) > 64)
14099 return 0;
14100
14101 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14102 return arm_movmemqi_unaligned (operands);
14103
14104 if (INTVAL (operands[3]) & 3)
14105 return 0;
14106
14107 dstbase = operands[0];
14108 srcbase = operands[1];
14109
14110 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14111 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14112
14113 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14114 out_words_to_go = INTVAL (operands[2]) / 4;
14115 last_bytes = INTVAL (operands[2]) & 3;
14116 dstoffset = srcoffset = 0;
14117
14118 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14119 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14120
14121 for (i = 0; in_words_to_go >= 2; i+=4)
14122 {
14123 if (in_words_to_go > 4)
14124 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14125 TRUE, srcbase, &srcoffset));
14126 else
14127 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14128 src, FALSE, srcbase,
14129 &srcoffset));
14130
14131 if (out_words_to_go)
14132 {
14133 if (out_words_to_go > 4)
14134 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14135 TRUE, dstbase, &dstoffset));
14136 else if (out_words_to_go != 1)
14137 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14138 out_words_to_go, dst,
14139 (last_bytes == 0
14140 ? FALSE : TRUE),
14141 dstbase, &dstoffset));
14142 else
14143 {
14144 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14145 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14146 if (last_bytes != 0)
14147 {
14148 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14149 dstoffset += 4;
14150 }
14151 }
14152 }
14153
14154 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14155 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14156 }
14157
14158 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14159 if (out_words_to_go)
14160 {
14161 rtx sreg;
14162
14163 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14164 sreg = copy_to_reg (mem);
14165
14166 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14167 emit_move_insn (mem, sreg);
14168 in_words_to_go--;
14169
14170 gcc_assert (!in_words_to_go); /* Sanity check */
14171 }
14172
14173 if (in_words_to_go)
14174 {
14175 gcc_assert (in_words_to_go > 0);
14176
14177 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14178 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14179 }
14180
14181 gcc_assert (!last_bytes || part_bytes_reg);
14182
14183 if (BYTES_BIG_ENDIAN && last_bytes)
14184 {
14185 rtx tmp = gen_reg_rtx (SImode);
14186
14187 /* The bytes we want are in the top end of the word. */
14188 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14189 GEN_INT (8 * (4 - last_bytes))));
14190 part_bytes_reg = tmp;
14191
14192 while (last_bytes)
14193 {
14194 mem = adjust_automodify_address (dstbase, QImode,
14195 plus_constant (Pmode, dst,
14196 last_bytes - 1),
14197 dstoffset + last_bytes - 1);
14198 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14199
14200 if (--last_bytes)
14201 {
14202 tmp = gen_reg_rtx (SImode);
14203 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14204 part_bytes_reg = tmp;
14205 }
14206 }
14207
14208 }
14209 else
14210 {
14211 if (last_bytes > 1)
14212 {
14213 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14214 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14215 last_bytes -= 2;
14216 if (last_bytes)
14217 {
14218 rtx tmp = gen_reg_rtx (SImode);
14219 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14220 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14221 part_bytes_reg = tmp;
14222 dstoffset += 2;
14223 }
14224 }
14225
14226 if (last_bytes)
14227 {
14228 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14229 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14230 }
14231 }
14232
14233 return 1;
14234 }
14235
14236 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14237 by mode size. */
14238 inline static rtx
14239 next_consecutive_mem (rtx mem)
14240 {
14241 machine_mode mode = GET_MODE (mem);
14242 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14243 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14244
14245 return adjust_automodify_address (mem, mode, addr, offset);
14246 }
14247
14248 /* Copy using LDRD/STRD instructions whenever possible.
14249 Returns true upon success. */
14250 bool
14251 gen_movmem_ldrd_strd (rtx *operands)
14252 {
14253 unsigned HOST_WIDE_INT len;
14254 HOST_WIDE_INT align;
14255 rtx src, dst, base;
14256 rtx reg0;
14257 bool src_aligned, dst_aligned;
14258 bool src_volatile, dst_volatile;
14259
14260 gcc_assert (CONST_INT_P (operands[2]));
14261 gcc_assert (CONST_INT_P (operands[3]));
14262
14263 len = UINTVAL (operands[2]);
14264 if (len > 64)
14265 return false;
14266
14267 /* Maximum alignment we can assume for both src and dst buffers. */
14268 align = INTVAL (operands[3]);
14269
14270 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14271 return false;
14272
14273 /* Place src and dst addresses in registers
14274 and update the corresponding mem rtx. */
14275 dst = operands[0];
14276 dst_volatile = MEM_VOLATILE_P (dst);
14277 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14278 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14279 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14280
14281 src = operands[1];
14282 src_volatile = MEM_VOLATILE_P (src);
14283 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14284 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14285 src = adjust_automodify_address (src, VOIDmode, base, 0);
14286
14287 if (!unaligned_access && !(src_aligned && dst_aligned))
14288 return false;
14289
14290 if (src_volatile || dst_volatile)
14291 return false;
14292
14293 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14294 if (!(dst_aligned || src_aligned))
14295 return arm_gen_movmemqi (operands);
14296
14297 /* If the either src or dst is unaligned we'll be accessing it as pairs
14298 of unaligned SImode accesses. Otherwise we can generate DImode
14299 ldrd/strd instructions. */
14300 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14301 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14302
14303 while (len >= 8)
14304 {
14305 len -= 8;
14306 reg0 = gen_reg_rtx (DImode);
14307 rtx low_reg = NULL_RTX;
14308 rtx hi_reg = NULL_RTX;
14309
14310 if (!src_aligned || !dst_aligned)
14311 {
14312 low_reg = gen_lowpart (SImode, reg0);
14313 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14314 }
14315 if (src_aligned)
14316 emit_move_insn (reg0, src);
14317 else
14318 {
14319 emit_insn (gen_unaligned_loadsi (low_reg, src));
14320 src = next_consecutive_mem (src);
14321 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14322 }
14323
14324 if (dst_aligned)
14325 emit_move_insn (dst, reg0);
14326 else
14327 {
14328 emit_insn (gen_unaligned_storesi (dst, low_reg));
14329 dst = next_consecutive_mem (dst);
14330 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14331 }
14332
14333 src = next_consecutive_mem (src);
14334 dst = next_consecutive_mem (dst);
14335 }
14336
14337 gcc_assert (len < 8);
14338 if (len >= 4)
14339 {
14340 /* More than a word but less than a double-word to copy. Copy a word. */
14341 reg0 = gen_reg_rtx (SImode);
14342 src = adjust_address (src, SImode, 0);
14343 dst = adjust_address (dst, SImode, 0);
14344 if (src_aligned)
14345 emit_move_insn (reg0, src);
14346 else
14347 emit_insn (gen_unaligned_loadsi (reg0, src));
14348
14349 if (dst_aligned)
14350 emit_move_insn (dst, reg0);
14351 else
14352 emit_insn (gen_unaligned_storesi (dst, reg0));
14353
14354 src = next_consecutive_mem (src);
14355 dst = next_consecutive_mem (dst);
14356 len -= 4;
14357 }
14358
14359 if (len == 0)
14360 return true;
14361
14362 /* Copy the remaining bytes. */
14363 if (len >= 2)
14364 {
14365 dst = adjust_address (dst, HImode, 0);
14366 src = adjust_address (src, HImode, 0);
14367 reg0 = gen_reg_rtx (SImode);
14368 if (src_aligned)
14369 emit_insn (gen_zero_extendhisi2 (reg0, src));
14370 else
14371 emit_insn (gen_unaligned_loadhiu (reg0, src));
14372
14373 if (dst_aligned)
14374 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14375 else
14376 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14377
14378 src = next_consecutive_mem (src);
14379 dst = next_consecutive_mem (dst);
14380 if (len == 2)
14381 return true;
14382 }
14383
14384 dst = adjust_address (dst, QImode, 0);
14385 src = adjust_address (src, QImode, 0);
14386 reg0 = gen_reg_rtx (QImode);
14387 emit_move_insn (reg0, src);
14388 emit_move_insn (dst, reg0);
14389 return true;
14390 }
14391
14392 /* Select a dominance comparison mode if possible for a test of the general
14393 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14394 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14395 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14396 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14397 In all cases OP will be either EQ or NE, but we don't need to know which
14398 here. If we are unable to support a dominance comparison we return
14399 CC mode. This will then fail to match for the RTL expressions that
14400 generate this call. */
14401 machine_mode
14402 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14403 {
14404 enum rtx_code cond1, cond2;
14405 int swapped = 0;
14406
14407 /* Currently we will probably get the wrong result if the individual
14408 comparisons are not simple. This also ensures that it is safe to
14409 reverse a comparison if necessary. */
14410 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14411 != CCmode)
14412 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14413 != CCmode))
14414 return CCmode;
14415
14416 /* The if_then_else variant of this tests the second condition if the
14417 first passes, but is true if the first fails. Reverse the first
14418 condition to get a true "inclusive-or" expression. */
14419 if (cond_or == DOM_CC_NX_OR_Y)
14420 cond1 = reverse_condition (cond1);
14421
14422 /* If the comparisons are not equal, and one doesn't dominate the other,
14423 then we can't do this. */
14424 if (cond1 != cond2
14425 && !comparison_dominates_p (cond1, cond2)
14426 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14427 return CCmode;
14428
14429 if (swapped)
14430 std::swap (cond1, cond2);
14431
14432 switch (cond1)
14433 {
14434 case EQ:
14435 if (cond_or == DOM_CC_X_AND_Y)
14436 return CC_DEQmode;
14437
14438 switch (cond2)
14439 {
14440 case EQ: return CC_DEQmode;
14441 case LE: return CC_DLEmode;
14442 case LEU: return CC_DLEUmode;
14443 case GE: return CC_DGEmode;
14444 case GEU: return CC_DGEUmode;
14445 default: gcc_unreachable ();
14446 }
14447
14448 case LT:
14449 if (cond_or == DOM_CC_X_AND_Y)
14450 return CC_DLTmode;
14451
14452 switch (cond2)
14453 {
14454 case LT:
14455 return CC_DLTmode;
14456 case LE:
14457 return CC_DLEmode;
14458 case NE:
14459 return CC_DNEmode;
14460 default:
14461 gcc_unreachable ();
14462 }
14463
14464 case GT:
14465 if (cond_or == DOM_CC_X_AND_Y)
14466 return CC_DGTmode;
14467
14468 switch (cond2)
14469 {
14470 case GT:
14471 return CC_DGTmode;
14472 case GE:
14473 return CC_DGEmode;
14474 case NE:
14475 return CC_DNEmode;
14476 default:
14477 gcc_unreachable ();
14478 }
14479
14480 case LTU:
14481 if (cond_or == DOM_CC_X_AND_Y)
14482 return CC_DLTUmode;
14483
14484 switch (cond2)
14485 {
14486 case LTU:
14487 return CC_DLTUmode;
14488 case LEU:
14489 return CC_DLEUmode;
14490 case NE:
14491 return CC_DNEmode;
14492 default:
14493 gcc_unreachable ();
14494 }
14495
14496 case GTU:
14497 if (cond_or == DOM_CC_X_AND_Y)
14498 return CC_DGTUmode;
14499
14500 switch (cond2)
14501 {
14502 case GTU:
14503 return CC_DGTUmode;
14504 case GEU:
14505 return CC_DGEUmode;
14506 case NE:
14507 return CC_DNEmode;
14508 default:
14509 gcc_unreachable ();
14510 }
14511
14512 /* The remaining cases only occur when both comparisons are the
14513 same. */
14514 case NE:
14515 gcc_assert (cond1 == cond2);
14516 return CC_DNEmode;
14517
14518 case LE:
14519 gcc_assert (cond1 == cond2);
14520 return CC_DLEmode;
14521
14522 case GE:
14523 gcc_assert (cond1 == cond2);
14524 return CC_DGEmode;
14525
14526 case LEU:
14527 gcc_assert (cond1 == cond2);
14528 return CC_DLEUmode;
14529
14530 case GEU:
14531 gcc_assert (cond1 == cond2);
14532 return CC_DGEUmode;
14533
14534 default:
14535 gcc_unreachable ();
14536 }
14537 }
14538
14539 machine_mode
14540 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14541 {
14542 /* All floating point compares return CCFP if it is an equality
14543 comparison, and CCFPE otherwise. */
14544 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14545 {
14546 switch (op)
14547 {
14548 case EQ:
14549 case NE:
14550 case UNORDERED:
14551 case ORDERED:
14552 case UNLT:
14553 case UNLE:
14554 case UNGT:
14555 case UNGE:
14556 case UNEQ:
14557 case LTGT:
14558 return CCFPmode;
14559
14560 case LT:
14561 case LE:
14562 case GT:
14563 case GE:
14564 return CCFPEmode;
14565
14566 default:
14567 gcc_unreachable ();
14568 }
14569 }
14570
14571 /* A compare with a shifted operand. Because of canonicalization, the
14572 comparison will have to be swapped when we emit the assembler. */
14573 if (GET_MODE (y) == SImode
14574 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14575 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14576 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14577 || GET_CODE (x) == ROTATERT))
14578 return CC_SWPmode;
14579
14580 /* This operation is performed swapped, but since we only rely on the Z
14581 flag we don't need an additional mode. */
14582 if (GET_MODE (y) == SImode
14583 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14584 && GET_CODE (x) == NEG
14585 && (op == EQ || op == NE))
14586 return CC_Zmode;
14587
14588 /* This is a special case that is used by combine to allow a
14589 comparison of a shifted byte load to be split into a zero-extend
14590 followed by a comparison of the shifted integer (only valid for
14591 equalities and unsigned inequalities). */
14592 if (GET_MODE (x) == SImode
14593 && GET_CODE (x) == ASHIFT
14594 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14595 && GET_CODE (XEXP (x, 0)) == SUBREG
14596 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14597 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14598 && (op == EQ || op == NE
14599 || op == GEU || op == GTU || op == LTU || op == LEU)
14600 && CONST_INT_P (y))
14601 return CC_Zmode;
14602
14603 /* A construct for a conditional compare, if the false arm contains
14604 0, then both conditions must be true, otherwise either condition
14605 must be true. Not all conditions are possible, so CCmode is
14606 returned if it can't be done. */
14607 if (GET_CODE (x) == IF_THEN_ELSE
14608 && (XEXP (x, 2) == const0_rtx
14609 || XEXP (x, 2) == const1_rtx)
14610 && COMPARISON_P (XEXP (x, 0))
14611 && COMPARISON_P (XEXP (x, 1)))
14612 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14613 INTVAL (XEXP (x, 2)));
14614
14615 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14616 if (GET_CODE (x) == AND
14617 && (op == EQ || op == NE)
14618 && COMPARISON_P (XEXP (x, 0))
14619 && COMPARISON_P (XEXP (x, 1)))
14620 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14621 DOM_CC_X_AND_Y);
14622
14623 if (GET_CODE (x) == IOR
14624 && (op == EQ || op == NE)
14625 && COMPARISON_P (XEXP (x, 0))
14626 && COMPARISON_P (XEXP (x, 1)))
14627 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14628 DOM_CC_X_OR_Y);
14629
14630 /* An operation (on Thumb) where we want to test for a single bit.
14631 This is done by shifting that bit up into the top bit of a
14632 scratch register; we can then branch on the sign bit. */
14633 if (TARGET_THUMB1
14634 && GET_MODE (x) == SImode
14635 && (op == EQ || op == NE)
14636 && GET_CODE (x) == ZERO_EXTRACT
14637 && XEXP (x, 1) == const1_rtx)
14638 return CC_Nmode;
14639
14640 /* An operation that sets the condition codes as a side-effect, the
14641 V flag is not set correctly, so we can only use comparisons where
14642 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14643 instead.) */
14644 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14645 if (GET_MODE (x) == SImode
14646 && y == const0_rtx
14647 && (op == EQ || op == NE || op == LT || op == GE)
14648 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14649 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14650 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14651 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14652 || GET_CODE (x) == LSHIFTRT
14653 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14654 || GET_CODE (x) == ROTATERT
14655 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14656 return CC_NOOVmode;
14657
14658 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14659 return CC_Zmode;
14660
14661 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14662 && GET_CODE (x) == PLUS
14663 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14664 return CC_Cmode;
14665
14666 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14667 {
14668 switch (op)
14669 {
14670 case EQ:
14671 case NE:
14672 /* A DImode comparison against zero can be implemented by
14673 or'ing the two halves together. */
14674 if (y == const0_rtx)
14675 return CC_Zmode;
14676
14677 /* We can do an equality test in three Thumb instructions. */
14678 if (!TARGET_32BIT)
14679 return CC_Zmode;
14680
14681 /* FALLTHROUGH */
14682
14683 case LTU:
14684 case LEU:
14685 case GTU:
14686 case GEU:
14687 /* DImode unsigned comparisons can be implemented by cmp +
14688 cmpeq without a scratch register. Not worth doing in
14689 Thumb-2. */
14690 if (TARGET_32BIT)
14691 return CC_CZmode;
14692
14693 /* FALLTHROUGH */
14694
14695 case LT:
14696 case LE:
14697 case GT:
14698 case GE:
14699 /* DImode signed and unsigned comparisons can be implemented
14700 by cmp + sbcs with a scratch register, but that does not
14701 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14702 gcc_assert (op != EQ && op != NE);
14703 return CC_NCVmode;
14704
14705 default:
14706 gcc_unreachable ();
14707 }
14708 }
14709
14710 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14711 return GET_MODE (x);
14712
14713 return CCmode;
14714 }
14715
14716 /* X and Y are two things to compare using CODE. Emit the compare insn and
14717 return the rtx for register 0 in the proper mode. FP means this is a
14718 floating point compare: I don't think that it is needed on the arm. */
14719 rtx
14720 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14721 {
14722 machine_mode mode;
14723 rtx cc_reg;
14724 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14725
14726 /* We might have X as a constant, Y as a register because of the predicates
14727 used for cmpdi. If so, force X to a register here. */
14728 if (dimode_comparison && !REG_P (x))
14729 x = force_reg (DImode, x);
14730
14731 mode = SELECT_CC_MODE (code, x, y);
14732 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14733
14734 if (dimode_comparison
14735 && mode != CC_CZmode)
14736 {
14737 rtx clobber, set;
14738
14739 /* To compare two non-zero values for equality, XOR them and
14740 then compare against zero. Not used for ARM mode; there
14741 CC_CZmode is cheaper. */
14742 if (mode == CC_Zmode && y != const0_rtx)
14743 {
14744 gcc_assert (!reload_completed);
14745 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14746 y = const0_rtx;
14747 }
14748
14749 /* A scratch register is required. */
14750 if (reload_completed)
14751 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14752 else
14753 scratch = gen_rtx_SCRATCH (SImode);
14754
14755 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14756 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14757 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14758 }
14759 else
14760 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14761
14762 return cc_reg;
14763 }
14764
14765 /* Generate a sequence of insns that will generate the correct return
14766 address mask depending on the physical architecture that the program
14767 is running on. */
14768 rtx
14769 arm_gen_return_addr_mask (void)
14770 {
14771 rtx reg = gen_reg_rtx (Pmode);
14772
14773 emit_insn (gen_return_addr_mask (reg));
14774 return reg;
14775 }
14776
14777 void
14778 arm_reload_in_hi (rtx *operands)
14779 {
14780 rtx ref = operands[1];
14781 rtx base, scratch;
14782 HOST_WIDE_INT offset = 0;
14783
14784 if (GET_CODE (ref) == SUBREG)
14785 {
14786 offset = SUBREG_BYTE (ref);
14787 ref = SUBREG_REG (ref);
14788 }
14789
14790 if (REG_P (ref))
14791 {
14792 /* We have a pseudo which has been spilt onto the stack; there
14793 are two cases here: the first where there is a simple
14794 stack-slot replacement and a second where the stack-slot is
14795 out of range, or is used as a subreg. */
14796 if (reg_equiv_mem (REGNO (ref)))
14797 {
14798 ref = reg_equiv_mem (REGNO (ref));
14799 base = find_replacement (&XEXP (ref, 0));
14800 }
14801 else
14802 /* The slot is out of range, or was dressed up in a SUBREG. */
14803 base = reg_equiv_address (REGNO (ref));
14804
14805 /* PR 62554: If there is no equivalent memory location then just move
14806 the value as an SImode register move. This happens when the target
14807 architecture variant does not have an HImode register move. */
14808 if (base == NULL)
14809 {
14810 gcc_assert (REG_P (operands[0]));
14811 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14812 gen_rtx_SUBREG (SImode, ref, 0)));
14813 return;
14814 }
14815 }
14816 else
14817 base = find_replacement (&XEXP (ref, 0));
14818
14819 /* Handle the case where the address is too complex to be offset by 1. */
14820 if (GET_CODE (base) == MINUS
14821 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14822 {
14823 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14824
14825 emit_set_insn (base_plus, base);
14826 base = base_plus;
14827 }
14828 else if (GET_CODE (base) == PLUS)
14829 {
14830 /* The addend must be CONST_INT, or we would have dealt with it above. */
14831 HOST_WIDE_INT hi, lo;
14832
14833 offset += INTVAL (XEXP (base, 1));
14834 base = XEXP (base, 0);
14835
14836 /* Rework the address into a legal sequence of insns. */
14837 /* Valid range for lo is -4095 -> 4095 */
14838 lo = (offset >= 0
14839 ? (offset & 0xfff)
14840 : -((-offset) & 0xfff));
14841
14842 /* Corner case, if lo is the max offset then we would be out of range
14843 once we have added the additional 1 below, so bump the msb into the
14844 pre-loading insn(s). */
14845 if (lo == 4095)
14846 lo &= 0x7ff;
14847
14848 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14849 ^ (HOST_WIDE_INT) 0x80000000)
14850 - (HOST_WIDE_INT) 0x80000000);
14851
14852 gcc_assert (hi + lo == offset);
14853
14854 if (hi != 0)
14855 {
14856 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14857
14858 /* Get the base address; addsi3 knows how to handle constants
14859 that require more than one insn. */
14860 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14861 base = base_plus;
14862 offset = lo;
14863 }
14864 }
14865
14866 /* Operands[2] may overlap operands[0] (though it won't overlap
14867 operands[1]), that's why we asked for a DImode reg -- so we can
14868 use the bit that does not overlap. */
14869 if (REGNO (operands[2]) == REGNO (operands[0]))
14870 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14871 else
14872 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14873
14874 emit_insn (gen_zero_extendqisi2 (scratch,
14875 gen_rtx_MEM (QImode,
14876 plus_constant (Pmode, base,
14877 offset))));
14878 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14879 gen_rtx_MEM (QImode,
14880 plus_constant (Pmode, base,
14881 offset + 1))));
14882 if (!BYTES_BIG_ENDIAN)
14883 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14884 gen_rtx_IOR (SImode,
14885 gen_rtx_ASHIFT
14886 (SImode,
14887 gen_rtx_SUBREG (SImode, operands[0], 0),
14888 GEN_INT (8)),
14889 scratch));
14890 else
14891 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14892 gen_rtx_IOR (SImode,
14893 gen_rtx_ASHIFT (SImode, scratch,
14894 GEN_INT (8)),
14895 gen_rtx_SUBREG (SImode, operands[0], 0)));
14896 }
14897
14898 /* Handle storing a half-word to memory during reload by synthesizing as two
14899 byte stores. Take care not to clobber the input values until after we
14900 have moved them somewhere safe. This code assumes that if the DImode
14901 scratch in operands[2] overlaps either the input value or output address
14902 in some way, then that value must die in this insn (we absolutely need
14903 two scratch registers for some corner cases). */
14904 void
14905 arm_reload_out_hi (rtx *operands)
14906 {
14907 rtx ref = operands[0];
14908 rtx outval = operands[1];
14909 rtx base, scratch;
14910 HOST_WIDE_INT offset = 0;
14911
14912 if (GET_CODE (ref) == SUBREG)
14913 {
14914 offset = SUBREG_BYTE (ref);
14915 ref = SUBREG_REG (ref);
14916 }
14917
14918 if (REG_P (ref))
14919 {
14920 /* We have a pseudo which has been spilt onto the stack; there
14921 are two cases here: the first where there is a simple
14922 stack-slot replacement and a second where the stack-slot is
14923 out of range, or is used as a subreg. */
14924 if (reg_equiv_mem (REGNO (ref)))
14925 {
14926 ref = reg_equiv_mem (REGNO (ref));
14927 base = find_replacement (&XEXP (ref, 0));
14928 }
14929 else
14930 /* The slot is out of range, or was dressed up in a SUBREG. */
14931 base = reg_equiv_address (REGNO (ref));
14932
14933 /* PR 62254: If there is no equivalent memory location then just move
14934 the value as an SImode register move. This happens when the target
14935 architecture variant does not have an HImode register move. */
14936 if (base == NULL)
14937 {
14938 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14939
14940 if (REG_P (outval))
14941 {
14942 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14943 gen_rtx_SUBREG (SImode, outval, 0)));
14944 }
14945 else /* SUBREG_P (outval) */
14946 {
14947 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14948 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14949 SUBREG_REG (outval)));
14950 else
14951 /* FIXME: Handle other cases ? */
14952 gcc_unreachable ();
14953 }
14954 return;
14955 }
14956 }
14957 else
14958 base = find_replacement (&XEXP (ref, 0));
14959
14960 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14961
14962 /* Handle the case where the address is too complex to be offset by 1. */
14963 if (GET_CODE (base) == MINUS
14964 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14965 {
14966 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14967
14968 /* Be careful not to destroy OUTVAL. */
14969 if (reg_overlap_mentioned_p (base_plus, outval))
14970 {
14971 /* Updating base_plus might destroy outval, see if we can
14972 swap the scratch and base_plus. */
14973 if (!reg_overlap_mentioned_p (scratch, outval))
14974 std::swap (scratch, base_plus);
14975 else
14976 {
14977 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14978
14979 /* Be conservative and copy OUTVAL into the scratch now,
14980 this should only be necessary if outval is a subreg
14981 of something larger than a word. */
14982 /* XXX Might this clobber base? I can't see how it can,
14983 since scratch is known to overlap with OUTVAL, and
14984 must be wider than a word. */
14985 emit_insn (gen_movhi (scratch_hi, outval));
14986 outval = scratch_hi;
14987 }
14988 }
14989
14990 emit_set_insn (base_plus, base);
14991 base = base_plus;
14992 }
14993 else if (GET_CODE (base) == PLUS)
14994 {
14995 /* The addend must be CONST_INT, or we would have dealt with it above. */
14996 HOST_WIDE_INT hi, lo;
14997
14998 offset += INTVAL (XEXP (base, 1));
14999 base = XEXP (base, 0);
15000
15001 /* Rework the address into a legal sequence of insns. */
15002 /* Valid range for lo is -4095 -> 4095 */
15003 lo = (offset >= 0
15004 ? (offset & 0xfff)
15005 : -((-offset) & 0xfff));
15006
15007 /* Corner case, if lo is the max offset then we would be out of range
15008 once we have added the additional 1 below, so bump the msb into the
15009 pre-loading insn(s). */
15010 if (lo == 4095)
15011 lo &= 0x7ff;
15012
15013 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15014 ^ (HOST_WIDE_INT) 0x80000000)
15015 - (HOST_WIDE_INT) 0x80000000);
15016
15017 gcc_assert (hi + lo == offset);
15018
15019 if (hi != 0)
15020 {
15021 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15022
15023 /* Be careful not to destroy OUTVAL. */
15024 if (reg_overlap_mentioned_p (base_plus, outval))
15025 {
15026 /* Updating base_plus might destroy outval, see if we
15027 can swap the scratch and base_plus. */
15028 if (!reg_overlap_mentioned_p (scratch, outval))
15029 std::swap (scratch, base_plus);
15030 else
15031 {
15032 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15033
15034 /* Be conservative and copy outval into scratch now,
15035 this should only be necessary if outval is a
15036 subreg of something larger than a word. */
15037 /* XXX Might this clobber base? I can't see how it
15038 can, since scratch is known to overlap with
15039 outval. */
15040 emit_insn (gen_movhi (scratch_hi, outval));
15041 outval = scratch_hi;
15042 }
15043 }
15044
15045 /* Get the base address; addsi3 knows how to handle constants
15046 that require more than one insn. */
15047 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15048 base = base_plus;
15049 offset = lo;
15050 }
15051 }
15052
15053 if (BYTES_BIG_ENDIAN)
15054 {
15055 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15056 plus_constant (Pmode, base,
15057 offset + 1)),
15058 gen_lowpart (QImode, outval)));
15059 emit_insn (gen_lshrsi3 (scratch,
15060 gen_rtx_SUBREG (SImode, outval, 0),
15061 GEN_INT (8)));
15062 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15063 offset)),
15064 gen_lowpart (QImode, scratch)));
15065 }
15066 else
15067 {
15068 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15069 offset)),
15070 gen_lowpart (QImode, outval)));
15071 emit_insn (gen_lshrsi3 (scratch,
15072 gen_rtx_SUBREG (SImode, outval, 0),
15073 GEN_INT (8)));
15074 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15075 plus_constant (Pmode, base,
15076 offset + 1)),
15077 gen_lowpart (QImode, scratch)));
15078 }
15079 }
15080
15081 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15082 (padded to the size of a word) should be passed in a register. */
15083
15084 static bool
15085 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15086 {
15087 if (TARGET_AAPCS_BASED)
15088 return must_pass_in_stack_var_size (mode, type);
15089 else
15090 return must_pass_in_stack_var_size_or_pad (mode, type);
15091 }
15092
15093
15094 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15095 Return true if an argument passed on the stack should be padded upwards,
15096 i.e. if the least-significant byte has useful data.
15097 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15098 aggregate types are placed in the lowest memory address. */
15099
15100 bool
15101 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15102 {
15103 if (!TARGET_AAPCS_BASED)
15104 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15105
15106 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15107 return false;
15108
15109 return true;
15110 }
15111
15112
15113 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15114 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15115 register has useful data, and return the opposite if the most
15116 significant byte does. */
15117
15118 bool
15119 arm_pad_reg_upward (machine_mode mode,
15120 tree type, int first ATTRIBUTE_UNUSED)
15121 {
15122 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15123 {
15124 /* For AAPCS, small aggregates, small fixed-point types,
15125 and small complex types are always padded upwards. */
15126 if (type)
15127 {
15128 if ((AGGREGATE_TYPE_P (type)
15129 || TREE_CODE (type) == COMPLEX_TYPE
15130 || FIXED_POINT_TYPE_P (type))
15131 && int_size_in_bytes (type) <= 4)
15132 return true;
15133 }
15134 else
15135 {
15136 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15137 && GET_MODE_SIZE (mode) <= 4)
15138 return true;
15139 }
15140 }
15141
15142 /* Otherwise, use default padding. */
15143 return !BYTES_BIG_ENDIAN;
15144 }
15145
15146 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15147 assuming that the address in the base register is word aligned. */
15148 bool
15149 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15150 {
15151 HOST_WIDE_INT max_offset;
15152
15153 /* Offset must be a multiple of 4 in Thumb mode. */
15154 if (TARGET_THUMB2 && ((offset & 3) != 0))
15155 return false;
15156
15157 if (TARGET_THUMB2)
15158 max_offset = 1020;
15159 else if (TARGET_ARM)
15160 max_offset = 255;
15161 else
15162 return false;
15163
15164 return ((offset <= max_offset) && (offset >= -max_offset));
15165 }
15166
15167 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15168 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15169 Assumes that the address in the base register RN is word aligned. Pattern
15170 guarantees that both memory accesses use the same base register,
15171 the offsets are constants within the range, and the gap between the offsets is 4.
15172 If preload complete then check that registers are legal. WBACK indicates whether
15173 address is updated. LOAD indicates whether memory access is load or store. */
15174 bool
15175 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15176 bool wback, bool load)
15177 {
15178 unsigned int t, t2, n;
15179
15180 if (!reload_completed)
15181 return true;
15182
15183 if (!offset_ok_for_ldrd_strd (offset))
15184 return false;
15185
15186 t = REGNO (rt);
15187 t2 = REGNO (rt2);
15188 n = REGNO (rn);
15189
15190 if ((TARGET_THUMB2)
15191 && ((wback && (n == t || n == t2))
15192 || (t == SP_REGNUM)
15193 || (t == PC_REGNUM)
15194 || (t2 == SP_REGNUM)
15195 || (t2 == PC_REGNUM)
15196 || (!load && (n == PC_REGNUM))
15197 || (load && (t == t2))
15198 /* Triggers Cortex-M3 LDRD errata. */
15199 || (!wback && load && fix_cm3_ldrd && (n == t))))
15200 return false;
15201
15202 if ((TARGET_ARM)
15203 && ((wback && (n == t || n == t2))
15204 || (t2 == PC_REGNUM)
15205 || (t % 2 != 0) /* First destination register is not even. */
15206 || (t2 != t + 1)
15207 /* PC can be used as base register (for offset addressing only),
15208 but it is depricated. */
15209 || (n == PC_REGNUM)))
15210 return false;
15211
15212 return true;
15213 }
15214
15215 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15216 operand MEM's address contains an immediate offset from the base
15217 register and has no side effects, in which case it sets BASE and
15218 OFFSET accordingly. */
15219 static bool
15220 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15221 {
15222 rtx addr;
15223
15224 gcc_assert (base != NULL && offset != NULL);
15225
15226 /* TODO: Handle more general memory operand patterns, such as
15227 PRE_DEC and PRE_INC. */
15228
15229 if (side_effects_p (mem))
15230 return false;
15231
15232 /* Can't deal with subregs. */
15233 if (GET_CODE (mem) == SUBREG)
15234 return false;
15235
15236 gcc_assert (MEM_P (mem));
15237
15238 *offset = const0_rtx;
15239
15240 addr = XEXP (mem, 0);
15241
15242 /* If addr isn't valid for DImode, then we can't handle it. */
15243 if (!arm_legitimate_address_p (DImode, addr,
15244 reload_in_progress || reload_completed))
15245 return false;
15246
15247 if (REG_P (addr))
15248 {
15249 *base = addr;
15250 return true;
15251 }
15252 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15253 {
15254 *base = XEXP (addr, 0);
15255 *offset = XEXP (addr, 1);
15256 return (REG_P (*base) && CONST_INT_P (*offset));
15257 }
15258
15259 return false;
15260 }
15261
15262 /* Called from a peephole2 to replace two word-size accesses with a
15263 single LDRD/STRD instruction. Returns true iff we can generate a
15264 new instruction sequence. That is, both accesses use the same base
15265 register and the gap between constant offsets is 4. This function
15266 may reorder its operands to match ldrd/strd RTL templates.
15267 OPERANDS are the operands found by the peephole matcher;
15268 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15269 corresponding memory operands. LOAD indicaates whether the access
15270 is load or store. CONST_STORE indicates a store of constant
15271 integer values held in OPERANDS[4,5] and assumes that the pattern
15272 is of length 4 insn, for the purpose of checking dead registers.
15273 COMMUTE indicates that register operands may be reordered. */
15274 bool
15275 gen_operands_ldrd_strd (rtx *operands, bool load,
15276 bool const_store, bool commute)
15277 {
15278 int nops = 2;
15279 HOST_WIDE_INT offsets[2], offset;
15280 rtx base = NULL_RTX;
15281 rtx cur_base, cur_offset, tmp;
15282 int i, gap;
15283 HARD_REG_SET regset;
15284
15285 gcc_assert (!const_store || !load);
15286 /* Check that the memory references are immediate offsets from the
15287 same base register. Extract the base register, the destination
15288 registers, and the corresponding memory offsets. */
15289 for (i = 0; i < nops; i++)
15290 {
15291 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15292 return false;
15293
15294 if (i == 0)
15295 base = cur_base;
15296 else if (REGNO (base) != REGNO (cur_base))
15297 return false;
15298
15299 offsets[i] = INTVAL (cur_offset);
15300 if (GET_CODE (operands[i]) == SUBREG)
15301 {
15302 tmp = SUBREG_REG (operands[i]);
15303 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15304 operands[i] = tmp;
15305 }
15306 }
15307
15308 /* Make sure there is no dependency between the individual loads. */
15309 if (load && REGNO (operands[0]) == REGNO (base))
15310 return false; /* RAW */
15311
15312 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15313 return false; /* WAW */
15314
15315 /* If the same input register is used in both stores
15316 when storing different constants, try to find a free register.
15317 For example, the code
15318 mov r0, 0
15319 str r0, [r2]
15320 mov r0, 1
15321 str r0, [r2, #4]
15322 can be transformed into
15323 mov r1, 0
15324 mov r0, 1
15325 strd r1, r0, [r2]
15326 in Thumb mode assuming that r1 is free.
15327 For ARM mode do the same but only if the starting register
15328 can be made to be even. */
15329 if (const_store
15330 && REGNO (operands[0]) == REGNO (operands[1])
15331 && INTVAL (operands[4]) != INTVAL (operands[5]))
15332 {
15333 if (TARGET_THUMB2)
15334 {
15335 CLEAR_HARD_REG_SET (regset);
15336 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15337 if (tmp == NULL_RTX)
15338 return false;
15339
15340 /* Use the new register in the first load to ensure that
15341 if the original input register is not dead after peephole,
15342 then it will have the correct constant value. */
15343 operands[0] = tmp;
15344 }
15345 else if (TARGET_ARM)
15346 {
15347 int regno = REGNO (operands[0]);
15348 if (!peep2_reg_dead_p (4, operands[0]))
15349 {
15350 /* When the input register is even and is not dead after the
15351 pattern, it has to hold the second constant but we cannot
15352 form a legal STRD in ARM mode with this register as the second
15353 register. */
15354 if (regno % 2 == 0)
15355 return false;
15356
15357 /* Is regno-1 free? */
15358 SET_HARD_REG_SET (regset);
15359 CLEAR_HARD_REG_BIT(regset, regno - 1);
15360 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15361 if (tmp == NULL_RTX)
15362 return false;
15363
15364 operands[0] = tmp;
15365 }
15366 else
15367 {
15368 /* Find a DImode register. */
15369 CLEAR_HARD_REG_SET (regset);
15370 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15371 if (tmp != NULL_RTX)
15372 {
15373 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15374 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15375 }
15376 else
15377 {
15378 /* Can we use the input register to form a DI register? */
15379 SET_HARD_REG_SET (regset);
15380 CLEAR_HARD_REG_BIT(regset,
15381 regno % 2 == 0 ? regno + 1 : regno - 1);
15382 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15383 if (tmp == NULL_RTX)
15384 return false;
15385 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15386 }
15387 }
15388
15389 gcc_assert (operands[0] != NULL_RTX);
15390 gcc_assert (operands[1] != NULL_RTX);
15391 gcc_assert (REGNO (operands[0]) % 2 == 0);
15392 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15393 }
15394 }
15395
15396 /* Make sure the instructions are ordered with lower memory access first. */
15397 if (offsets[0] > offsets[1])
15398 {
15399 gap = offsets[0] - offsets[1];
15400 offset = offsets[1];
15401
15402 /* Swap the instructions such that lower memory is accessed first. */
15403 std::swap (operands[0], operands[1]);
15404 std::swap (operands[2], operands[3]);
15405 if (const_store)
15406 std::swap (operands[4], operands[5]);
15407 }
15408 else
15409 {
15410 gap = offsets[1] - offsets[0];
15411 offset = offsets[0];
15412 }
15413
15414 /* Make sure accesses are to consecutive memory locations. */
15415 if (gap != 4)
15416 return false;
15417
15418 /* Make sure we generate legal instructions. */
15419 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15420 false, load))
15421 return true;
15422
15423 /* In Thumb state, where registers are almost unconstrained, there
15424 is little hope to fix it. */
15425 if (TARGET_THUMB2)
15426 return false;
15427
15428 if (load && commute)
15429 {
15430 /* Try reordering registers. */
15431 std::swap (operands[0], operands[1]);
15432 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15433 false, load))
15434 return true;
15435 }
15436
15437 if (const_store)
15438 {
15439 /* If input registers are dead after this pattern, they can be
15440 reordered or replaced by other registers that are free in the
15441 current pattern. */
15442 if (!peep2_reg_dead_p (4, operands[0])
15443 || !peep2_reg_dead_p (4, operands[1]))
15444 return false;
15445
15446 /* Try to reorder the input registers. */
15447 /* For example, the code
15448 mov r0, 0
15449 mov r1, 1
15450 str r1, [r2]
15451 str r0, [r2, #4]
15452 can be transformed into
15453 mov r1, 0
15454 mov r0, 1
15455 strd r0, [r2]
15456 */
15457 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15458 false, false))
15459 {
15460 std::swap (operands[0], operands[1]);
15461 return true;
15462 }
15463
15464 /* Try to find a free DI register. */
15465 CLEAR_HARD_REG_SET (regset);
15466 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15467 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15468 while (true)
15469 {
15470 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15471 if (tmp == NULL_RTX)
15472 return false;
15473
15474 /* DREG must be an even-numbered register in DImode.
15475 Split it into SI registers. */
15476 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15477 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15478 gcc_assert (operands[0] != NULL_RTX);
15479 gcc_assert (operands[1] != NULL_RTX);
15480 gcc_assert (REGNO (operands[0]) % 2 == 0);
15481 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15482
15483 return (operands_ok_ldrd_strd (operands[0], operands[1],
15484 base, offset,
15485 false, load));
15486 }
15487 }
15488
15489 return false;
15490 }
15491
15492
15493
15494 \f
15495 /* Print a symbolic form of X to the debug file, F. */
15496 static void
15497 arm_print_value (FILE *f, rtx x)
15498 {
15499 switch (GET_CODE (x))
15500 {
15501 case CONST_INT:
15502 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15503 return;
15504
15505 case CONST_DOUBLE:
15506 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15507 return;
15508
15509 case CONST_VECTOR:
15510 {
15511 int i;
15512
15513 fprintf (f, "<");
15514 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15515 {
15516 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15517 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15518 fputc (',', f);
15519 }
15520 fprintf (f, ">");
15521 }
15522 return;
15523
15524 case CONST_STRING:
15525 fprintf (f, "\"%s\"", XSTR (x, 0));
15526 return;
15527
15528 case SYMBOL_REF:
15529 fprintf (f, "`%s'", XSTR (x, 0));
15530 return;
15531
15532 case LABEL_REF:
15533 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15534 return;
15535
15536 case CONST:
15537 arm_print_value (f, XEXP (x, 0));
15538 return;
15539
15540 case PLUS:
15541 arm_print_value (f, XEXP (x, 0));
15542 fprintf (f, "+");
15543 arm_print_value (f, XEXP (x, 1));
15544 return;
15545
15546 case PC:
15547 fprintf (f, "pc");
15548 return;
15549
15550 default:
15551 fprintf (f, "????");
15552 return;
15553 }
15554 }
15555 \f
15556 /* Routines for manipulation of the constant pool. */
15557
15558 /* Arm instructions cannot load a large constant directly into a
15559 register; they have to come from a pc relative load. The constant
15560 must therefore be placed in the addressable range of the pc
15561 relative load. Depending on the precise pc relative load
15562 instruction the range is somewhere between 256 bytes and 4k. This
15563 means that we often have to dump a constant inside a function, and
15564 generate code to branch around it.
15565
15566 It is important to minimize this, since the branches will slow
15567 things down and make the code larger.
15568
15569 Normally we can hide the table after an existing unconditional
15570 branch so that there is no interruption of the flow, but in the
15571 worst case the code looks like this:
15572
15573 ldr rn, L1
15574 ...
15575 b L2
15576 align
15577 L1: .long value
15578 L2:
15579 ...
15580
15581 ldr rn, L3
15582 ...
15583 b L4
15584 align
15585 L3: .long value
15586 L4:
15587 ...
15588
15589 We fix this by performing a scan after scheduling, which notices
15590 which instructions need to have their operands fetched from the
15591 constant table and builds the table.
15592
15593 The algorithm starts by building a table of all the constants that
15594 need fixing up and all the natural barriers in the function (places
15595 where a constant table can be dropped without breaking the flow).
15596 For each fixup we note how far the pc-relative replacement will be
15597 able to reach and the offset of the instruction into the function.
15598
15599 Having built the table we then group the fixes together to form
15600 tables that are as large as possible (subject to addressing
15601 constraints) and emit each table of constants after the last
15602 barrier that is within range of all the instructions in the group.
15603 If a group does not contain a barrier, then we forcibly create one
15604 by inserting a jump instruction into the flow. Once the table has
15605 been inserted, the insns are then modified to reference the
15606 relevant entry in the pool.
15607
15608 Possible enhancements to the algorithm (not implemented) are:
15609
15610 1) For some processors and object formats, there may be benefit in
15611 aligning the pools to the start of cache lines; this alignment
15612 would need to be taken into account when calculating addressability
15613 of a pool. */
15614
15615 /* These typedefs are located at the start of this file, so that
15616 they can be used in the prototypes there. This comment is to
15617 remind readers of that fact so that the following structures
15618 can be understood more easily.
15619
15620 typedef struct minipool_node Mnode;
15621 typedef struct minipool_fixup Mfix; */
15622
15623 struct minipool_node
15624 {
15625 /* Doubly linked chain of entries. */
15626 Mnode * next;
15627 Mnode * prev;
15628 /* The maximum offset into the code that this entry can be placed. While
15629 pushing fixes for forward references, all entries are sorted in order
15630 of increasing max_address. */
15631 HOST_WIDE_INT max_address;
15632 /* Similarly for an entry inserted for a backwards ref. */
15633 HOST_WIDE_INT min_address;
15634 /* The number of fixes referencing this entry. This can become zero
15635 if we "unpush" an entry. In this case we ignore the entry when we
15636 come to emit the code. */
15637 int refcount;
15638 /* The offset from the start of the minipool. */
15639 HOST_WIDE_INT offset;
15640 /* The value in table. */
15641 rtx value;
15642 /* The mode of value. */
15643 machine_mode mode;
15644 /* The size of the value. With iWMMXt enabled
15645 sizes > 4 also imply an alignment of 8-bytes. */
15646 int fix_size;
15647 };
15648
15649 struct minipool_fixup
15650 {
15651 Mfix * next;
15652 rtx_insn * insn;
15653 HOST_WIDE_INT address;
15654 rtx * loc;
15655 machine_mode mode;
15656 int fix_size;
15657 rtx value;
15658 Mnode * minipool;
15659 HOST_WIDE_INT forwards;
15660 HOST_WIDE_INT backwards;
15661 };
15662
15663 /* Fixes less than a word need padding out to a word boundary. */
15664 #define MINIPOOL_FIX_SIZE(mode) \
15665 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15666
15667 static Mnode * minipool_vector_head;
15668 static Mnode * minipool_vector_tail;
15669 static rtx_code_label *minipool_vector_label;
15670 static int minipool_pad;
15671
15672 /* The linked list of all minipool fixes required for this function. */
15673 Mfix * minipool_fix_head;
15674 Mfix * minipool_fix_tail;
15675 /* The fix entry for the current minipool, once it has been placed. */
15676 Mfix * minipool_barrier;
15677
15678 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15679 #define JUMP_TABLES_IN_TEXT_SECTION 0
15680 #endif
15681
15682 static HOST_WIDE_INT
15683 get_jump_table_size (rtx_jump_table_data *insn)
15684 {
15685 /* ADDR_VECs only take room if read-only data does into the text
15686 section. */
15687 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15688 {
15689 rtx body = PATTERN (insn);
15690 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15691 HOST_WIDE_INT size;
15692 HOST_WIDE_INT modesize;
15693
15694 modesize = GET_MODE_SIZE (GET_MODE (body));
15695 size = modesize * XVECLEN (body, elt);
15696 switch (modesize)
15697 {
15698 case 1:
15699 /* Round up size of TBB table to a halfword boundary. */
15700 size = (size + 1) & ~HOST_WIDE_INT_1;
15701 break;
15702 case 2:
15703 /* No padding necessary for TBH. */
15704 break;
15705 case 4:
15706 /* Add two bytes for alignment on Thumb. */
15707 if (TARGET_THUMB)
15708 size += 2;
15709 break;
15710 default:
15711 gcc_unreachable ();
15712 }
15713 return size;
15714 }
15715
15716 return 0;
15717 }
15718
15719 /* Return the maximum amount of padding that will be inserted before
15720 label LABEL. */
15721
15722 static HOST_WIDE_INT
15723 get_label_padding (rtx label)
15724 {
15725 HOST_WIDE_INT align, min_insn_size;
15726
15727 align = 1 << label_to_alignment (label);
15728 min_insn_size = TARGET_THUMB ? 2 : 4;
15729 return align > min_insn_size ? align - min_insn_size : 0;
15730 }
15731
15732 /* Move a minipool fix MP from its current location to before MAX_MP.
15733 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15734 constraints may need updating. */
15735 static Mnode *
15736 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15737 HOST_WIDE_INT max_address)
15738 {
15739 /* The code below assumes these are different. */
15740 gcc_assert (mp != max_mp);
15741
15742 if (max_mp == NULL)
15743 {
15744 if (max_address < mp->max_address)
15745 mp->max_address = max_address;
15746 }
15747 else
15748 {
15749 if (max_address > max_mp->max_address - mp->fix_size)
15750 mp->max_address = max_mp->max_address - mp->fix_size;
15751 else
15752 mp->max_address = max_address;
15753
15754 /* Unlink MP from its current position. Since max_mp is non-null,
15755 mp->prev must be non-null. */
15756 mp->prev->next = mp->next;
15757 if (mp->next != NULL)
15758 mp->next->prev = mp->prev;
15759 else
15760 minipool_vector_tail = mp->prev;
15761
15762 /* Re-insert it before MAX_MP. */
15763 mp->next = max_mp;
15764 mp->prev = max_mp->prev;
15765 max_mp->prev = mp;
15766
15767 if (mp->prev != NULL)
15768 mp->prev->next = mp;
15769 else
15770 minipool_vector_head = mp;
15771 }
15772
15773 /* Save the new entry. */
15774 max_mp = mp;
15775
15776 /* Scan over the preceding entries and adjust their addresses as
15777 required. */
15778 while (mp->prev != NULL
15779 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15780 {
15781 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15782 mp = mp->prev;
15783 }
15784
15785 return max_mp;
15786 }
15787
15788 /* Add a constant to the minipool for a forward reference. Returns the
15789 node added or NULL if the constant will not fit in this pool. */
15790 static Mnode *
15791 add_minipool_forward_ref (Mfix *fix)
15792 {
15793 /* If set, max_mp is the first pool_entry that has a lower
15794 constraint than the one we are trying to add. */
15795 Mnode * max_mp = NULL;
15796 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15797 Mnode * mp;
15798
15799 /* If the minipool starts before the end of FIX->INSN then this FIX
15800 can not be placed into the current pool. Furthermore, adding the
15801 new constant pool entry may cause the pool to start FIX_SIZE bytes
15802 earlier. */
15803 if (minipool_vector_head &&
15804 (fix->address + get_attr_length (fix->insn)
15805 >= minipool_vector_head->max_address - fix->fix_size))
15806 return NULL;
15807
15808 /* Scan the pool to see if a constant with the same value has
15809 already been added. While we are doing this, also note the
15810 location where we must insert the constant if it doesn't already
15811 exist. */
15812 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15813 {
15814 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15815 && fix->mode == mp->mode
15816 && (!LABEL_P (fix->value)
15817 || (CODE_LABEL_NUMBER (fix->value)
15818 == CODE_LABEL_NUMBER (mp->value)))
15819 && rtx_equal_p (fix->value, mp->value))
15820 {
15821 /* More than one fix references this entry. */
15822 mp->refcount++;
15823 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15824 }
15825
15826 /* Note the insertion point if necessary. */
15827 if (max_mp == NULL
15828 && mp->max_address > max_address)
15829 max_mp = mp;
15830
15831 /* If we are inserting an 8-bytes aligned quantity and
15832 we have not already found an insertion point, then
15833 make sure that all such 8-byte aligned quantities are
15834 placed at the start of the pool. */
15835 if (ARM_DOUBLEWORD_ALIGN
15836 && max_mp == NULL
15837 && fix->fix_size >= 8
15838 && mp->fix_size < 8)
15839 {
15840 max_mp = mp;
15841 max_address = mp->max_address;
15842 }
15843 }
15844
15845 /* The value is not currently in the minipool, so we need to create
15846 a new entry for it. If MAX_MP is NULL, the entry will be put on
15847 the end of the list since the placement is less constrained than
15848 any existing entry. Otherwise, we insert the new fix before
15849 MAX_MP and, if necessary, adjust the constraints on the other
15850 entries. */
15851 mp = XNEW (Mnode);
15852 mp->fix_size = fix->fix_size;
15853 mp->mode = fix->mode;
15854 mp->value = fix->value;
15855 mp->refcount = 1;
15856 /* Not yet required for a backwards ref. */
15857 mp->min_address = -65536;
15858
15859 if (max_mp == NULL)
15860 {
15861 mp->max_address = max_address;
15862 mp->next = NULL;
15863 mp->prev = minipool_vector_tail;
15864
15865 if (mp->prev == NULL)
15866 {
15867 minipool_vector_head = mp;
15868 minipool_vector_label = gen_label_rtx ();
15869 }
15870 else
15871 mp->prev->next = mp;
15872
15873 minipool_vector_tail = mp;
15874 }
15875 else
15876 {
15877 if (max_address > max_mp->max_address - mp->fix_size)
15878 mp->max_address = max_mp->max_address - mp->fix_size;
15879 else
15880 mp->max_address = max_address;
15881
15882 mp->next = max_mp;
15883 mp->prev = max_mp->prev;
15884 max_mp->prev = mp;
15885 if (mp->prev != NULL)
15886 mp->prev->next = mp;
15887 else
15888 minipool_vector_head = mp;
15889 }
15890
15891 /* Save the new entry. */
15892 max_mp = mp;
15893
15894 /* Scan over the preceding entries and adjust their addresses as
15895 required. */
15896 while (mp->prev != NULL
15897 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15898 {
15899 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15900 mp = mp->prev;
15901 }
15902
15903 return max_mp;
15904 }
15905
15906 static Mnode *
15907 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15908 HOST_WIDE_INT min_address)
15909 {
15910 HOST_WIDE_INT offset;
15911
15912 /* The code below assumes these are different. */
15913 gcc_assert (mp != min_mp);
15914
15915 if (min_mp == NULL)
15916 {
15917 if (min_address > mp->min_address)
15918 mp->min_address = min_address;
15919 }
15920 else
15921 {
15922 /* We will adjust this below if it is too loose. */
15923 mp->min_address = min_address;
15924
15925 /* Unlink MP from its current position. Since min_mp is non-null,
15926 mp->next must be non-null. */
15927 mp->next->prev = mp->prev;
15928 if (mp->prev != NULL)
15929 mp->prev->next = mp->next;
15930 else
15931 minipool_vector_head = mp->next;
15932
15933 /* Reinsert it after MIN_MP. */
15934 mp->prev = min_mp;
15935 mp->next = min_mp->next;
15936 min_mp->next = mp;
15937 if (mp->next != NULL)
15938 mp->next->prev = mp;
15939 else
15940 minipool_vector_tail = mp;
15941 }
15942
15943 min_mp = mp;
15944
15945 offset = 0;
15946 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15947 {
15948 mp->offset = offset;
15949 if (mp->refcount > 0)
15950 offset += mp->fix_size;
15951
15952 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15953 mp->next->min_address = mp->min_address + mp->fix_size;
15954 }
15955
15956 return min_mp;
15957 }
15958
15959 /* Add a constant to the minipool for a backward reference. Returns the
15960 node added or NULL if the constant will not fit in this pool.
15961
15962 Note that the code for insertion for a backwards reference can be
15963 somewhat confusing because the calculated offsets for each fix do
15964 not take into account the size of the pool (which is still under
15965 construction. */
15966 static Mnode *
15967 add_minipool_backward_ref (Mfix *fix)
15968 {
15969 /* If set, min_mp is the last pool_entry that has a lower constraint
15970 than the one we are trying to add. */
15971 Mnode *min_mp = NULL;
15972 /* This can be negative, since it is only a constraint. */
15973 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15974 Mnode *mp;
15975
15976 /* If we can't reach the current pool from this insn, or if we can't
15977 insert this entry at the end of the pool without pushing other
15978 fixes out of range, then we don't try. This ensures that we
15979 can't fail later on. */
15980 if (min_address >= minipool_barrier->address
15981 || (minipool_vector_tail->min_address + fix->fix_size
15982 >= minipool_barrier->address))
15983 return NULL;
15984
15985 /* Scan the pool to see if a constant with the same value has
15986 already been added. While we are doing this, also note the
15987 location where we must insert the constant if it doesn't already
15988 exist. */
15989 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15990 {
15991 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15992 && fix->mode == mp->mode
15993 && (!LABEL_P (fix->value)
15994 || (CODE_LABEL_NUMBER (fix->value)
15995 == CODE_LABEL_NUMBER (mp->value)))
15996 && rtx_equal_p (fix->value, mp->value)
15997 /* Check that there is enough slack to move this entry to the
15998 end of the table (this is conservative). */
15999 && (mp->max_address
16000 > (minipool_barrier->address
16001 + minipool_vector_tail->offset
16002 + minipool_vector_tail->fix_size)))
16003 {
16004 mp->refcount++;
16005 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16006 }
16007
16008 if (min_mp != NULL)
16009 mp->min_address += fix->fix_size;
16010 else
16011 {
16012 /* Note the insertion point if necessary. */
16013 if (mp->min_address < min_address)
16014 {
16015 /* For now, we do not allow the insertion of 8-byte alignment
16016 requiring nodes anywhere but at the start of the pool. */
16017 if (ARM_DOUBLEWORD_ALIGN
16018 && fix->fix_size >= 8 && mp->fix_size < 8)
16019 return NULL;
16020 else
16021 min_mp = mp;
16022 }
16023 else if (mp->max_address
16024 < minipool_barrier->address + mp->offset + fix->fix_size)
16025 {
16026 /* Inserting before this entry would push the fix beyond
16027 its maximum address (which can happen if we have
16028 re-located a forwards fix); force the new fix to come
16029 after it. */
16030 if (ARM_DOUBLEWORD_ALIGN
16031 && fix->fix_size >= 8 && mp->fix_size < 8)
16032 return NULL;
16033 else
16034 {
16035 min_mp = mp;
16036 min_address = mp->min_address + fix->fix_size;
16037 }
16038 }
16039 /* Do not insert a non-8-byte aligned quantity before 8-byte
16040 aligned quantities. */
16041 else if (ARM_DOUBLEWORD_ALIGN
16042 && fix->fix_size < 8
16043 && mp->fix_size >= 8)
16044 {
16045 min_mp = mp;
16046 min_address = mp->min_address + fix->fix_size;
16047 }
16048 }
16049 }
16050
16051 /* We need to create a new entry. */
16052 mp = XNEW (Mnode);
16053 mp->fix_size = fix->fix_size;
16054 mp->mode = fix->mode;
16055 mp->value = fix->value;
16056 mp->refcount = 1;
16057 mp->max_address = minipool_barrier->address + 65536;
16058
16059 mp->min_address = min_address;
16060
16061 if (min_mp == NULL)
16062 {
16063 mp->prev = NULL;
16064 mp->next = minipool_vector_head;
16065
16066 if (mp->next == NULL)
16067 {
16068 minipool_vector_tail = mp;
16069 minipool_vector_label = gen_label_rtx ();
16070 }
16071 else
16072 mp->next->prev = mp;
16073
16074 minipool_vector_head = mp;
16075 }
16076 else
16077 {
16078 mp->next = min_mp->next;
16079 mp->prev = min_mp;
16080 min_mp->next = mp;
16081
16082 if (mp->next != NULL)
16083 mp->next->prev = mp;
16084 else
16085 minipool_vector_tail = mp;
16086 }
16087
16088 /* Save the new entry. */
16089 min_mp = mp;
16090
16091 if (mp->prev)
16092 mp = mp->prev;
16093 else
16094 mp->offset = 0;
16095
16096 /* Scan over the following entries and adjust their offsets. */
16097 while (mp->next != NULL)
16098 {
16099 if (mp->next->min_address < mp->min_address + mp->fix_size)
16100 mp->next->min_address = mp->min_address + mp->fix_size;
16101
16102 if (mp->refcount)
16103 mp->next->offset = mp->offset + mp->fix_size;
16104 else
16105 mp->next->offset = mp->offset;
16106
16107 mp = mp->next;
16108 }
16109
16110 return min_mp;
16111 }
16112
16113 static void
16114 assign_minipool_offsets (Mfix *barrier)
16115 {
16116 HOST_WIDE_INT offset = 0;
16117 Mnode *mp;
16118
16119 minipool_barrier = barrier;
16120
16121 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16122 {
16123 mp->offset = offset;
16124
16125 if (mp->refcount > 0)
16126 offset += mp->fix_size;
16127 }
16128 }
16129
16130 /* Output the literal table */
16131 static void
16132 dump_minipool (rtx_insn *scan)
16133 {
16134 Mnode * mp;
16135 Mnode * nmp;
16136 int align64 = 0;
16137
16138 if (ARM_DOUBLEWORD_ALIGN)
16139 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16140 if (mp->refcount > 0 && mp->fix_size >= 8)
16141 {
16142 align64 = 1;
16143 break;
16144 }
16145
16146 if (dump_file)
16147 fprintf (dump_file,
16148 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16149 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16150
16151 scan = emit_label_after (gen_label_rtx (), scan);
16152 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16153 scan = emit_label_after (minipool_vector_label, scan);
16154
16155 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16156 {
16157 if (mp->refcount > 0)
16158 {
16159 if (dump_file)
16160 {
16161 fprintf (dump_file,
16162 ";; Offset %u, min %ld, max %ld ",
16163 (unsigned) mp->offset, (unsigned long) mp->min_address,
16164 (unsigned long) mp->max_address);
16165 arm_print_value (dump_file, mp->value);
16166 fputc ('\n', dump_file);
16167 }
16168
16169 rtx val = copy_rtx (mp->value);
16170
16171 switch (GET_MODE_SIZE (mp->mode))
16172 {
16173 #ifdef HAVE_consttable_1
16174 case 1:
16175 scan = emit_insn_after (gen_consttable_1 (val), scan);
16176 break;
16177
16178 #endif
16179 #ifdef HAVE_consttable_2
16180 case 2:
16181 scan = emit_insn_after (gen_consttable_2 (val), scan);
16182 break;
16183
16184 #endif
16185 #ifdef HAVE_consttable_4
16186 case 4:
16187 scan = emit_insn_after (gen_consttable_4 (val), scan);
16188 break;
16189
16190 #endif
16191 #ifdef HAVE_consttable_8
16192 case 8:
16193 scan = emit_insn_after (gen_consttable_8 (val), scan);
16194 break;
16195
16196 #endif
16197 #ifdef HAVE_consttable_16
16198 case 16:
16199 scan = emit_insn_after (gen_consttable_16 (val), scan);
16200 break;
16201
16202 #endif
16203 default:
16204 gcc_unreachable ();
16205 }
16206 }
16207
16208 nmp = mp->next;
16209 free (mp);
16210 }
16211
16212 minipool_vector_head = minipool_vector_tail = NULL;
16213 scan = emit_insn_after (gen_consttable_end (), scan);
16214 scan = emit_barrier_after (scan);
16215 }
16216
16217 /* Return the cost of forcibly inserting a barrier after INSN. */
16218 static int
16219 arm_barrier_cost (rtx_insn *insn)
16220 {
16221 /* Basing the location of the pool on the loop depth is preferable,
16222 but at the moment, the basic block information seems to be
16223 corrupt by this stage of the compilation. */
16224 int base_cost = 50;
16225 rtx_insn *next = next_nonnote_insn (insn);
16226
16227 if (next != NULL && LABEL_P (next))
16228 base_cost -= 20;
16229
16230 switch (GET_CODE (insn))
16231 {
16232 case CODE_LABEL:
16233 /* It will always be better to place the table before the label, rather
16234 than after it. */
16235 return 50;
16236
16237 case INSN:
16238 case CALL_INSN:
16239 return base_cost;
16240
16241 case JUMP_INSN:
16242 return base_cost - 10;
16243
16244 default:
16245 return base_cost + 10;
16246 }
16247 }
16248
16249 /* Find the best place in the insn stream in the range
16250 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16251 Create the barrier by inserting a jump and add a new fix entry for
16252 it. */
16253 static Mfix *
16254 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16255 {
16256 HOST_WIDE_INT count = 0;
16257 rtx_barrier *barrier;
16258 rtx_insn *from = fix->insn;
16259 /* The instruction after which we will insert the jump. */
16260 rtx_insn *selected = NULL;
16261 int selected_cost;
16262 /* The address at which the jump instruction will be placed. */
16263 HOST_WIDE_INT selected_address;
16264 Mfix * new_fix;
16265 HOST_WIDE_INT max_count = max_address - fix->address;
16266 rtx_code_label *label = gen_label_rtx ();
16267
16268 selected_cost = arm_barrier_cost (from);
16269 selected_address = fix->address;
16270
16271 while (from && count < max_count)
16272 {
16273 rtx_jump_table_data *tmp;
16274 int new_cost;
16275
16276 /* This code shouldn't have been called if there was a natural barrier
16277 within range. */
16278 gcc_assert (!BARRIER_P (from));
16279
16280 /* Count the length of this insn. This must stay in sync with the
16281 code that pushes minipool fixes. */
16282 if (LABEL_P (from))
16283 count += get_label_padding (from);
16284 else
16285 count += get_attr_length (from);
16286
16287 /* If there is a jump table, add its length. */
16288 if (tablejump_p (from, NULL, &tmp))
16289 {
16290 count += get_jump_table_size (tmp);
16291
16292 /* Jump tables aren't in a basic block, so base the cost on
16293 the dispatch insn. If we select this location, we will
16294 still put the pool after the table. */
16295 new_cost = arm_barrier_cost (from);
16296
16297 if (count < max_count
16298 && (!selected || new_cost <= selected_cost))
16299 {
16300 selected = tmp;
16301 selected_cost = new_cost;
16302 selected_address = fix->address + count;
16303 }
16304
16305 /* Continue after the dispatch table. */
16306 from = NEXT_INSN (tmp);
16307 continue;
16308 }
16309
16310 new_cost = arm_barrier_cost (from);
16311
16312 if (count < max_count
16313 && (!selected || new_cost <= selected_cost))
16314 {
16315 selected = from;
16316 selected_cost = new_cost;
16317 selected_address = fix->address + count;
16318 }
16319
16320 from = NEXT_INSN (from);
16321 }
16322
16323 /* Make sure that we found a place to insert the jump. */
16324 gcc_assert (selected);
16325
16326 /* Make sure we do not split a call and its corresponding
16327 CALL_ARG_LOCATION note. */
16328 if (CALL_P (selected))
16329 {
16330 rtx_insn *next = NEXT_INSN (selected);
16331 if (next && NOTE_P (next)
16332 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16333 selected = next;
16334 }
16335
16336 /* Create a new JUMP_INSN that branches around a barrier. */
16337 from = emit_jump_insn_after (gen_jump (label), selected);
16338 JUMP_LABEL (from) = label;
16339 barrier = emit_barrier_after (from);
16340 emit_label_after (label, barrier);
16341
16342 /* Create a minipool barrier entry for the new barrier. */
16343 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16344 new_fix->insn = barrier;
16345 new_fix->address = selected_address;
16346 new_fix->next = fix->next;
16347 fix->next = new_fix;
16348
16349 return new_fix;
16350 }
16351
16352 /* Record that there is a natural barrier in the insn stream at
16353 ADDRESS. */
16354 static void
16355 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16356 {
16357 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16358
16359 fix->insn = insn;
16360 fix->address = address;
16361
16362 fix->next = NULL;
16363 if (minipool_fix_head != NULL)
16364 minipool_fix_tail->next = fix;
16365 else
16366 minipool_fix_head = fix;
16367
16368 minipool_fix_tail = fix;
16369 }
16370
16371 /* Record INSN, which will need fixing up to load a value from the
16372 minipool. ADDRESS is the offset of the insn since the start of the
16373 function; LOC is a pointer to the part of the insn which requires
16374 fixing; VALUE is the constant that must be loaded, which is of type
16375 MODE. */
16376 static void
16377 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16378 machine_mode mode, rtx value)
16379 {
16380 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16381
16382 fix->insn = insn;
16383 fix->address = address;
16384 fix->loc = loc;
16385 fix->mode = mode;
16386 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16387 fix->value = value;
16388 fix->forwards = get_attr_pool_range (insn);
16389 fix->backwards = get_attr_neg_pool_range (insn);
16390 fix->minipool = NULL;
16391
16392 /* If an insn doesn't have a range defined for it, then it isn't
16393 expecting to be reworked by this code. Better to stop now than
16394 to generate duff assembly code. */
16395 gcc_assert (fix->forwards || fix->backwards);
16396
16397 /* If an entry requires 8-byte alignment then assume all constant pools
16398 require 4 bytes of padding. Trying to do this later on a per-pool
16399 basis is awkward because existing pool entries have to be modified. */
16400 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16401 minipool_pad = 4;
16402
16403 if (dump_file)
16404 {
16405 fprintf (dump_file,
16406 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16407 GET_MODE_NAME (mode),
16408 INSN_UID (insn), (unsigned long) address,
16409 -1 * (long)fix->backwards, (long)fix->forwards);
16410 arm_print_value (dump_file, fix->value);
16411 fprintf (dump_file, "\n");
16412 }
16413
16414 /* Add it to the chain of fixes. */
16415 fix->next = NULL;
16416
16417 if (minipool_fix_head != NULL)
16418 minipool_fix_tail->next = fix;
16419 else
16420 minipool_fix_head = fix;
16421
16422 minipool_fix_tail = fix;
16423 }
16424
16425 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16426 Returns the number of insns needed, or 99 if we always want to synthesize
16427 the value. */
16428 int
16429 arm_max_const_double_inline_cost ()
16430 {
16431 /* Let the value get synthesized to avoid the use of literal pools. */
16432 if (arm_disable_literal_pool)
16433 return 99;
16434
16435 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16436 }
16437
16438 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16439 Returns the number of insns needed, or 99 if we don't know how to
16440 do it. */
16441 int
16442 arm_const_double_inline_cost (rtx val)
16443 {
16444 rtx lowpart, highpart;
16445 machine_mode mode;
16446
16447 mode = GET_MODE (val);
16448
16449 if (mode == VOIDmode)
16450 mode = DImode;
16451
16452 gcc_assert (GET_MODE_SIZE (mode) == 8);
16453
16454 lowpart = gen_lowpart (SImode, val);
16455 highpart = gen_highpart_mode (SImode, mode, val);
16456
16457 gcc_assert (CONST_INT_P (lowpart));
16458 gcc_assert (CONST_INT_P (highpart));
16459
16460 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16461 NULL_RTX, NULL_RTX, 0, 0)
16462 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16463 NULL_RTX, NULL_RTX, 0, 0));
16464 }
16465
16466 /* Cost of loading a SImode constant. */
16467 static inline int
16468 arm_const_inline_cost (enum rtx_code code, rtx val)
16469 {
16470 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16471 NULL_RTX, NULL_RTX, 1, 0);
16472 }
16473
16474 /* Return true if it is worthwhile to split a 64-bit constant into two
16475 32-bit operations. This is the case if optimizing for size, or
16476 if we have load delay slots, or if one 32-bit part can be done with
16477 a single data operation. */
16478 bool
16479 arm_const_double_by_parts (rtx val)
16480 {
16481 machine_mode mode = GET_MODE (val);
16482 rtx part;
16483
16484 if (optimize_size || arm_ld_sched)
16485 return true;
16486
16487 if (mode == VOIDmode)
16488 mode = DImode;
16489
16490 part = gen_highpart_mode (SImode, mode, val);
16491
16492 gcc_assert (CONST_INT_P (part));
16493
16494 if (const_ok_for_arm (INTVAL (part))
16495 || const_ok_for_arm (~INTVAL (part)))
16496 return true;
16497
16498 part = gen_lowpart (SImode, val);
16499
16500 gcc_assert (CONST_INT_P (part));
16501
16502 if (const_ok_for_arm (INTVAL (part))
16503 || const_ok_for_arm (~INTVAL (part)))
16504 return true;
16505
16506 return false;
16507 }
16508
16509 /* Return true if it is possible to inline both the high and low parts
16510 of a 64-bit constant into 32-bit data processing instructions. */
16511 bool
16512 arm_const_double_by_immediates (rtx val)
16513 {
16514 machine_mode mode = GET_MODE (val);
16515 rtx part;
16516
16517 if (mode == VOIDmode)
16518 mode = DImode;
16519
16520 part = gen_highpart_mode (SImode, mode, val);
16521
16522 gcc_assert (CONST_INT_P (part));
16523
16524 if (!const_ok_for_arm (INTVAL (part)))
16525 return false;
16526
16527 part = gen_lowpart (SImode, val);
16528
16529 gcc_assert (CONST_INT_P (part));
16530
16531 if (!const_ok_for_arm (INTVAL (part)))
16532 return false;
16533
16534 return true;
16535 }
16536
16537 /* Scan INSN and note any of its operands that need fixing.
16538 If DO_PUSHES is false we do not actually push any of the fixups
16539 needed. */
16540 static void
16541 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16542 {
16543 int opno;
16544
16545 extract_constrain_insn (insn);
16546
16547 if (recog_data.n_alternatives == 0)
16548 return;
16549
16550 /* Fill in recog_op_alt with information about the constraints of
16551 this insn. */
16552 preprocess_constraints (insn);
16553
16554 const operand_alternative *op_alt = which_op_alt ();
16555 for (opno = 0; opno < recog_data.n_operands; opno++)
16556 {
16557 /* Things we need to fix can only occur in inputs. */
16558 if (recog_data.operand_type[opno] != OP_IN)
16559 continue;
16560
16561 /* If this alternative is a memory reference, then any mention
16562 of constants in this alternative is really to fool reload
16563 into allowing us to accept one there. We need to fix them up
16564 now so that we output the right code. */
16565 if (op_alt[opno].memory_ok)
16566 {
16567 rtx op = recog_data.operand[opno];
16568
16569 if (CONSTANT_P (op))
16570 {
16571 if (do_pushes)
16572 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16573 recog_data.operand_mode[opno], op);
16574 }
16575 else if (MEM_P (op)
16576 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16577 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16578 {
16579 if (do_pushes)
16580 {
16581 rtx cop = avoid_constant_pool_reference (op);
16582
16583 /* Casting the address of something to a mode narrower
16584 than a word can cause avoid_constant_pool_reference()
16585 to return the pool reference itself. That's no good to
16586 us here. Lets just hope that we can use the
16587 constant pool value directly. */
16588 if (op == cop)
16589 cop = get_pool_constant (XEXP (op, 0));
16590
16591 push_minipool_fix (insn, address,
16592 recog_data.operand_loc[opno],
16593 recog_data.operand_mode[opno], cop);
16594 }
16595
16596 }
16597 }
16598 }
16599
16600 return;
16601 }
16602
16603 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16604 and unions in the context of ARMv8-M Security Extensions. It is used as a
16605 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16606 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16607 or four masks, depending on whether it is being computed for a
16608 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16609 respectively. The tree for the type of the argument or a field within an
16610 argument is passed in ARG_TYPE, the current register this argument or field
16611 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16612 argument or field starts at is passed in STARTING_BIT and the last used bit
16613 is kept in LAST_USED_BIT which is also updated accordingly. */
16614
16615 static unsigned HOST_WIDE_INT
16616 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16617 uint32_t * padding_bits_to_clear,
16618 unsigned starting_bit, int * last_used_bit)
16619
16620 {
16621 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16622
16623 if (TREE_CODE (arg_type) == RECORD_TYPE)
16624 {
16625 unsigned current_bit = starting_bit;
16626 tree field;
16627 long int offset, size;
16628
16629
16630 field = TYPE_FIELDS (arg_type);
16631 while (field)
16632 {
16633 /* The offset within a structure is always an offset from
16634 the start of that structure. Make sure we take that into the
16635 calculation of the register based offset that we use here. */
16636 offset = starting_bit;
16637 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16638 offset %= 32;
16639
16640 /* This is the actual size of the field, for bitfields this is the
16641 bitfield width and not the container size. */
16642 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16643
16644 if (*last_used_bit != offset)
16645 {
16646 if (offset < *last_used_bit)
16647 {
16648 /* This field's offset is before the 'last_used_bit', that
16649 means this field goes on the next register. So we need to
16650 pad the rest of the current register and increase the
16651 register number. */
16652 uint32_t mask;
16653 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16654 mask++;
16655
16656 padding_bits_to_clear[*regno] |= mask;
16657 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16658 (*regno)++;
16659 }
16660 else
16661 {
16662 /* Otherwise we pad the bits between the last field's end and
16663 the start of the new field. */
16664 uint32_t mask;
16665
16666 mask = ((uint32_t)-1) >> (32 - offset);
16667 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16668 padding_bits_to_clear[*regno] |= mask;
16669 }
16670 current_bit = offset;
16671 }
16672
16673 /* Calculate further padding bits for inner structs/unions too. */
16674 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16675 {
16676 *last_used_bit = current_bit;
16677 not_to_clear_reg_mask
16678 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16679 padding_bits_to_clear, offset,
16680 last_used_bit);
16681 }
16682 else
16683 {
16684 /* Update 'current_bit' with this field's size. If the
16685 'current_bit' lies in a subsequent register, update 'regno' and
16686 reset 'current_bit' to point to the current bit in that new
16687 register. */
16688 current_bit += size;
16689 while (current_bit >= 32)
16690 {
16691 current_bit-=32;
16692 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16693 (*regno)++;
16694 }
16695 *last_used_bit = current_bit;
16696 }
16697
16698 field = TREE_CHAIN (field);
16699 }
16700 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16701 }
16702 else if (TREE_CODE (arg_type) == UNION_TYPE)
16703 {
16704 tree field, field_t;
16705 int i, regno_t, field_size;
16706 int max_reg = -1;
16707 int max_bit = -1;
16708 uint32_t mask;
16709 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16710 = {-1, -1, -1, -1};
16711
16712 /* To compute the padding bits in a union we only consider bits as
16713 padding bits if they are always either a padding bit or fall outside a
16714 fields size for all fields in the union. */
16715 field = TYPE_FIELDS (arg_type);
16716 while (field)
16717 {
16718 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16719 = {0U, 0U, 0U, 0U};
16720 int last_used_bit_t = *last_used_bit;
16721 regno_t = *regno;
16722 field_t = TREE_TYPE (field);
16723
16724 /* If the field's type is either a record or a union make sure to
16725 compute their padding bits too. */
16726 if (RECORD_OR_UNION_TYPE_P (field_t))
16727 not_to_clear_reg_mask
16728 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16729 &padding_bits_to_clear_t[0],
16730 starting_bit, &last_used_bit_t);
16731 else
16732 {
16733 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16734 regno_t = (field_size / 32) + *regno;
16735 last_used_bit_t = (starting_bit + field_size) % 32;
16736 }
16737
16738 for (i = *regno; i < regno_t; i++)
16739 {
16740 /* For all but the last register used by this field only keep the
16741 padding bits that were padding bits in this field. */
16742 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16743 }
16744
16745 /* For the last register, keep all padding bits that were padding
16746 bits in this field and any padding bits that are still valid
16747 as padding bits but fall outside of this field's size. */
16748 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16749 padding_bits_to_clear_res[regno_t]
16750 &= padding_bits_to_clear_t[regno_t] | mask;
16751
16752 /* Update the maximum size of the fields in terms of registers used
16753 ('max_reg') and the 'last_used_bit' in said register. */
16754 if (max_reg < regno_t)
16755 {
16756 max_reg = regno_t;
16757 max_bit = last_used_bit_t;
16758 }
16759 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16760 max_bit = last_used_bit_t;
16761
16762 field = TREE_CHAIN (field);
16763 }
16764
16765 /* Update the current padding_bits_to_clear using the intersection of the
16766 padding bits of all the fields. */
16767 for (i=*regno; i < max_reg; i++)
16768 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16769
16770 /* Do not keep trailing padding bits, we do not know yet whether this
16771 is the end of the argument. */
16772 mask = ((uint32_t) 1 << max_bit) - 1;
16773 padding_bits_to_clear[max_reg]
16774 |= padding_bits_to_clear_res[max_reg] & mask;
16775
16776 *regno = max_reg;
16777 *last_used_bit = max_bit;
16778 }
16779 else
16780 /* This function should only be used for structs and unions. */
16781 gcc_unreachable ();
16782
16783 return not_to_clear_reg_mask;
16784 }
16785
16786 /* In the context of ARMv8-M Security Extensions, this function is used for both
16787 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16788 registers are used when returning or passing arguments, which is then
16789 returned as a mask. It will also compute a mask to indicate padding/unused
16790 bits for each of these registers, and passes this through the
16791 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16792 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16793 the starting register used to pass this argument or return value is passed
16794 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16795 for struct and union types. */
16796
16797 static unsigned HOST_WIDE_INT
16798 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16799 uint32_t * padding_bits_to_clear)
16800
16801 {
16802 int last_used_bit = 0;
16803 unsigned HOST_WIDE_INT not_to_clear_mask;
16804
16805 if (RECORD_OR_UNION_TYPE_P (arg_type))
16806 {
16807 not_to_clear_mask
16808 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16809 padding_bits_to_clear, 0,
16810 &last_used_bit);
16811
16812
16813 /* If the 'last_used_bit' is not zero, that means we are still using a
16814 part of the last 'regno'. In such cases we must clear the trailing
16815 bits. Otherwise we are not using regno and we should mark it as to
16816 clear. */
16817 if (last_used_bit != 0)
16818 padding_bits_to_clear[regno]
16819 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16820 else
16821 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16822 }
16823 else
16824 {
16825 not_to_clear_mask = 0;
16826 /* We are not dealing with structs nor unions. So these arguments may be
16827 passed in floating point registers too. In some cases a BLKmode is
16828 used when returning or passing arguments in multiple VFP registers. */
16829 if (GET_MODE (arg_rtx) == BLKmode)
16830 {
16831 int i, arg_regs;
16832 rtx reg;
16833
16834 /* This should really only occur when dealing with the hard-float
16835 ABI. */
16836 gcc_assert (TARGET_HARD_FLOAT_ABI);
16837
16838 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16839 {
16840 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16841 gcc_assert (REG_P (reg));
16842
16843 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16844
16845 /* If we are dealing with DF mode, make sure we don't
16846 clear either of the registers it addresses. */
16847 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16848 if (arg_regs > 1)
16849 {
16850 unsigned HOST_WIDE_INT mask;
16851 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16852 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16853 not_to_clear_mask |= mask;
16854 }
16855 }
16856 }
16857 else
16858 {
16859 /* Otherwise we can rely on the MODE to determine how many registers
16860 are being used by this argument. */
16861 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16862 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16863 if (arg_regs > 1)
16864 {
16865 unsigned HOST_WIDE_INT
16866 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16867 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16868 not_to_clear_mask |= mask;
16869 }
16870 }
16871 }
16872
16873 return not_to_clear_mask;
16874 }
16875
16876 /* Saves callee saved registers, clears callee saved registers and caller saved
16877 registers not used to pass arguments before a cmse_nonsecure_call. And
16878 restores the callee saved registers after. */
16879
16880 static void
16881 cmse_nonsecure_call_clear_caller_saved (void)
16882 {
16883 basic_block bb;
16884
16885 FOR_EACH_BB_FN (bb, cfun)
16886 {
16887 rtx_insn *insn;
16888
16889 FOR_BB_INSNS (bb, insn)
16890 {
16891 uint64_t to_clear_mask, float_mask;
16892 rtx_insn *seq;
16893 rtx pat, call, unspec, reg, cleared_reg, tmp;
16894 unsigned int regno, maxregno;
16895 rtx address;
16896 CUMULATIVE_ARGS args_so_far_v;
16897 cumulative_args_t args_so_far;
16898 tree arg_type, fntype;
16899 bool using_r4, first_param = true;
16900 function_args_iterator args_iter;
16901 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16902 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16903
16904 if (!NONDEBUG_INSN_P (insn))
16905 continue;
16906
16907 if (!CALL_P (insn))
16908 continue;
16909
16910 pat = PATTERN (insn);
16911 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16912 call = XVECEXP (pat, 0, 0);
16913
16914 /* Get the real call RTX if the insn sets a value, ie. returns. */
16915 if (GET_CODE (call) == SET)
16916 call = SET_SRC (call);
16917
16918 /* Check if it is a cmse_nonsecure_call. */
16919 unspec = XEXP (call, 0);
16920 if (GET_CODE (unspec) != UNSPEC
16921 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16922 continue;
16923
16924 /* Determine the caller-saved registers we need to clear. */
16925 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16926 maxregno = NUM_ARG_REGS - 1;
16927 /* Only look at the caller-saved floating point registers in case of
16928 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16929 lazy store and loads which clear both caller- and callee-saved
16930 registers. */
16931 if (TARGET_HARD_FLOAT_ABI)
16932 {
16933 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16934 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16935 to_clear_mask |= float_mask;
16936 maxregno = D7_VFP_REGNUM;
16937 }
16938
16939 /* Make sure the register used to hold the function address is not
16940 cleared. */
16941 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16942 gcc_assert (MEM_P (address));
16943 gcc_assert (REG_P (XEXP (address, 0)));
16944 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16945
16946 /* Set basic block of call insn so that df rescan is performed on
16947 insns inserted here. */
16948 set_block_for_insn (insn, bb);
16949 df_set_flags (DF_DEFER_INSN_RESCAN);
16950 start_sequence ();
16951
16952 /* Make sure the scheduler doesn't schedule other insns beyond
16953 here. */
16954 emit_insn (gen_blockage ());
16955
16956 /* Walk through all arguments and clear registers appropriately.
16957 */
16958 fntype = TREE_TYPE (MEM_EXPR (address));
16959 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16960 NULL_TREE);
16961 args_so_far = pack_cumulative_args (&args_so_far_v);
16962 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16963 {
16964 rtx arg_rtx;
16965 machine_mode arg_mode = TYPE_MODE (arg_type);
16966
16967 if (VOID_TYPE_P (arg_type))
16968 continue;
16969
16970 if (!first_param)
16971 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16972 true);
16973
16974 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16975 true);
16976 gcc_assert (REG_P (arg_rtx));
16977 to_clear_mask
16978 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16979 REGNO (arg_rtx),
16980 padding_bits_to_clear_ptr);
16981
16982 first_param = false;
16983 }
16984
16985 /* Clear padding bits where needed. */
16986 cleared_reg = XEXP (address, 0);
16987 reg = gen_rtx_REG (SImode, IP_REGNUM);
16988 using_r4 = false;
16989 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
16990 {
16991 if (padding_bits_to_clear[regno] == 0)
16992 continue;
16993
16994 /* If this is a Thumb-1 target copy the address of the function
16995 we are calling from 'r4' into 'ip' such that we can use r4 to
16996 clear the unused bits in the arguments. */
16997 if (TARGET_THUMB1 && !using_r4)
16998 {
16999 using_r4 = true;
17000 reg = cleared_reg;
17001 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17002 reg);
17003 }
17004
17005 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17006 emit_move_insn (reg, tmp);
17007 /* Also fill the top half of the negated
17008 padding_bits_to_clear. */
17009 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17010 {
17011 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17012 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17013 GEN_INT (16),
17014 GEN_INT (16)),
17015 tmp));
17016 }
17017
17018 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17019 gen_rtx_REG (SImode, regno),
17020 reg));
17021
17022 }
17023 if (using_r4)
17024 emit_move_insn (cleared_reg,
17025 gen_rtx_REG (SImode, IP_REGNUM));
17026
17027 /* We use right shift and left shift to clear the LSB of the address
17028 we jump to instead of using bic, to avoid having to use an extra
17029 register on Thumb-1. */
17030 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17031 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17032 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17033 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17034
17035 /* Clearing all registers that leak before doing a non-secure
17036 call. */
17037 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17038 {
17039 if (!(to_clear_mask & (1LL << regno)))
17040 continue;
17041
17042 /* If regno is an even vfp register and its successor is also to
17043 be cleared, use vmov. */
17044 if (IS_VFP_REGNUM (regno))
17045 {
17046 if (TARGET_VFP_DOUBLE
17047 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17048 && to_clear_mask & (1LL << (regno + 1)))
17049 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17050 CONST0_RTX (DFmode));
17051 else
17052 emit_move_insn (gen_rtx_REG (SFmode, regno),
17053 CONST0_RTX (SFmode));
17054 }
17055 else
17056 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17057 }
17058
17059 seq = get_insns ();
17060 end_sequence ();
17061 emit_insn_before (seq, insn);
17062
17063 }
17064 }
17065 }
17066
17067 /* Rewrite move insn into subtract of 0 if the condition codes will
17068 be useful in next conditional jump insn. */
17069
17070 static void
17071 thumb1_reorg (void)
17072 {
17073 basic_block bb;
17074
17075 FOR_EACH_BB_FN (bb, cfun)
17076 {
17077 rtx dest, src;
17078 rtx cmp, op0, op1, set = NULL;
17079 rtx_insn *prev, *insn = BB_END (bb);
17080 bool insn_clobbered = false;
17081
17082 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17083 insn = PREV_INSN (insn);
17084
17085 /* Find the last cbranchsi4_insn in basic block BB. */
17086 if (insn == BB_HEAD (bb)
17087 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17088 continue;
17089
17090 /* Get the register with which we are comparing. */
17091 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17092 op0 = XEXP (cmp, 0);
17093 op1 = XEXP (cmp, 1);
17094
17095 /* Check that comparison is against ZERO. */
17096 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17097 continue;
17098
17099 /* Find the first flag setting insn before INSN in basic block BB. */
17100 gcc_assert (insn != BB_HEAD (bb));
17101 for (prev = PREV_INSN (insn);
17102 (!insn_clobbered
17103 && prev != BB_HEAD (bb)
17104 && (NOTE_P (prev)
17105 || DEBUG_INSN_P (prev)
17106 || ((set = single_set (prev)) != NULL
17107 && get_attr_conds (prev) == CONDS_NOCOND)));
17108 prev = PREV_INSN (prev))
17109 {
17110 if (reg_set_p (op0, prev))
17111 insn_clobbered = true;
17112 }
17113
17114 /* Skip if op0 is clobbered by insn other than prev. */
17115 if (insn_clobbered)
17116 continue;
17117
17118 if (!set)
17119 continue;
17120
17121 dest = SET_DEST (set);
17122 src = SET_SRC (set);
17123 if (!low_register_operand (dest, SImode)
17124 || !low_register_operand (src, SImode))
17125 continue;
17126
17127 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17128 in INSN. Both src and dest of the move insn are checked. */
17129 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17130 {
17131 dest = copy_rtx (dest);
17132 src = copy_rtx (src);
17133 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17134 PATTERN (prev) = gen_rtx_SET (dest, src);
17135 INSN_CODE (prev) = -1;
17136 /* Set test register in INSN to dest. */
17137 XEXP (cmp, 0) = copy_rtx (dest);
17138 INSN_CODE (insn) = -1;
17139 }
17140 }
17141 }
17142
17143 /* Convert instructions to their cc-clobbering variant if possible, since
17144 that allows us to use smaller encodings. */
17145
17146 static void
17147 thumb2_reorg (void)
17148 {
17149 basic_block bb;
17150 regset_head live;
17151
17152 INIT_REG_SET (&live);
17153
17154 /* We are freeing block_for_insn in the toplev to keep compatibility
17155 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17156 compute_bb_for_insn ();
17157 df_analyze ();
17158
17159 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17160
17161 FOR_EACH_BB_FN (bb, cfun)
17162 {
17163 if ((current_tune->disparage_flag_setting_t16_encodings
17164 == tune_params::DISPARAGE_FLAGS_ALL)
17165 && optimize_bb_for_speed_p (bb))
17166 continue;
17167
17168 rtx_insn *insn;
17169 Convert_Action action = SKIP;
17170 Convert_Action action_for_partial_flag_setting
17171 = ((current_tune->disparage_flag_setting_t16_encodings
17172 != tune_params::DISPARAGE_FLAGS_NEITHER)
17173 && optimize_bb_for_speed_p (bb))
17174 ? SKIP : CONV;
17175
17176 COPY_REG_SET (&live, DF_LR_OUT (bb));
17177 df_simulate_initialize_backwards (bb, &live);
17178 FOR_BB_INSNS_REVERSE (bb, insn)
17179 {
17180 if (NONJUMP_INSN_P (insn)
17181 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17182 && GET_CODE (PATTERN (insn)) == SET)
17183 {
17184 action = SKIP;
17185 rtx pat = PATTERN (insn);
17186 rtx dst = XEXP (pat, 0);
17187 rtx src = XEXP (pat, 1);
17188 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17189
17190 if (UNARY_P (src) || BINARY_P (src))
17191 op0 = XEXP (src, 0);
17192
17193 if (BINARY_P (src))
17194 op1 = XEXP (src, 1);
17195
17196 if (low_register_operand (dst, SImode))
17197 {
17198 switch (GET_CODE (src))
17199 {
17200 case PLUS:
17201 /* Adding two registers and storing the result
17202 in the first source is already a 16-bit
17203 operation. */
17204 if (rtx_equal_p (dst, op0)
17205 && register_operand (op1, SImode))
17206 break;
17207
17208 if (low_register_operand (op0, SImode))
17209 {
17210 /* ADDS <Rd>,<Rn>,<Rm> */
17211 if (low_register_operand (op1, SImode))
17212 action = CONV;
17213 /* ADDS <Rdn>,#<imm8> */
17214 /* SUBS <Rdn>,#<imm8> */
17215 else if (rtx_equal_p (dst, op0)
17216 && CONST_INT_P (op1)
17217 && IN_RANGE (INTVAL (op1), -255, 255))
17218 action = CONV;
17219 /* ADDS <Rd>,<Rn>,#<imm3> */
17220 /* SUBS <Rd>,<Rn>,#<imm3> */
17221 else if (CONST_INT_P (op1)
17222 && IN_RANGE (INTVAL (op1), -7, 7))
17223 action = CONV;
17224 }
17225 /* ADCS <Rd>, <Rn> */
17226 else if (GET_CODE (XEXP (src, 0)) == PLUS
17227 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17228 && low_register_operand (XEXP (XEXP (src, 0), 1),
17229 SImode)
17230 && COMPARISON_P (op1)
17231 && cc_register (XEXP (op1, 0), VOIDmode)
17232 && maybe_get_arm_condition_code (op1) == ARM_CS
17233 && XEXP (op1, 1) == const0_rtx)
17234 action = CONV;
17235 break;
17236
17237 case MINUS:
17238 /* RSBS <Rd>,<Rn>,#0
17239 Not handled here: see NEG below. */
17240 /* SUBS <Rd>,<Rn>,#<imm3>
17241 SUBS <Rdn>,#<imm8>
17242 Not handled here: see PLUS above. */
17243 /* SUBS <Rd>,<Rn>,<Rm> */
17244 if (low_register_operand (op0, SImode)
17245 && low_register_operand (op1, SImode))
17246 action = CONV;
17247 break;
17248
17249 case MULT:
17250 /* MULS <Rdm>,<Rn>,<Rdm>
17251 As an exception to the rule, this is only used
17252 when optimizing for size since MULS is slow on all
17253 known implementations. We do not even want to use
17254 MULS in cold code, if optimizing for speed, so we
17255 test the global flag here. */
17256 if (!optimize_size)
17257 break;
17258 /* Fall through. */
17259 case AND:
17260 case IOR:
17261 case XOR:
17262 /* ANDS <Rdn>,<Rm> */
17263 if (rtx_equal_p (dst, op0)
17264 && low_register_operand (op1, SImode))
17265 action = action_for_partial_flag_setting;
17266 else if (rtx_equal_p (dst, op1)
17267 && low_register_operand (op0, SImode))
17268 action = action_for_partial_flag_setting == SKIP
17269 ? SKIP : SWAP_CONV;
17270 break;
17271
17272 case ASHIFTRT:
17273 case ASHIFT:
17274 case LSHIFTRT:
17275 /* ASRS <Rdn>,<Rm> */
17276 /* LSRS <Rdn>,<Rm> */
17277 /* LSLS <Rdn>,<Rm> */
17278 if (rtx_equal_p (dst, op0)
17279 && low_register_operand (op1, SImode))
17280 action = action_for_partial_flag_setting;
17281 /* ASRS <Rd>,<Rm>,#<imm5> */
17282 /* LSRS <Rd>,<Rm>,#<imm5> */
17283 /* LSLS <Rd>,<Rm>,#<imm5> */
17284 else if (low_register_operand (op0, SImode)
17285 && CONST_INT_P (op1)
17286 && IN_RANGE (INTVAL (op1), 0, 31))
17287 action = action_for_partial_flag_setting;
17288 break;
17289
17290 case ROTATERT:
17291 /* RORS <Rdn>,<Rm> */
17292 if (rtx_equal_p (dst, op0)
17293 && low_register_operand (op1, SImode))
17294 action = action_for_partial_flag_setting;
17295 break;
17296
17297 case NOT:
17298 /* MVNS <Rd>,<Rm> */
17299 if (low_register_operand (op0, SImode))
17300 action = action_for_partial_flag_setting;
17301 break;
17302
17303 case NEG:
17304 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17305 if (low_register_operand (op0, SImode))
17306 action = CONV;
17307 break;
17308
17309 case CONST_INT:
17310 /* MOVS <Rd>,#<imm8> */
17311 if (CONST_INT_P (src)
17312 && IN_RANGE (INTVAL (src), 0, 255))
17313 action = action_for_partial_flag_setting;
17314 break;
17315
17316 case REG:
17317 /* MOVS and MOV<c> with registers have different
17318 encodings, so are not relevant here. */
17319 break;
17320
17321 default:
17322 break;
17323 }
17324 }
17325
17326 if (action != SKIP)
17327 {
17328 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17329 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17330 rtvec vec;
17331
17332 if (action == SWAP_CONV)
17333 {
17334 src = copy_rtx (src);
17335 XEXP (src, 0) = op1;
17336 XEXP (src, 1) = op0;
17337 pat = gen_rtx_SET (dst, src);
17338 vec = gen_rtvec (2, pat, clobber);
17339 }
17340 else /* action == CONV */
17341 vec = gen_rtvec (2, pat, clobber);
17342
17343 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17344 INSN_CODE (insn) = -1;
17345 }
17346 }
17347
17348 if (NONDEBUG_INSN_P (insn))
17349 df_simulate_one_insn_backwards (bb, insn, &live);
17350 }
17351 }
17352
17353 CLEAR_REG_SET (&live);
17354 }
17355
17356 /* Gcc puts the pool in the wrong place for ARM, since we can only
17357 load addresses a limited distance around the pc. We do some
17358 special munging to move the constant pool values to the correct
17359 point in the code. */
17360 static void
17361 arm_reorg (void)
17362 {
17363 rtx_insn *insn;
17364 HOST_WIDE_INT address = 0;
17365 Mfix * fix;
17366
17367 if (use_cmse)
17368 cmse_nonsecure_call_clear_caller_saved ();
17369 if (TARGET_THUMB1)
17370 thumb1_reorg ();
17371 else if (TARGET_THUMB2)
17372 thumb2_reorg ();
17373
17374 /* Ensure all insns that must be split have been split at this point.
17375 Otherwise, the pool placement code below may compute incorrect
17376 insn lengths. Note that when optimizing, all insns have already
17377 been split at this point. */
17378 if (!optimize)
17379 split_all_insns_noflow ();
17380
17381 minipool_fix_head = minipool_fix_tail = NULL;
17382
17383 /* The first insn must always be a note, or the code below won't
17384 scan it properly. */
17385 insn = get_insns ();
17386 gcc_assert (NOTE_P (insn));
17387 minipool_pad = 0;
17388
17389 /* Scan all the insns and record the operands that will need fixing. */
17390 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17391 {
17392 if (BARRIER_P (insn))
17393 push_minipool_barrier (insn, address);
17394 else if (INSN_P (insn))
17395 {
17396 rtx_jump_table_data *table;
17397
17398 note_invalid_constants (insn, address, true);
17399 address += get_attr_length (insn);
17400
17401 /* If the insn is a vector jump, add the size of the table
17402 and skip the table. */
17403 if (tablejump_p (insn, NULL, &table))
17404 {
17405 address += get_jump_table_size (table);
17406 insn = table;
17407 }
17408 }
17409 else if (LABEL_P (insn))
17410 /* Add the worst-case padding due to alignment. We don't add
17411 the _current_ padding because the minipool insertions
17412 themselves might change it. */
17413 address += get_label_padding (insn);
17414 }
17415
17416 fix = minipool_fix_head;
17417
17418 /* Now scan the fixups and perform the required changes. */
17419 while (fix)
17420 {
17421 Mfix * ftmp;
17422 Mfix * fdel;
17423 Mfix * last_added_fix;
17424 Mfix * last_barrier = NULL;
17425 Mfix * this_fix;
17426
17427 /* Skip any further barriers before the next fix. */
17428 while (fix && BARRIER_P (fix->insn))
17429 fix = fix->next;
17430
17431 /* No more fixes. */
17432 if (fix == NULL)
17433 break;
17434
17435 last_added_fix = NULL;
17436
17437 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17438 {
17439 if (BARRIER_P (ftmp->insn))
17440 {
17441 if (ftmp->address >= minipool_vector_head->max_address)
17442 break;
17443
17444 last_barrier = ftmp;
17445 }
17446 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17447 break;
17448
17449 last_added_fix = ftmp; /* Keep track of the last fix added. */
17450 }
17451
17452 /* If we found a barrier, drop back to that; any fixes that we
17453 could have reached but come after the barrier will now go in
17454 the next mini-pool. */
17455 if (last_barrier != NULL)
17456 {
17457 /* Reduce the refcount for those fixes that won't go into this
17458 pool after all. */
17459 for (fdel = last_barrier->next;
17460 fdel && fdel != ftmp;
17461 fdel = fdel->next)
17462 {
17463 fdel->minipool->refcount--;
17464 fdel->minipool = NULL;
17465 }
17466
17467 ftmp = last_barrier;
17468 }
17469 else
17470 {
17471 /* ftmp is first fix that we can't fit into this pool and
17472 there no natural barriers that we could use. Insert a
17473 new barrier in the code somewhere between the previous
17474 fix and this one, and arrange to jump around it. */
17475 HOST_WIDE_INT max_address;
17476
17477 /* The last item on the list of fixes must be a barrier, so
17478 we can never run off the end of the list of fixes without
17479 last_barrier being set. */
17480 gcc_assert (ftmp);
17481
17482 max_address = minipool_vector_head->max_address;
17483 /* Check that there isn't another fix that is in range that
17484 we couldn't fit into this pool because the pool was
17485 already too large: we need to put the pool before such an
17486 instruction. The pool itself may come just after the
17487 fix because create_fix_barrier also allows space for a
17488 jump instruction. */
17489 if (ftmp->address < max_address)
17490 max_address = ftmp->address + 1;
17491
17492 last_barrier = create_fix_barrier (last_added_fix, max_address);
17493 }
17494
17495 assign_minipool_offsets (last_barrier);
17496
17497 while (ftmp)
17498 {
17499 if (!BARRIER_P (ftmp->insn)
17500 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17501 == NULL))
17502 break;
17503
17504 ftmp = ftmp->next;
17505 }
17506
17507 /* Scan over the fixes we have identified for this pool, fixing them
17508 up and adding the constants to the pool itself. */
17509 for (this_fix = fix; this_fix && ftmp != this_fix;
17510 this_fix = this_fix->next)
17511 if (!BARRIER_P (this_fix->insn))
17512 {
17513 rtx addr
17514 = plus_constant (Pmode,
17515 gen_rtx_LABEL_REF (VOIDmode,
17516 minipool_vector_label),
17517 this_fix->minipool->offset);
17518 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17519 }
17520
17521 dump_minipool (last_barrier->insn);
17522 fix = ftmp;
17523 }
17524
17525 /* From now on we must synthesize any constants that we can't handle
17526 directly. This can happen if the RTL gets split during final
17527 instruction generation. */
17528 cfun->machine->after_arm_reorg = 1;
17529
17530 /* Free the minipool memory. */
17531 obstack_free (&minipool_obstack, minipool_startobj);
17532 }
17533 \f
17534 /* Routines to output assembly language. */
17535
17536 /* Return string representation of passed in real value. */
17537 static const char *
17538 fp_const_from_val (REAL_VALUE_TYPE *r)
17539 {
17540 if (!fp_consts_inited)
17541 init_fp_table ();
17542
17543 gcc_assert (real_equal (r, &value_fp0));
17544 return "0";
17545 }
17546
17547 /* OPERANDS[0] is the entire list of insns that constitute pop,
17548 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17549 is in the list, UPDATE is true iff the list contains explicit
17550 update of base register. */
17551 void
17552 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17553 bool update)
17554 {
17555 int i;
17556 char pattern[100];
17557 int offset;
17558 const char *conditional;
17559 int num_saves = XVECLEN (operands[0], 0);
17560 unsigned int regno;
17561 unsigned int regno_base = REGNO (operands[1]);
17562 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17563
17564 offset = 0;
17565 offset += update ? 1 : 0;
17566 offset += return_pc ? 1 : 0;
17567
17568 /* Is the base register in the list? */
17569 for (i = offset; i < num_saves; i++)
17570 {
17571 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17572 /* If SP is in the list, then the base register must be SP. */
17573 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17574 /* If base register is in the list, there must be no explicit update. */
17575 if (regno == regno_base)
17576 gcc_assert (!update);
17577 }
17578
17579 conditional = reverse ? "%?%D0" : "%?%d0";
17580 /* Can't use POP if returning from an interrupt. */
17581 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17582 sprintf (pattern, "pop%s\t{", conditional);
17583 else
17584 {
17585 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17586 It's just a convention, their semantics are identical. */
17587 if (regno_base == SP_REGNUM)
17588 sprintf (pattern, "ldmfd%s\t", conditional);
17589 else if (update)
17590 sprintf (pattern, "ldmia%s\t", conditional);
17591 else
17592 sprintf (pattern, "ldm%s\t", conditional);
17593
17594 strcat (pattern, reg_names[regno_base]);
17595 if (update)
17596 strcat (pattern, "!, {");
17597 else
17598 strcat (pattern, ", {");
17599 }
17600
17601 /* Output the first destination register. */
17602 strcat (pattern,
17603 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17604
17605 /* Output the rest of the destination registers. */
17606 for (i = offset + 1; i < num_saves; i++)
17607 {
17608 strcat (pattern, ", ");
17609 strcat (pattern,
17610 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17611 }
17612
17613 strcat (pattern, "}");
17614
17615 if (interrupt_p && return_pc)
17616 strcat (pattern, "^");
17617
17618 output_asm_insn (pattern, &cond);
17619 }
17620
17621
17622 /* Output the assembly for a store multiple. */
17623
17624 const char *
17625 vfp_output_vstmd (rtx * operands)
17626 {
17627 char pattern[100];
17628 int p;
17629 int base;
17630 int i;
17631 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17632 ? XEXP (operands[0], 0)
17633 : XEXP (XEXP (operands[0], 0), 0);
17634 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17635
17636 if (push_p)
17637 strcpy (pattern, "vpush%?.64\t{%P1");
17638 else
17639 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17640
17641 p = strlen (pattern);
17642
17643 gcc_assert (REG_P (operands[1]));
17644
17645 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17646 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17647 {
17648 p += sprintf (&pattern[p], ", d%d", base + i);
17649 }
17650 strcpy (&pattern[p], "}");
17651
17652 output_asm_insn (pattern, operands);
17653 return "";
17654 }
17655
17656
17657 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17658 number of bytes pushed. */
17659
17660 static int
17661 vfp_emit_fstmd (int base_reg, int count)
17662 {
17663 rtx par;
17664 rtx dwarf;
17665 rtx tmp, reg;
17666 int i;
17667
17668 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17669 register pairs are stored by a store multiple insn. We avoid this
17670 by pushing an extra pair. */
17671 if (count == 2 && !arm_arch6)
17672 {
17673 if (base_reg == LAST_VFP_REGNUM - 3)
17674 base_reg -= 2;
17675 count++;
17676 }
17677
17678 /* FSTMD may not store more than 16 doubleword registers at once. Split
17679 larger stores into multiple parts (up to a maximum of two, in
17680 practice). */
17681 if (count > 16)
17682 {
17683 int saved;
17684 /* NOTE: base_reg is an internal register number, so each D register
17685 counts as 2. */
17686 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17687 saved += vfp_emit_fstmd (base_reg, 16);
17688 return saved;
17689 }
17690
17691 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17692 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17693
17694 reg = gen_rtx_REG (DFmode, base_reg);
17695 base_reg += 2;
17696
17697 XVECEXP (par, 0, 0)
17698 = gen_rtx_SET (gen_frame_mem
17699 (BLKmode,
17700 gen_rtx_PRE_MODIFY (Pmode,
17701 stack_pointer_rtx,
17702 plus_constant
17703 (Pmode, stack_pointer_rtx,
17704 - (count * 8)))
17705 ),
17706 gen_rtx_UNSPEC (BLKmode,
17707 gen_rtvec (1, reg),
17708 UNSPEC_PUSH_MULT));
17709
17710 tmp = gen_rtx_SET (stack_pointer_rtx,
17711 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17712 RTX_FRAME_RELATED_P (tmp) = 1;
17713 XVECEXP (dwarf, 0, 0) = tmp;
17714
17715 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17716 RTX_FRAME_RELATED_P (tmp) = 1;
17717 XVECEXP (dwarf, 0, 1) = tmp;
17718
17719 for (i = 1; i < count; i++)
17720 {
17721 reg = gen_rtx_REG (DFmode, base_reg);
17722 base_reg += 2;
17723 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17724
17725 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17726 plus_constant (Pmode,
17727 stack_pointer_rtx,
17728 i * 8)),
17729 reg);
17730 RTX_FRAME_RELATED_P (tmp) = 1;
17731 XVECEXP (dwarf, 0, i + 1) = tmp;
17732 }
17733
17734 par = emit_insn (par);
17735 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17736 RTX_FRAME_RELATED_P (par) = 1;
17737
17738 return count * 8;
17739 }
17740
17741 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17742 has the cmse_nonsecure_call attribute and returns false otherwise. */
17743
17744 bool
17745 detect_cmse_nonsecure_call (tree addr)
17746 {
17747 if (!addr)
17748 return FALSE;
17749
17750 tree fntype = TREE_TYPE (addr);
17751 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17752 TYPE_ATTRIBUTES (fntype)))
17753 return TRUE;
17754 return FALSE;
17755 }
17756
17757
17758 /* Emit a call instruction with pattern PAT. ADDR is the address of
17759 the call target. */
17760
17761 void
17762 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17763 {
17764 rtx insn;
17765
17766 insn = emit_call_insn (pat);
17767
17768 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17769 If the call might use such an entry, add a use of the PIC register
17770 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17771 if (TARGET_VXWORKS_RTP
17772 && flag_pic
17773 && !sibcall
17774 && GET_CODE (addr) == SYMBOL_REF
17775 && (SYMBOL_REF_DECL (addr)
17776 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17777 : !SYMBOL_REF_LOCAL_P (addr)))
17778 {
17779 require_pic_register ();
17780 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17781 }
17782
17783 if (TARGET_AAPCS_BASED)
17784 {
17785 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17786 linker. We need to add an IP clobber to allow setting
17787 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17788 is not needed since it's a fixed register. */
17789 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17790 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17791 }
17792 }
17793
17794 /* Output a 'call' insn. */
17795 const char *
17796 output_call (rtx *operands)
17797 {
17798 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17799
17800 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17801 if (REGNO (operands[0]) == LR_REGNUM)
17802 {
17803 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17804 output_asm_insn ("mov%?\t%0, %|lr", operands);
17805 }
17806
17807 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17808
17809 if (TARGET_INTERWORK || arm_arch4t)
17810 output_asm_insn ("bx%?\t%0", operands);
17811 else
17812 output_asm_insn ("mov%?\t%|pc, %0", operands);
17813
17814 return "";
17815 }
17816
17817 /* Output a move from arm registers to arm registers of a long double
17818 OPERANDS[0] is the destination.
17819 OPERANDS[1] is the source. */
17820 const char *
17821 output_mov_long_double_arm_from_arm (rtx *operands)
17822 {
17823 /* We have to be careful here because the two might overlap. */
17824 int dest_start = REGNO (operands[0]);
17825 int src_start = REGNO (operands[1]);
17826 rtx ops[2];
17827 int i;
17828
17829 if (dest_start < src_start)
17830 {
17831 for (i = 0; i < 3; i++)
17832 {
17833 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17834 ops[1] = gen_rtx_REG (SImode, src_start + i);
17835 output_asm_insn ("mov%?\t%0, %1", ops);
17836 }
17837 }
17838 else
17839 {
17840 for (i = 2; i >= 0; i--)
17841 {
17842 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17843 ops[1] = gen_rtx_REG (SImode, src_start + i);
17844 output_asm_insn ("mov%?\t%0, %1", ops);
17845 }
17846 }
17847
17848 return "";
17849 }
17850
17851 void
17852 arm_emit_movpair (rtx dest, rtx src)
17853 {
17854 /* If the src is an immediate, simplify it. */
17855 if (CONST_INT_P (src))
17856 {
17857 HOST_WIDE_INT val = INTVAL (src);
17858 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17859 if ((val >> 16) & 0x0000ffff)
17860 {
17861 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17862 GEN_INT (16)),
17863 GEN_INT ((val >> 16) & 0x0000ffff));
17864 rtx_insn *insn = get_last_insn ();
17865 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17866 }
17867 return;
17868 }
17869 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17870 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17871 rtx_insn *insn = get_last_insn ();
17872 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17873 }
17874
17875 /* Output a move between double words. It must be REG<-MEM
17876 or MEM<-REG. */
17877 const char *
17878 output_move_double (rtx *operands, bool emit, int *count)
17879 {
17880 enum rtx_code code0 = GET_CODE (operands[0]);
17881 enum rtx_code code1 = GET_CODE (operands[1]);
17882 rtx otherops[3];
17883 if (count)
17884 *count = 1;
17885
17886 /* The only case when this might happen is when
17887 you are looking at the length of a DImode instruction
17888 that has an invalid constant in it. */
17889 if (code0 == REG && code1 != MEM)
17890 {
17891 gcc_assert (!emit);
17892 *count = 2;
17893 return "";
17894 }
17895
17896 if (code0 == REG)
17897 {
17898 unsigned int reg0 = REGNO (operands[0]);
17899
17900 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17901
17902 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17903
17904 switch (GET_CODE (XEXP (operands[1], 0)))
17905 {
17906 case REG:
17907
17908 if (emit)
17909 {
17910 if (TARGET_LDRD
17911 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17912 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17913 else
17914 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17915 }
17916 break;
17917
17918 case PRE_INC:
17919 gcc_assert (TARGET_LDRD);
17920 if (emit)
17921 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17922 break;
17923
17924 case PRE_DEC:
17925 if (emit)
17926 {
17927 if (TARGET_LDRD)
17928 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17929 else
17930 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17931 }
17932 break;
17933
17934 case POST_INC:
17935 if (emit)
17936 {
17937 if (TARGET_LDRD)
17938 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17939 else
17940 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17941 }
17942 break;
17943
17944 case POST_DEC:
17945 gcc_assert (TARGET_LDRD);
17946 if (emit)
17947 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17948 break;
17949
17950 case PRE_MODIFY:
17951 case POST_MODIFY:
17952 /* Autoicrement addressing modes should never have overlapping
17953 base and destination registers, and overlapping index registers
17954 are already prohibited, so this doesn't need to worry about
17955 fix_cm3_ldrd. */
17956 otherops[0] = operands[0];
17957 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17958 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17959
17960 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17961 {
17962 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17963 {
17964 /* Registers overlap so split out the increment. */
17965 if (emit)
17966 {
17967 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17968 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17969 }
17970 if (count)
17971 *count = 2;
17972 }
17973 else
17974 {
17975 /* Use a single insn if we can.
17976 FIXME: IWMMXT allows offsets larger than ldrd can
17977 handle, fix these up with a pair of ldr. */
17978 if (TARGET_THUMB2
17979 || !CONST_INT_P (otherops[2])
17980 || (INTVAL (otherops[2]) > -256
17981 && INTVAL (otherops[2]) < 256))
17982 {
17983 if (emit)
17984 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17985 }
17986 else
17987 {
17988 if (emit)
17989 {
17990 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17991 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17992 }
17993 if (count)
17994 *count = 2;
17995
17996 }
17997 }
17998 }
17999 else
18000 {
18001 /* Use a single insn if we can.
18002 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18003 fix these up with a pair of ldr. */
18004 if (TARGET_THUMB2
18005 || !CONST_INT_P (otherops[2])
18006 || (INTVAL (otherops[2]) > -256
18007 && INTVAL (otherops[2]) < 256))
18008 {
18009 if (emit)
18010 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18011 }
18012 else
18013 {
18014 if (emit)
18015 {
18016 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18017 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18018 }
18019 if (count)
18020 *count = 2;
18021 }
18022 }
18023 break;
18024
18025 case LABEL_REF:
18026 case CONST:
18027 /* We might be able to use ldrd %0, %1 here. However the range is
18028 different to ldr/adr, and it is broken on some ARMv7-M
18029 implementations. */
18030 /* Use the second register of the pair to avoid problematic
18031 overlap. */
18032 otherops[1] = operands[1];
18033 if (emit)
18034 output_asm_insn ("adr%?\t%0, %1", otherops);
18035 operands[1] = otherops[0];
18036 if (emit)
18037 {
18038 if (TARGET_LDRD)
18039 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18040 else
18041 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18042 }
18043
18044 if (count)
18045 *count = 2;
18046 break;
18047
18048 /* ??? This needs checking for thumb2. */
18049 default:
18050 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18051 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18052 {
18053 otherops[0] = operands[0];
18054 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18055 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18056
18057 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18058 {
18059 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18060 {
18061 switch ((int) INTVAL (otherops[2]))
18062 {
18063 case -8:
18064 if (emit)
18065 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18066 return "";
18067 case -4:
18068 if (TARGET_THUMB2)
18069 break;
18070 if (emit)
18071 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18072 return "";
18073 case 4:
18074 if (TARGET_THUMB2)
18075 break;
18076 if (emit)
18077 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18078 return "";
18079 }
18080 }
18081 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18082 operands[1] = otherops[0];
18083 if (TARGET_LDRD
18084 && (REG_P (otherops[2])
18085 || TARGET_THUMB2
18086 || (CONST_INT_P (otherops[2])
18087 && INTVAL (otherops[2]) > -256
18088 && INTVAL (otherops[2]) < 256)))
18089 {
18090 if (reg_overlap_mentioned_p (operands[0],
18091 otherops[2]))
18092 {
18093 /* Swap base and index registers over to
18094 avoid a conflict. */
18095 std::swap (otherops[1], otherops[2]);
18096 }
18097 /* If both registers conflict, it will usually
18098 have been fixed by a splitter. */
18099 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18100 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18101 {
18102 if (emit)
18103 {
18104 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18105 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18106 }
18107 if (count)
18108 *count = 2;
18109 }
18110 else
18111 {
18112 otherops[0] = operands[0];
18113 if (emit)
18114 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18115 }
18116 return "";
18117 }
18118
18119 if (CONST_INT_P (otherops[2]))
18120 {
18121 if (emit)
18122 {
18123 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18124 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18125 else
18126 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18127 }
18128 }
18129 else
18130 {
18131 if (emit)
18132 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18133 }
18134 }
18135 else
18136 {
18137 if (emit)
18138 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18139 }
18140
18141 if (count)
18142 *count = 2;
18143
18144 if (TARGET_LDRD)
18145 return "ldrd%?\t%0, [%1]";
18146
18147 return "ldmia%?\t%1, %M0";
18148 }
18149 else
18150 {
18151 otherops[1] = adjust_address (operands[1], SImode, 4);
18152 /* Take care of overlapping base/data reg. */
18153 if (reg_mentioned_p (operands[0], operands[1]))
18154 {
18155 if (emit)
18156 {
18157 output_asm_insn ("ldr%?\t%0, %1", otherops);
18158 output_asm_insn ("ldr%?\t%0, %1", operands);
18159 }
18160 if (count)
18161 *count = 2;
18162
18163 }
18164 else
18165 {
18166 if (emit)
18167 {
18168 output_asm_insn ("ldr%?\t%0, %1", operands);
18169 output_asm_insn ("ldr%?\t%0, %1", otherops);
18170 }
18171 if (count)
18172 *count = 2;
18173 }
18174 }
18175 }
18176 }
18177 else
18178 {
18179 /* Constraints should ensure this. */
18180 gcc_assert (code0 == MEM && code1 == REG);
18181 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18182 || (TARGET_ARM && TARGET_LDRD));
18183
18184 switch (GET_CODE (XEXP (operands[0], 0)))
18185 {
18186 case REG:
18187 if (emit)
18188 {
18189 if (TARGET_LDRD)
18190 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18191 else
18192 output_asm_insn ("stm%?\t%m0, %M1", operands);
18193 }
18194 break;
18195
18196 case PRE_INC:
18197 gcc_assert (TARGET_LDRD);
18198 if (emit)
18199 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18200 break;
18201
18202 case PRE_DEC:
18203 if (emit)
18204 {
18205 if (TARGET_LDRD)
18206 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18207 else
18208 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18209 }
18210 break;
18211
18212 case POST_INC:
18213 if (emit)
18214 {
18215 if (TARGET_LDRD)
18216 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18217 else
18218 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18219 }
18220 break;
18221
18222 case POST_DEC:
18223 gcc_assert (TARGET_LDRD);
18224 if (emit)
18225 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18226 break;
18227
18228 case PRE_MODIFY:
18229 case POST_MODIFY:
18230 otherops[0] = operands[1];
18231 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18232 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18233
18234 /* IWMMXT allows offsets larger than ldrd can handle,
18235 fix these up with a pair of ldr. */
18236 if (!TARGET_THUMB2
18237 && CONST_INT_P (otherops[2])
18238 && (INTVAL(otherops[2]) <= -256
18239 || INTVAL(otherops[2]) >= 256))
18240 {
18241 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18242 {
18243 if (emit)
18244 {
18245 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18246 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18247 }
18248 if (count)
18249 *count = 2;
18250 }
18251 else
18252 {
18253 if (emit)
18254 {
18255 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18256 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18257 }
18258 if (count)
18259 *count = 2;
18260 }
18261 }
18262 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18263 {
18264 if (emit)
18265 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18266 }
18267 else
18268 {
18269 if (emit)
18270 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18271 }
18272 break;
18273
18274 case PLUS:
18275 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18276 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18277 {
18278 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18279 {
18280 case -8:
18281 if (emit)
18282 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18283 return "";
18284
18285 case -4:
18286 if (TARGET_THUMB2)
18287 break;
18288 if (emit)
18289 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18290 return "";
18291
18292 case 4:
18293 if (TARGET_THUMB2)
18294 break;
18295 if (emit)
18296 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18297 return "";
18298 }
18299 }
18300 if (TARGET_LDRD
18301 && (REG_P (otherops[2])
18302 || TARGET_THUMB2
18303 || (CONST_INT_P (otherops[2])
18304 && INTVAL (otherops[2]) > -256
18305 && INTVAL (otherops[2]) < 256)))
18306 {
18307 otherops[0] = operands[1];
18308 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18309 if (emit)
18310 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18311 return "";
18312 }
18313 /* Fall through */
18314
18315 default:
18316 otherops[0] = adjust_address (operands[0], SImode, 4);
18317 otherops[1] = operands[1];
18318 if (emit)
18319 {
18320 output_asm_insn ("str%?\t%1, %0", operands);
18321 output_asm_insn ("str%?\t%H1, %0", otherops);
18322 }
18323 if (count)
18324 *count = 2;
18325 }
18326 }
18327
18328 return "";
18329 }
18330
18331 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18332 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18333
18334 const char *
18335 output_move_quad (rtx *operands)
18336 {
18337 if (REG_P (operands[0]))
18338 {
18339 /* Load, or reg->reg move. */
18340
18341 if (MEM_P (operands[1]))
18342 {
18343 switch (GET_CODE (XEXP (operands[1], 0)))
18344 {
18345 case REG:
18346 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18347 break;
18348
18349 case LABEL_REF:
18350 case CONST:
18351 output_asm_insn ("adr%?\t%0, %1", operands);
18352 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18353 break;
18354
18355 default:
18356 gcc_unreachable ();
18357 }
18358 }
18359 else
18360 {
18361 rtx ops[2];
18362 int dest, src, i;
18363
18364 gcc_assert (REG_P (operands[1]));
18365
18366 dest = REGNO (operands[0]);
18367 src = REGNO (operands[1]);
18368
18369 /* This seems pretty dumb, but hopefully GCC won't try to do it
18370 very often. */
18371 if (dest < src)
18372 for (i = 0; i < 4; i++)
18373 {
18374 ops[0] = gen_rtx_REG (SImode, dest + i);
18375 ops[1] = gen_rtx_REG (SImode, src + i);
18376 output_asm_insn ("mov%?\t%0, %1", ops);
18377 }
18378 else
18379 for (i = 3; i >= 0; i--)
18380 {
18381 ops[0] = gen_rtx_REG (SImode, dest + i);
18382 ops[1] = gen_rtx_REG (SImode, src + i);
18383 output_asm_insn ("mov%?\t%0, %1", ops);
18384 }
18385 }
18386 }
18387 else
18388 {
18389 gcc_assert (MEM_P (operands[0]));
18390 gcc_assert (REG_P (operands[1]));
18391 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18392
18393 switch (GET_CODE (XEXP (operands[0], 0)))
18394 {
18395 case REG:
18396 output_asm_insn ("stm%?\t%m0, %M1", operands);
18397 break;
18398
18399 default:
18400 gcc_unreachable ();
18401 }
18402 }
18403
18404 return "";
18405 }
18406
18407 /* Output a VFP load or store instruction. */
18408
18409 const char *
18410 output_move_vfp (rtx *operands)
18411 {
18412 rtx reg, mem, addr, ops[2];
18413 int load = REG_P (operands[0]);
18414 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18415 int sp = (!TARGET_VFP_FP16INST
18416 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18417 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18418 const char *templ;
18419 char buff[50];
18420 machine_mode mode;
18421
18422 reg = operands[!load];
18423 mem = operands[load];
18424
18425 mode = GET_MODE (reg);
18426
18427 gcc_assert (REG_P (reg));
18428 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18429 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18430 || mode == SFmode
18431 || mode == DFmode
18432 || mode == HImode
18433 || mode == SImode
18434 || mode == DImode
18435 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18436 gcc_assert (MEM_P (mem));
18437
18438 addr = XEXP (mem, 0);
18439
18440 switch (GET_CODE (addr))
18441 {
18442 case PRE_DEC:
18443 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18444 ops[0] = XEXP (addr, 0);
18445 ops[1] = reg;
18446 break;
18447
18448 case POST_INC:
18449 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18450 ops[0] = XEXP (addr, 0);
18451 ops[1] = reg;
18452 break;
18453
18454 default:
18455 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18456 ops[0] = reg;
18457 ops[1] = mem;
18458 break;
18459 }
18460
18461 sprintf (buff, templ,
18462 load ? "ld" : "st",
18463 dp ? "64" : sp ? "32" : "16",
18464 dp ? "P" : "",
18465 integer_p ? "\t%@ int" : "");
18466 output_asm_insn (buff, ops);
18467
18468 return "";
18469 }
18470
18471 /* Output a Neon double-word or quad-word load or store, or a load
18472 or store for larger structure modes.
18473
18474 WARNING: The ordering of elements is weird in big-endian mode,
18475 because the EABI requires that vectors stored in memory appear
18476 as though they were stored by a VSTM, as required by the EABI.
18477 GCC RTL defines element ordering based on in-memory order.
18478 This can be different from the architectural ordering of elements
18479 within a NEON register. The intrinsics defined in arm_neon.h use the
18480 NEON register element ordering, not the GCC RTL element ordering.
18481
18482 For example, the in-memory ordering of a big-endian a quadword
18483 vector with 16-bit elements when stored from register pair {d0,d1}
18484 will be (lowest address first, d0[N] is NEON register element N):
18485
18486 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18487
18488 When necessary, quadword registers (dN, dN+1) are moved to ARM
18489 registers from rN in the order:
18490
18491 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18492
18493 So that STM/LDM can be used on vectors in ARM registers, and the
18494 same memory layout will result as if VSTM/VLDM were used.
18495
18496 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18497 possible, which allows use of appropriate alignment tags.
18498 Note that the choice of "64" is independent of the actual vector
18499 element size; this size simply ensures that the behavior is
18500 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18501
18502 Due to limitations of those instructions, use of VST1.64/VLD1.64
18503 is not possible if:
18504 - the address contains PRE_DEC, or
18505 - the mode refers to more than 4 double-word registers
18506
18507 In those cases, it would be possible to replace VSTM/VLDM by a
18508 sequence of instructions; this is not currently implemented since
18509 this is not certain to actually improve performance. */
18510
18511 const char *
18512 output_move_neon (rtx *operands)
18513 {
18514 rtx reg, mem, addr, ops[2];
18515 int regno, nregs, load = REG_P (operands[0]);
18516 const char *templ;
18517 char buff[50];
18518 machine_mode mode;
18519
18520 reg = operands[!load];
18521 mem = operands[load];
18522
18523 mode = GET_MODE (reg);
18524
18525 gcc_assert (REG_P (reg));
18526 regno = REGNO (reg);
18527 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18528 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18529 || NEON_REGNO_OK_FOR_QUAD (regno));
18530 gcc_assert (VALID_NEON_DREG_MODE (mode)
18531 || VALID_NEON_QREG_MODE (mode)
18532 || VALID_NEON_STRUCT_MODE (mode));
18533 gcc_assert (MEM_P (mem));
18534
18535 addr = XEXP (mem, 0);
18536
18537 /* Strip off const from addresses like (const (plus (...))). */
18538 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18539 addr = XEXP (addr, 0);
18540
18541 switch (GET_CODE (addr))
18542 {
18543 case POST_INC:
18544 /* We have to use vldm / vstm for too-large modes. */
18545 if (nregs > 4)
18546 {
18547 templ = "v%smia%%?\t%%0!, %%h1";
18548 ops[0] = XEXP (addr, 0);
18549 }
18550 else
18551 {
18552 templ = "v%s1.64\t%%h1, %%A0";
18553 ops[0] = mem;
18554 }
18555 ops[1] = reg;
18556 break;
18557
18558 case PRE_DEC:
18559 /* We have to use vldm / vstm in this case, since there is no
18560 pre-decrement form of the vld1 / vst1 instructions. */
18561 templ = "v%smdb%%?\t%%0!, %%h1";
18562 ops[0] = XEXP (addr, 0);
18563 ops[1] = reg;
18564 break;
18565
18566 case POST_MODIFY:
18567 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18568 gcc_unreachable ();
18569
18570 case REG:
18571 /* We have to use vldm / vstm for too-large modes. */
18572 if (nregs > 1)
18573 {
18574 if (nregs > 4)
18575 templ = "v%smia%%?\t%%m0, %%h1";
18576 else
18577 templ = "v%s1.64\t%%h1, %%A0";
18578
18579 ops[0] = mem;
18580 ops[1] = reg;
18581 break;
18582 }
18583 /* Fall through. */
18584 case LABEL_REF:
18585 case PLUS:
18586 {
18587 int i;
18588 int overlap = -1;
18589 for (i = 0; i < nregs; i++)
18590 {
18591 /* We're only using DImode here because it's a convenient size. */
18592 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18593 ops[1] = adjust_address (mem, DImode, 8 * i);
18594 if (reg_overlap_mentioned_p (ops[0], mem))
18595 {
18596 gcc_assert (overlap == -1);
18597 overlap = i;
18598 }
18599 else
18600 {
18601 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18602 output_asm_insn (buff, ops);
18603 }
18604 }
18605 if (overlap != -1)
18606 {
18607 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18608 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18609 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18610 output_asm_insn (buff, ops);
18611 }
18612
18613 return "";
18614 }
18615
18616 default:
18617 gcc_unreachable ();
18618 }
18619
18620 sprintf (buff, templ, load ? "ld" : "st");
18621 output_asm_insn (buff, ops);
18622
18623 return "";
18624 }
18625
18626 /* Compute and return the length of neon_mov<mode>, where <mode> is
18627 one of VSTRUCT modes: EI, OI, CI or XI. */
18628 int
18629 arm_attr_length_move_neon (rtx_insn *insn)
18630 {
18631 rtx reg, mem, addr;
18632 int load;
18633 machine_mode mode;
18634
18635 extract_insn_cached (insn);
18636
18637 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18638 {
18639 mode = GET_MODE (recog_data.operand[0]);
18640 switch (mode)
18641 {
18642 case EImode:
18643 case OImode:
18644 return 8;
18645 case CImode:
18646 return 12;
18647 case XImode:
18648 return 16;
18649 default:
18650 gcc_unreachable ();
18651 }
18652 }
18653
18654 load = REG_P (recog_data.operand[0]);
18655 reg = recog_data.operand[!load];
18656 mem = recog_data.operand[load];
18657
18658 gcc_assert (MEM_P (mem));
18659
18660 mode = GET_MODE (reg);
18661 addr = XEXP (mem, 0);
18662
18663 /* Strip off const from addresses like (const (plus (...))). */
18664 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18665 addr = XEXP (addr, 0);
18666
18667 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18668 {
18669 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18670 return insns * 4;
18671 }
18672 else
18673 return 4;
18674 }
18675
18676 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18677 return zero. */
18678
18679 int
18680 arm_address_offset_is_imm (rtx_insn *insn)
18681 {
18682 rtx mem, addr;
18683
18684 extract_insn_cached (insn);
18685
18686 if (REG_P (recog_data.operand[0]))
18687 return 0;
18688
18689 mem = recog_data.operand[0];
18690
18691 gcc_assert (MEM_P (mem));
18692
18693 addr = XEXP (mem, 0);
18694
18695 if (REG_P (addr)
18696 || (GET_CODE (addr) == PLUS
18697 && REG_P (XEXP (addr, 0))
18698 && CONST_INT_P (XEXP (addr, 1))))
18699 return 1;
18700 else
18701 return 0;
18702 }
18703
18704 /* Output an ADD r, s, #n where n may be too big for one instruction.
18705 If adding zero to one register, output nothing. */
18706 const char *
18707 output_add_immediate (rtx *operands)
18708 {
18709 HOST_WIDE_INT n = INTVAL (operands[2]);
18710
18711 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18712 {
18713 if (n < 0)
18714 output_multi_immediate (operands,
18715 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18716 -n);
18717 else
18718 output_multi_immediate (operands,
18719 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18720 n);
18721 }
18722
18723 return "";
18724 }
18725
18726 /* Output a multiple immediate operation.
18727 OPERANDS is the vector of operands referred to in the output patterns.
18728 INSTR1 is the output pattern to use for the first constant.
18729 INSTR2 is the output pattern to use for subsequent constants.
18730 IMMED_OP is the index of the constant slot in OPERANDS.
18731 N is the constant value. */
18732 static const char *
18733 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18734 int immed_op, HOST_WIDE_INT n)
18735 {
18736 #if HOST_BITS_PER_WIDE_INT > 32
18737 n &= 0xffffffff;
18738 #endif
18739
18740 if (n == 0)
18741 {
18742 /* Quick and easy output. */
18743 operands[immed_op] = const0_rtx;
18744 output_asm_insn (instr1, operands);
18745 }
18746 else
18747 {
18748 int i;
18749 const char * instr = instr1;
18750
18751 /* Note that n is never zero here (which would give no output). */
18752 for (i = 0; i < 32; i += 2)
18753 {
18754 if (n & (3 << i))
18755 {
18756 operands[immed_op] = GEN_INT (n & (255 << i));
18757 output_asm_insn (instr, operands);
18758 instr = instr2;
18759 i += 6;
18760 }
18761 }
18762 }
18763
18764 return "";
18765 }
18766
18767 /* Return the name of a shifter operation. */
18768 static const char *
18769 arm_shift_nmem(enum rtx_code code)
18770 {
18771 switch (code)
18772 {
18773 case ASHIFT:
18774 return ARM_LSL_NAME;
18775
18776 case ASHIFTRT:
18777 return "asr";
18778
18779 case LSHIFTRT:
18780 return "lsr";
18781
18782 case ROTATERT:
18783 return "ror";
18784
18785 default:
18786 abort();
18787 }
18788 }
18789
18790 /* Return the appropriate ARM instruction for the operation code.
18791 The returned result should not be overwritten. OP is the rtx of the
18792 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18793 was shifted. */
18794 const char *
18795 arithmetic_instr (rtx op, int shift_first_arg)
18796 {
18797 switch (GET_CODE (op))
18798 {
18799 case PLUS:
18800 return "add";
18801
18802 case MINUS:
18803 return shift_first_arg ? "rsb" : "sub";
18804
18805 case IOR:
18806 return "orr";
18807
18808 case XOR:
18809 return "eor";
18810
18811 case AND:
18812 return "and";
18813
18814 case ASHIFT:
18815 case ASHIFTRT:
18816 case LSHIFTRT:
18817 case ROTATERT:
18818 return arm_shift_nmem(GET_CODE(op));
18819
18820 default:
18821 gcc_unreachable ();
18822 }
18823 }
18824
18825 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18826 for the operation code. The returned result should not be overwritten.
18827 OP is the rtx code of the shift.
18828 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18829 shift. */
18830 static const char *
18831 shift_op (rtx op, HOST_WIDE_INT *amountp)
18832 {
18833 const char * mnem;
18834 enum rtx_code code = GET_CODE (op);
18835
18836 switch (code)
18837 {
18838 case ROTATE:
18839 if (!CONST_INT_P (XEXP (op, 1)))
18840 {
18841 output_operand_lossage ("invalid shift operand");
18842 return NULL;
18843 }
18844
18845 code = ROTATERT;
18846 *amountp = 32 - INTVAL (XEXP (op, 1));
18847 mnem = "ror";
18848 break;
18849
18850 case ASHIFT:
18851 case ASHIFTRT:
18852 case LSHIFTRT:
18853 case ROTATERT:
18854 mnem = arm_shift_nmem(code);
18855 if (CONST_INT_P (XEXP (op, 1)))
18856 {
18857 *amountp = INTVAL (XEXP (op, 1));
18858 }
18859 else if (REG_P (XEXP (op, 1)))
18860 {
18861 *amountp = -1;
18862 return mnem;
18863 }
18864 else
18865 {
18866 output_operand_lossage ("invalid shift operand");
18867 return NULL;
18868 }
18869 break;
18870
18871 case MULT:
18872 /* We never have to worry about the amount being other than a
18873 power of 2, since this case can never be reloaded from a reg. */
18874 if (!CONST_INT_P (XEXP (op, 1)))
18875 {
18876 output_operand_lossage ("invalid shift operand");
18877 return NULL;
18878 }
18879
18880 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18881
18882 /* Amount must be a power of two. */
18883 if (*amountp & (*amountp - 1))
18884 {
18885 output_operand_lossage ("invalid shift operand");
18886 return NULL;
18887 }
18888
18889 *amountp = exact_log2 (*amountp);
18890 gcc_assert (IN_RANGE (*amountp, 0, 31));
18891 return ARM_LSL_NAME;
18892
18893 default:
18894 output_operand_lossage ("invalid shift operand");
18895 return NULL;
18896 }
18897
18898 /* This is not 100% correct, but follows from the desire to merge
18899 multiplication by a power of 2 with the recognizer for a
18900 shift. >=32 is not a valid shift for "lsl", so we must try and
18901 output a shift that produces the correct arithmetical result.
18902 Using lsr #32 is identical except for the fact that the carry bit
18903 is not set correctly if we set the flags; but we never use the
18904 carry bit from such an operation, so we can ignore that. */
18905 if (code == ROTATERT)
18906 /* Rotate is just modulo 32. */
18907 *amountp &= 31;
18908 else if (*amountp != (*amountp & 31))
18909 {
18910 if (code == ASHIFT)
18911 mnem = "lsr";
18912 *amountp = 32;
18913 }
18914
18915 /* Shifts of 0 are no-ops. */
18916 if (*amountp == 0)
18917 return NULL;
18918
18919 return mnem;
18920 }
18921
18922 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18923 because /bin/as is horribly restrictive. The judgement about
18924 whether or not each character is 'printable' (and can be output as
18925 is) or not (and must be printed with an octal escape) must be made
18926 with reference to the *host* character set -- the situation is
18927 similar to that discussed in the comments above pp_c_char in
18928 c-pretty-print.c. */
18929
18930 #define MAX_ASCII_LEN 51
18931
18932 void
18933 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18934 {
18935 int i;
18936 int len_so_far = 0;
18937
18938 fputs ("\t.ascii\t\"", stream);
18939
18940 for (i = 0; i < len; i++)
18941 {
18942 int c = p[i];
18943
18944 if (len_so_far >= MAX_ASCII_LEN)
18945 {
18946 fputs ("\"\n\t.ascii\t\"", stream);
18947 len_so_far = 0;
18948 }
18949
18950 if (ISPRINT (c))
18951 {
18952 if (c == '\\' || c == '\"')
18953 {
18954 putc ('\\', stream);
18955 len_so_far++;
18956 }
18957 putc (c, stream);
18958 len_so_far++;
18959 }
18960 else
18961 {
18962 fprintf (stream, "\\%03o", c);
18963 len_so_far += 4;
18964 }
18965 }
18966
18967 fputs ("\"\n", stream);
18968 }
18969 \f
18970 /* Whether a register is callee saved or not. This is necessary because high
18971 registers are marked as caller saved when optimizing for size on Thumb-1
18972 targets despite being callee saved in order to avoid using them. */
18973 #define callee_saved_reg_p(reg) \
18974 (!call_used_regs[reg] \
18975 || (TARGET_THUMB1 && optimize_size \
18976 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18977
18978 /* Compute the register save mask for registers 0 through 12
18979 inclusive. This code is used by arm_compute_save_reg_mask. */
18980
18981 static unsigned long
18982 arm_compute_save_reg0_reg12_mask (void)
18983 {
18984 unsigned long func_type = arm_current_func_type ();
18985 unsigned long save_reg_mask = 0;
18986 unsigned int reg;
18987
18988 if (IS_INTERRUPT (func_type))
18989 {
18990 unsigned int max_reg;
18991 /* Interrupt functions must not corrupt any registers,
18992 even call clobbered ones. If this is a leaf function
18993 we can just examine the registers used by the RTL, but
18994 otherwise we have to assume that whatever function is
18995 called might clobber anything, and so we have to save
18996 all the call-clobbered registers as well. */
18997 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18998 /* FIQ handlers have registers r8 - r12 banked, so
18999 we only need to check r0 - r7, Normal ISRs only
19000 bank r14 and r15, so we must check up to r12.
19001 r13 is the stack pointer which is always preserved,
19002 so we do not need to consider it here. */
19003 max_reg = 7;
19004 else
19005 max_reg = 12;
19006
19007 for (reg = 0; reg <= max_reg; reg++)
19008 if (df_regs_ever_live_p (reg)
19009 || (! crtl->is_leaf && call_used_regs[reg]))
19010 save_reg_mask |= (1 << reg);
19011
19012 /* Also save the pic base register if necessary. */
19013 if (flag_pic
19014 && !TARGET_SINGLE_PIC_BASE
19015 && arm_pic_register != INVALID_REGNUM
19016 && crtl->uses_pic_offset_table)
19017 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19018 }
19019 else if (IS_VOLATILE(func_type))
19020 {
19021 /* For noreturn functions we historically omitted register saves
19022 altogether. However this really messes up debugging. As a
19023 compromise save just the frame pointers. Combined with the link
19024 register saved elsewhere this should be sufficient to get
19025 a backtrace. */
19026 if (frame_pointer_needed)
19027 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19028 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19029 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19030 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19031 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19032 }
19033 else
19034 {
19035 /* In the normal case we only need to save those registers
19036 which are call saved and which are used by this function. */
19037 for (reg = 0; reg <= 11; reg++)
19038 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19039 save_reg_mask |= (1 << reg);
19040
19041 /* Handle the frame pointer as a special case. */
19042 if (frame_pointer_needed)
19043 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19044
19045 /* If we aren't loading the PIC register,
19046 don't stack it even though it may be live. */
19047 if (flag_pic
19048 && !TARGET_SINGLE_PIC_BASE
19049 && arm_pic_register != INVALID_REGNUM
19050 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19051 || crtl->uses_pic_offset_table))
19052 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19053
19054 /* The prologue will copy SP into R0, so save it. */
19055 if (IS_STACKALIGN (func_type))
19056 save_reg_mask |= 1;
19057 }
19058
19059 /* Save registers so the exception handler can modify them. */
19060 if (crtl->calls_eh_return)
19061 {
19062 unsigned int i;
19063
19064 for (i = 0; ; i++)
19065 {
19066 reg = EH_RETURN_DATA_REGNO (i);
19067 if (reg == INVALID_REGNUM)
19068 break;
19069 save_reg_mask |= 1 << reg;
19070 }
19071 }
19072
19073 return save_reg_mask;
19074 }
19075
19076 /* Return true if r3 is live at the start of the function. */
19077
19078 static bool
19079 arm_r3_live_at_start_p (void)
19080 {
19081 /* Just look at cfg info, which is still close enough to correct at this
19082 point. This gives false positives for broken functions that might use
19083 uninitialized data that happens to be allocated in r3, but who cares? */
19084 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19085 }
19086
19087 /* Compute the number of bytes used to store the static chain register on the
19088 stack, above the stack frame. We need to know this accurately to get the
19089 alignment of the rest of the stack frame correct. */
19090
19091 static int
19092 arm_compute_static_chain_stack_bytes (void)
19093 {
19094 /* See the defining assertion in arm_expand_prologue. */
19095 if (IS_NESTED (arm_current_func_type ())
19096 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19097 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19098 && !df_regs_ever_live_p (LR_REGNUM)))
19099 && arm_r3_live_at_start_p ()
19100 && crtl->args.pretend_args_size == 0)
19101 return 4;
19102
19103 return 0;
19104 }
19105
19106 /* Compute a bit mask of which registers need to be
19107 saved on the stack for the current function.
19108 This is used by arm_get_frame_offsets, which may add extra registers. */
19109
19110 static unsigned long
19111 arm_compute_save_reg_mask (void)
19112 {
19113 unsigned int save_reg_mask = 0;
19114 unsigned long func_type = arm_current_func_type ();
19115 unsigned int reg;
19116
19117 if (IS_NAKED (func_type))
19118 /* This should never really happen. */
19119 return 0;
19120
19121 /* If we are creating a stack frame, then we must save the frame pointer,
19122 IP (which will hold the old stack pointer), LR and the PC. */
19123 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19124 save_reg_mask |=
19125 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19126 | (1 << IP_REGNUM)
19127 | (1 << LR_REGNUM)
19128 | (1 << PC_REGNUM);
19129
19130 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19131
19132 /* Decide if we need to save the link register.
19133 Interrupt routines have their own banked link register,
19134 so they never need to save it.
19135 Otherwise if we do not use the link register we do not need to save
19136 it. If we are pushing other registers onto the stack however, we
19137 can save an instruction in the epilogue by pushing the link register
19138 now and then popping it back into the PC. This incurs extra memory
19139 accesses though, so we only do it when optimizing for size, and only
19140 if we know that we will not need a fancy return sequence. */
19141 if (df_regs_ever_live_p (LR_REGNUM)
19142 || (save_reg_mask
19143 && optimize_size
19144 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19145 && !crtl->tail_call_emit
19146 && !crtl->calls_eh_return))
19147 save_reg_mask |= 1 << LR_REGNUM;
19148
19149 if (cfun->machine->lr_save_eliminated)
19150 save_reg_mask &= ~ (1 << LR_REGNUM);
19151
19152 if (TARGET_REALLY_IWMMXT
19153 && ((bit_count (save_reg_mask)
19154 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19155 arm_compute_static_chain_stack_bytes())
19156 ) % 2) != 0)
19157 {
19158 /* The total number of registers that are going to be pushed
19159 onto the stack is odd. We need to ensure that the stack
19160 is 64-bit aligned before we start to save iWMMXt registers,
19161 and also before we start to create locals. (A local variable
19162 might be a double or long long which we will load/store using
19163 an iWMMXt instruction). Therefore we need to push another
19164 ARM register, so that the stack will be 64-bit aligned. We
19165 try to avoid using the arg registers (r0 -r3) as they might be
19166 used to pass values in a tail call. */
19167 for (reg = 4; reg <= 12; reg++)
19168 if ((save_reg_mask & (1 << reg)) == 0)
19169 break;
19170
19171 if (reg <= 12)
19172 save_reg_mask |= (1 << reg);
19173 else
19174 {
19175 cfun->machine->sibcall_blocked = 1;
19176 save_reg_mask |= (1 << 3);
19177 }
19178 }
19179
19180 /* We may need to push an additional register for use initializing the
19181 PIC base register. */
19182 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19183 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19184 {
19185 reg = thumb_find_work_register (1 << 4);
19186 if (!call_used_regs[reg])
19187 save_reg_mask |= (1 << reg);
19188 }
19189
19190 return save_reg_mask;
19191 }
19192
19193 /* Compute a bit mask of which registers need to be
19194 saved on the stack for the current function. */
19195 static unsigned long
19196 thumb1_compute_save_reg_mask (void)
19197 {
19198 unsigned long mask;
19199 unsigned reg;
19200
19201 mask = 0;
19202 for (reg = 0; reg < 12; reg ++)
19203 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19204 mask |= 1 << reg;
19205
19206 /* Handle the frame pointer as a special case. */
19207 if (frame_pointer_needed)
19208 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19209
19210 if (flag_pic
19211 && !TARGET_SINGLE_PIC_BASE
19212 && arm_pic_register != INVALID_REGNUM
19213 && crtl->uses_pic_offset_table)
19214 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19215
19216 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19217 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19218 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19219
19220 /* LR will also be pushed if any lo regs are pushed. */
19221 if (mask & 0xff || thumb_force_lr_save ())
19222 mask |= (1 << LR_REGNUM);
19223
19224 /* Make sure we have a low work register if we need one.
19225 We will need one if we are going to push a high register,
19226 but we are not currently intending to push a low register. */
19227 if ((mask & 0xff) == 0
19228 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19229 {
19230 /* Use thumb_find_work_register to choose which register
19231 we will use. If the register is live then we will
19232 have to push it. Use LAST_LO_REGNUM as our fallback
19233 choice for the register to select. */
19234 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19235 /* Make sure the register returned by thumb_find_work_register is
19236 not part of the return value. */
19237 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19238 reg = LAST_LO_REGNUM;
19239
19240 if (callee_saved_reg_p (reg))
19241 mask |= 1 << reg;
19242 }
19243
19244 /* The 504 below is 8 bytes less than 512 because there are two possible
19245 alignment words. We can't tell here if they will be present or not so we
19246 have to play it safe and assume that they are. */
19247 if ((CALLER_INTERWORKING_SLOT_SIZE +
19248 ROUND_UP_WORD (get_frame_size ()) +
19249 crtl->outgoing_args_size) >= 504)
19250 {
19251 /* This is the same as the code in thumb1_expand_prologue() which
19252 determines which register to use for stack decrement. */
19253 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19254 if (mask & (1 << reg))
19255 break;
19256
19257 if (reg > LAST_LO_REGNUM)
19258 {
19259 /* Make sure we have a register available for stack decrement. */
19260 mask |= 1 << LAST_LO_REGNUM;
19261 }
19262 }
19263
19264 return mask;
19265 }
19266
19267
19268 /* Return the number of bytes required to save VFP registers. */
19269 static int
19270 arm_get_vfp_saved_size (void)
19271 {
19272 unsigned int regno;
19273 int count;
19274 int saved;
19275
19276 saved = 0;
19277 /* Space for saved VFP registers. */
19278 if (TARGET_HARD_FLOAT)
19279 {
19280 count = 0;
19281 for (regno = FIRST_VFP_REGNUM;
19282 regno < LAST_VFP_REGNUM;
19283 regno += 2)
19284 {
19285 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19286 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19287 {
19288 if (count > 0)
19289 {
19290 /* Workaround ARM10 VFPr1 bug. */
19291 if (count == 2 && !arm_arch6)
19292 count++;
19293 saved += count * 8;
19294 }
19295 count = 0;
19296 }
19297 else
19298 count++;
19299 }
19300 if (count > 0)
19301 {
19302 if (count == 2 && !arm_arch6)
19303 count++;
19304 saved += count * 8;
19305 }
19306 }
19307 return saved;
19308 }
19309
19310
19311 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19312 everything bar the final return instruction. If simple_return is true,
19313 then do not output epilogue, because it has already been emitted in RTL. */
19314 const char *
19315 output_return_instruction (rtx operand, bool really_return, bool reverse,
19316 bool simple_return)
19317 {
19318 char conditional[10];
19319 char instr[100];
19320 unsigned reg;
19321 unsigned long live_regs_mask;
19322 unsigned long func_type;
19323 arm_stack_offsets *offsets;
19324
19325 func_type = arm_current_func_type ();
19326
19327 if (IS_NAKED (func_type))
19328 return "";
19329
19330 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19331 {
19332 /* If this function was declared non-returning, and we have
19333 found a tail call, then we have to trust that the called
19334 function won't return. */
19335 if (really_return)
19336 {
19337 rtx ops[2];
19338
19339 /* Otherwise, trap an attempted return by aborting. */
19340 ops[0] = operand;
19341 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19342 : "abort");
19343 assemble_external_libcall (ops[1]);
19344 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19345 }
19346
19347 return "";
19348 }
19349
19350 gcc_assert (!cfun->calls_alloca || really_return);
19351
19352 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19353
19354 cfun->machine->return_used_this_function = 1;
19355
19356 offsets = arm_get_frame_offsets ();
19357 live_regs_mask = offsets->saved_regs_mask;
19358
19359 if (!simple_return && live_regs_mask)
19360 {
19361 const char * return_reg;
19362
19363 /* If we do not have any special requirements for function exit
19364 (e.g. interworking) then we can load the return address
19365 directly into the PC. Otherwise we must load it into LR. */
19366 if (really_return
19367 && !IS_CMSE_ENTRY (func_type)
19368 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19369 return_reg = reg_names[PC_REGNUM];
19370 else
19371 return_reg = reg_names[LR_REGNUM];
19372
19373 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19374 {
19375 /* There are three possible reasons for the IP register
19376 being saved. 1) a stack frame was created, in which case
19377 IP contains the old stack pointer, or 2) an ISR routine
19378 corrupted it, or 3) it was saved to align the stack on
19379 iWMMXt. In case 1, restore IP into SP, otherwise just
19380 restore IP. */
19381 if (frame_pointer_needed)
19382 {
19383 live_regs_mask &= ~ (1 << IP_REGNUM);
19384 live_regs_mask |= (1 << SP_REGNUM);
19385 }
19386 else
19387 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19388 }
19389
19390 /* On some ARM architectures it is faster to use LDR rather than
19391 LDM to load a single register. On other architectures, the
19392 cost is the same. In 26 bit mode, or for exception handlers,
19393 we have to use LDM to load the PC so that the CPSR is also
19394 restored. */
19395 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19396 if (live_regs_mask == (1U << reg))
19397 break;
19398
19399 if (reg <= LAST_ARM_REGNUM
19400 && (reg != LR_REGNUM
19401 || ! really_return
19402 || ! IS_INTERRUPT (func_type)))
19403 {
19404 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19405 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19406 }
19407 else
19408 {
19409 char *p;
19410 int first = 1;
19411
19412 /* Generate the load multiple instruction to restore the
19413 registers. Note we can get here, even if
19414 frame_pointer_needed is true, but only if sp already
19415 points to the base of the saved core registers. */
19416 if (live_regs_mask & (1 << SP_REGNUM))
19417 {
19418 unsigned HOST_WIDE_INT stack_adjust;
19419
19420 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19421 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19422
19423 if (stack_adjust && arm_arch5 && TARGET_ARM)
19424 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19425 else
19426 {
19427 /* If we can't use ldmib (SA110 bug),
19428 then try to pop r3 instead. */
19429 if (stack_adjust)
19430 live_regs_mask |= 1 << 3;
19431
19432 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19433 }
19434 }
19435 /* For interrupt returns we have to use an LDM rather than
19436 a POP so that we can use the exception return variant. */
19437 else if (IS_INTERRUPT (func_type))
19438 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19439 else
19440 sprintf (instr, "pop%s\t{", conditional);
19441
19442 p = instr + strlen (instr);
19443
19444 for (reg = 0; reg <= SP_REGNUM; reg++)
19445 if (live_regs_mask & (1 << reg))
19446 {
19447 int l = strlen (reg_names[reg]);
19448
19449 if (first)
19450 first = 0;
19451 else
19452 {
19453 memcpy (p, ", ", 2);
19454 p += 2;
19455 }
19456
19457 memcpy (p, "%|", 2);
19458 memcpy (p + 2, reg_names[reg], l);
19459 p += l + 2;
19460 }
19461
19462 if (live_regs_mask & (1 << LR_REGNUM))
19463 {
19464 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19465 /* If returning from an interrupt, restore the CPSR. */
19466 if (IS_INTERRUPT (func_type))
19467 strcat (p, "^");
19468 }
19469 else
19470 strcpy (p, "}");
19471 }
19472
19473 output_asm_insn (instr, & operand);
19474
19475 /* See if we need to generate an extra instruction to
19476 perform the actual function return. */
19477 if (really_return
19478 && func_type != ARM_FT_INTERWORKED
19479 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19480 {
19481 /* The return has already been handled
19482 by loading the LR into the PC. */
19483 return "";
19484 }
19485 }
19486
19487 if (really_return)
19488 {
19489 switch ((int) ARM_FUNC_TYPE (func_type))
19490 {
19491 case ARM_FT_ISR:
19492 case ARM_FT_FIQ:
19493 /* ??? This is wrong for unified assembly syntax. */
19494 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19495 break;
19496
19497 case ARM_FT_INTERWORKED:
19498 gcc_assert (arm_arch5 || arm_arch4t);
19499 sprintf (instr, "bx%s\t%%|lr", conditional);
19500 break;
19501
19502 case ARM_FT_EXCEPTION:
19503 /* ??? This is wrong for unified assembly syntax. */
19504 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19505 break;
19506
19507 default:
19508 if (IS_CMSE_ENTRY (func_type))
19509 {
19510 /* Check if we have to clear the 'GE bits' which is only used if
19511 parallel add and subtraction instructions are available. */
19512 if (TARGET_INT_SIMD)
19513 snprintf (instr, sizeof (instr),
19514 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19515 else
19516 snprintf (instr, sizeof (instr),
19517 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19518
19519 output_asm_insn (instr, & operand);
19520 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19521 {
19522 /* Clear the cumulative exception-status bits (0-4,7) and the
19523 condition code bits (28-31) of the FPSCR. We need to
19524 remember to clear the first scratch register used (IP) and
19525 save and restore the second (r4). */
19526 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19527 output_asm_insn (instr, & operand);
19528 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19529 output_asm_insn (instr, & operand);
19530 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19531 output_asm_insn (instr, & operand);
19532 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19533 output_asm_insn (instr, & operand);
19534 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19535 output_asm_insn (instr, & operand);
19536 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19537 output_asm_insn (instr, & operand);
19538 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19539 output_asm_insn (instr, & operand);
19540 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19541 output_asm_insn (instr, & operand);
19542 }
19543 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19544 }
19545 /* Use bx if it's available. */
19546 else if (arm_arch5 || arm_arch4t)
19547 sprintf (instr, "bx%s\t%%|lr", conditional);
19548 else
19549 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19550 break;
19551 }
19552
19553 output_asm_insn (instr, & operand);
19554 }
19555
19556 return "";
19557 }
19558
19559 /* Output in FILE asm statements needed to declare the NAME of the function
19560 defined by its DECL node. */
19561
19562 void
19563 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19564 {
19565 size_t cmse_name_len;
19566 char *cmse_name = 0;
19567 char cmse_prefix[] = "__acle_se_";
19568
19569 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19570 extra function label for each function with the 'cmse_nonsecure_entry'
19571 attribute. This extra function label should be prepended with
19572 '__acle_se_', telling the linker that it needs to create secure gateway
19573 veneers for this function. */
19574 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19575 DECL_ATTRIBUTES (decl)))
19576 {
19577 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19578 cmse_name = XALLOCAVEC (char, cmse_name_len);
19579 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19580 targetm.asm_out.globalize_label (file, cmse_name);
19581
19582 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19583 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19584 }
19585
19586 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19587 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19588 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19589 ASM_OUTPUT_LABEL (file, name);
19590
19591 if (cmse_name)
19592 ASM_OUTPUT_LABEL (file, cmse_name);
19593
19594 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19595 }
19596
19597 /* Write the function name into the code section, directly preceding
19598 the function prologue.
19599
19600 Code will be output similar to this:
19601 t0
19602 .ascii "arm_poke_function_name", 0
19603 .align
19604 t1
19605 .word 0xff000000 + (t1 - t0)
19606 arm_poke_function_name
19607 mov ip, sp
19608 stmfd sp!, {fp, ip, lr, pc}
19609 sub fp, ip, #4
19610
19611 When performing a stack backtrace, code can inspect the value
19612 of 'pc' stored at 'fp' + 0. If the trace function then looks
19613 at location pc - 12 and the top 8 bits are set, then we know
19614 that there is a function name embedded immediately preceding this
19615 location and has length ((pc[-3]) & 0xff000000).
19616
19617 We assume that pc is declared as a pointer to an unsigned long.
19618
19619 It is of no benefit to output the function name if we are assembling
19620 a leaf function. These function types will not contain a stack
19621 backtrace structure, therefore it is not possible to determine the
19622 function name. */
19623 void
19624 arm_poke_function_name (FILE *stream, const char *name)
19625 {
19626 unsigned long alignlength;
19627 unsigned long length;
19628 rtx x;
19629
19630 length = strlen (name) + 1;
19631 alignlength = ROUND_UP_WORD (length);
19632
19633 ASM_OUTPUT_ASCII (stream, name, length);
19634 ASM_OUTPUT_ALIGN (stream, 2);
19635 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19636 assemble_aligned_integer (UNITS_PER_WORD, x);
19637 }
19638
19639 /* Place some comments into the assembler stream
19640 describing the current function. */
19641 static void
19642 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19643 {
19644 unsigned long func_type;
19645
19646 /* Sanity check. */
19647 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19648
19649 func_type = arm_current_func_type ();
19650
19651 switch ((int) ARM_FUNC_TYPE (func_type))
19652 {
19653 default:
19654 case ARM_FT_NORMAL:
19655 break;
19656 case ARM_FT_INTERWORKED:
19657 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19658 break;
19659 case ARM_FT_ISR:
19660 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19661 break;
19662 case ARM_FT_FIQ:
19663 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19664 break;
19665 case ARM_FT_EXCEPTION:
19666 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19667 break;
19668 }
19669
19670 if (IS_NAKED (func_type))
19671 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19672
19673 if (IS_VOLATILE (func_type))
19674 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19675
19676 if (IS_NESTED (func_type))
19677 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19678 if (IS_STACKALIGN (func_type))
19679 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19680 if (IS_CMSE_ENTRY (func_type))
19681 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19682
19683 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19684 crtl->args.size,
19685 crtl->args.pretend_args_size, frame_size);
19686
19687 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19688 frame_pointer_needed,
19689 cfun->machine->uses_anonymous_args);
19690
19691 if (cfun->machine->lr_save_eliminated)
19692 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19693
19694 if (crtl->calls_eh_return)
19695 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19696
19697 }
19698
19699 static void
19700 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19701 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19702 {
19703 arm_stack_offsets *offsets;
19704
19705 if (TARGET_THUMB1)
19706 {
19707 int regno;
19708
19709 /* Emit any call-via-reg trampolines that are needed for v4t support
19710 of call_reg and call_value_reg type insns. */
19711 for (regno = 0; regno < LR_REGNUM; regno++)
19712 {
19713 rtx label = cfun->machine->call_via[regno];
19714
19715 if (label != NULL)
19716 {
19717 switch_to_section (function_section (current_function_decl));
19718 targetm.asm_out.internal_label (asm_out_file, "L",
19719 CODE_LABEL_NUMBER (label));
19720 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19721 }
19722 }
19723
19724 /* ??? Probably not safe to set this here, since it assumes that a
19725 function will be emitted as assembly immediately after we generate
19726 RTL for it. This does not happen for inline functions. */
19727 cfun->machine->return_used_this_function = 0;
19728 }
19729 else /* TARGET_32BIT */
19730 {
19731 /* We need to take into account any stack-frame rounding. */
19732 offsets = arm_get_frame_offsets ();
19733
19734 gcc_assert (!use_return_insn (FALSE, NULL)
19735 || (cfun->machine->return_used_this_function != 0)
19736 || offsets->saved_regs == offsets->outgoing_args
19737 || frame_pointer_needed);
19738 }
19739 }
19740
19741 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19742 STR and STRD. If an even number of registers are being pushed, one
19743 or more STRD patterns are created for each register pair. If an
19744 odd number of registers are pushed, emit an initial STR followed by
19745 as many STRD instructions as are needed. This works best when the
19746 stack is initially 64-bit aligned (the normal case), since it
19747 ensures that each STRD is also 64-bit aligned. */
19748 static void
19749 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19750 {
19751 int num_regs = 0;
19752 int i;
19753 int regno;
19754 rtx par = NULL_RTX;
19755 rtx dwarf = NULL_RTX;
19756 rtx tmp;
19757 bool first = true;
19758
19759 num_regs = bit_count (saved_regs_mask);
19760
19761 /* Must be at least one register to save, and can't save SP or PC. */
19762 gcc_assert (num_regs > 0 && num_regs <= 14);
19763 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19764 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19765
19766 /* Create sequence for DWARF info. All the frame-related data for
19767 debugging is held in this wrapper. */
19768 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19769
19770 /* Describe the stack adjustment. */
19771 tmp = gen_rtx_SET (stack_pointer_rtx,
19772 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19773 RTX_FRAME_RELATED_P (tmp) = 1;
19774 XVECEXP (dwarf, 0, 0) = tmp;
19775
19776 /* Find the first register. */
19777 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19778 ;
19779
19780 i = 0;
19781
19782 /* If there's an odd number of registers to push. Start off by
19783 pushing a single register. This ensures that subsequent strd
19784 operations are dword aligned (assuming that SP was originally
19785 64-bit aligned). */
19786 if ((num_regs & 1) != 0)
19787 {
19788 rtx reg, mem, insn;
19789
19790 reg = gen_rtx_REG (SImode, regno);
19791 if (num_regs == 1)
19792 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19793 stack_pointer_rtx));
19794 else
19795 mem = gen_frame_mem (Pmode,
19796 gen_rtx_PRE_MODIFY
19797 (Pmode, stack_pointer_rtx,
19798 plus_constant (Pmode, stack_pointer_rtx,
19799 -4 * num_regs)));
19800
19801 tmp = gen_rtx_SET (mem, reg);
19802 RTX_FRAME_RELATED_P (tmp) = 1;
19803 insn = emit_insn (tmp);
19804 RTX_FRAME_RELATED_P (insn) = 1;
19805 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19806 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19807 RTX_FRAME_RELATED_P (tmp) = 1;
19808 i++;
19809 regno++;
19810 XVECEXP (dwarf, 0, i) = tmp;
19811 first = false;
19812 }
19813
19814 while (i < num_regs)
19815 if (saved_regs_mask & (1 << regno))
19816 {
19817 rtx reg1, reg2, mem1, mem2;
19818 rtx tmp0, tmp1, tmp2;
19819 int regno2;
19820
19821 /* Find the register to pair with this one. */
19822 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19823 regno2++)
19824 ;
19825
19826 reg1 = gen_rtx_REG (SImode, regno);
19827 reg2 = gen_rtx_REG (SImode, regno2);
19828
19829 if (first)
19830 {
19831 rtx insn;
19832
19833 first = false;
19834 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19835 stack_pointer_rtx,
19836 -4 * num_regs));
19837 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19838 stack_pointer_rtx,
19839 -4 * (num_regs - 1)));
19840 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19841 plus_constant (Pmode, stack_pointer_rtx,
19842 -4 * (num_regs)));
19843 tmp1 = gen_rtx_SET (mem1, reg1);
19844 tmp2 = gen_rtx_SET (mem2, reg2);
19845 RTX_FRAME_RELATED_P (tmp0) = 1;
19846 RTX_FRAME_RELATED_P (tmp1) = 1;
19847 RTX_FRAME_RELATED_P (tmp2) = 1;
19848 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19849 XVECEXP (par, 0, 0) = tmp0;
19850 XVECEXP (par, 0, 1) = tmp1;
19851 XVECEXP (par, 0, 2) = tmp2;
19852 insn = emit_insn (par);
19853 RTX_FRAME_RELATED_P (insn) = 1;
19854 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19855 }
19856 else
19857 {
19858 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19859 stack_pointer_rtx,
19860 4 * i));
19861 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19862 stack_pointer_rtx,
19863 4 * (i + 1)));
19864 tmp1 = gen_rtx_SET (mem1, reg1);
19865 tmp2 = gen_rtx_SET (mem2, reg2);
19866 RTX_FRAME_RELATED_P (tmp1) = 1;
19867 RTX_FRAME_RELATED_P (tmp2) = 1;
19868 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19869 XVECEXP (par, 0, 0) = tmp1;
19870 XVECEXP (par, 0, 1) = tmp2;
19871 emit_insn (par);
19872 }
19873
19874 /* Create unwind information. This is an approximation. */
19875 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19876 plus_constant (Pmode,
19877 stack_pointer_rtx,
19878 4 * i)),
19879 reg1);
19880 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19881 plus_constant (Pmode,
19882 stack_pointer_rtx,
19883 4 * (i + 1))),
19884 reg2);
19885
19886 RTX_FRAME_RELATED_P (tmp1) = 1;
19887 RTX_FRAME_RELATED_P (tmp2) = 1;
19888 XVECEXP (dwarf, 0, i + 1) = tmp1;
19889 XVECEXP (dwarf, 0, i + 2) = tmp2;
19890 i += 2;
19891 regno = regno2 + 1;
19892 }
19893 else
19894 regno++;
19895
19896 return;
19897 }
19898
19899 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19900 whenever possible, otherwise it emits single-word stores. The first store
19901 also allocates stack space for all saved registers, using writeback with
19902 post-addressing mode. All other stores use offset addressing. If no STRD
19903 can be emitted, this function emits a sequence of single-word stores,
19904 and not an STM as before, because single-word stores provide more freedom
19905 scheduling and can be turned into an STM by peephole optimizations. */
19906 static void
19907 arm_emit_strd_push (unsigned long saved_regs_mask)
19908 {
19909 int num_regs = 0;
19910 int i, j, dwarf_index = 0;
19911 int offset = 0;
19912 rtx dwarf = NULL_RTX;
19913 rtx insn = NULL_RTX;
19914 rtx tmp, mem;
19915
19916 /* TODO: A more efficient code can be emitted by changing the
19917 layout, e.g., first push all pairs that can use STRD to keep the
19918 stack aligned, and then push all other registers. */
19919 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19920 if (saved_regs_mask & (1 << i))
19921 num_regs++;
19922
19923 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19924 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19925 gcc_assert (num_regs > 0);
19926
19927 /* Create sequence for DWARF info. */
19928 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19929
19930 /* For dwarf info, we generate explicit stack update. */
19931 tmp = gen_rtx_SET (stack_pointer_rtx,
19932 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19933 RTX_FRAME_RELATED_P (tmp) = 1;
19934 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19935
19936 /* Save registers. */
19937 offset = - 4 * num_regs;
19938 j = 0;
19939 while (j <= LAST_ARM_REGNUM)
19940 if (saved_regs_mask & (1 << j))
19941 {
19942 if ((j % 2 == 0)
19943 && (saved_regs_mask & (1 << (j + 1))))
19944 {
19945 /* Current register and previous register form register pair for
19946 which STRD can be generated. */
19947 if (offset < 0)
19948 {
19949 /* Allocate stack space for all saved registers. */
19950 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19951 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19952 mem = gen_frame_mem (DImode, tmp);
19953 offset = 0;
19954 }
19955 else if (offset > 0)
19956 mem = gen_frame_mem (DImode,
19957 plus_constant (Pmode,
19958 stack_pointer_rtx,
19959 offset));
19960 else
19961 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19962
19963 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19964 RTX_FRAME_RELATED_P (tmp) = 1;
19965 tmp = emit_insn (tmp);
19966
19967 /* Record the first store insn. */
19968 if (dwarf_index == 1)
19969 insn = tmp;
19970
19971 /* Generate dwarf info. */
19972 mem = gen_frame_mem (SImode,
19973 plus_constant (Pmode,
19974 stack_pointer_rtx,
19975 offset));
19976 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19977 RTX_FRAME_RELATED_P (tmp) = 1;
19978 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19979
19980 mem = gen_frame_mem (SImode,
19981 plus_constant (Pmode,
19982 stack_pointer_rtx,
19983 offset + 4));
19984 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19985 RTX_FRAME_RELATED_P (tmp) = 1;
19986 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19987
19988 offset += 8;
19989 j += 2;
19990 }
19991 else
19992 {
19993 /* Emit a single word store. */
19994 if (offset < 0)
19995 {
19996 /* Allocate stack space for all saved registers. */
19997 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19998 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19999 mem = gen_frame_mem (SImode, tmp);
20000 offset = 0;
20001 }
20002 else if (offset > 0)
20003 mem = gen_frame_mem (SImode,
20004 plus_constant (Pmode,
20005 stack_pointer_rtx,
20006 offset));
20007 else
20008 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20009
20010 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20011 RTX_FRAME_RELATED_P (tmp) = 1;
20012 tmp = emit_insn (tmp);
20013
20014 /* Record the first store insn. */
20015 if (dwarf_index == 1)
20016 insn = tmp;
20017
20018 /* Generate dwarf info. */
20019 mem = gen_frame_mem (SImode,
20020 plus_constant(Pmode,
20021 stack_pointer_rtx,
20022 offset));
20023 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20024 RTX_FRAME_RELATED_P (tmp) = 1;
20025 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20026
20027 offset += 4;
20028 j += 1;
20029 }
20030 }
20031 else
20032 j++;
20033
20034 /* Attach dwarf info to the first insn we generate. */
20035 gcc_assert (insn != NULL_RTX);
20036 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20037 RTX_FRAME_RELATED_P (insn) = 1;
20038 }
20039
20040 /* Generate and emit an insn that we will recognize as a push_multi.
20041 Unfortunately, since this insn does not reflect very well the actual
20042 semantics of the operation, we need to annotate the insn for the benefit
20043 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20044 MASK for registers that should be annotated for DWARF2 frame unwind
20045 information. */
20046 static rtx
20047 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20048 {
20049 int num_regs = 0;
20050 int num_dwarf_regs = 0;
20051 int i, j;
20052 rtx par;
20053 rtx dwarf;
20054 int dwarf_par_index;
20055 rtx tmp, reg;
20056
20057 /* We don't record the PC in the dwarf frame information. */
20058 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20059
20060 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20061 {
20062 if (mask & (1 << i))
20063 num_regs++;
20064 if (dwarf_regs_mask & (1 << i))
20065 num_dwarf_regs++;
20066 }
20067
20068 gcc_assert (num_regs && num_regs <= 16);
20069 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20070
20071 /* For the body of the insn we are going to generate an UNSPEC in
20072 parallel with several USEs. This allows the insn to be recognized
20073 by the push_multi pattern in the arm.md file.
20074
20075 The body of the insn looks something like this:
20076
20077 (parallel [
20078 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20079 (const_int:SI <num>)))
20080 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20081 (use (reg:SI XX))
20082 (use (reg:SI YY))
20083 ...
20084 ])
20085
20086 For the frame note however, we try to be more explicit and actually
20087 show each register being stored into the stack frame, plus a (single)
20088 decrement of the stack pointer. We do it this way in order to be
20089 friendly to the stack unwinding code, which only wants to see a single
20090 stack decrement per instruction. The RTL we generate for the note looks
20091 something like this:
20092
20093 (sequence [
20094 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20095 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20096 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20097 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20098 ...
20099 ])
20100
20101 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20102 instead we'd have a parallel expression detailing all
20103 the stores to the various memory addresses so that debug
20104 information is more up-to-date. Remember however while writing
20105 this to take care of the constraints with the push instruction.
20106
20107 Note also that this has to be taken care of for the VFP registers.
20108
20109 For more see PR43399. */
20110
20111 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20112 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20113 dwarf_par_index = 1;
20114
20115 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20116 {
20117 if (mask & (1 << i))
20118 {
20119 reg = gen_rtx_REG (SImode, i);
20120
20121 XVECEXP (par, 0, 0)
20122 = gen_rtx_SET (gen_frame_mem
20123 (BLKmode,
20124 gen_rtx_PRE_MODIFY (Pmode,
20125 stack_pointer_rtx,
20126 plus_constant
20127 (Pmode, stack_pointer_rtx,
20128 -4 * num_regs))
20129 ),
20130 gen_rtx_UNSPEC (BLKmode,
20131 gen_rtvec (1, reg),
20132 UNSPEC_PUSH_MULT));
20133
20134 if (dwarf_regs_mask & (1 << i))
20135 {
20136 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20137 reg);
20138 RTX_FRAME_RELATED_P (tmp) = 1;
20139 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20140 }
20141
20142 break;
20143 }
20144 }
20145
20146 for (j = 1, i++; j < num_regs; i++)
20147 {
20148 if (mask & (1 << i))
20149 {
20150 reg = gen_rtx_REG (SImode, i);
20151
20152 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20153
20154 if (dwarf_regs_mask & (1 << i))
20155 {
20156 tmp
20157 = gen_rtx_SET (gen_frame_mem
20158 (SImode,
20159 plus_constant (Pmode, stack_pointer_rtx,
20160 4 * j)),
20161 reg);
20162 RTX_FRAME_RELATED_P (tmp) = 1;
20163 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20164 }
20165
20166 j++;
20167 }
20168 }
20169
20170 par = emit_insn (par);
20171
20172 tmp = gen_rtx_SET (stack_pointer_rtx,
20173 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20174 RTX_FRAME_RELATED_P (tmp) = 1;
20175 XVECEXP (dwarf, 0, 0) = tmp;
20176
20177 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20178
20179 return par;
20180 }
20181
20182 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20183 SIZE is the offset to be adjusted.
20184 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20185 static void
20186 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20187 {
20188 rtx dwarf;
20189
20190 RTX_FRAME_RELATED_P (insn) = 1;
20191 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20192 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20193 }
20194
20195 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20196 SAVED_REGS_MASK shows which registers need to be restored.
20197
20198 Unfortunately, since this insn does not reflect very well the actual
20199 semantics of the operation, we need to annotate the insn for the benefit
20200 of DWARF2 frame unwind information. */
20201 static void
20202 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20203 {
20204 int num_regs = 0;
20205 int i, j;
20206 rtx par;
20207 rtx dwarf = NULL_RTX;
20208 rtx tmp, reg;
20209 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20210 int offset_adj;
20211 int emit_update;
20212
20213 offset_adj = return_in_pc ? 1 : 0;
20214 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20215 if (saved_regs_mask & (1 << i))
20216 num_regs++;
20217
20218 gcc_assert (num_regs && num_regs <= 16);
20219
20220 /* If SP is in reglist, then we don't emit SP update insn. */
20221 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20222
20223 /* The parallel needs to hold num_regs SETs
20224 and one SET for the stack update. */
20225 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20226
20227 if (return_in_pc)
20228 XVECEXP (par, 0, 0) = ret_rtx;
20229
20230 if (emit_update)
20231 {
20232 /* Increment the stack pointer, based on there being
20233 num_regs 4-byte registers to restore. */
20234 tmp = gen_rtx_SET (stack_pointer_rtx,
20235 plus_constant (Pmode,
20236 stack_pointer_rtx,
20237 4 * num_regs));
20238 RTX_FRAME_RELATED_P (tmp) = 1;
20239 XVECEXP (par, 0, offset_adj) = tmp;
20240 }
20241
20242 /* Now restore every reg, which may include PC. */
20243 for (j = 0, i = 0; j < num_regs; i++)
20244 if (saved_regs_mask & (1 << i))
20245 {
20246 reg = gen_rtx_REG (SImode, i);
20247 if ((num_regs == 1) && emit_update && !return_in_pc)
20248 {
20249 /* Emit single load with writeback. */
20250 tmp = gen_frame_mem (SImode,
20251 gen_rtx_POST_INC (Pmode,
20252 stack_pointer_rtx));
20253 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20254 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20255 return;
20256 }
20257
20258 tmp = gen_rtx_SET (reg,
20259 gen_frame_mem
20260 (SImode,
20261 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20262 RTX_FRAME_RELATED_P (tmp) = 1;
20263 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20264
20265 /* We need to maintain a sequence for DWARF info too. As dwarf info
20266 should not have PC, skip PC. */
20267 if (i != PC_REGNUM)
20268 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20269
20270 j++;
20271 }
20272
20273 if (return_in_pc)
20274 par = emit_jump_insn (par);
20275 else
20276 par = emit_insn (par);
20277
20278 REG_NOTES (par) = dwarf;
20279 if (!return_in_pc)
20280 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20281 stack_pointer_rtx, stack_pointer_rtx);
20282 }
20283
20284 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20285 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20286
20287 Unfortunately, since this insn does not reflect very well the actual
20288 semantics of the operation, we need to annotate the insn for the benefit
20289 of DWARF2 frame unwind information. */
20290 static void
20291 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20292 {
20293 int i, j;
20294 rtx par;
20295 rtx dwarf = NULL_RTX;
20296 rtx tmp, reg;
20297
20298 gcc_assert (num_regs && num_regs <= 32);
20299
20300 /* Workaround ARM10 VFPr1 bug. */
20301 if (num_regs == 2 && !arm_arch6)
20302 {
20303 if (first_reg == 15)
20304 first_reg--;
20305
20306 num_regs++;
20307 }
20308
20309 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20310 there could be up to 32 D-registers to restore.
20311 If there are more than 16 D-registers, make two recursive calls,
20312 each of which emits one pop_multi instruction. */
20313 if (num_regs > 16)
20314 {
20315 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20316 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20317 return;
20318 }
20319
20320 /* The parallel needs to hold num_regs SETs
20321 and one SET for the stack update. */
20322 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20323
20324 /* Increment the stack pointer, based on there being
20325 num_regs 8-byte registers to restore. */
20326 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20327 RTX_FRAME_RELATED_P (tmp) = 1;
20328 XVECEXP (par, 0, 0) = tmp;
20329
20330 /* Now show every reg that will be restored, using a SET for each. */
20331 for (j = 0, i=first_reg; j < num_regs; i += 2)
20332 {
20333 reg = gen_rtx_REG (DFmode, i);
20334
20335 tmp = gen_rtx_SET (reg,
20336 gen_frame_mem
20337 (DFmode,
20338 plus_constant (Pmode, base_reg, 8 * j)));
20339 RTX_FRAME_RELATED_P (tmp) = 1;
20340 XVECEXP (par, 0, j + 1) = tmp;
20341
20342 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20343
20344 j++;
20345 }
20346
20347 par = emit_insn (par);
20348 REG_NOTES (par) = dwarf;
20349
20350 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20351 if (REGNO (base_reg) == IP_REGNUM)
20352 {
20353 RTX_FRAME_RELATED_P (par) = 1;
20354 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20355 }
20356 else
20357 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20358 base_reg, base_reg);
20359 }
20360
20361 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20362 number of registers are being popped, multiple LDRD patterns are created for
20363 all register pairs. If odd number of registers are popped, last register is
20364 loaded by using LDR pattern. */
20365 static void
20366 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20367 {
20368 int num_regs = 0;
20369 int i, j;
20370 rtx par = NULL_RTX;
20371 rtx dwarf = NULL_RTX;
20372 rtx tmp, reg, tmp1;
20373 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20374
20375 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20376 if (saved_regs_mask & (1 << i))
20377 num_regs++;
20378
20379 gcc_assert (num_regs && num_regs <= 16);
20380
20381 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20382 to be popped. So, if num_regs is even, now it will become odd,
20383 and we can generate pop with PC. If num_regs is odd, it will be
20384 even now, and ldr with return can be generated for PC. */
20385 if (return_in_pc)
20386 num_regs--;
20387
20388 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20389
20390 /* Var j iterates over all the registers to gather all the registers in
20391 saved_regs_mask. Var i gives index of saved registers in stack frame.
20392 A PARALLEL RTX of register-pair is created here, so that pattern for
20393 LDRD can be matched. As PC is always last register to be popped, and
20394 we have already decremented num_regs if PC, we don't have to worry
20395 about PC in this loop. */
20396 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20397 if (saved_regs_mask & (1 << j))
20398 {
20399 /* Create RTX for memory load. */
20400 reg = gen_rtx_REG (SImode, j);
20401 tmp = gen_rtx_SET (reg,
20402 gen_frame_mem (SImode,
20403 plus_constant (Pmode,
20404 stack_pointer_rtx, 4 * i)));
20405 RTX_FRAME_RELATED_P (tmp) = 1;
20406
20407 if (i % 2 == 0)
20408 {
20409 /* When saved-register index (i) is even, the RTX to be emitted is
20410 yet to be created. Hence create it first. The LDRD pattern we
20411 are generating is :
20412 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20413 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20414 where target registers need not be consecutive. */
20415 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20416 dwarf = NULL_RTX;
20417 }
20418
20419 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20420 added as 0th element and if i is odd, reg_i is added as 1st element
20421 of LDRD pattern shown above. */
20422 XVECEXP (par, 0, (i % 2)) = tmp;
20423 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20424
20425 if ((i % 2) == 1)
20426 {
20427 /* When saved-register index (i) is odd, RTXs for both the registers
20428 to be loaded are generated in above given LDRD pattern, and the
20429 pattern can be emitted now. */
20430 par = emit_insn (par);
20431 REG_NOTES (par) = dwarf;
20432 RTX_FRAME_RELATED_P (par) = 1;
20433 }
20434
20435 i++;
20436 }
20437
20438 /* If the number of registers pushed is odd AND return_in_pc is false OR
20439 number of registers are even AND return_in_pc is true, last register is
20440 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20441 then LDR with post increment. */
20442
20443 /* Increment the stack pointer, based on there being
20444 num_regs 4-byte registers to restore. */
20445 tmp = gen_rtx_SET (stack_pointer_rtx,
20446 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20447 RTX_FRAME_RELATED_P (tmp) = 1;
20448 tmp = emit_insn (tmp);
20449 if (!return_in_pc)
20450 {
20451 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20452 stack_pointer_rtx, stack_pointer_rtx);
20453 }
20454
20455 dwarf = NULL_RTX;
20456
20457 if (((num_regs % 2) == 1 && !return_in_pc)
20458 || ((num_regs % 2) == 0 && return_in_pc))
20459 {
20460 /* Scan for the single register to be popped. Skip until the saved
20461 register is found. */
20462 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20463
20464 /* Gen LDR with post increment here. */
20465 tmp1 = gen_rtx_MEM (SImode,
20466 gen_rtx_POST_INC (SImode,
20467 stack_pointer_rtx));
20468 set_mem_alias_set (tmp1, get_frame_alias_set ());
20469
20470 reg = gen_rtx_REG (SImode, j);
20471 tmp = gen_rtx_SET (reg, tmp1);
20472 RTX_FRAME_RELATED_P (tmp) = 1;
20473 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20474
20475 if (return_in_pc)
20476 {
20477 /* If return_in_pc, j must be PC_REGNUM. */
20478 gcc_assert (j == PC_REGNUM);
20479 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20480 XVECEXP (par, 0, 0) = ret_rtx;
20481 XVECEXP (par, 0, 1) = tmp;
20482 par = emit_jump_insn (par);
20483 }
20484 else
20485 {
20486 par = emit_insn (tmp);
20487 REG_NOTES (par) = dwarf;
20488 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20489 stack_pointer_rtx, stack_pointer_rtx);
20490 }
20491
20492 }
20493 else if ((num_regs % 2) == 1 && return_in_pc)
20494 {
20495 /* There are 2 registers to be popped. So, generate the pattern
20496 pop_multiple_with_stack_update_and_return to pop in PC. */
20497 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20498 }
20499
20500 return;
20501 }
20502
20503 /* LDRD in ARM mode needs consecutive registers as operands. This function
20504 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20505 offset addressing and then generates one separate stack udpate. This provides
20506 more scheduling freedom, compared to writeback on every load. However,
20507 if the function returns using load into PC directly
20508 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20509 before the last load. TODO: Add a peephole optimization to recognize
20510 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20511 peephole optimization to merge the load at stack-offset zero
20512 with the stack update instruction using load with writeback
20513 in post-index addressing mode. */
20514 static void
20515 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20516 {
20517 int j = 0;
20518 int offset = 0;
20519 rtx par = NULL_RTX;
20520 rtx dwarf = NULL_RTX;
20521 rtx tmp, mem;
20522
20523 /* Restore saved registers. */
20524 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20525 j = 0;
20526 while (j <= LAST_ARM_REGNUM)
20527 if (saved_regs_mask & (1 << j))
20528 {
20529 if ((j % 2) == 0
20530 && (saved_regs_mask & (1 << (j + 1)))
20531 && (j + 1) != PC_REGNUM)
20532 {
20533 /* Current register and next register form register pair for which
20534 LDRD can be generated. PC is always the last register popped, and
20535 we handle it separately. */
20536 if (offset > 0)
20537 mem = gen_frame_mem (DImode,
20538 plus_constant (Pmode,
20539 stack_pointer_rtx,
20540 offset));
20541 else
20542 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20543
20544 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20545 tmp = emit_insn (tmp);
20546 RTX_FRAME_RELATED_P (tmp) = 1;
20547
20548 /* Generate dwarf info. */
20549
20550 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20551 gen_rtx_REG (SImode, j),
20552 NULL_RTX);
20553 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20554 gen_rtx_REG (SImode, j + 1),
20555 dwarf);
20556
20557 REG_NOTES (tmp) = dwarf;
20558
20559 offset += 8;
20560 j += 2;
20561 }
20562 else if (j != PC_REGNUM)
20563 {
20564 /* Emit a single word load. */
20565 if (offset > 0)
20566 mem = gen_frame_mem (SImode,
20567 plus_constant (Pmode,
20568 stack_pointer_rtx,
20569 offset));
20570 else
20571 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20572
20573 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20574 tmp = emit_insn (tmp);
20575 RTX_FRAME_RELATED_P (tmp) = 1;
20576
20577 /* Generate dwarf info. */
20578 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20579 gen_rtx_REG (SImode, j),
20580 NULL_RTX);
20581
20582 offset += 4;
20583 j += 1;
20584 }
20585 else /* j == PC_REGNUM */
20586 j++;
20587 }
20588 else
20589 j++;
20590
20591 /* Update the stack. */
20592 if (offset > 0)
20593 {
20594 tmp = gen_rtx_SET (stack_pointer_rtx,
20595 plus_constant (Pmode,
20596 stack_pointer_rtx,
20597 offset));
20598 tmp = emit_insn (tmp);
20599 arm_add_cfa_adjust_cfa_note (tmp, offset,
20600 stack_pointer_rtx, stack_pointer_rtx);
20601 offset = 0;
20602 }
20603
20604 if (saved_regs_mask & (1 << PC_REGNUM))
20605 {
20606 /* Only PC is to be popped. */
20607 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20608 XVECEXP (par, 0, 0) = ret_rtx;
20609 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20610 gen_frame_mem (SImode,
20611 gen_rtx_POST_INC (SImode,
20612 stack_pointer_rtx)));
20613 RTX_FRAME_RELATED_P (tmp) = 1;
20614 XVECEXP (par, 0, 1) = tmp;
20615 par = emit_jump_insn (par);
20616
20617 /* Generate dwarf info. */
20618 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20619 gen_rtx_REG (SImode, PC_REGNUM),
20620 NULL_RTX);
20621 REG_NOTES (par) = dwarf;
20622 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20623 stack_pointer_rtx, stack_pointer_rtx);
20624 }
20625 }
20626
20627 /* Calculate the size of the return value that is passed in registers. */
20628 static unsigned
20629 arm_size_return_regs (void)
20630 {
20631 machine_mode mode;
20632
20633 if (crtl->return_rtx != 0)
20634 mode = GET_MODE (crtl->return_rtx);
20635 else
20636 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20637
20638 return GET_MODE_SIZE (mode);
20639 }
20640
20641 /* Return true if the current function needs to save/restore LR. */
20642 static bool
20643 thumb_force_lr_save (void)
20644 {
20645 return !cfun->machine->lr_save_eliminated
20646 && (!crtl->is_leaf
20647 || thumb_far_jump_used_p ()
20648 || df_regs_ever_live_p (LR_REGNUM));
20649 }
20650
20651 /* We do not know if r3 will be available because
20652 we do have an indirect tailcall happening in this
20653 particular case. */
20654 static bool
20655 is_indirect_tailcall_p (rtx call)
20656 {
20657 rtx pat = PATTERN (call);
20658
20659 /* Indirect tail call. */
20660 pat = XVECEXP (pat, 0, 0);
20661 if (GET_CODE (pat) == SET)
20662 pat = SET_SRC (pat);
20663
20664 pat = XEXP (XEXP (pat, 0), 0);
20665 return REG_P (pat);
20666 }
20667
20668 /* Return true if r3 is used by any of the tail call insns in the
20669 current function. */
20670 static bool
20671 any_sibcall_could_use_r3 (void)
20672 {
20673 edge_iterator ei;
20674 edge e;
20675
20676 if (!crtl->tail_call_emit)
20677 return false;
20678 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20679 if (e->flags & EDGE_SIBCALL)
20680 {
20681 rtx_insn *call = BB_END (e->src);
20682 if (!CALL_P (call))
20683 call = prev_nonnote_nondebug_insn (call);
20684 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20685 if (find_regno_fusage (call, USE, 3)
20686 || is_indirect_tailcall_p (call))
20687 return true;
20688 }
20689 return false;
20690 }
20691
20692
20693 /* Compute the distance from register FROM to register TO.
20694 These can be the arg pointer (26), the soft frame pointer (25),
20695 the stack pointer (13) or the hard frame pointer (11).
20696 In thumb mode r7 is used as the soft frame pointer, if needed.
20697 Typical stack layout looks like this:
20698
20699 old stack pointer -> | |
20700 ----
20701 | | \
20702 | | saved arguments for
20703 | | vararg functions
20704 | | /
20705 --
20706 hard FP & arg pointer -> | | \
20707 | | stack
20708 | | frame
20709 | | /
20710 --
20711 | | \
20712 | | call saved
20713 | | registers
20714 soft frame pointer -> | | /
20715 --
20716 | | \
20717 | | local
20718 | | variables
20719 locals base pointer -> | | /
20720 --
20721 | | \
20722 | | outgoing
20723 | | arguments
20724 current stack pointer -> | | /
20725 --
20726
20727 For a given function some or all of these stack components
20728 may not be needed, giving rise to the possibility of
20729 eliminating some of the registers.
20730
20731 The values returned by this function must reflect the behavior
20732 of arm_expand_prologue() and arm_compute_save_reg_mask().
20733
20734 The sign of the number returned reflects the direction of stack
20735 growth, so the values are positive for all eliminations except
20736 from the soft frame pointer to the hard frame pointer.
20737
20738 SFP may point just inside the local variables block to ensure correct
20739 alignment. */
20740
20741
20742 /* Calculate stack offsets. These are used to calculate register elimination
20743 offsets and in prologue/epilogue code. Also calculates which registers
20744 should be saved. */
20745
20746 static arm_stack_offsets *
20747 arm_get_frame_offsets (void)
20748 {
20749 struct arm_stack_offsets *offsets;
20750 unsigned long func_type;
20751 int saved;
20752 int core_saved;
20753 HOST_WIDE_INT frame_size;
20754 int i;
20755
20756 offsets = &cfun->machine->stack_offsets;
20757
20758 if (reload_completed)
20759 return offsets;
20760
20761 /* Initially this is the size of the local variables. It will translated
20762 into an offset once we have determined the size of preceding data. */
20763 frame_size = ROUND_UP_WORD (get_frame_size ());
20764
20765 /* Space for variadic functions. */
20766 offsets->saved_args = crtl->args.pretend_args_size;
20767
20768 /* In Thumb mode this is incorrect, but never used. */
20769 offsets->frame
20770 = (offsets->saved_args
20771 + arm_compute_static_chain_stack_bytes ()
20772 + (frame_pointer_needed ? 4 : 0));
20773
20774 if (TARGET_32BIT)
20775 {
20776 unsigned int regno;
20777
20778 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20779 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20780 saved = core_saved;
20781
20782 /* We know that SP will be doubleword aligned on entry, and we must
20783 preserve that condition at any subroutine call. We also require the
20784 soft frame pointer to be doubleword aligned. */
20785
20786 if (TARGET_REALLY_IWMMXT)
20787 {
20788 /* Check for the call-saved iWMMXt registers. */
20789 for (regno = FIRST_IWMMXT_REGNUM;
20790 regno <= LAST_IWMMXT_REGNUM;
20791 regno++)
20792 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20793 saved += 8;
20794 }
20795
20796 func_type = arm_current_func_type ();
20797 /* Space for saved VFP registers. */
20798 if (! IS_VOLATILE (func_type)
20799 && TARGET_HARD_FLOAT)
20800 saved += arm_get_vfp_saved_size ();
20801 }
20802 else /* TARGET_THUMB1 */
20803 {
20804 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20805 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20806 saved = core_saved;
20807 if (TARGET_BACKTRACE)
20808 saved += 16;
20809 }
20810
20811 /* Saved registers include the stack frame. */
20812 offsets->saved_regs
20813 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20814 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20815
20816 /* A leaf function does not need any stack alignment if it has nothing
20817 on the stack. */
20818 if (crtl->is_leaf && frame_size == 0
20819 /* However if it calls alloca(), we have a dynamically allocated
20820 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20821 && ! cfun->calls_alloca)
20822 {
20823 offsets->outgoing_args = offsets->soft_frame;
20824 offsets->locals_base = offsets->soft_frame;
20825 return offsets;
20826 }
20827
20828 /* Ensure SFP has the correct alignment. */
20829 if (ARM_DOUBLEWORD_ALIGN
20830 && (offsets->soft_frame & 7))
20831 {
20832 offsets->soft_frame += 4;
20833 /* Try to align stack by pushing an extra reg. Don't bother doing this
20834 when there is a stack frame as the alignment will be rolled into
20835 the normal stack adjustment. */
20836 if (frame_size + crtl->outgoing_args_size == 0)
20837 {
20838 int reg = -1;
20839
20840 /* Register r3 is caller-saved. Normally it does not need to be
20841 saved on entry by the prologue. However if we choose to save
20842 it for padding then we may confuse the compiler into thinking
20843 a prologue sequence is required when in fact it is not. This
20844 will occur when shrink-wrapping if r3 is used as a scratch
20845 register and there are no other callee-saved writes.
20846
20847 This situation can be avoided when other callee-saved registers
20848 are available and r3 is not mandatory if we choose a callee-saved
20849 register for padding. */
20850 bool prefer_callee_reg_p = false;
20851
20852 /* If it is safe to use r3, then do so. This sometimes
20853 generates better code on Thumb-2 by avoiding the need to
20854 use 32-bit push/pop instructions. */
20855 if (! any_sibcall_could_use_r3 ()
20856 && arm_size_return_regs () <= 12
20857 && (offsets->saved_regs_mask & (1 << 3)) == 0
20858 && (TARGET_THUMB2
20859 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20860 {
20861 reg = 3;
20862 if (!TARGET_THUMB2)
20863 prefer_callee_reg_p = true;
20864 }
20865 if (reg == -1
20866 || prefer_callee_reg_p)
20867 {
20868 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20869 {
20870 /* Avoid fixed registers; they may be changed at
20871 arbitrary times so it's unsafe to restore them
20872 during the epilogue. */
20873 if (!fixed_regs[i]
20874 && (offsets->saved_regs_mask & (1 << i)) == 0)
20875 {
20876 reg = i;
20877 break;
20878 }
20879 }
20880 }
20881
20882 if (reg != -1)
20883 {
20884 offsets->saved_regs += 4;
20885 offsets->saved_regs_mask |= (1 << reg);
20886 }
20887 }
20888 }
20889
20890 offsets->locals_base = offsets->soft_frame + frame_size;
20891 offsets->outgoing_args = (offsets->locals_base
20892 + crtl->outgoing_args_size);
20893
20894 if (ARM_DOUBLEWORD_ALIGN)
20895 {
20896 /* Ensure SP remains doubleword aligned. */
20897 if (offsets->outgoing_args & 7)
20898 offsets->outgoing_args += 4;
20899 gcc_assert (!(offsets->outgoing_args & 7));
20900 }
20901
20902 return offsets;
20903 }
20904
20905
20906 /* Calculate the relative offsets for the different stack pointers. Positive
20907 offsets are in the direction of stack growth. */
20908
20909 HOST_WIDE_INT
20910 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20911 {
20912 arm_stack_offsets *offsets;
20913
20914 offsets = arm_get_frame_offsets ();
20915
20916 /* OK, now we have enough information to compute the distances.
20917 There must be an entry in these switch tables for each pair
20918 of registers in ELIMINABLE_REGS, even if some of the entries
20919 seem to be redundant or useless. */
20920 switch (from)
20921 {
20922 case ARG_POINTER_REGNUM:
20923 switch (to)
20924 {
20925 case THUMB_HARD_FRAME_POINTER_REGNUM:
20926 return 0;
20927
20928 case FRAME_POINTER_REGNUM:
20929 /* This is the reverse of the soft frame pointer
20930 to hard frame pointer elimination below. */
20931 return offsets->soft_frame - offsets->saved_args;
20932
20933 case ARM_HARD_FRAME_POINTER_REGNUM:
20934 /* This is only non-zero in the case where the static chain register
20935 is stored above the frame. */
20936 return offsets->frame - offsets->saved_args - 4;
20937
20938 case STACK_POINTER_REGNUM:
20939 /* If nothing has been pushed on the stack at all
20940 then this will return -4. This *is* correct! */
20941 return offsets->outgoing_args - (offsets->saved_args + 4);
20942
20943 default:
20944 gcc_unreachable ();
20945 }
20946 gcc_unreachable ();
20947
20948 case FRAME_POINTER_REGNUM:
20949 switch (to)
20950 {
20951 case THUMB_HARD_FRAME_POINTER_REGNUM:
20952 return 0;
20953
20954 case ARM_HARD_FRAME_POINTER_REGNUM:
20955 /* The hard frame pointer points to the top entry in the
20956 stack frame. The soft frame pointer to the bottom entry
20957 in the stack frame. If there is no stack frame at all,
20958 then they are identical. */
20959
20960 return offsets->frame - offsets->soft_frame;
20961
20962 case STACK_POINTER_REGNUM:
20963 return offsets->outgoing_args - offsets->soft_frame;
20964
20965 default:
20966 gcc_unreachable ();
20967 }
20968 gcc_unreachable ();
20969
20970 default:
20971 /* You cannot eliminate from the stack pointer.
20972 In theory you could eliminate from the hard frame
20973 pointer to the stack pointer, but this will never
20974 happen, since if a stack frame is not needed the
20975 hard frame pointer will never be used. */
20976 gcc_unreachable ();
20977 }
20978 }
20979
20980 /* Given FROM and TO register numbers, say whether this elimination is
20981 allowed. Frame pointer elimination is automatically handled.
20982
20983 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20984 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20985 pointer, we must eliminate FRAME_POINTER_REGNUM into
20986 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20987 ARG_POINTER_REGNUM. */
20988
20989 bool
20990 arm_can_eliminate (const int from, const int to)
20991 {
20992 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20993 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20994 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20995 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20996 true);
20997 }
20998
20999 /* Emit RTL to save coprocessor registers on function entry. Returns the
21000 number of bytes pushed. */
21001
21002 static int
21003 arm_save_coproc_regs(void)
21004 {
21005 int saved_size = 0;
21006 unsigned reg;
21007 unsigned start_reg;
21008 rtx insn;
21009
21010 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21011 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21012 {
21013 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21014 insn = gen_rtx_MEM (V2SImode, insn);
21015 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21016 RTX_FRAME_RELATED_P (insn) = 1;
21017 saved_size += 8;
21018 }
21019
21020 if (TARGET_HARD_FLOAT)
21021 {
21022 start_reg = FIRST_VFP_REGNUM;
21023
21024 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21025 {
21026 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21027 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21028 {
21029 if (start_reg != reg)
21030 saved_size += vfp_emit_fstmd (start_reg,
21031 (reg - start_reg) / 2);
21032 start_reg = reg + 2;
21033 }
21034 }
21035 if (start_reg != reg)
21036 saved_size += vfp_emit_fstmd (start_reg,
21037 (reg - start_reg) / 2);
21038 }
21039 return saved_size;
21040 }
21041
21042
21043 /* Set the Thumb frame pointer from the stack pointer. */
21044
21045 static void
21046 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21047 {
21048 HOST_WIDE_INT amount;
21049 rtx insn, dwarf;
21050
21051 amount = offsets->outgoing_args - offsets->locals_base;
21052 if (amount < 1024)
21053 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21054 stack_pointer_rtx, GEN_INT (amount)));
21055 else
21056 {
21057 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21058 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21059 expects the first two operands to be the same. */
21060 if (TARGET_THUMB2)
21061 {
21062 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21063 stack_pointer_rtx,
21064 hard_frame_pointer_rtx));
21065 }
21066 else
21067 {
21068 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21069 hard_frame_pointer_rtx,
21070 stack_pointer_rtx));
21071 }
21072 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21073 plus_constant (Pmode, stack_pointer_rtx, amount));
21074 RTX_FRAME_RELATED_P (dwarf) = 1;
21075 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21076 }
21077
21078 RTX_FRAME_RELATED_P (insn) = 1;
21079 }
21080
21081 struct scratch_reg {
21082 rtx reg;
21083 bool saved;
21084 };
21085
21086 /* Return a short-lived scratch register for use as a 2nd scratch register on
21087 function entry after the registers are saved in the prologue. This register
21088 must be released by means of release_scratch_register_on_entry. IP is not
21089 considered since it is always used as the 1st scratch register if available.
21090
21091 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21092 mask of live registers. */
21093
21094 static void
21095 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21096 unsigned long live_regs)
21097 {
21098 int regno = -1;
21099
21100 sr->saved = false;
21101
21102 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21103 regno = LR_REGNUM;
21104 else
21105 {
21106 unsigned int i;
21107
21108 for (i = 4; i < 11; i++)
21109 if (regno1 != i && (live_regs & (1 << i)) != 0)
21110 {
21111 regno = i;
21112 break;
21113 }
21114
21115 if (regno < 0)
21116 {
21117 /* If IP is used as the 1st scratch register for a nested function,
21118 then either r3 wasn't available or is used to preserve IP. */
21119 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21120 regno1 = 3;
21121 regno = (regno1 == 3 ? 2 : 3);
21122 sr->saved
21123 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21124 regno);
21125 }
21126 }
21127
21128 sr->reg = gen_rtx_REG (SImode, regno);
21129 if (sr->saved)
21130 {
21131 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21132 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21133 rtx x = gen_rtx_SET (stack_pointer_rtx,
21134 plus_constant (Pmode, stack_pointer_rtx, -4));
21135 RTX_FRAME_RELATED_P (insn) = 1;
21136 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21137 }
21138 }
21139
21140 /* Release a scratch register obtained from the preceding function. */
21141
21142 static void
21143 release_scratch_register_on_entry (struct scratch_reg *sr)
21144 {
21145 if (sr->saved)
21146 {
21147 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21148 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21149 rtx x = gen_rtx_SET (stack_pointer_rtx,
21150 plus_constant (Pmode, stack_pointer_rtx, 4));
21151 RTX_FRAME_RELATED_P (insn) = 1;
21152 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21153 }
21154 }
21155
21156 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21157
21158 #if PROBE_INTERVAL > 4096
21159 #error Cannot use indexed addressing mode for stack probing
21160 #endif
21161
21162 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21163 inclusive. These are offsets from the current stack pointer. REGNO1
21164 is the index number of the 1st scratch register and LIVE_REGS is the
21165 mask of live registers. */
21166
21167 static void
21168 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21169 unsigned int regno1, unsigned long live_regs)
21170 {
21171 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21172
21173 /* See if we have a constant small number of probes to generate. If so,
21174 that's the easy case. */
21175 if (size <= PROBE_INTERVAL)
21176 {
21177 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21178 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21179 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21180 }
21181
21182 /* The run-time loop is made up of 10 insns in the generic case while the
21183 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21184 else if (size <= 5 * PROBE_INTERVAL)
21185 {
21186 HOST_WIDE_INT i, rem;
21187
21188 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21189 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21190 emit_stack_probe (reg1);
21191
21192 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21193 it exceeds SIZE. If only two probes are needed, this will not
21194 generate any code. Then probe at FIRST + SIZE. */
21195 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21196 {
21197 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21198 emit_stack_probe (reg1);
21199 }
21200
21201 rem = size - (i - PROBE_INTERVAL);
21202 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21203 {
21204 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21205 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21206 }
21207 else
21208 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21209 }
21210
21211 /* Otherwise, do the same as above, but in a loop. Note that we must be
21212 extra careful with variables wrapping around because we might be at
21213 the very top (or the very bottom) of the address space and we have
21214 to be able to handle this case properly; in particular, we use an
21215 equality test for the loop condition. */
21216 else
21217 {
21218 HOST_WIDE_INT rounded_size;
21219 struct scratch_reg sr;
21220
21221 get_scratch_register_on_entry (&sr, regno1, live_regs);
21222
21223 emit_move_insn (reg1, GEN_INT (first));
21224
21225
21226 /* Step 1: round SIZE to the previous multiple of the interval. */
21227
21228 rounded_size = size & -PROBE_INTERVAL;
21229 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21230
21231
21232 /* Step 2: compute initial and final value of the loop counter. */
21233
21234 /* TEST_ADDR = SP + FIRST. */
21235 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21236
21237 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21238 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21239
21240
21241 /* Step 3: the loop
21242
21243 do
21244 {
21245 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21246 probe at TEST_ADDR
21247 }
21248 while (TEST_ADDR != LAST_ADDR)
21249
21250 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21251 until it is equal to ROUNDED_SIZE. */
21252
21253 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21254
21255
21256 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21257 that SIZE is equal to ROUNDED_SIZE. */
21258
21259 if (size != rounded_size)
21260 {
21261 HOST_WIDE_INT rem = size - rounded_size;
21262
21263 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21264 {
21265 emit_set_insn (sr.reg,
21266 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21267 emit_stack_probe (plus_constant (Pmode, sr.reg,
21268 PROBE_INTERVAL - rem));
21269 }
21270 else
21271 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21272 }
21273
21274 release_scratch_register_on_entry (&sr);
21275 }
21276
21277 /* Make sure nothing is scheduled before we are done. */
21278 emit_insn (gen_blockage ());
21279 }
21280
21281 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21282 absolute addresses. */
21283
21284 const char *
21285 output_probe_stack_range (rtx reg1, rtx reg2)
21286 {
21287 static int labelno = 0;
21288 char loop_lab[32];
21289 rtx xops[2];
21290
21291 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21292
21293 /* Loop. */
21294 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21295
21296 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21297 xops[0] = reg1;
21298 xops[1] = GEN_INT (PROBE_INTERVAL);
21299 output_asm_insn ("sub\t%0, %0, %1", xops);
21300
21301 /* Probe at TEST_ADDR. */
21302 output_asm_insn ("str\tr0, [%0, #0]", xops);
21303
21304 /* Test if TEST_ADDR == LAST_ADDR. */
21305 xops[1] = reg2;
21306 output_asm_insn ("cmp\t%0, %1", xops);
21307
21308 /* Branch. */
21309 fputs ("\tbne\t", asm_out_file);
21310 assemble_name_raw (asm_out_file, loop_lab);
21311 fputc ('\n', asm_out_file);
21312
21313 return "";
21314 }
21315
21316 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21317 function. */
21318 void
21319 arm_expand_prologue (void)
21320 {
21321 rtx amount;
21322 rtx insn;
21323 rtx ip_rtx;
21324 unsigned long live_regs_mask;
21325 unsigned long func_type;
21326 int fp_offset = 0;
21327 int saved_pretend_args = 0;
21328 int saved_regs = 0;
21329 unsigned HOST_WIDE_INT args_to_push;
21330 HOST_WIDE_INT size;
21331 arm_stack_offsets *offsets;
21332 bool clobber_ip;
21333
21334 func_type = arm_current_func_type ();
21335
21336 /* Naked functions don't have prologues. */
21337 if (IS_NAKED (func_type))
21338 {
21339 if (flag_stack_usage_info)
21340 current_function_static_stack_size = 0;
21341 return;
21342 }
21343
21344 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21345 args_to_push = crtl->args.pretend_args_size;
21346
21347 /* Compute which register we will have to save onto the stack. */
21348 offsets = arm_get_frame_offsets ();
21349 live_regs_mask = offsets->saved_regs_mask;
21350
21351 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21352
21353 if (IS_STACKALIGN (func_type))
21354 {
21355 rtx r0, r1;
21356
21357 /* Handle a word-aligned stack pointer. We generate the following:
21358
21359 mov r0, sp
21360 bic r1, r0, #7
21361 mov sp, r1
21362 <save and restore r0 in normal prologue/epilogue>
21363 mov sp, r0
21364 bx lr
21365
21366 The unwinder doesn't need to know about the stack realignment.
21367 Just tell it we saved SP in r0. */
21368 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21369
21370 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21371 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21372
21373 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21374 RTX_FRAME_RELATED_P (insn) = 1;
21375 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21376
21377 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21378
21379 /* ??? The CFA changes here, which may cause GDB to conclude that it
21380 has entered a different function. That said, the unwind info is
21381 correct, individually, before and after this instruction because
21382 we've described the save of SP, which will override the default
21383 handling of SP as restoring from the CFA. */
21384 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21385 }
21386
21387 /* The static chain register is the same as the IP register. If it is
21388 clobbered when creating the frame, we need to save and restore it. */
21389 clobber_ip = IS_NESTED (func_type)
21390 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21391 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21392 && !df_regs_ever_live_p (LR_REGNUM)
21393 && arm_r3_live_at_start_p ()));
21394
21395 /* Find somewhere to store IP whilst the frame is being created.
21396 We try the following places in order:
21397
21398 1. The last argument register r3 if it is available.
21399 2. A slot on the stack above the frame if there are no
21400 arguments to push onto the stack.
21401 3. Register r3 again, after pushing the argument registers
21402 onto the stack, if this is a varargs function.
21403 4. The last slot on the stack created for the arguments to
21404 push, if this isn't a varargs function.
21405
21406 Note - we only need to tell the dwarf2 backend about the SP
21407 adjustment in the second variant; the static chain register
21408 doesn't need to be unwound, as it doesn't contain a value
21409 inherited from the caller. */
21410 if (clobber_ip)
21411 {
21412 if (!arm_r3_live_at_start_p ())
21413 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21414 else if (args_to_push == 0)
21415 {
21416 rtx addr, dwarf;
21417
21418 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21419 saved_regs += 4;
21420
21421 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21422 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21423 fp_offset = 4;
21424
21425 /* Just tell the dwarf backend that we adjusted SP. */
21426 dwarf = gen_rtx_SET (stack_pointer_rtx,
21427 plus_constant (Pmode, stack_pointer_rtx,
21428 -fp_offset));
21429 RTX_FRAME_RELATED_P (insn) = 1;
21430 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21431 }
21432 else
21433 {
21434 /* Store the args on the stack. */
21435 if (cfun->machine->uses_anonymous_args)
21436 {
21437 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21438 (0xf0 >> (args_to_push / 4)) & 0xf);
21439 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21440 saved_pretend_args = 1;
21441 }
21442 else
21443 {
21444 rtx addr, dwarf;
21445
21446 if (args_to_push == 4)
21447 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21448 else
21449 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21450 plus_constant (Pmode,
21451 stack_pointer_rtx,
21452 -args_to_push));
21453
21454 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21455
21456 /* Just tell the dwarf backend that we adjusted SP. */
21457 dwarf = gen_rtx_SET (stack_pointer_rtx,
21458 plus_constant (Pmode, stack_pointer_rtx,
21459 -args_to_push));
21460 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21461 }
21462
21463 RTX_FRAME_RELATED_P (insn) = 1;
21464 fp_offset = args_to_push;
21465 args_to_push = 0;
21466 }
21467 }
21468
21469 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21470 {
21471 if (IS_INTERRUPT (func_type))
21472 {
21473 /* Interrupt functions must not corrupt any registers.
21474 Creating a frame pointer however, corrupts the IP
21475 register, so we must push it first. */
21476 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21477
21478 /* Do not set RTX_FRAME_RELATED_P on this insn.
21479 The dwarf stack unwinding code only wants to see one
21480 stack decrement per function, and this is not it. If
21481 this instruction is labeled as being part of the frame
21482 creation sequence then dwarf2out_frame_debug_expr will
21483 die when it encounters the assignment of IP to FP
21484 later on, since the use of SP here establishes SP as
21485 the CFA register and not IP.
21486
21487 Anyway this instruction is not really part of the stack
21488 frame creation although it is part of the prologue. */
21489 }
21490
21491 insn = emit_set_insn (ip_rtx,
21492 plus_constant (Pmode, stack_pointer_rtx,
21493 fp_offset));
21494 RTX_FRAME_RELATED_P (insn) = 1;
21495 }
21496
21497 if (args_to_push)
21498 {
21499 /* Push the argument registers, or reserve space for them. */
21500 if (cfun->machine->uses_anonymous_args)
21501 insn = emit_multi_reg_push
21502 ((0xf0 >> (args_to_push / 4)) & 0xf,
21503 (0xf0 >> (args_to_push / 4)) & 0xf);
21504 else
21505 insn = emit_insn
21506 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21507 GEN_INT (- args_to_push)));
21508 RTX_FRAME_RELATED_P (insn) = 1;
21509 }
21510
21511 /* If this is an interrupt service routine, and the link register
21512 is going to be pushed, and we're not generating extra
21513 push of IP (needed when frame is needed and frame layout if apcs),
21514 subtracting four from LR now will mean that the function return
21515 can be done with a single instruction. */
21516 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21517 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21518 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21519 && TARGET_ARM)
21520 {
21521 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21522
21523 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21524 }
21525
21526 if (live_regs_mask)
21527 {
21528 unsigned long dwarf_regs_mask = live_regs_mask;
21529
21530 saved_regs += bit_count (live_regs_mask) * 4;
21531 if (optimize_size && !frame_pointer_needed
21532 && saved_regs == offsets->saved_regs - offsets->saved_args)
21533 {
21534 /* If no coprocessor registers are being pushed and we don't have
21535 to worry about a frame pointer then push extra registers to
21536 create the stack frame. This is done is a way that does not
21537 alter the frame layout, so is independent of the epilogue. */
21538 int n;
21539 int frame;
21540 n = 0;
21541 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21542 n++;
21543 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21544 if (frame && n * 4 >= frame)
21545 {
21546 n = frame / 4;
21547 live_regs_mask |= (1 << n) - 1;
21548 saved_regs += frame;
21549 }
21550 }
21551
21552 if (TARGET_LDRD
21553 && current_tune->prefer_ldrd_strd
21554 && !optimize_function_for_size_p (cfun))
21555 {
21556 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21557 if (TARGET_THUMB2)
21558 thumb2_emit_strd_push (live_regs_mask);
21559 else if (TARGET_ARM
21560 && !TARGET_APCS_FRAME
21561 && !IS_INTERRUPT (func_type))
21562 arm_emit_strd_push (live_regs_mask);
21563 else
21564 {
21565 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21566 RTX_FRAME_RELATED_P (insn) = 1;
21567 }
21568 }
21569 else
21570 {
21571 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21572 RTX_FRAME_RELATED_P (insn) = 1;
21573 }
21574 }
21575
21576 if (! IS_VOLATILE (func_type))
21577 saved_regs += arm_save_coproc_regs ();
21578
21579 if (frame_pointer_needed && TARGET_ARM)
21580 {
21581 /* Create the new frame pointer. */
21582 if (TARGET_APCS_FRAME)
21583 {
21584 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21585 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21586 RTX_FRAME_RELATED_P (insn) = 1;
21587 }
21588 else
21589 {
21590 insn = GEN_INT (saved_regs - (4 + fp_offset));
21591 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21592 stack_pointer_rtx, insn));
21593 RTX_FRAME_RELATED_P (insn) = 1;
21594 }
21595 }
21596
21597 size = offsets->outgoing_args - offsets->saved_args;
21598 if (flag_stack_usage_info)
21599 current_function_static_stack_size = size;
21600
21601 /* If this isn't an interrupt service routine and we have a frame, then do
21602 stack checking. We use IP as the first scratch register, except for the
21603 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21604 if (!IS_INTERRUPT (func_type)
21605 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21606 {
21607 unsigned int regno;
21608
21609 if (!IS_NESTED (func_type) || clobber_ip)
21610 regno = IP_REGNUM;
21611 else if (df_regs_ever_live_p (LR_REGNUM))
21612 regno = LR_REGNUM;
21613 else
21614 regno = 3;
21615
21616 if (crtl->is_leaf && !cfun->calls_alloca)
21617 {
21618 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21619 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21620 size - STACK_CHECK_PROTECT,
21621 regno, live_regs_mask);
21622 }
21623 else if (size > 0)
21624 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21625 regno, live_regs_mask);
21626 }
21627
21628 /* Recover the static chain register. */
21629 if (clobber_ip)
21630 {
21631 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21632 insn = gen_rtx_REG (SImode, 3);
21633 else
21634 {
21635 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21636 insn = gen_frame_mem (SImode, insn);
21637 }
21638 emit_set_insn (ip_rtx, insn);
21639 emit_insn (gen_force_register_use (ip_rtx));
21640 }
21641
21642 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21643 {
21644 /* This add can produce multiple insns for a large constant, so we
21645 need to get tricky. */
21646 rtx_insn *last = get_last_insn ();
21647
21648 amount = GEN_INT (offsets->saved_args + saved_regs
21649 - offsets->outgoing_args);
21650
21651 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21652 amount));
21653 do
21654 {
21655 last = last ? NEXT_INSN (last) : get_insns ();
21656 RTX_FRAME_RELATED_P (last) = 1;
21657 }
21658 while (last != insn);
21659
21660 /* If the frame pointer is needed, emit a special barrier that
21661 will prevent the scheduler from moving stores to the frame
21662 before the stack adjustment. */
21663 if (frame_pointer_needed)
21664 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21665 hard_frame_pointer_rtx));
21666 }
21667
21668
21669 if (frame_pointer_needed && TARGET_THUMB2)
21670 thumb_set_frame_pointer (offsets);
21671
21672 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21673 {
21674 unsigned long mask;
21675
21676 mask = live_regs_mask;
21677 mask &= THUMB2_WORK_REGS;
21678 if (!IS_NESTED (func_type))
21679 mask |= (1 << IP_REGNUM);
21680 arm_load_pic_register (mask);
21681 }
21682
21683 /* If we are profiling, make sure no instructions are scheduled before
21684 the call to mcount. Similarly if the user has requested no
21685 scheduling in the prolog. Similarly if we want non-call exceptions
21686 using the EABI unwinder, to prevent faulting instructions from being
21687 swapped with a stack adjustment. */
21688 if (crtl->profile || !TARGET_SCHED_PROLOG
21689 || (arm_except_unwind_info (&global_options) == UI_TARGET
21690 && cfun->can_throw_non_call_exceptions))
21691 emit_insn (gen_blockage ());
21692
21693 /* If the link register is being kept alive, with the return address in it,
21694 then make sure that it does not get reused by the ce2 pass. */
21695 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21696 cfun->machine->lr_save_eliminated = 1;
21697 }
21698 \f
21699 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21700 static void
21701 arm_print_condition (FILE *stream)
21702 {
21703 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21704 {
21705 /* Branch conversion is not implemented for Thumb-2. */
21706 if (TARGET_THUMB)
21707 {
21708 output_operand_lossage ("predicated Thumb instruction");
21709 return;
21710 }
21711 if (current_insn_predicate != NULL)
21712 {
21713 output_operand_lossage
21714 ("predicated instruction in conditional sequence");
21715 return;
21716 }
21717
21718 fputs (arm_condition_codes[arm_current_cc], stream);
21719 }
21720 else if (current_insn_predicate)
21721 {
21722 enum arm_cond_code code;
21723
21724 if (TARGET_THUMB1)
21725 {
21726 output_operand_lossage ("predicated Thumb instruction");
21727 return;
21728 }
21729
21730 code = get_arm_condition_code (current_insn_predicate);
21731 fputs (arm_condition_codes[code], stream);
21732 }
21733 }
21734
21735
21736 /* Globally reserved letters: acln
21737 Puncutation letters currently used: @_|?().!#
21738 Lower case letters currently used: bcdefhimpqtvwxyz
21739 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21740 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21741
21742 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21743
21744 If CODE is 'd', then the X is a condition operand and the instruction
21745 should only be executed if the condition is true.
21746 if CODE is 'D', then the X is a condition operand and the instruction
21747 should only be executed if the condition is false: however, if the mode
21748 of the comparison is CCFPEmode, then always execute the instruction -- we
21749 do this because in these circumstances !GE does not necessarily imply LT;
21750 in these cases the instruction pattern will take care to make sure that
21751 an instruction containing %d will follow, thereby undoing the effects of
21752 doing this instruction unconditionally.
21753 If CODE is 'N' then X is a floating point operand that must be negated
21754 before output.
21755 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21756 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21757 static void
21758 arm_print_operand (FILE *stream, rtx x, int code)
21759 {
21760 switch (code)
21761 {
21762 case '@':
21763 fputs (ASM_COMMENT_START, stream);
21764 return;
21765
21766 case '_':
21767 fputs (user_label_prefix, stream);
21768 return;
21769
21770 case '|':
21771 fputs (REGISTER_PREFIX, stream);
21772 return;
21773
21774 case '?':
21775 arm_print_condition (stream);
21776 return;
21777
21778 case '.':
21779 /* The current condition code for a condition code setting instruction.
21780 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21781 fputc('s', stream);
21782 arm_print_condition (stream);
21783 return;
21784
21785 case '!':
21786 /* If the instruction is conditionally executed then print
21787 the current condition code, otherwise print 's'. */
21788 gcc_assert (TARGET_THUMB2);
21789 if (current_insn_predicate)
21790 arm_print_condition (stream);
21791 else
21792 fputc('s', stream);
21793 break;
21794
21795 /* %# is a "break" sequence. It doesn't output anything, but is used to
21796 separate e.g. operand numbers from following text, if that text consists
21797 of further digits which we don't want to be part of the operand
21798 number. */
21799 case '#':
21800 return;
21801
21802 case 'N':
21803 {
21804 REAL_VALUE_TYPE r;
21805 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21806 fprintf (stream, "%s", fp_const_from_val (&r));
21807 }
21808 return;
21809
21810 /* An integer or symbol address without a preceding # sign. */
21811 case 'c':
21812 switch (GET_CODE (x))
21813 {
21814 case CONST_INT:
21815 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21816 break;
21817
21818 case SYMBOL_REF:
21819 output_addr_const (stream, x);
21820 break;
21821
21822 case CONST:
21823 if (GET_CODE (XEXP (x, 0)) == PLUS
21824 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21825 {
21826 output_addr_const (stream, x);
21827 break;
21828 }
21829 /* Fall through. */
21830
21831 default:
21832 output_operand_lossage ("Unsupported operand for code '%c'", code);
21833 }
21834 return;
21835
21836 /* An integer that we want to print in HEX. */
21837 case 'x':
21838 switch (GET_CODE (x))
21839 {
21840 case CONST_INT:
21841 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21842 break;
21843
21844 default:
21845 output_operand_lossage ("Unsupported operand for code '%c'", code);
21846 }
21847 return;
21848
21849 case 'B':
21850 if (CONST_INT_P (x))
21851 {
21852 HOST_WIDE_INT val;
21853 val = ARM_SIGN_EXTEND (~INTVAL (x));
21854 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21855 }
21856 else
21857 {
21858 putc ('~', stream);
21859 output_addr_const (stream, x);
21860 }
21861 return;
21862
21863 case 'b':
21864 /* Print the log2 of a CONST_INT. */
21865 {
21866 HOST_WIDE_INT val;
21867
21868 if (!CONST_INT_P (x)
21869 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21870 output_operand_lossage ("Unsupported operand for code '%c'", code);
21871 else
21872 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21873 }
21874 return;
21875
21876 case 'L':
21877 /* The low 16 bits of an immediate constant. */
21878 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21879 return;
21880
21881 case 'i':
21882 fprintf (stream, "%s", arithmetic_instr (x, 1));
21883 return;
21884
21885 case 'I':
21886 fprintf (stream, "%s", arithmetic_instr (x, 0));
21887 return;
21888
21889 case 'S':
21890 {
21891 HOST_WIDE_INT val;
21892 const char *shift;
21893
21894 shift = shift_op (x, &val);
21895
21896 if (shift)
21897 {
21898 fprintf (stream, ", %s ", shift);
21899 if (val == -1)
21900 arm_print_operand (stream, XEXP (x, 1), 0);
21901 else
21902 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21903 }
21904 }
21905 return;
21906
21907 /* An explanation of the 'Q', 'R' and 'H' register operands:
21908
21909 In a pair of registers containing a DI or DF value the 'Q'
21910 operand returns the register number of the register containing
21911 the least significant part of the value. The 'R' operand returns
21912 the register number of the register containing the most
21913 significant part of the value.
21914
21915 The 'H' operand returns the higher of the two register numbers.
21916 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21917 same as the 'Q' operand, since the most significant part of the
21918 value is held in the lower number register. The reverse is true
21919 on systems where WORDS_BIG_ENDIAN is false.
21920
21921 The purpose of these operands is to distinguish between cases
21922 where the endian-ness of the values is important (for example
21923 when they are added together), and cases where the endian-ness
21924 is irrelevant, but the order of register operations is important.
21925 For example when loading a value from memory into a register
21926 pair, the endian-ness does not matter. Provided that the value
21927 from the lower memory address is put into the lower numbered
21928 register, and the value from the higher address is put into the
21929 higher numbered register, the load will work regardless of whether
21930 the value being loaded is big-wordian or little-wordian. The
21931 order of the two register loads can matter however, if the address
21932 of the memory location is actually held in one of the registers
21933 being overwritten by the load.
21934
21935 The 'Q' and 'R' constraints are also available for 64-bit
21936 constants. */
21937 case 'Q':
21938 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21939 {
21940 rtx part = gen_lowpart (SImode, x);
21941 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21942 return;
21943 }
21944
21945 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21946 {
21947 output_operand_lossage ("invalid operand for code '%c'", code);
21948 return;
21949 }
21950
21951 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21952 return;
21953
21954 case 'R':
21955 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21956 {
21957 machine_mode mode = GET_MODE (x);
21958 rtx part;
21959
21960 if (mode == VOIDmode)
21961 mode = DImode;
21962 part = gen_highpart_mode (SImode, mode, x);
21963 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21964 return;
21965 }
21966
21967 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21968 {
21969 output_operand_lossage ("invalid operand for code '%c'", code);
21970 return;
21971 }
21972
21973 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21974 return;
21975
21976 case 'H':
21977 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21978 {
21979 output_operand_lossage ("invalid operand for code '%c'", code);
21980 return;
21981 }
21982
21983 asm_fprintf (stream, "%r", REGNO (x) + 1);
21984 return;
21985
21986 case 'J':
21987 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21988 {
21989 output_operand_lossage ("invalid operand for code '%c'", code);
21990 return;
21991 }
21992
21993 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21994 return;
21995
21996 case 'K':
21997 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21998 {
21999 output_operand_lossage ("invalid operand for code '%c'", code);
22000 return;
22001 }
22002
22003 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22004 return;
22005
22006 case 'm':
22007 asm_fprintf (stream, "%r",
22008 REG_P (XEXP (x, 0))
22009 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22010 return;
22011
22012 case 'M':
22013 asm_fprintf (stream, "{%r-%r}",
22014 REGNO (x),
22015 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22016 return;
22017
22018 /* Like 'M', but writing doubleword vector registers, for use by Neon
22019 insns. */
22020 case 'h':
22021 {
22022 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22023 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22024 if (numregs == 1)
22025 asm_fprintf (stream, "{d%d}", regno);
22026 else
22027 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22028 }
22029 return;
22030
22031 case 'd':
22032 /* CONST_TRUE_RTX means always -- that's the default. */
22033 if (x == const_true_rtx)
22034 return;
22035
22036 if (!COMPARISON_P (x))
22037 {
22038 output_operand_lossage ("invalid operand for code '%c'", code);
22039 return;
22040 }
22041
22042 fputs (arm_condition_codes[get_arm_condition_code (x)],
22043 stream);
22044 return;
22045
22046 case 'D':
22047 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22048 want to do that. */
22049 if (x == const_true_rtx)
22050 {
22051 output_operand_lossage ("instruction never executed");
22052 return;
22053 }
22054 if (!COMPARISON_P (x))
22055 {
22056 output_operand_lossage ("invalid operand for code '%c'", code);
22057 return;
22058 }
22059
22060 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22061 (get_arm_condition_code (x))],
22062 stream);
22063 return;
22064
22065 case 's':
22066 case 'V':
22067 case 'W':
22068 case 'X':
22069 case 'Y':
22070 case 'Z':
22071 /* Former Maverick support, removed after GCC-4.7. */
22072 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22073 return;
22074
22075 case 'U':
22076 if (!REG_P (x)
22077 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22078 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22079 /* Bad value for wCG register number. */
22080 {
22081 output_operand_lossage ("invalid operand for code '%c'", code);
22082 return;
22083 }
22084
22085 else
22086 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22087 return;
22088
22089 /* Print an iWMMXt control register name. */
22090 case 'w':
22091 if (!CONST_INT_P (x)
22092 || INTVAL (x) < 0
22093 || INTVAL (x) >= 16)
22094 /* Bad value for wC register number. */
22095 {
22096 output_operand_lossage ("invalid operand for code '%c'", code);
22097 return;
22098 }
22099
22100 else
22101 {
22102 static const char * wc_reg_names [16] =
22103 {
22104 "wCID", "wCon", "wCSSF", "wCASF",
22105 "wC4", "wC5", "wC6", "wC7",
22106 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22107 "wC12", "wC13", "wC14", "wC15"
22108 };
22109
22110 fputs (wc_reg_names [INTVAL (x)], stream);
22111 }
22112 return;
22113
22114 /* Print the high single-precision register of a VFP double-precision
22115 register. */
22116 case 'p':
22117 {
22118 machine_mode mode = GET_MODE (x);
22119 int regno;
22120
22121 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22122 {
22123 output_operand_lossage ("invalid operand for code '%c'", code);
22124 return;
22125 }
22126
22127 regno = REGNO (x);
22128 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22129 {
22130 output_operand_lossage ("invalid operand for code '%c'", code);
22131 return;
22132 }
22133
22134 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22135 }
22136 return;
22137
22138 /* Print a VFP/Neon double precision or quad precision register name. */
22139 case 'P':
22140 case 'q':
22141 {
22142 machine_mode mode = GET_MODE (x);
22143 int is_quad = (code == 'q');
22144 int regno;
22145
22146 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22147 {
22148 output_operand_lossage ("invalid operand for code '%c'", code);
22149 return;
22150 }
22151
22152 if (!REG_P (x)
22153 || !IS_VFP_REGNUM (REGNO (x)))
22154 {
22155 output_operand_lossage ("invalid operand for code '%c'", code);
22156 return;
22157 }
22158
22159 regno = REGNO (x);
22160 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22161 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22162 {
22163 output_operand_lossage ("invalid operand for code '%c'", code);
22164 return;
22165 }
22166
22167 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22168 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22169 }
22170 return;
22171
22172 /* These two codes print the low/high doubleword register of a Neon quad
22173 register, respectively. For pair-structure types, can also print
22174 low/high quadword registers. */
22175 case 'e':
22176 case 'f':
22177 {
22178 machine_mode mode = GET_MODE (x);
22179 int regno;
22180
22181 if ((GET_MODE_SIZE (mode) != 16
22182 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22183 {
22184 output_operand_lossage ("invalid operand for code '%c'", code);
22185 return;
22186 }
22187
22188 regno = REGNO (x);
22189 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22190 {
22191 output_operand_lossage ("invalid operand for code '%c'", code);
22192 return;
22193 }
22194
22195 if (GET_MODE_SIZE (mode) == 16)
22196 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22197 + (code == 'f' ? 1 : 0));
22198 else
22199 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22200 + (code == 'f' ? 1 : 0));
22201 }
22202 return;
22203
22204 /* Print a VFPv3 floating-point constant, represented as an integer
22205 index. */
22206 case 'G':
22207 {
22208 int index = vfp3_const_double_index (x);
22209 gcc_assert (index != -1);
22210 fprintf (stream, "%d", index);
22211 }
22212 return;
22213
22214 /* Print bits representing opcode features for Neon.
22215
22216 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22217 and polynomials as unsigned.
22218
22219 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22220
22221 Bit 2 is 1 for rounding functions, 0 otherwise. */
22222
22223 /* Identify the type as 's', 'u', 'p' or 'f'. */
22224 case 'T':
22225 {
22226 HOST_WIDE_INT bits = INTVAL (x);
22227 fputc ("uspf"[bits & 3], stream);
22228 }
22229 return;
22230
22231 /* Likewise, but signed and unsigned integers are both 'i'. */
22232 case 'F':
22233 {
22234 HOST_WIDE_INT bits = INTVAL (x);
22235 fputc ("iipf"[bits & 3], stream);
22236 }
22237 return;
22238
22239 /* As for 'T', but emit 'u' instead of 'p'. */
22240 case 't':
22241 {
22242 HOST_WIDE_INT bits = INTVAL (x);
22243 fputc ("usuf"[bits & 3], stream);
22244 }
22245 return;
22246
22247 /* Bit 2: rounding (vs none). */
22248 case 'O':
22249 {
22250 HOST_WIDE_INT bits = INTVAL (x);
22251 fputs ((bits & 4) != 0 ? "r" : "", stream);
22252 }
22253 return;
22254
22255 /* Memory operand for vld1/vst1 instruction. */
22256 case 'A':
22257 {
22258 rtx addr;
22259 bool postinc = FALSE;
22260 rtx postinc_reg = NULL;
22261 unsigned align, memsize, align_bits;
22262
22263 gcc_assert (MEM_P (x));
22264 addr = XEXP (x, 0);
22265 if (GET_CODE (addr) == POST_INC)
22266 {
22267 postinc = 1;
22268 addr = XEXP (addr, 0);
22269 }
22270 if (GET_CODE (addr) == POST_MODIFY)
22271 {
22272 postinc_reg = XEXP( XEXP (addr, 1), 1);
22273 addr = XEXP (addr, 0);
22274 }
22275 asm_fprintf (stream, "[%r", REGNO (addr));
22276
22277 /* We know the alignment of this access, so we can emit a hint in the
22278 instruction (for some alignments) as an aid to the memory subsystem
22279 of the target. */
22280 align = MEM_ALIGN (x) >> 3;
22281 memsize = MEM_SIZE (x);
22282
22283 /* Only certain alignment specifiers are supported by the hardware. */
22284 if (memsize == 32 && (align % 32) == 0)
22285 align_bits = 256;
22286 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22287 align_bits = 128;
22288 else if (memsize >= 8 && (align % 8) == 0)
22289 align_bits = 64;
22290 else
22291 align_bits = 0;
22292
22293 if (align_bits != 0)
22294 asm_fprintf (stream, ":%d", align_bits);
22295
22296 asm_fprintf (stream, "]");
22297
22298 if (postinc)
22299 fputs("!", stream);
22300 if (postinc_reg)
22301 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22302 }
22303 return;
22304
22305 case 'C':
22306 {
22307 rtx addr;
22308
22309 gcc_assert (MEM_P (x));
22310 addr = XEXP (x, 0);
22311 gcc_assert (REG_P (addr));
22312 asm_fprintf (stream, "[%r]", REGNO (addr));
22313 }
22314 return;
22315
22316 /* Translate an S register number into a D register number and element index. */
22317 case 'y':
22318 {
22319 machine_mode mode = GET_MODE (x);
22320 int regno;
22321
22322 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22323 {
22324 output_operand_lossage ("invalid operand for code '%c'", code);
22325 return;
22326 }
22327
22328 regno = REGNO (x);
22329 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22330 {
22331 output_operand_lossage ("invalid operand for code '%c'", code);
22332 return;
22333 }
22334
22335 regno = regno - FIRST_VFP_REGNUM;
22336 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22337 }
22338 return;
22339
22340 case 'v':
22341 gcc_assert (CONST_DOUBLE_P (x));
22342 int result;
22343 result = vfp3_const_double_for_fract_bits (x);
22344 if (result == 0)
22345 result = vfp3_const_double_for_bits (x);
22346 fprintf (stream, "#%d", result);
22347 return;
22348
22349 /* Register specifier for vld1.16/vst1.16. Translate the S register
22350 number into a D register number and element index. */
22351 case 'z':
22352 {
22353 machine_mode mode = GET_MODE (x);
22354 int regno;
22355
22356 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22357 {
22358 output_operand_lossage ("invalid operand for code '%c'", code);
22359 return;
22360 }
22361
22362 regno = REGNO (x);
22363 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22364 {
22365 output_operand_lossage ("invalid operand for code '%c'", code);
22366 return;
22367 }
22368
22369 regno = regno - FIRST_VFP_REGNUM;
22370 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22371 }
22372 return;
22373
22374 default:
22375 if (x == 0)
22376 {
22377 output_operand_lossage ("missing operand");
22378 return;
22379 }
22380
22381 switch (GET_CODE (x))
22382 {
22383 case REG:
22384 asm_fprintf (stream, "%r", REGNO (x));
22385 break;
22386
22387 case MEM:
22388 output_address (GET_MODE (x), XEXP (x, 0));
22389 break;
22390
22391 case CONST_DOUBLE:
22392 {
22393 char fpstr[20];
22394 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22395 sizeof (fpstr), 0, 1);
22396 fprintf (stream, "#%s", fpstr);
22397 }
22398 break;
22399
22400 default:
22401 gcc_assert (GET_CODE (x) != NEG);
22402 fputc ('#', stream);
22403 if (GET_CODE (x) == HIGH)
22404 {
22405 fputs (":lower16:", stream);
22406 x = XEXP (x, 0);
22407 }
22408
22409 output_addr_const (stream, x);
22410 break;
22411 }
22412 }
22413 }
22414 \f
22415 /* Target hook for printing a memory address. */
22416 static void
22417 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22418 {
22419 if (TARGET_32BIT)
22420 {
22421 int is_minus = GET_CODE (x) == MINUS;
22422
22423 if (REG_P (x))
22424 asm_fprintf (stream, "[%r]", REGNO (x));
22425 else if (GET_CODE (x) == PLUS || is_minus)
22426 {
22427 rtx base = XEXP (x, 0);
22428 rtx index = XEXP (x, 1);
22429 HOST_WIDE_INT offset = 0;
22430 if (!REG_P (base)
22431 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22432 {
22433 /* Ensure that BASE is a register. */
22434 /* (one of them must be). */
22435 /* Also ensure the SP is not used as in index register. */
22436 std::swap (base, index);
22437 }
22438 switch (GET_CODE (index))
22439 {
22440 case CONST_INT:
22441 offset = INTVAL (index);
22442 if (is_minus)
22443 offset = -offset;
22444 asm_fprintf (stream, "[%r, #%wd]",
22445 REGNO (base), offset);
22446 break;
22447
22448 case REG:
22449 asm_fprintf (stream, "[%r, %s%r]",
22450 REGNO (base), is_minus ? "-" : "",
22451 REGNO (index));
22452 break;
22453
22454 case MULT:
22455 case ASHIFTRT:
22456 case LSHIFTRT:
22457 case ASHIFT:
22458 case ROTATERT:
22459 {
22460 asm_fprintf (stream, "[%r, %s%r",
22461 REGNO (base), is_minus ? "-" : "",
22462 REGNO (XEXP (index, 0)));
22463 arm_print_operand (stream, index, 'S');
22464 fputs ("]", stream);
22465 break;
22466 }
22467
22468 default:
22469 gcc_unreachable ();
22470 }
22471 }
22472 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22473 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22474 {
22475 gcc_assert (REG_P (XEXP (x, 0)));
22476
22477 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22478 asm_fprintf (stream, "[%r, #%s%d]!",
22479 REGNO (XEXP (x, 0)),
22480 GET_CODE (x) == PRE_DEC ? "-" : "",
22481 GET_MODE_SIZE (mode));
22482 else
22483 asm_fprintf (stream, "[%r], #%s%d",
22484 REGNO (XEXP (x, 0)),
22485 GET_CODE (x) == POST_DEC ? "-" : "",
22486 GET_MODE_SIZE (mode));
22487 }
22488 else if (GET_CODE (x) == PRE_MODIFY)
22489 {
22490 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22491 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22492 asm_fprintf (stream, "#%wd]!",
22493 INTVAL (XEXP (XEXP (x, 1), 1)));
22494 else
22495 asm_fprintf (stream, "%r]!",
22496 REGNO (XEXP (XEXP (x, 1), 1)));
22497 }
22498 else if (GET_CODE (x) == POST_MODIFY)
22499 {
22500 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22501 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22502 asm_fprintf (stream, "#%wd",
22503 INTVAL (XEXP (XEXP (x, 1), 1)));
22504 else
22505 asm_fprintf (stream, "%r",
22506 REGNO (XEXP (XEXP (x, 1), 1)));
22507 }
22508 else output_addr_const (stream, x);
22509 }
22510 else
22511 {
22512 if (REG_P (x))
22513 asm_fprintf (stream, "[%r]", REGNO (x));
22514 else if (GET_CODE (x) == POST_INC)
22515 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22516 else if (GET_CODE (x) == PLUS)
22517 {
22518 gcc_assert (REG_P (XEXP (x, 0)));
22519 if (CONST_INT_P (XEXP (x, 1)))
22520 asm_fprintf (stream, "[%r, #%wd]",
22521 REGNO (XEXP (x, 0)),
22522 INTVAL (XEXP (x, 1)));
22523 else
22524 asm_fprintf (stream, "[%r, %r]",
22525 REGNO (XEXP (x, 0)),
22526 REGNO (XEXP (x, 1)));
22527 }
22528 else
22529 output_addr_const (stream, x);
22530 }
22531 }
22532 \f
22533 /* Target hook for indicating whether a punctuation character for
22534 TARGET_PRINT_OPERAND is valid. */
22535 static bool
22536 arm_print_operand_punct_valid_p (unsigned char code)
22537 {
22538 return (code == '@' || code == '|' || code == '.'
22539 || code == '(' || code == ')' || code == '#'
22540 || (TARGET_32BIT && (code == '?'))
22541 || (TARGET_THUMB2 && (code == '!'))
22542 || (TARGET_THUMB && (code == '_')));
22543 }
22544 \f
22545 /* Target hook for assembling integer objects. The ARM version needs to
22546 handle word-sized values specially. */
22547 static bool
22548 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22549 {
22550 machine_mode mode;
22551
22552 if (size == UNITS_PER_WORD && aligned_p)
22553 {
22554 fputs ("\t.word\t", asm_out_file);
22555 output_addr_const (asm_out_file, x);
22556
22557 /* Mark symbols as position independent. We only do this in the
22558 .text segment, not in the .data segment. */
22559 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22560 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22561 {
22562 /* See legitimize_pic_address for an explanation of the
22563 TARGET_VXWORKS_RTP check. */
22564 /* References to weak symbols cannot be resolved locally:
22565 they may be overridden by a non-weak definition at link
22566 time. */
22567 if (!arm_pic_data_is_text_relative
22568 || (GET_CODE (x) == SYMBOL_REF
22569 && (!SYMBOL_REF_LOCAL_P (x)
22570 || (SYMBOL_REF_DECL (x)
22571 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22572 fputs ("(GOT)", asm_out_file);
22573 else
22574 fputs ("(GOTOFF)", asm_out_file);
22575 }
22576 fputc ('\n', asm_out_file);
22577 return true;
22578 }
22579
22580 mode = GET_MODE (x);
22581
22582 if (arm_vector_mode_supported_p (mode))
22583 {
22584 int i, units;
22585
22586 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22587
22588 units = CONST_VECTOR_NUNITS (x);
22589 size = GET_MODE_UNIT_SIZE (mode);
22590
22591 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22592 for (i = 0; i < units; i++)
22593 {
22594 rtx elt = CONST_VECTOR_ELT (x, i);
22595 assemble_integer
22596 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22597 }
22598 else
22599 for (i = 0; i < units; i++)
22600 {
22601 rtx elt = CONST_VECTOR_ELT (x, i);
22602 assemble_real
22603 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22604 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22605 }
22606
22607 return true;
22608 }
22609
22610 return default_assemble_integer (x, size, aligned_p);
22611 }
22612
22613 static void
22614 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22615 {
22616 section *s;
22617
22618 if (!TARGET_AAPCS_BASED)
22619 {
22620 (is_ctor ?
22621 default_named_section_asm_out_constructor
22622 : default_named_section_asm_out_destructor) (symbol, priority);
22623 return;
22624 }
22625
22626 /* Put these in the .init_array section, using a special relocation. */
22627 if (priority != DEFAULT_INIT_PRIORITY)
22628 {
22629 char buf[18];
22630 sprintf (buf, "%s.%.5u",
22631 is_ctor ? ".init_array" : ".fini_array",
22632 priority);
22633 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22634 }
22635 else if (is_ctor)
22636 s = ctors_section;
22637 else
22638 s = dtors_section;
22639
22640 switch_to_section (s);
22641 assemble_align (POINTER_SIZE);
22642 fputs ("\t.word\t", asm_out_file);
22643 output_addr_const (asm_out_file, symbol);
22644 fputs ("(target1)\n", asm_out_file);
22645 }
22646
22647 /* Add a function to the list of static constructors. */
22648
22649 static void
22650 arm_elf_asm_constructor (rtx symbol, int priority)
22651 {
22652 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22653 }
22654
22655 /* Add a function to the list of static destructors. */
22656
22657 static void
22658 arm_elf_asm_destructor (rtx symbol, int priority)
22659 {
22660 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22661 }
22662 \f
22663 /* A finite state machine takes care of noticing whether or not instructions
22664 can be conditionally executed, and thus decrease execution time and code
22665 size by deleting branch instructions. The fsm is controlled by
22666 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22667
22668 /* The state of the fsm controlling condition codes are:
22669 0: normal, do nothing special
22670 1: make ASM_OUTPUT_OPCODE not output this instruction
22671 2: make ASM_OUTPUT_OPCODE not output this instruction
22672 3: make instructions conditional
22673 4: make instructions conditional
22674
22675 State transitions (state->state by whom under condition):
22676 0 -> 1 final_prescan_insn if the `target' is a label
22677 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22678 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22679 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22680 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22681 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22682 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22683 (the target insn is arm_target_insn).
22684
22685 If the jump clobbers the conditions then we use states 2 and 4.
22686
22687 A similar thing can be done with conditional return insns.
22688
22689 XXX In case the `target' is an unconditional branch, this conditionalising
22690 of the instructions always reduces code size, but not always execution
22691 time. But then, I want to reduce the code size to somewhere near what
22692 /bin/cc produces. */
22693
22694 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22695 instructions. When a COND_EXEC instruction is seen the subsequent
22696 instructions are scanned so that multiple conditional instructions can be
22697 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22698 specify the length and true/false mask for the IT block. These will be
22699 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22700
22701 /* Returns the index of the ARM condition code string in
22702 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22703 COMPARISON should be an rtx like `(eq (...) (...))'. */
22704
22705 enum arm_cond_code
22706 maybe_get_arm_condition_code (rtx comparison)
22707 {
22708 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22709 enum arm_cond_code code;
22710 enum rtx_code comp_code = GET_CODE (comparison);
22711
22712 if (GET_MODE_CLASS (mode) != MODE_CC)
22713 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22714 XEXP (comparison, 1));
22715
22716 switch (mode)
22717 {
22718 case CC_DNEmode: code = ARM_NE; goto dominance;
22719 case CC_DEQmode: code = ARM_EQ; goto dominance;
22720 case CC_DGEmode: code = ARM_GE; goto dominance;
22721 case CC_DGTmode: code = ARM_GT; goto dominance;
22722 case CC_DLEmode: code = ARM_LE; goto dominance;
22723 case CC_DLTmode: code = ARM_LT; goto dominance;
22724 case CC_DGEUmode: code = ARM_CS; goto dominance;
22725 case CC_DGTUmode: code = ARM_HI; goto dominance;
22726 case CC_DLEUmode: code = ARM_LS; goto dominance;
22727 case CC_DLTUmode: code = ARM_CC;
22728
22729 dominance:
22730 if (comp_code == EQ)
22731 return ARM_INVERSE_CONDITION_CODE (code);
22732 if (comp_code == NE)
22733 return code;
22734 return ARM_NV;
22735
22736 case CC_NOOVmode:
22737 switch (comp_code)
22738 {
22739 case NE: return ARM_NE;
22740 case EQ: return ARM_EQ;
22741 case GE: return ARM_PL;
22742 case LT: return ARM_MI;
22743 default: return ARM_NV;
22744 }
22745
22746 case CC_Zmode:
22747 switch (comp_code)
22748 {
22749 case NE: return ARM_NE;
22750 case EQ: return ARM_EQ;
22751 default: return ARM_NV;
22752 }
22753
22754 case CC_Nmode:
22755 switch (comp_code)
22756 {
22757 case NE: return ARM_MI;
22758 case EQ: return ARM_PL;
22759 default: return ARM_NV;
22760 }
22761
22762 case CCFPEmode:
22763 case CCFPmode:
22764 /* We can handle all cases except UNEQ and LTGT. */
22765 switch (comp_code)
22766 {
22767 case GE: return ARM_GE;
22768 case GT: return ARM_GT;
22769 case LE: return ARM_LS;
22770 case LT: return ARM_MI;
22771 case NE: return ARM_NE;
22772 case EQ: return ARM_EQ;
22773 case ORDERED: return ARM_VC;
22774 case UNORDERED: return ARM_VS;
22775 case UNLT: return ARM_LT;
22776 case UNLE: return ARM_LE;
22777 case UNGT: return ARM_HI;
22778 case UNGE: return ARM_PL;
22779 /* UNEQ and LTGT do not have a representation. */
22780 case UNEQ: /* Fall through. */
22781 case LTGT: /* Fall through. */
22782 default: return ARM_NV;
22783 }
22784
22785 case CC_SWPmode:
22786 switch (comp_code)
22787 {
22788 case NE: return ARM_NE;
22789 case EQ: return ARM_EQ;
22790 case GE: return ARM_LE;
22791 case GT: return ARM_LT;
22792 case LE: return ARM_GE;
22793 case LT: return ARM_GT;
22794 case GEU: return ARM_LS;
22795 case GTU: return ARM_CC;
22796 case LEU: return ARM_CS;
22797 case LTU: return ARM_HI;
22798 default: return ARM_NV;
22799 }
22800
22801 case CC_Cmode:
22802 switch (comp_code)
22803 {
22804 case LTU: return ARM_CS;
22805 case GEU: return ARM_CC;
22806 case NE: return ARM_CS;
22807 case EQ: return ARM_CC;
22808 default: return ARM_NV;
22809 }
22810
22811 case CC_CZmode:
22812 switch (comp_code)
22813 {
22814 case NE: return ARM_NE;
22815 case EQ: return ARM_EQ;
22816 case GEU: return ARM_CS;
22817 case GTU: return ARM_HI;
22818 case LEU: return ARM_LS;
22819 case LTU: return ARM_CC;
22820 default: return ARM_NV;
22821 }
22822
22823 case CC_NCVmode:
22824 switch (comp_code)
22825 {
22826 case GE: return ARM_GE;
22827 case LT: return ARM_LT;
22828 case GEU: return ARM_CS;
22829 case LTU: return ARM_CC;
22830 default: return ARM_NV;
22831 }
22832
22833 case CC_Vmode:
22834 switch (comp_code)
22835 {
22836 case NE: return ARM_VS;
22837 case EQ: return ARM_VC;
22838 default: return ARM_NV;
22839 }
22840
22841 case CCmode:
22842 switch (comp_code)
22843 {
22844 case NE: return ARM_NE;
22845 case EQ: return ARM_EQ;
22846 case GE: return ARM_GE;
22847 case GT: return ARM_GT;
22848 case LE: return ARM_LE;
22849 case LT: return ARM_LT;
22850 case GEU: return ARM_CS;
22851 case GTU: return ARM_HI;
22852 case LEU: return ARM_LS;
22853 case LTU: return ARM_CC;
22854 default: return ARM_NV;
22855 }
22856
22857 default: gcc_unreachable ();
22858 }
22859 }
22860
22861 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22862 static enum arm_cond_code
22863 get_arm_condition_code (rtx comparison)
22864 {
22865 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22866 gcc_assert (code != ARM_NV);
22867 return code;
22868 }
22869
22870 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22871 instructions. */
22872 void
22873 thumb2_final_prescan_insn (rtx_insn *insn)
22874 {
22875 rtx_insn *first_insn = insn;
22876 rtx body = PATTERN (insn);
22877 rtx predicate;
22878 enum arm_cond_code code;
22879 int n;
22880 int mask;
22881 int max;
22882
22883 /* max_insns_skipped in the tune was already taken into account in the
22884 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22885 just emit the IT blocks as we can. It does not make sense to split
22886 the IT blocks. */
22887 max = MAX_INSN_PER_IT_BLOCK;
22888
22889 /* Remove the previous insn from the count of insns to be output. */
22890 if (arm_condexec_count)
22891 arm_condexec_count--;
22892
22893 /* Nothing to do if we are already inside a conditional block. */
22894 if (arm_condexec_count)
22895 return;
22896
22897 if (GET_CODE (body) != COND_EXEC)
22898 return;
22899
22900 /* Conditional jumps are implemented directly. */
22901 if (JUMP_P (insn))
22902 return;
22903
22904 predicate = COND_EXEC_TEST (body);
22905 arm_current_cc = get_arm_condition_code (predicate);
22906
22907 n = get_attr_ce_count (insn);
22908 arm_condexec_count = 1;
22909 arm_condexec_mask = (1 << n) - 1;
22910 arm_condexec_masklen = n;
22911 /* See if subsequent instructions can be combined into the same block. */
22912 for (;;)
22913 {
22914 insn = next_nonnote_insn (insn);
22915
22916 /* Jumping into the middle of an IT block is illegal, so a label or
22917 barrier terminates the block. */
22918 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22919 break;
22920
22921 body = PATTERN (insn);
22922 /* USE and CLOBBER aren't really insns, so just skip them. */
22923 if (GET_CODE (body) == USE
22924 || GET_CODE (body) == CLOBBER)
22925 continue;
22926
22927 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22928 if (GET_CODE (body) != COND_EXEC)
22929 break;
22930 /* Maximum number of conditionally executed instructions in a block. */
22931 n = get_attr_ce_count (insn);
22932 if (arm_condexec_masklen + n > max)
22933 break;
22934
22935 predicate = COND_EXEC_TEST (body);
22936 code = get_arm_condition_code (predicate);
22937 mask = (1 << n) - 1;
22938 if (arm_current_cc == code)
22939 arm_condexec_mask |= (mask << arm_condexec_masklen);
22940 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22941 break;
22942
22943 arm_condexec_count++;
22944 arm_condexec_masklen += n;
22945
22946 /* A jump must be the last instruction in a conditional block. */
22947 if (JUMP_P (insn))
22948 break;
22949 }
22950 /* Restore recog_data (getting the attributes of other insns can
22951 destroy this array, but final.c assumes that it remains intact
22952 across this call). */
22953 extract_constrain_insn_cached (first_insn);
22954 }
22955
22956 void
22957 arm_final_prescan_insn (rtx_insn *insn)
22958 {
22959 /* BODY will hold the body of INSN. */
22960 rtx body = PATTERN (insn);
22961
22962 /* This will be 1 if trying to repeat the trick, and things need to be
22963 reversed if it appears to fail. */
22964 int reverse = 0;
22965
22966 /* If we start with a return insn, we only succeed if we find another one. */
22967 int seeking_return = 0;
22968 enum rtx_code return_code = UNKNOWN;
22969
22970 /* START_INSN will hold the insn from where we start looking. This is the
22971 first insn after the following code_label if REVERSE is true. */
22972 rtx_insn *start_insn = insn;
22973
22974 /* If in state 4, check if the target branch is reached, in order to
22975 change back to state 0. */
22976 if (arm_ccfsm_state == 4)
22977 {
22978 if (insn == arm_target_insn)
22979 {
22980 arm_target_insn = NULL;
22981 arm_ccfsm_state = 0;
22982 }
22983 return;
22984 }
22985
22986 /* If in state 3, it is possible to repeat the trick, if this insn is an
22987 unconditional branch to a label, and immediately following this branch
22988 is the previous target label which is only used once, and the label this
22989 branch jumps to is not too far off. */
22990 if (arm_ccfsm_state == 3)
22991 {
22992 if (simplejump_p (insn))
22993 {
22994 start_insn = next_nonnote_insn (start_insn);
22995 if (BARRIER_P (start_insn))
22996 {
22997 /* XXX Isn't this always a barrier? */
22998 start_insn = next_nonnote_insn (start_insn);
22999 }
23000 if (LABEL_P (start_insn)
23001 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23002 && LABEL_NUSES (start_insn) == 1)
23003 reverse = TRUE;
23004 else
23005 return;
23006 }
23007 else if (ANY_RETURN_P (body))
23008 {
23009 start_insn = next_nonnote_insn (start_insn);
23010 if (BARRIER_P (start_insn))
23011 start_insn = next_nonnote_insn (start_insn);
23012 if (LABEL_P (start_insn)
23013 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23014 && LABEL_NUSES (start_insn) == 1)
23015 {
23016 reverse = TRUE;
23017 seeking_return = 1;
23018 return_code = GET_CODE (body);
23019 }
23020 else
23021 return;
23022 }
23023 else
23024 return;
23025 }
23026
23027 gcc_assert (!arm_ccfsm_state || reverse);
23028 if (!JUMP_P (insn))
23029 return;
23030
23031 /* This jump might be paralleled with a clobber of the condition codes
23032 the jump should always come first */
23033 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23034 body = XVECEXP (body, 0, 0);
23035
23036 if (reverse
23037 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23038 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23039 {
23040 int insns_skipped;
23041 int fail = FALSE, succeed = FALSE;
23042 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23043 int then_not_else = TRUE;
23044 rtx_insn *this_insn = start_insn;
23045 rtx label = 0;
23046
23047 /* Register the insn jumped to. */
23048 if (reverse)
23049 {
23050 if (!seeking_return)
23051 label = XEXP (SET_SRC (body), 0);
23052 }
23053 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23054 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23055 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23056 {
23057 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23058 then_not_else = FALSE;
23059 }
23060 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23061 {
23062 seeking_return = 1;
23063 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23064 }
23065 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23066 {
23067 seeking_return = 1;
23068 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23069 then_not_else = FALSE;
23070 }
23071 else
23072 gcc_unreachable ();
23073
23074 /* See how many insns this branch skips, and what kind of insns. If all
23075 insns are okay, and the label or unconditional branch to the same
23076 label is not too far away, succeed. */
23077 for (insns_skipped = 0;
23078 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23079 {
23080 rtx scanbody;
23081
23082 this_insn = next_nonnote_insn (this_insn);
23083 if (!this_insn)
23084 break;
23085
23086 switch (GET_CODE (this_insn))
23087 {
23088 case CODE_LABEL:
23089 /* Succeed if it is the target label, otherwise fail since
23090 control falls in from somewhere else. */
23091 if (this_insn == label)
23092 {
23093 arm_ccfsm_state = 1;
23094 succeed = TRUE;
23095 }
23096 else
23097 fail = TRUE;
23098 break;
23099
23100 case BARRIER:
23101 /* Succeed if the following insn is the target label.
23102 Otherwise fail.
23103 If return insns are used then the last insn in a function
23104 will be a barrier. */
23105 this_insn = next_nonnote_insn (this_insn);
23106 if (this_insn && this_insn == label)
23107 {
23108 arm_ccfsm_state = 1;
23109 succeed = TRUE;
23110 }
23111 else
23112 fail = TRUE;
23113 break;
23114
23115 case CALL_INSN:
23116 /* The AAPCS says that conditional calls should not be
23117 used since they make interworking inefficient (the
23118 linker can't transform BL<cond> into BLX). That's
23119 only a problem if the machine has BLX. */
23120 if (arm_arch5)
23121 {
23122 fail = TRUE;
23123 break;
23124 }
23125
23126 /* Succeed if the following insn is the target label, or
23127 if the following two insns are a barrier and the
23128 target label. */
23129 this_insn = next_nonnote_insn (this_insn);
23130 if (this_insn && BARRIER_P (this_insn))
23131 this_insn = next_nonnote_insn (this_insn);
23132
23133 if (this_insn && this_insn == label
23134 && insns_skipped < max_insns_skipped)
23135 {
23136 arm_ccfsm_state = 1;
23137 succeed = TRUE;
23138 }
23139 else
23140 fail = TRUE;
23141 break;
23142
23143 case JUMP_INSN:
23144 /* If this is an unconditional branch to the same label, succeed.
23145 If it is to another label, do nothing. If it is conditional,
23146 fail. */
23147 /* XXX Probably, the tests for SET and the PC are
23148 unnecessary. */
23149
23150 scanbody = PATTERN (this_insn);
23151 if (GET_CODE (scanbody) == SET
23152 && GET_CODE (SET_DEST (scanbody)) == PC)
23153 {
23154 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23155 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23156 {
23157 arm_ccfsm_state = 2;
23158 succeed = TRUE;
23159 }
23160 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23161 fail = TRUE;
23162 }
23163 /* Fail if a conditional return is undesirable (e.g. on a
23164 StrongARM), but still allow this if optimizing for size. */
23165 else if (GET_CODE (scanbody) == return_code
23166 && !use_return_insn (TRUE, NULL)
23167 && !optimize_size)
23168 fail = TRUE;
23169 else if (GET_CODE (scanbody) == return_code)
23170 {
23171 arm_ccfsm_state = 2;
23172 succeed = TRUE;
23173 }
23174 else if (GET_CODE (scanbody) == PARALLEL)
23175 {
23176 switch (get_attr_conds (this_insn))
23177 {
23178 case CONDS_NOCOND:
23179 break;
23180 default:
23181 fail = TRUE;
23182 break;
23183 }
23184 }
23185 else
23186 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23187
23188 break;
23189
23190 case INSN:
23191 /* Instructions using or affecting the condition codes make it
23192 fail. */
23193 scanbody = PATTERN (this_insn);
23194 if (!(GET_CODE (scanbody) == SET
23195 || GET_CODE (scanbody) == PARALLEL)
23196 || get_attr_conds (this_insn) != CONDS_NOCOND)
23197 fail = TRUE;
23198 break;
23199
23200 default:
23201 break;
23202 }
23203 }
23204 if (succeed)
23205 {
23206 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23207 arm_target_label = CODE_LABEL_NUMBER (label);
23208 else
23209 {
23210 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23211
23212 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23213 {
23214 this_insn = next_nonnote_insn (this_insn);
23215 gcc_assert (!this_insn
23216 || (!BARRIER_P (this_insn)
23217 && !LABEL_P (this_insn)));
23218 }
23219 if (!this_insn)
23220 {
23221 /* Oh, dear! we ran off the end.. give up. */
23222 extract_constrain_insn_cached (insn);
23223 arm_ccfsm_state = 0;
23224 arm_target_insn = NULL;
23225 return;
23226 }
23227 arm_target_insn = this_insn;
23228 }
23229
23230 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23231 what it was. */
23232 if (!reverse)
23233 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23234
23235 if (reverse || then_not_else)
23236 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23237 }
23238
23239 /* Restore recog_data (getting the attributes of other insns can
23240 destroy this array, but final.c assumes that it remains intact
23241 across this call. */
23242 extract_constrain_insn_cached (insn);
23243 }
23244 }
23245
23246 /* Output IT instructions. */
23247 void
23248 thumb2_asm_output_opcode (FILE * stream)
23249 {
23250 char buff[5];
23251 int n;
23252
23253 if (arm_condexec_mask)
23254 {
23255 for (n = 0; n < arm_condexec_masklen; n++)
23256 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23257 buff[n] = 0;
23258 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23259 arm_condition_codes[arm_current_cc]);
23260 arm_condexec_mask = 0;
23261 }
23262 }
23263
23264 /* Returns true if REGNO is a valid register
23265 for holding a quantity of type MODE. */
23266 int
23267 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23268 {
23269 if (GET_MODE_CLASS (mode) == MODE_CC)
23270 return (regno == CC_REGNUM
23271 || (TARGET_HARD_FLOAT
23272 && regno == VFPCC_REGNUM));
23273
23274 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23275 return false;
23276
23277 if (TARGET_THUMB1)
23278 /* For the Thumb we only allow values bigger than SImode in
23279 registers 0 - 6, so that there is always a second low
23280 register available to hold the upper part of the value.
23281 We probably we ought to ensure that the register is the
23282 start of an even numbered register pair. */
23283 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23284
23285 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23286 {
23287 if (mode == SFmode || mode == SImode)
23288 return VFP_REGNO_OK_FOR_SINGLE (regno);
23289
23290 if (mode == DFmode)
23291 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23292
23293 if (mode == HFmode)
23294 return VFP_REGNO_OK_FOR_SINGLE (regno);
23295
23296 /* VFP registers can hold HImode values. */
23297 if (mode == HImode)
23298 return VFP_REGNO_OK_FOR_SINGLE (regno);
23299
23300 if (TARGET_NEON)
23301 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23302 || (VALID_NEON_QREG_MODE (mode)
23303 && NEON_REGNO_OK_FOR_QUAD (regno))
23304 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23305 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23306 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23307 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23308 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23309
23310 return FALSE;
23311 }
23312
23313 if (TARGET_REALLY_IWMMXT)
23314 {
23315 if (IS_IWMMXT_GR_REGNUM (regno))
23316 return mode == SImode;
23317
23318 if (IS_IWMMXT_REGNUM (regno))
23319 return VALID_IWMMXT_REG_MODE (mode);
23320 }
23321
23322 /* We allow almost any value to be stored in the general registers.
23323 Restrict doubleword quantities to even register pairs in ARM state
23324 so that we can use ldrd. Do not allow very large Neon structure
23325 opaque modes in general registers; they would use too many. */
23326 if (regno <= LAST_ARM_REGNUM)
23327 {
23328 if (ARM_NUM_REGS (mode) > 4)
23329 return FALSE;
23330
23331 if (TARGET_THUMB2)
23332 return TRUE;
23333
23334 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23335 }
23336
23337 if (regno == FRAME_POINTER_REGNUM
23338 || regno == ARG_POINTER_REGNUM)
23339 /* We only allow integers in the fake hard registers. */
23340 return GET_MODE_CLASS (mode) == MODE_INT;
23341
23342 return FALSE;
23343 }
23344
23345 /* Implement MODES_TIEABLE_P. */
23346
23347 bool
23348 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23349 {
23350 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23351 return true;
23352
23353 /* We specifically want to allow elements of "structure" modes to
23354 be tieable to the structure. This more general condition allows
23355 other rarer situations too. */
23356 if (TARGET_NEON
23357 && (VALID_NEON_DREG_MODE (mode1)
23358 || VALID_NEON_QREG_MODE (mode1)
23359 || VALID_NEON_STRUCT_MODE (mode1))
23360 && (VALID_NEON_DREG_MODE (mode2)
23361 || VALID_NEON_QREG_MODE (mode2)
23362 || VALID_NEON_STRUCT_MODE (mode2)))
23363 return true;
23364
23365 return false;
23366 }
23367
23368 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23369 not used in arm mode. */
23370
23371 enum reg_class
23372 arm_regno_class (int regno)
23373 {
23374 if (regno == PC_REGNUM)
23375 return NO_REGS;
23376
23377 if (TARGET_THUMB1)
23378 {
23379 if (regno == STACK_POINTER_REGNUM)
23380 return STACK_REG;
23381 if (regno == CC_REGNUM)
23382 return CC_REG;
23383 if (regno < 8)
23384 return LO_REGS;
23385 return HI_REGS;
23386 }
23387
23388 if (TARGET_THUMB2 && regno < 8)
23389 return LO_REGS;
23390
23391 if ( regno <= LAST_ARM_REGNUM
23392 || regno == FRAME_POINTER_REGNUM
23393 || regno == ARG_POINTER_REGNUM)
23394 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23395
23396 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23397 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23398
23399 if (IS_VFP_REGNUM (regno))
23400 {
23401 if (regno <= D7_VFP_REGNUM)
23402 return VFP_D0_D7_REGS;
23403 else if (regno <= LAST_LO_VFP_REGNUM)
23404 return VFP_LO_REGS;
23405 else
23406 return VFP_HI_REGS;
23407 }
23408
23409 if (IS_IWMMXT_REGNUM (regno))
23410 return IWMMXT_REGS;
23411
23412 if (IS_IWMMXT_GR_REGNUM (regno))
23413 return IWMMXT_GR_REGS;
23414
23415 return NO_REGS;
23416 }
23417
23418 /* Handle a special case when computing the offset
23419 of an argument from the frame pointer. */
23420 int
23421 arm_debugger_arg_offset (int value, rtx addr)
23422 {
23423 rtx_insn *insn;
23424
23425 /* We are only interested if dbxout_parms() failed to compute the offset. */
23426 if (value != 0)
23427 return 0;
23428
23429 /* We can only cope with the case where the address is held in a register. */
23430 if (!REG_P (addr))
23431 return 0;
23432
23433 /* If we are using the frame pointer to point at the argument, then
23434 an offset of 0 is correct. */
23435 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23436 return 0;
23437
23438 /* If we are using the stack pointer to point at the
23439 argument, then an offset of 0 is correct. */
23440 /* ??? Check this is consistent with thumb2 frame layout. */
23441 if ((TARGET_THUMB || !frame_pointer_needed)
23442 && REGNO (addr) == SP_REGNUM)
23443 return 0;
23444
23445 /* Oh dear. The argument is pointed to by a register rather
23446 than being held in a register, or being stored at a known
23447 offset from the frame pointer. Since GDB only understands
23448 those two kinds of argument we must translate the address
23449 held in the register into an offset from the frame pointer.
23450 We do this by searching through the insns for the function
23451 looking to see where this register gets its value. If the
23452 register is initialized from the frame pointer plus an offset
23453 then we are in luck and we can continue, otherwise we give up.
23454
23455 This code is exercised by producing debugging information
23456 for a function with arguments like this:
23457
23458 double func (double a, double b, int c, double d) {return d;}
23459
23460 Without this code the stab for parameter 'd' will be set to
23461 an offset of 0 from the frame pointer, rather than 8. */
23462
23463 /* The if() statement says:
23464
23465 If the insn is a normal instruction
23466 and if the insn is setting the value in a register
23467 and if the register being set is the register holding the address of the argument
23468 and if the address is computing by an addition
23469 that involves adding to a register
23470 which is the frame pointer
23471 a constant integer
23472
23473 then... */
23474
23475 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23476 {
23477 if ( NONJUMP_INSN_P (insn)
23478 && GET_CODE (PATTERN (insn)) == SET
23479 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23480 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23481 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23482 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23483 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23484 )
23485 {
23486 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23487
23488 break;
23489 }
23490 }
23491
23492 if (value == 0)
23493 {
23494 debug_rtx (addr);
23495 warning (0, "unable to compute real location of stacked parameter");
23496 value = 8; /* XXX magic hack */
23497 }
23498
23499 return value;
23500 }
23501 \f
23502 /* Implement TARGET_PROMOTED_TYPE. */
23503
23504 static tree
23505 arm_promoted_type (const_tree t)
23506 {
23507 if (SCALAR_FLOAT_TYPE_P (t)
23508 && TYPE_PRECISION (t) == 16
23509 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23510 return float_type_node;
23511 return NULL_TREE;
23512 }
23513
23514 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23515 This simply adds HFmode as a supported mode; even though we don't
23516 implement arithmetic on this type directly, it's supported by
23517 optabs conversions, much the way the double-word arithmetic is
23518 special-cased in the default hook. */
23519
23520 static bool
23521 arm_scalar_mode_supported_p (machine_mode mode)
23522 {
23523 if (mode == HFmode)
23524 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23525 else if (ALL_FIXED_POINT_MODE_P (mode))
23526 return true;
23527 else
23528 return default_scalar_mode_supported_p (mode);
23529 }
23530
23531 /* Set the value of FLT_EVAL_METHOD.
23532 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23533
23534 0: evaluate all operations and constants, whose semantic type has at
23535 most the range and precision of type float, to the range and
23536 precision of float; evaluate all other operations and constants to
23537 the range and precision of the semantic type;
23538
23539 N, where _FloatN is a supported interchange floating type
23540 evaluate all operations and constants, whose semantic type has at
23541 most the range and precision of _FloatN type, to the range and
23542 precision of the _FloatN type; evaluate all other operations and
23543 constants to the range and precision of the semantic type;
23544
23545 If we have the ARMv8.2-A extensions then we support _Float16 in native
23546 precision, so we should set this to 16. Otherwise, we support the type,
23547 but want to evaluate expressions in float precision, so set this to
23548 0. */
23549
23550 static enum flt_eval_method
23551 arm_excess_precision (enum excess_precision_type type)
23552 {
23553 switch (type)
23554 {
23555 case EXCESS_PRECISION_TYPE_FAST:
23556 case EXCESS_PRECISION_TYPE_STANDARD:
23557 /* We can calculate either in 16-bit range and precision or
23558 32-bit range and precision. Make that decision based on whether
23559 we have native support for the ARMv8.2-A 16-bit floating-point
23560 instructions or not. */
23561 return (TARGET_VFP_FP16INST
23562 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23563 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23564 case EXCESS_PRECISION_TYPE_IMPLICIT:
23565 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23566 default:
23567 gcc_unreachable ();
23568 }
23569 return FLT_EVAL_METHOD_UNPREDICTABLE;
23570 }
23571
23572
23573 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23574 _Float16 if we are using anything other than ieee format for 16-bit
23575 floating point. Otherwise, punt to the default implementation. */
23576 static machine_mode
23577 arm_floatn_mode (int n, bool extended)
23578 {
23579 if (!extended && n == 16)
23580 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23581
23582 return default_floatn_mode (n, extended);
23583 }
23584
23585
23586 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23587 not to early-clobber SRC registers in the process.
23588
23589 We assume that the operands described by SRC and DEST represent a
23590 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23591 number of components into which the copy has been decomposed. */
23592 void
23593 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23594 {
23595 unsigned int i;
23596
23597 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23598 || REGNO (operands[0]) < REGNO (operands[1]))
23599 {
23600 for (i = 0; i < count; i++)
23601 {
23602 operands[2 * i] = dest[i];
23603 operands[2 * i + 1] = src[i];
23604 }
23605 }
23606 else
23607 {
23608 for (i = 0; i < count; i++)
23609 {
23610 operands[2 * i] = dest[count - i - 1];
23611 operands[2 * i + 1] = src[count - i - 1];
23612 }
23613 }
23614 }
23615
23616 /* Split operands into moves from op[1] + op[2] into op[0]. */
23617
23618 void
23619 neon_split_vcombine (rtx operands[3])
23620 {
23621 unsigned int dest = REGNO (operands[0]);
23622 unsigned int src1 = REGNO (operands[1]);
23623 unsigned int src2 = REGNO (operands[2]);
23624 machine_mode halfmode = GET_MODE (operands[1]);
23625 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23626 rtx destlo, desthi;
23627
23628 if (src1 == dest && src2 == dest + halfregs)
23629 {
23630 /* No-op move. Can't split to nothing; emit something. */
23631 emit_note (NOTE_INSN_DELETED);
23632 return;
23633 }
23634
23635 /* Preserve register attributes for variable tracking. */
23636 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23637 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23638 GET_MODE_SIZE (halfmode));
23639
23640 /* Special case of reversed high/low parts. Use VSWP. */
23641 if (src2 == dest && src1 == dest + halfregs)
23642 {
23643 rtx x = gen_rtx_SET (destlo, operands[1]);
23644 rtx y = gen_rtx_SET (desthi, operands[2]);
23645 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23646 return;
23647 }
23648
23649 if (!reg_overlap_mentioned_p (operands[2], destlo))
23650 {
23651 /* Try to avoid unnecessary moves if part of the result
23652 is in the right place already. */
23653 if (src1 != dest)
23654 emit_move_insn (destlo, operands[1]);
23655 if (src2 != dest + halfregs)
23656 emit_move_insn (desthi, operands[2]);
23657 }
23658 else
23659 {
23660 if (src2 != dest + halfregs)
23661 emit_move_insn (desthi, operands[2]);
23662 if (src1 != dest)
23663 emit_move_insn (destlo, operands[1]);
23664 }
23665 }
23666 \f
23667 /* Return the number (counting from 0) of
23668 the least significant set bit in MASK. */
23669
23670 inline static int
23671 number_of_first_bit_set (unsigned mask)
23672 {
23673 return ctz_hwi (mask);
23674 }
23675
23676 /* Like emit_multi_reg_push, but allowing for a different set of
23677 registers to be described as saved. MASK is the set of registers
23678 to be saved; REAL_REGS is the set of registers to be described as
23679 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23680
23681 static rtx_insn *
23682 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23683 {
23684 unsigned long regno;
23685 rtx par[10], tmp, reg;
23686 rtx_insn *insn;
23687 int i, j;
23688
23689 /* Build the parallel of the registers actually being stored. */
23690 for (i = 0; mask; ++i, mask &= mask - 1)
23691 {
23692 regno = ctz_hwi (mask);
23693 reg = gen_rtx_REG (SImode, regno);
23694
23695 if (i == 0)
23696 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23697 else
23698 tmp = gen_rtx_USE (VOIDmode, reg);
23699
23700 par[i] = tmp;
23701 }
23702
23703 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23704 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23705 tmp = gen_frame_mem (BLKmode, tmp);
23706 tmp = gen_rtx_SET (tmp, par[0]);
23707 par[0] = tmp;
23708
23709 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23710 insn = emit_insn (tmp);
23711
23712 /* Always build the stack adjustment note for unwind info. */
23713 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23714 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23715 par[0] = tmp;
23716
23717 /* Build the parallel of the registers recorded as saved for unwind. */
23718 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23719 {
23720 regno = ctz_hwi (real_regs);
23721 reg = gen_rtx_REG (SImode, regno);
23722
23723 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23724 tmp = gen_frame_mem (SImode, tmp);
23725 tmp = gen_rtx_SET (tmp, reg);
23726 RTX_FRAME_RELATED_P (tmp) = 1;
23727 par[j + 1] = tmp;
23728 }
23729
23730 if (j == 0)
23731 tmp = par[0];
23732 else
23733 {
23734 RTX_FRAME_RELATED_P (par[0]) = 1;
23735 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23736 }
23737
23738 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23739
23740 return insn;
23741 }
23742
23743 /* Emit code to push or pop registers to or from the stack. F is the
23744 assembly file. MASK is the registers to pop. */
23745 static void
23746 thumb_pop (FILE *f, unsigned long mask)
23747 {
23748 int regno;
23749 int lo_mask = mask & 0xFF;
23750 int pushed_words = 0;
23751
23752 gcc_assert (mask);
23753
23754 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23755 {
23756 /* Special case. Do not generate a POP PC statement here, do it in
23757 thumb_exit() */
23758 thumb_exit (f, -1);
23759 return;
23760 }
23761
23762 fprintf (f, "\tpop\t{");
23763
23764 /* Look at the low registers first. */
23765 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23766 {
23767 if (lo_mask & 1)
23768 {
23769 asm_fprintf (f, "%r", regno);
23770
23771 if ((lo_mask & ~1) != 0)
23772 fprintf (f, ", ");
23773
23774 pushed_words++;
23775 }
23776 }
23777
23778 if (mask & (1 << PC_REGNUM))
23779 {
23780 /* Catch popping the PC. */
23781 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23782 || IS_CMSE_ENTRY (arm_current_func_type ()))
23783 {
23784 /* The PC is never poped directly, instead
23785 it is popped into r3 and then BX is used. */
23786 fprintf (f, "}\n");
23787
23788 thumb_exit (f, -1);
23789
23790 return;
23791 }
23792 else
23793 {
23794 if (mask & 0xFF)
23795 fprintf (f, ", ");
23796
23797 asm_fprintf (f, "%r", PC_REGNUM);
23798 }
23799 }
23800
23801 fprintf (f, "}\n");
23802 }
23803
23804 /* Generate code to return from a thumb function.
23805 If 'reg_containing_return_addr' is -1, then the return address is
23806 actually on the stack, at the stack pointer. */
23807 static void
23808 thumb_exit (FILE *f, int reg_containing_return_addr)
23809 {
23810 unsigned regs_available_for_popping;
23811 unsigned regs_to_pop;
23812 int pops_needed;
23813 unsigned available;
23814 unsigned required;
23815 machine_mode mode;
23816 int size;
23817 int restore_a4 = FALSE;
23818
23819 /* Compute the registers we need to pop. */
23820 regs_to_pop = 0;
23821 pops_needed = 0;
23822
23823 if (reg_containing_return_addr == -1)
23824 {
23825 regs_to_pop |= 1 << LR_REGNUM;
23826 ++pops_needed;
23827 }
23828
23829 if (TARGET_BACKTRACE)
23830 {
23831 /* Restore the (ARM) frame pointer and stack pointer. */
23832 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23833 pops_needed += 2;
23834 }
23835
23836 /* If there is nothing to pop then just emit the BX instruction and
23837 return. */
23838 if (pops_needed == 0)
23839 {
23840 if (crtl->calls_eh_return)
23841 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23842
23843 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23844 {
23845 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23846 reg_containing_return_addr);
23847 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23848 }
23849 else
23850 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23851 return;
23852 }
23853 /* Otherwise if we are not supporting interworking and we have not created
23854 a backtrace structure and the function was not entered in ARM mode then
23855 just pop the return address straight into the PC. */
23856 else if (!TARGET_INTERWORK
23857 && !TARGET_BACKTRACE
23858 && !is_called_in_ARM_mode (current_function_decl)
23859 && !crtl->calls_eh_return
23860 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23861 {
23862 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23863 return;
23864 }
23865
23866 /* Find out how many of the (return) argument registers we can corrupt. */
23867 regs_available_for_popping = 0;
23868
23869 /* If returning via __builtin_eh_return, the bottom three registers
23870 all contain information needed for the return. */
23871 if (crtl->calls_eh_return)
23872 size = 12;
23873 else
23874 {
23875 /* If we can deduce the registers used from the function's
23876 return value. This is more reliable that examining
23877 df_regs_ever_live_p () because that will be set if the register is
23878 ever used in the function, not just if the register is used
23879 to hold a return value. */
23880
23881 if (crtl->return_rtx != 0)
23882 mode = GET_MODE (crtl->return_rtx);
23883 else
23884 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23885
23886 size = GET_MODE_SIZE (mode);
23887
23888 if (size == 0)
23889 {
23890 /* In a void function we can use any argument register.
23891 In a function that returns a structure on the stack
23892 we can use the second and third argument registers. */
23893 if (mode == VOIDmode)
23894 regs_available_for_popping =
23895 (1 << ARG_REGISTER (1))
23896 | (1 << ARG_REGISTER (2))
23897 | (1 << ARG_REGISTER (3));
23898 else
23899 regs_available_for_popping =
23900 (1 << ARG_REGISTER (2))
23901 | (1 << ARG_REGISTER (3));
23902 }
23903 else if (size <= 4)
23904 regs_available_for_popping =
23905 (1 << ARG_REGISTER (2))
23906 | (1 << ARG_REGISTER (3));
23907 else if (size <= 8)
23908 regs_available_for_popping =
23909 (1 << ARG_REGISTER (3));
23910 }
23911
23912 /* Match registers to be popped with registers into which we pop them. */
23913 for (available = regs_available_for_popping,
23914 required = regs_to_pop;
23915 required != 0 && available != 0;
23916 available &= ~(available & - available),
23917 required &= ~(required & - required))
23918 -- pops_needed;
23919
23920 /* If we have any popping registers left over, remove them. */
23921 if (available > 0)
23922 regs_available_for_popping &= ~available;
23923
23924 /* Otherwise if we need another popping register we can use
23925 the fourth argument register. */
23926 else if (pops_needed)
23927 {
23928 /* If we have not found any free argument registers and
23929 reg a4 contains the return address, we must move it. */
23930 if (regs_available_for_popping == 0
23931 && reg_containing_return_addr == LAST_ARG_REGNUM)
23932 {
23933 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23934 reg_containing_return_addr = LR_REGNUM;
23935 }
23936 else if (size > 12)
23937 {
23938 /* Register a4 is being used to hold part of the return value,
23939 but we have dire need of a free, low register. */
23940 restore_a4 = TRUE;
23941
23942 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23943 }
23944
23945 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23946 {
23947 /* The fourth argument register is available. */
23948 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23949
23950 --pops_needed;
23951 }
23952 }
23953
23954 /* Pop as many registers as we can. */
23955 thumb_pop (f, regs_available_for_popping);
23956
23957 /* Process the registers we popped. */
23958 if (reg_containing_return_addr == -1)
23959 {
23960 /* The return address was popped into the lowest numbered register. */
23961 regs_to_pop &= ~(1 << LR_REGNUM);
23962
23963 reg_containing_return_addr =
23964 number_of_first_bit_set (regs_available_for_popping);
23965
23966 /* Remove this register for the mask of available registers, so that
23967 the return address will not be corrupted by further pops. */
23968 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23969 }
23970
23971 /* If we popped other registers then handle them here. */
23972 if (regs_available_for_popping)
23973 {
23974 int frame_pointer;
23975
23976 /* Work out which register currently contains the frame pointer. */
23977 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23978
23979 /* Move it into the correct place. */
23980 asm_fprintf (f, "\tmov\t%r, %r\n",
23981 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23982
23983 /* (Temporarily) remove it from the mask of popped registers. */
23984 regs_available_for_popping &= ~(1 << frame_pointer);
23985 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23986
23987 if (regs_available_for_popping)
23988 {
23989 int stack_pointer;
23990
23991 /* We popped the stack pointer as well,
23992 find the register that contains it. */
23993 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23994
23995 /* Move it into the stack register. */
23996 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23997
23998 /* At this point we have popped all necessary registers, so
23999 do not worry about restoring regs_available_for_popping
24000 to its correct value:
24001
24002 assert (pops_needed == 0)
24003 assert (regs_available_for_popping == (1 << frame_pointer))
24004 assert (regs_to_pop == (1 << STACK_POINTER)) */
24005 }
24006 else
24007 {
24008 /* Since we have just move the popped value into the frame
24009 pointer, the popping register is available for reuse, and
24010 we know that we still have the stack pointer left to pop. */
24011 regs_available_for_popping |= (1 << frame_pointer);
24012 }
24013 }
24014
24015 /* If we still have registers left on the stack, but we no longer have
24016 any registers into which we can pop them, then we must move the return
24017 address into the link register and make available the register that
24018 contained it. */
24019 if (regs_available_for_popping == 0 && pops_needed > 0)
24020 {
24021 regs_available_for_popping |= 1 << reg_containing_return_addr;
24022
24023 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24024 reg_containing_return_addr);
24025
24026 reg_containing_return_addr = LR_REGNUM;
24027 }
24028
24029 /* If we have registers left on the stack then pop some more.
24030 We know that at most we will want to pop FP and SP. */
24031 if (pops_needed > 0)
24032 {
24033 int popped_into;
24034 int move_to;
24035
24036 thumb_pop (f, regs_available_for_popping);
24037
24038 /* We have popped either FP or SP.
24039 Move whichever one it is into the correct register. */
24040 popped_into = number_of_first_bit_set (regs_available_for_popping);
24041 move_to = number_of_first_bit_set (regs_to_pop);
24042
24043 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24044
24045 regs_to_pop &= ~(1 << move_to);
24046
24047 --pops_needed;
24048 }
24049
24050 /* If we still have not popped everything then we must have only
24051 had one register available to us and we are now popping the SP. */
24052 if (pops_needed > 0)
24053 {
24054 int popped_into;
24055
24056 thumb_pop (f, regs_available_for_popping);
24057
24058 popped_into = number_of_first_bit_set (regs_available_for_popping);
24059
24060 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24061 /*
24062 assert (regs_to_pop == (1 << STACK_POINTER))
24063 assert (pops_needed == 1)
24064 */
24065 }
24066
24067 /* If necessary restore the a4 register. */
24068 if (restore_a4)
24069 {
24070 if (reg_containing_return_addr != LR_REGNUM)
24071 {
24072 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24073 reg_containing_return_addr = LR_REGNUM;
24074 }
24075
24076 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24077 }
24078
24079 if (crtl->calls_eh_return)
24080 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24081
24082 /* Return to caller. */
24083 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24084 {
24085 /* This is for the cases where LR is not being used to contain the return
24086 address. It may therefore contain information that we might not want
24087 to leak, hence it must be cleared. The value in R0 will never be a
24088 secret at this point, so it is safe to use it, see the clearing code
24089 in 'cmse_nonsecure_entry_clear_before_return'. */
24090 if (reg_containing_return_addr != LR_REGNUM)
24091 asm_fprintf (f, "\tmov\tlr, r0\n");
24092
24093 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24094 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24095 }
24096 else
24097 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24098 }
24099 \f
24100 /* Scan INSN just before assembler is output for it.
24101 For Thumb-1, we track the status of the condition codes; this
24102 information is used in the cbranchsi4_insn pattern. */
24103 void
24104 thumb1_final_prescan_insn (rtx_insn *insn)
24105 {
24106 if (flag_print_asm_name)
24107 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24108 INSN_ADDRESSES (INSN_UID (insn)));
24109 /* Don't overwrite the previous setter when we get to a cbranch. */
24110 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24111 {
24112 enum attr_conds conds;
24113
24114 if (cfun->machine->thumb1_cc_insn)
24115 {
24116 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24117 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24118 CC_STATUS_INIT;
24119 }
24120 conds = get_attr_conds (insn);
24121 if (conds == CONDS_SET)
24122 {
24123 rtx set = single_set (insn);
24124 cfun->machine->thumb1_cc_insn = insn;
24125 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24126 cfun->machine->thumb1_cc_op1 = const0_rtx;
24127 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24128 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24129 {
24130 rtx src1 = XEXP (SET_SRC (set), 1);
24131 if (src1 == const0_rtx)
24132 cfun->machine->thumb1_cc_mode = CCmode;
24133 }
24134 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24135 {
24136 /* Record the src register operand instead of dest because
24137 cprop_hardreg pass propagates src. */
24138 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24139 }
24140 }
24141 else if (conds != CONDS_NOCOND)
24142 cfun->machine->thumb1_cc_insn = NULL_RTX;
24143 }
24144
24145 /* Check if unexpected far jump is used. */
24146 if (cfun->machine->lr_save_eliminated
24147 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24148 internal_error("Unexpected thumb1 far jump");
24149 }
24150
24151 int
24152 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24153 {
24154 unsigned HOST_WIDE_INT mask = 0xff;
24155 int i;
24156
24157 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24158 if (val == 0) /* XXX */
24159 return 0;
24160
24161 for (i = 0; i < 25; i++)
24162 if ((val & (mask << i)) == val)
24163 return 1;
24164
24165 return 0;
24166 }
24167
24168 /* Returns nonzero if the current function contains,
24169 or might contain a far jump. */
24170 static int
24171 thumb_far_jump_used_p (void)
24172 {
24173 rtx_insn *insn;
24174 bool far_jump = false;
24175 unsigned int func_size = 0;
24176
24177 /* If we have already decided that far jumps may be used,
24178 do not bother checking again, and always return true even if
24179 it turns out that they are not being used. Once we have made
24180 the decision that far jumps are present (and that hence the link
24181 register will be pushed onto the stack) we cannot go back on it. */
24182 if (cfun->machine->far_jump_used)
24183 return 1;
24184
24185 /* If this function is not being called from the prologue/epilogue
24186 generation code then it must be being called from the
24187 INITIAL_ELIMINATION_OFFSET macro. */
24188 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24189 {
24190 /* In this case we know that we are being asked about the elimination
24191 of the arg pointer register. If that register is not being used,
24192 then there are no arguments on the stack, and we do not have to
24193 worry that a far jump might force the prologue to push the link
24194 register, changing the stack offsets. In this case we can just
24195 return false, since the presence of far jumps in the function will
24196 not affect stack offsets.
24197
24198 If the arg pointer is live (or if it was live, but has now been
24199 eliminated and so set to dead) then we do have to test to see if
24200 the function might contain a far jump. This test can lead to some
24201 false negatives, since before reload is completed, then length of
24202 branch instructions is not known, so gcc defaults to returning their
24203 longest length, which in turn sets the far jump attribute to true.
24204
24205 A false negative will not result in bad code being generated, but it
24206 will result in a needless push and pop of the link register. We
24207 hope that this does not occur too often.
24208
24209 If we need doubleword stack alignment this could affect the other
24210 elimination offsets so we can't risk getting it wrong. */
24211 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24212 cfun->machine->arg_pointer_live = 1;
24213 else if (!cfun->machine->arg_pointer_live)
24214 return 0;
24215 }
24216
24217 /* We should not change far_jump_used during or after reload, as there is
24218 no chance to change stack frame layout. */
24219 if (reload_in_progress || reload_completed)
24220 return 0;
24221
24222 /* Check to see if the function contains a branch
24223 insn with the far jump attribute set. */
24224 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24225 {
24226 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24227 {
24228 far_jump = true;
24229 }
24230 func_size += get_attr_length (insn);
24231 }
24232
24233 /* Attribute far_jump will always be true for thumb1 before
24234 shorten_branch pass. So checking far_jump attribute before
24235 shorten_branch isn't much useful.
24236
24237 Following heuristic tries to estimate more accurately if a far jump
24238 may finally be used. The heuristic is very conservative as there is
24239 no chance to roll-back the decision of not to use far jump.
24240
24241 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24242 2-byte insn is associated with a 4 byte constant pool. Using
24243 function size 2048/3 as the threshold is conservative enough. */
24244 if (far_jump)
24245 {
24246 if ((func_size * 3) >= 2048)
24247 {
24248 /* Record the fact that we have decided that
24249 the function does use far jumps. */
24250 cfun->machine->far_jump_used = 1;
24251 return 1;
24252 }
24253 }
24254
24255 return 0;
24256 }
24257
24258 /* Return nonzero if FUNC must be entered in ARM mode. */
24259 static bool
24260 is_called_in_ARM_mode (tree func)
24261 {
24262 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24263
24264 /* Ignore the problem about functions whose address is taken. */
24265 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24266 return true;
24267
24268 #ifdef ARM_PE
24269 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24270 #else
24271 return false;
24272 #endif
24273 }
24274
24275 /* Given the stack offsets and register mask in OFFSETS, decide how
24276 many additional registers to push instead of subtracting a constant
24277 from SP. For epilogues the principle is the same except we use pop.
24278 FOR_PROLOGUE indicates which we're generating. */
24279 static int
24280 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24281 {
24282 HOST_WIDE_INT amount;
24283 unsigned long live_regs_mask = offsets->saved_regs_mask;
24284 /* Extract a mask of the ones we can give to the Thumb's push/pop
24285 instruction. */
24286 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24287 /* Then count how many other high registers will need to be pushed. */
24288 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24289 int n_free, reg_base, size;
24290
24291 if (!for_prologue && frame_pointer_needed)
24292 amount = offsets->locals_base - offsets->saved_regs;
24293 else
24294 amount = offsets->outgoing_args - offsets->saved_regs;
24295
24296 /* If the stack frame size is 512 exactly, we can save one load
24297 instruction, which should make this a win even when optimizing
24298 for speed. */
24299 if (!optimize_size && amount != 512)
24300 return 0;
24301
24302 /* Can't do this if there are high registers to push. */
24303 if (high_regs_pushed != 0)
24304 return 0;
24305
24306 /* Shouldn't do it in the prologue if no registers would normally
24307 be pushed at all. In the epilogue, also allow it if we'll have
24308 a pop insn for the PC. */
24309 if (l_mask == 0
24310 && (for_prologue
24311 || TARGET_BACKTRACE
24312 || (live_regs_mask & 1 << LR_REGNUM) == 0
24313 || TARGET_INTERWORK
24314 || crtl->args.pretend_args_size != 0))
24315 return 0;
24316
24317 /* Don't do this if thumb_expand_prologue wants to emit instructions
24318 between the push and the stack frame allocation. */
24319 if (for_prologue
24320 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24321 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24322 return 0;
24323
24324 reg_base = 0;
24325 n_free = 0;
24326 if (!for_prologue)
24327 {
24328 size = arm_size_return_regs ();
24329 reg_base = ARM_NUM_INTS (size);
24330 live_regs_mask >>= reg_base;
24331 }
24332
24333 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24334 && (for_prologue || call_used_regs[reg_base + n_free]))
24335 {
24336 live_regs_mask >>= 1;
24337 n_free++;
24338 }
24339
24340 if (n_free == 0)
24341 return 0;
24342 gcc_assert (amount / 4 * 4 == amount);
24343
24344 if (amount >= 512 && (amount - n_free * 4) < 512)
24345 return (amount - 508) / 4;
24346 if (amount <= n_free * 4)
24347 return amount / 4;
24348 return 0;
24349 }
24350
24351 /* The bits which aren't usefully expanded as rtl. */
24352 const char *
24353 thumb1_unexpanded_epilogue (void)
24354 {
24355 arm_stack_offsets *offsets;
24356 int regno;
24357 unsigned long live_regs_mask = 0;
24358 int high_regs_pushed = 0;
24359 int extra_pop;
24360 int had_to_push_lr;
24361 int size;
24362
24363 if (cfun->machine->return_used_this_function != 0)
24364 return "";
24365
24366 if (IS_NAKED (arm_current_func_type ()))
24367 return "";
24368
24369 offsets = arm_get_frame_offsets ();
24370 live_regs_mask = offsets->saved_regs_mask;
24371 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24372
24373 /* If we can deduce the registers used from the function's return value.
24374 This is more reliable that examining df_regs_ever_live_p () because that
24375 will be set if the register is ever used in the function, not just if
24376 the register is used to hold a return value. */
24377 size = arm_size_return_regs ();
24378
24379 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24380 if (extra_pop > 0)
24381 {
24382 unsigned long extra_mask = (1 << extra_pop) - 1;
24383 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24384 }
24385
24386 /* The prolog may have pushed some high registers to use as
24387 work registers. e.g. the testsuite file:
24388 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24389 compiles to produce:
24390 push {r4, r5, r6, r7, lr}
24391 mov r7, r9
24392 mov r6, r8
24393 push {r6, r7}
24394 as part of the prolog. We have to undo that pushing here. */
24395
24396 if (high_regs_pushed)
24397 {
24398 unsigned long mask = live_regs_mask & 0xff;
24399 int next_hi_reg;
24400
24401 /* The available low registers depend on the size of the value we are
24402 returning. */
24403 if (size <= 12)
24404 mask |= 1 << 3;
24405 if (size <= 8)
24406 mask |= 1 << 2;
24407
24408 if (mask == 0)
24409 /* Oh dear! We have no low registers into which we can pop
24410 high registers! */
24411 internal_error
24412 ("no low registers available for popping high registers");
24413
24414 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24415 if (live_regs_mask & (1 << next_hi_reg))
24416 break;
24417
24418 while (high_regs_pushed)
24419 {
24420 /* Find lo register(s) into which the high register(s) can
24421 be popped. */
24422 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24423 {
24424 if (mask & (1 << regno))
24425 high_regs_pushed--;
24426 if (high_regs_pushed == 0)
24427 break;
24428 }
24429
24430 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24431
24432 /* Pop the values into the low register(s). */
24433 thumb_pop (asm_out_file, mask);
24434
24435 /* Move the value(s) into the high registers. */
24436 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24437 {
24438 if (mask & (1 << regno))
24439 {
24440 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24441 regno);
24442
24443 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24444 if (live_regs_mask & (1 << next_hi_reg))
24445 break;
24446 }
24447 }
24448 }
24449 live_regs_mask &= ~0x0f00;
24450 }
24451
24452 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24453 live_regs_mask &= 0xff;
24454
24455 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24456 {
24457 /* Pop the return address into the PC. */
24458 if (had_to_push_lr)
24459 live_regs_mask |= 1 << PC_REGNUM;
24460
24461 /* Either no argument registers were pushed or a backtrace
24462 structure was created which includes an adjusted stack
24463 pointer, so just pop everything. */
24464 if (live_regs_mask)
24465 thumb_pop (asm_out_file, live_regs_mask);
24466
24467 /* We have either just popped the return address into the
24468 PC or it is was kept in LR for the entire function.
24469 Note that thumb_pop has already called thumb_exit if the
24470 PC was in the list. */
24471 if (!had_to_push_lr)
24472 thumb_exit (asm_out_file, LR_REGNUM);
24473 }
24474 else
24475 {
24476 /* Pop everything but the return address. */
24477 if (live_regs_mask)
24478 thumb_pop (asm_out_file, live_regs_mask);
24479
24480 if (had_to_push_lr)
24481 {
24482 if (size > 12)
24483 {
24484 /* We have no free low regs, so save one. */
24485 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24486 LAST_ARG_REGNUM);
24487 }
24488
24489 /* Get the return address into a temporary register. */
24490 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24491
24492 if (size > 12)
24493 {
24494 /* Move the return address to lr. */
24495 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24496 LAST_ARG_REGNUM);
24497 /* Restore the low register. */
24498 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24499 IP_REGNUM);
24500 regno = LR_REGNUM;
24501 }
24502 else
24503 regno = LAST_ARG_REGNUM;
24504 }
24505 else
24506 regno = LR_REGNUM;
24507
24508 /* Remove the argument registers that were pushed onto the stack. */
24509 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24510 SP_REGNUM, SP_REGNUM,
24511 crtl->args.pretend_args_size);
24512
24513 thumb_exit (asm_out_file, regno);
24514 }
24515
24516 return "";
24517 }
24518
24519 /* Functions to save and restore machine-specific function data. */
24520 static struct machine_function *
24521 arm_init_machine_status (void)
24522 {
24523 struct machine_function *machine;
24524 machine = ggc_cleared_alloc<machine_function> ();
24525
24526 #if ARM_FT_UNKNOWN != 0
24527 machine->func_type = ARM_FT_UNKNOWN;
24528 #endif
24529 return machine;
24530 }
24531
24532 /* Return an RTX indicating where the return address to the
24533 calling function can be found. */
24534 rtx
24535 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24536 {
24537 if (count != 0)
24538 return NULL_RTX;
24539
24540 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24541 }
24542
24543 /* Do anything needed before RTL is emitted for each function. */
24544 void
24545 arm_init_expanders (void)
24546 {
24547 /* Arrange to initialize and mark the machine per-function status. */
24548 init_machine_status = arm_init_machine_status;
24549
24550 /* This is to stop the combine pass optimizing away the alignment
24551 adjustment of va_arg. */
24552 /* ??? It is claimed that this should not be necessary. */
24553 if (cfun)
24554 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24555 }
24556
24557 /* Check that FUNC is called with a different mode. */
24558
24559 bool
24560 arm_change_mode_p (tree func)
24561 {
24562 if (TREE_CODE (func) != FUNCTION_DECL)
24563 return false;
24564
24565 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24566
24567 if (!callee_tree)
24568 callee_tree = target_option_default_node;
24569
24570 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24571 int flags = callee_opts->x_target_flags;
24572
24573 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24574 }
24575
24576 /* Like arm_compute_initial_elimination offset. Simpler because there
24577 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24578 to point at the base of the local variables after static stack
24579 space for a function has been allocated. */
24580
24581 HOST_WIDE_INT
24582 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24583 {
24584 arm_stack_offsets *offsets;
24585
24586 offsets = arm_get_frame_offsets ();
24587
24588 switch (from)
24589 {
24590 case ARG_POINTER_REGNUM:
24591 switch (to)
24592 {
24593 case STACK_POINTER_REGNUM:
24594 return offsets->outgoing_args - offsets->saved_args;
24595
24596 case FRAME_POINTER_REGNUM:
24597 return offsets->soft_frame - offsets->saved_args;
24598
24599 case ARM_HARD_FRAME_POINTER_REGNUM:
24600 return offsets->saved_regs - offsets->saved_args;
24601
24602 case THUMB_HARD_FRAME_POINTER_REGNUM:
24603 return offsets->locals_base - offsets->saved_args;
24604
24605 default:
24606 gcc_unreachable ();
24607 }
24608 break;
24609
24610 case FRAME_POINTER_REGNUM:
24611 switch (to)
24612 {
24613 case STACK_POINTER_REGNUM:
24614 return offsets->outgoing_args - offsets->soft_frame;
24615
24616 case ARM_HARD_FRAME_POINTER_REGNUM:
24617 return offsets->saved_regs - offsets->soft_frame;
24618
24619 case THUMB_HARD_FRAME_POINTER_REGNUM:
24620 return offsets->locals_base - offsets->soft_frame;
24621
24622 default:
24623 gcc_unreachable ();
24624 }
24625 break;
24626
24627 default:
24628 gcc_unreachable ();
24629 }
24630 }
24631
24632 /* Generate the function's prologue. */
24633
24634 void
24635 thumb1_expand_prologue (void)
24636 {
24637 rtx_insn *insn;
24638
24639 HOST_WIDE_INT amount;
24640 HOST_WIDE_INT size;
24641 arm_stack_offsets *offsets;
24642 unsigned long func_type;
24643 int regno;
24644 unsigned long live_regs_mask;
24645 unsigned long l_mask;
24646 unsigned high_regs_pushed = 0;
24647 bool lr_needs_saving;
24648
24649 func_type = arm_current_func_type ();
24650
24651 /* Naked functions don't have prologues. */
24652 if (IS_NAKED (func_type))
24653 {
24654 if (flag_stack_usage_info)
24655 current_function_static_stack_size = 0;
24656 return;
24657 }
24658
24659 if (IS_INTERRUPT (func_type))
24660 {
24661 error ("interrupt Service Routines cannot be coded in Thumb mode");
24662 return;
24663 }
24664
24665 if (is_called_in_ARM_mode (current_function_decl))
24666 emit_insn (gen_prologue_thumb1_interwork ());
24667
24668 offsets = arm_get_frame_offsets ();
24669 live_regs_mask = offsets->saved_regs_mask;
24670 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24671
24672 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24673 l_mask = live_regs_mask & 0x40ff;
24674 /* Then count how many other high registers will need to be pushed. */
24675 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24676
24677 if (crtl->args.pretend_args_size)
24678 {
24679 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24680
24681 if (cfun->machine->uses_anonymous_args)
24682 {
24683 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24684 unsigned long mask;
24685
24686 mask = 1ul << (LAST_ARG_REGNUM + 1);
24687 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24688
24689 insn = thumb1_emit_multi_reg_push (mask, 0);
24690 }
24691 else
24692 {
24693 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24694 stack_pointer_rtx, x));
24695 }
24696 RTX_FRAME_RELATED_P (insn) = 1;
24697 }
24698
24699 if (TARGET_BACKTRACE)
24700 {
24701 HOST_WIDE_INT offset = 0;
24702 unsigned work_register;
24703 rtx work_reg, x, arm_hfp_rtx;
24704
24705 /* We have been asked to create a stack backtrace structure.
24706 The code looks like this:
24707
24708 0 .align 2
24709 0 func:
24710 0 sub SP, #16 Reserve space for 4 registers.
24711 2 push {R7} Push low registers.
24712 4 add R7, SP, #20 Get the stack pointer before the push.
24713 6 str R7, [SP, #8] Store the stack pointer
24714 (before reserving the space).
24715 8 mov R7, PC Get hold of the start of this code + 12.
24716 10 str R7, [SP, #16] Store it.
24717 12 mov R7, FP Get hold of the current frame pointer.
24718 14 str R7, [SP, #4] Store it.
24719 16 mov R7, LR Get hold of the current return address.
24720 18 str R7, [SP, #12] Store it.
24721 20 add R7, SP, #16 Point at the start of the
24722 backtrace structure.
24723 22 mov FP, R7 Put this value into the frame pointer. */
24724
24725 work_register = thumb_find_work_register (live_regs_mask);
24726 work_reg = gen_rtx_REG (SImode, work_register);
24727 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24728
24729 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24730 stack_pointer_rtx, GEN_INT (-16)));
24731 RTX_FRAME_RELATED_P (insn) = 1;
24732
24733 if (l_mask)
24734 {
24735 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24736 RTX_FRAME_RELATED_P (insn) = 1;
24737 lr_needs_saving = false;
24738
24739 offset = bit_count (l_mask) * UNITS_PER_WORD;
24740 }
24741
24742 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24743 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24744
24745 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24746 x = gen_frame_mem (SImode, x);
24747 emit_move_insn (x, work_reg);
24748
24749 /* Make sure that the instruction fetching the PC is in the right place
24750 to calculate "start of backtrace creation code + 12". */
24751 /* ??? The stores using the common WORK_REG ought to be enough to
24752 prevent the scheduler from doing anything weird. Failing that
24753 we could always move all of the following into an UNSPEC_VOLATILE. */
24754 if (l_mask)
24755 {
24756 x = gen_rtx_REG (SImode, PC_REGNUM);
24757 emit_move_insn (work_reg, x);
24758
24759 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24760 x = gen_frame_mem (SImode, x);
24761 emit_move_insn (x, work_reg);
24762
24763 emit_move_insn (work_reg, arm_hfp_rtx);
24764
24765 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24766 x = gen_frame_mem (SImode, x);
24767 emit_move_insn (x, work_reg);
24768 }
24769 else
24770 {
24771 emit_move_insn (work_reg, arm_hfp_rtx);
24772
24773 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24774 x = gen_frame_mem (SImode, x);
24775 emit_move_insn (x, work_reg);
24776
24777 x = gen_rtx_REG (SImode, PC_REGNUM);
24778 emit_move_insn (work_reg, x);
24779
24780 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24781 x = gen_frame_mem (SImode, x);
24782 emit_move_insn (x, work_reg);
24783 }
24784
24785 x = gen_rtx_REG (SImode, LR_REGNUM);
24786 emit_move_insn (work_reg, x);
24787
24788 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24789 x = gen_frame_mem (SImode, x);
24790 emit_move_insn (x, work_reg);
24791
24792 x = GEN_INT (offset + 12);
24793 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24794
24795 emit_move_insn (arm_hfp_rtx, work_reg);
24796 }
24797 /* Optimization: If we are not pushing any low registers but we are going
24798 to push some high registers then delay our first push. This will just
24799 be a push of LR and we can combine it with the push of the first high
24800 register. */
24801 else if ((l_mask & 0xff) != 0
24802 || (high_regs_pushed == 0 && lr_needs_saving))
24803 {
24804 unsigned long mask = l_mask;
24805 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24806 insn = thumb1_emit_multi_reg_push (mask, mask);
24807 RTX_FRAME_RELATED_P (insn) = 1;
24808 lr_needs_saving = false;
24809 }
24810
24811 if (high_regs_pushed)
24812 {
24813 unsigned pushable_regs;
24814 unsigned next_hi_reg;
24815 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24816 : crtl->args.info.nregs;
24817 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24818
24819 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24820 if (live_regs_mask & (1 << next_hi_reg))
24821 break;
24822
24823 /* Here we need to mask out registers used for passing arguments
24824 even if they can be pushed. This is to avoid using them to stash the high
24825 registers. Such kind of stash may clobber the use of arguments. */
24826 pushable_regs = l_mask & (~arg_regs_mask);
24827 if (lr_needs_saving)
24828 pushable_regs &= ~(1 << LR_REGNUM);
24829
24830 if (pushable_regs == 0)
24831 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24832
24833 while (high_regs_pushed > 0)
24834 {
24835 unsigned long real_regs_mask = 0;
24836 unsigned long push_mask = 0;
24837
24838 for (regno = LR_REGNUM; regno >= 0; regno --)
24839 {
24840 if (pushable_regs & (1 << regno))
24841 {
24842 emit_move_insn (gen_rtx_REG (SImode, regno),
24843 gen_rtx_REG (SImode, next_hi_reg));
24844
24845 high_regs_pushed --;
24846 real_regs_mask |= (1 << next_hi_reg);
24847 push_mask |= (1 << regno);
24848
24849 if (high_regs_pushed)
24850 {
24851 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24852 next_hi_reg --)
24853 if (live_regs_mask & (1 << next_hi_reg))
24854 break;
24855 }
24856 else
24857 break;
24858 }
24859 }
24860
24861 /* If we had to find a work register and we have not yet
24862 saved the LR then add it to the list of regs to push. */
24863 if (lr_needs_saving)
24864 {
24865 push_mask |= 1 << LR_REGNUM;
24866 real_regs_mask |= 1 << LR_REGNUM;
24867 lr_needs_saving = false;
24868 }
24869
24870 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24871 RTX_FRAME_RELATED_P (insn) = 1;
24872 }
24873 }
24874
24875 /* Load the pic register before setting the frame pointer,
24876 so we can use r7 as a temporary work register. */
24877 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24878 arm_load_pic_register (live_regs_mask);
24879
24880 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24881 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24882 stack_pointer_rtx);
24883
24884 size = offsets->outgoing_args - offsets->saved_args;
24885 if (flag_stack_usage_info)
24886 current_function_static_stack_size = size;
24887
24888 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24889 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24890 sorry ("-fstack-check=specific for Thumb-1");
24891
24892 amount = offsets->outgoing_args - offsets->saved_regs;
24893 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24894 if (amount)
24895 {
24896 if (amount < 512)
24897 {
24898 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24899 GEN_INT (- amount)));
24900 RTX_FRAME_RELATED_P (insn) = 1;
24901 }
24902 else
24903 {
24904 rtx reg, dwarf;
24905
24906 /* The stack decrement is too big for an immediate value in a single
24907 insn. In theory we could issue multiple subtracts, but after
24908 three of them it becomes more space efficient to place the full
24909 value in the constant pool and load into a register. (Also the
24910 ARM debugger really likes to see only one stack decrement per
24911 function). So instead we look for a scratch register into which
24912 we can load the decrement, and then we subtract this from the
24913 stack pointer. Unfortunately on the thumb the only available
24914 scratch registers are the argument registers, and we cannot use
24915 these as they may hold arguments to the function. Instead we
24916 attempt to locate a call preserved register which is used by this
24917 function. If we can find one, then we know that it will have
24918 been pushed at the start of the prologue and so we can corrupt
24919 it now. */
24920 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24921 if (live_regs_mask & (1 << regno))
24922 break;
24923
24924 gcc_assert(regno <= LAST_LO_REGNUM);
24925
24926 reg = gen_rtx_REG (SImode, regno);
24927
24928 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24929
24930 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24931 stack_pointer_rtx, reg));
24932
24933 dwarf = gen_rtx_SET (stack_pointer_rtx,
24934 plus_constant (Pmode, stack_pointer_rtx,
24935 -amount));
24936 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24937 RTX_FRAME_RELATED_P (insn) = 1;
24938 }
24939 }
24940
24941 if (frame_pointer_needed)
24942 thumb_set_frame_pointer (offsets);
24943
24944 /* If we are profiling, make sure no instructions are scheduled before
24945 the call to mcount. Similarly if the user has requested no
24946 scheduling in the prolog. Similarly if we want non-call exceptions
24947 using the EABI unwinder, to prevent faulting instructions from being
24948 swapped with a stack adjustment. */
24949 if (crtl->profile || !TARGET_SCHED_PROLOG
24950 || (arm_except_unwind_info (&global_options) == UI_TARGET
24951 && cfun->can_throw_non_call_exceptions))
24952 emit_insn (gen_blockage ());
24953
24954 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24955 if (live_regs_mask & 0xff)
24956 cfun->machine->lr_save_eliminated = 0;
24957 }
24958
24959 /* Clear caller saved registers not used to pass return values and leaked
24960 condition flags before exiting a cmse_nonsecure_entry function. */
24961
24962 void
24963 cmse_nonsecure_entry_clear_before_return (void)
24964 {
24965 uint64_t to_clear_mask[2];
24966 uint32_t padding_bits_to_clear = 0;
24967 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24968 int regno, maxregno = IP_REGNUM;
24969 tree result_type;
24970 rtx result_rtl;
24971
24972 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
24973 to_clear_mask[0] |= (1ULL << IP_REGNUM);
24974
24975 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24976 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24977 to make sure the instructions used to clear them are present. */
24978 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
24979 {
24980 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
24981 maxregno = LAST_VFP_REGNUM;
24982
24983 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
24984 to_clear_mask[0] |= float_mask;
24985
24986 float_mask = (1ULL << (maxregno - 63)) - 1;
24987 to_clear_mask[1] = float_mask;
24988
24989 /* Make sure we don't clear the two scratch registers used to clear the
24990 relevant FPSCR bits in output_return_instruction. */
24991 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
24992 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
24993 emit_use (gen_rtx_REG (SImode, 4));
24994 to_clear_mask[0] &= ~(1ULL << 4);
24995 }
24996
24997 /* If the user has defined registers to be caller saved, these are no longer
24998 restored by the function before returning and must thus be cleared for
24999 security purposes. */
25000 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25001 {
25002 /* We do not touch registers that can be used to pass arguments as per
25003 the AAPCS, since these should never be made callee-saved by user
25004 options. */
25005 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25006 continue;
25007 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25008 continue;
25009 if (call_used_regs[regno])
25010 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25011 }
25012
25013 /* Make sure we do not clear the registers used to return the result in. */
25014 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25015 if (!VOID_TYPE_P (result_type))
25016 {
25017 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25018
25019 /* No need to check that we return in registers, because we don't
25020 support returning on stack yet. */
25021 to_clear_mask[0]
25022 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25023 padding_bits_to_clear_ptr);
25024 }
25025
25026 if (padding_bits_to_clear != 0)
25027 {
25028 rtx reg_rtx;
25029 /* Padding bits to clear is not 0 so we know we are dealing with
25030 returning a composite type, which only uses r0. Let's make sure that
25031 r1-r3 is cleared too, we will use r1 as a scratch register. */
25032 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25033
25034 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25035
25036 /* Fill the lower half of the negated padding_bits_to_clear. */
25037 emit_move_insn (reg_rtx,
25038 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25039
25040 /* Also fill the top half of the negated padding_bits_to_clear. */
25041 if (((~padding_bits_to_clear) >> 16) > 0)
25042 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25043 GEN_INT (16),
25044 GEN_INT (16)),
25045 GEN_INT ((~padding_bits_to_clear) >> 16)));
25046
25047 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25048 gen_rtx_REG (SImode, R0_REGNUM),
25049 reg_rtx));
25050 }
25051
25052 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25053 {
25054 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25055 continue;
25056
25057 if (IS_VFP_REGNUM (regno))
25058 {
25059 /* If regno is an even vfp register and its successor is also to
25060 be cleared, use vmov. */
25061 if (TARGET_VFP_DOUBLE
25062 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25063 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25064 {
25065 emit_move_insn (gen_rtx_REG (DFmode, regno),
25066 CONST1_RTX (DFmode));
25067 emit_use (gen_rtx_REG (DFmode, regno));
25068 regno++;
25069 }
25070 else
25071 {
25072 emit_move_insn (gen_rtx_REG (SFmode, regno),
25073 CONST1_RTX (SFmode));
25074 emit_use (gen_rtx_REG (SFmode, regno));
25075 }
25076 }
25077 else
25078 {
25079 if (TARGET_THUMB1)
25080 {
25081 if (regno == R0_REGNUM)
25082 emit_move_insn (gen_rtx_REG (SImode, regno),
25083 const0_rtx);
25084 else
25085 /* R0 has either been cleared before, see code above, or it
25086 holds a return value, either way it is not secret
25087 information. */
25088 emit_move_insn (gen_rtx_REG (SImode, regno),
25089 gen_rtx_REG (SImode, R0_REGNUM));
25090 emit_use (gen_rtx_REG (SImode, regno));
25091 }
25092 else
25093 {
25094 emit_move_insn (gen_rtx_REG (SImode, regno),
25095 gen_rtx_REG (SImode, LR_REGNUM));
25096 emit_use (gen_rtx_REG (SImode, regno));
25097 }
25098 }
25099 }
25100 }
25101
25102 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25103 POP instruction can be generated. LR should be replaced by PC. All
25104 the checks required are already done by USE_RETURN_INSN (). Hence,
25105 all we really need to check here is if single register is to be
25106 returned, or multiple register return. */
25107 void
25108 thumb2_expand_return (bool simple_return)
25109 {
25110 int i, num_regs;
25111 unsigned long saved_regs_mask;
25112 arm_stack_offsets *offsets;
25113
25114 offsets = arm_get_frame_offsets ();
25115 saved_regs_mask = offsets->saved_regs_mask;
25116
25117 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25118 if (saved_regs_mask & (1 << i))
25119 num_regs++;
25120
25121 if (!simple_return && saved_regs_mask)
25122 {
25123 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25124 functions or adapt code to handle according to ACLE. This path should
25125 not be reachable for cmse_nonsecure_entry functions though we prefer
25126 to assert it for now to ensure that future code changes do not silently
25127 change this behavior. */
25128 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25129 if (num_regs == 1)
25130 {
25131 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25132 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25133 rtx addr = gen_rtx_MEM (SImode,
25134 gen_rtx_POST_INC (SImode,
25135 stack_pointer_rtx));
25136 set_mem_alias_set (addr, get_frame_alias_set ());
25137 XVECEXP (par, 0, 0) = ret_rtx;
25138 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25139 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25140 emit_jump_insn (par);
25141 }
25142 else
25143 {
25144 saved_regs_mask &= ~ (1 << LR_REGNUM);
25145 saved_regs_mask |= (1 << PC_REGNUM);
25146 arm_emit_multi_reg_pop (saved_regs_mask);
25147 }
25148 }
25149 else
25150 {
25151 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25152 cmse_nonsecure_entry_clear_before_return ();
25153 emit_jump_insn (simple_return_rtx);
25154 }
25155 }
25156
25157 void
25158 thumb1_expand_epilogue (void)
25159 {
25160 HOST_WIDE_INT amount;
25161 arm_stack_offsets *offsets;
25162 int regno;
25163
25164 /* Naked functions don't have prologues. */
25165 if (IS_NAKED (arm_current_func_type ()))
25166 return;
25167
25168 offsets = arm_get_frame_offsets ();
25169 amount = offsets->outgoing_args - offsets->saved_regs;
25170
25171 if (frame_pointer_needed)
25172 {
25173 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25174 amount = offsets->locals_base - offsets->saved_regs;
25175 }
25176 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25177
25178 gcc_assert (amount >= 0);
25179 if (amount)
25180 {
25181 emit_insn (gen_blockage ());
25182
25183 if (amount < 512)
25184 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25185 GEN_INT (amount)));
25186 else
25187 {
25188 /* r3 is always free in the epilogue. */
25189 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25190
25191 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25192 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25193 }
25194 }
25195
25196 /* Emit a USE (stack_pointer_rtx), so that
25197 the stack adjustment will not be deleted. */
25198 emit_insn (gen_force_register_use (stack_pointer_rtx));
25199
25200 if (crtl->profile || !TARGET_SCHED_PROLOG)
25201 emit_insn (gen_blockage ());
25202
25203 /* Emit a clobber for each insn that will be restored in the epilogue,
25204 so that flow2 will get register lifetimes correct. */
25205 for (regno = 0; regno < 13; regno++)
25206 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25207 emit_clobber (gen_rtx_REG (SImode, regno));
25208
25209 if (! df_regs_ever_live_p (LR_REGNUM))
25210 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25211
25212 /* Clear all caller-saved regs that are not used to return. */
25213 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25214 cmse_nonsecure_entry_clear_before_return ();
25215 }
25216
25217 /* Epilogue code for APCS frame. */
25218 static void
25219 arm_expand_epilogue_apcs_frame (bool really_return)
25220 {
25221 unsigned long func_type;
25222 unsigned long saved_regs_mask;
25223 int num_regs = 0;
25224 int i;
25225 int floats_from_frame = 0;
25226 arm_stack_offsets *offsets;
25227
25228 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25229 func_type = arm_current_func_type ();
25230
25231 /* Get frame offsets for ARM. */
25232 offsets = arm_get_frame_offsets ();
25233 saved_regs_mask = offsets->saved_regs_mask;
25234
25235 /* Find the offset of the floating-point save area in the frame. */
25236 floats_from_frame
25237 = (offsets->saved_args
25238 + arm_compute_static_chain_stack_bytes ()
25239 - offsets->frame);
25240
25241 /* Compute how many core registers saved and how far away the floats are. */
25242 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25243 if (saved_regs_mask & (1 << i))
25244 {
25245 num_regs++;
25246 floats_from_frame += 4;
25247 }
25248
25249 if (TARGET_HARD_FLOAT)
25250 {
25251 int start_reg;
25252 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25253
25254 /* The offset is from IP_REGNUM. */
25255 int saved_size = arm_get_vfp_saved_size ();
25256 if (saved_size > 0)
25257 {
25258 rtx_insn *insn;
25259 floats_from_frame += saved_size;
25260 insn = emit_insn (gen_addsi3 (ip_rtx,
25261 hard_frame_pointer_rtx,
25262 GEN_INT (-floats_from_frame)));
25263 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25264 ip_rtx, hard_frame_pointer_rtx);
25265 }
25266
25267 /* Generate VFP register multi-pop. */
25268 start_reg = FIRST_VFP_REGNUM;
25269
25270 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25271 /* Look for a case where a reg does not need restoring. */
25272 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25273 && (!df_regs_ever_live_p (i + 1)
25274 || call_used_regs[i + 1]))
25275 {
25276 if (start_reg != i)
25277 arm_emit_vfp_multi_reg_pop (start_reg,
25278 (i - start_reg) / 2,
25279 gen_rtx_REG (SImode,
25280 IP_REGNUM));
25281 start_reg = i + 2;
25282 }
25283
25284 /* Restore the remaining regs that we have discovered (or possibly
25285 even all of them, if the conditional in the for loop never
25286 fired). */
25287 if (start_reg != i)
25288 arm_emit_vfp_multi_reg_pop (start_reg,
25289 (i - start_reg) / 2,
25290 gen_rtx_REG (SImode, IP_REGNUM));
25291 }
25292
25293 if (TARGET_IWMMXT)
25294 {
25295 /* The frame pointer is guaranteed to be non-double-word aligned, as
25296 it is set to double-word-aligned old_stack_pointer - 4. */
25297 rtx_insn *insn;
25298 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25299
25300 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25301 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25302 {
25303 rtx addr = gen_frame_mem (V2SImode,
25304 plus_constant (Pmode, hard_frame_pointer_rtx,
25305 - lrm_count * 4));
25306 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25307 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25308 gen_rtx_REG (V2SImode, i),
25309 NULL_RTX);
25310 lrm_count += 2;
25311 }
25312 }
25313
25314 /* saved_regs_mask should contain IP which contains old stack pointer
25315 at the time of activation creation. Since SP and IP are adjacent registers,
25316 we can restore the value directly into SP. */
25317 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25318 saved_regs_mask &= ~(1 << IP_REGNUM);
25319 saved_regs_mask |= (1 << SP_REGNUM);
25320
25321 /* There are two registers left in saved_regs_mask - LR and PC. We
25322 only need to restore LR (the return address), but to
25323 save time we can load it directly into PC, unless we need a
25324 special function exit sequence, or we are not really returning. */
25325 if (really_return
25326 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25327 && !crtl->calls_eh_return)
25328 /* Delete LR from the register mask, so that LR on
25329 the stack is loaded into the PC in the register mask. */
25330 saved_regs_mask &= ~(1 << LR_REGNUM);
25331 else
25332 saved_regs_mask &= ~(1 << PC_REGNUM);
25333
25334 num_regs = bit_count (saved_regs_mask);
25335 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25336 {
25337 rtx_insn *insn;
25338 emit_insn (gen_blockage ());
25339 /* Unwind the stack to just below the saved registers. */
25340 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25341 hard_frame_pointer_rtx,
25342 GEN_INT (- 4 * num_regs)));
25343
25344 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25345 stack_pointer_rtx, hard_frame_pointer_rtx);
25346 }
25347
25348 arm_emit_multi_reg_pop (saved_regs_mask);
25349
25350 if (IS_INTERRUPT (func_type))
25351 {
25352 /* Interrupt handlers will have pushed the
25353 IP onto the stack, so restore it now. */
25354 rtx_insn *insn;
25355 rtx addr = gen_rtx_MEM (SImode,
25356 gen_rtx_POST_INC (SImode,
25357 stack_pointer_rtx));
25358 set_mem_alias_set (addr, get_frame_alias_set ());
25359 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25360 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25361 gen_rtx_REG (SImode, IP_REGNUM),
25362 NULL_RTX);
25363 }
25364
25365 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25366 return;
25367
25368 if (crtl->calls_eh_return)
25369 emit_insn (gen_addsi3 (stack_pointer_rtx,
25370 stack_pointer_rtx,
25371 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25372
25373 if (IS_STACKALIGN (func_type))
25374 /* Restore the original stack pointer. Before prologue, the stack was
25375 realigned and the original stack pointer saved in r0. For details,
25376 see comment in arm_expand_prologue. */
25377 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25378
25379 emit_jump_insn (simple_return_rtx);
25380 }
25381
25382 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25383 function is not a sibcall. */
25384 void
25385 arm_expand_epilogue (bool really_return)
25386 {
25387 unsigned long func_type;
25388 unsigned long saved_regs_mask;
25389 int num_regs = 0;
25390 int i;
25391 int amount;
25392 arm_stack_offsets *offsets;
25393
25394 func_type = arm_current_func_type ();
25395
25396 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25397 let output_return_instruction take care of instruction emission if any. */
25398 if (IS_NAKED (func_type)
25399 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25400 {
25401 if (really_return)
25402 emit_jump_insn (simple_return_rtx);
25403 return;
25404 }
25405
25406 /* If we are throwing an exception, then we really must be doing a
25407 return, so we can't tail-call. */
25408 gcc_assert (!crtl->calls_eh_return || really_return);
25409
25410 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25411 {
25412 arm_expand_epilogue_apcs_frame (really_return);
25413 return;
25414 }
25415
25416 /* Get frame offsets for ARM. */
25417 offsets = arm_get_frame_offsets ();
25418 saved_regs_mask = offsets->saved_regs_mask;
25419 num_regs = bit_count (saved_regs_mask);
25420
25421 if (frame_pointer_needed)
25422 {
25423 rtx_insn *insn;
25424 /* Restore stack pointer if necessary. */
25425 if (TARGET_ARM)
25426 {
25427 /* In ARM mode, frame pointer points to first saved register.
25428 Restore stack pointer to last saved register. */
25429 amount = offsets->frame - offsets->saved_regs;
25430
25431 /* Force out any pending memory operations that reference stacked data
25432 before stack de-allocation occurs. */
25433 emit_insn (gen_blockage ());
25434 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25435 hard_frame_pointer_rtx,
25436 GEN_INT (amount)));
25437 arm_add_cfa_adjust_cfa_note (insn, amount,
25438 stack_pointer_rtx,
25439 hard_frame_pointer_rtx);
25440
25441 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25442 deleted. */
25443 emit_insn (gen_force_register_use (stack_pointer_rtx));
25444 }
25445 else
25446 {
25447 /* In Thumb-2 mode, the frame pointer points to the last saved
25448 register. */
25449 amount = offsets->locals_base - offsets->saved_regs;
25450 if (amount)
25451 {
25452 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25453 hard_frame_pointer_rtx,
25454 GEN_INT (amount)));
25455 arm_add_cfa_adjust_cfa_note (insn, amount,
25456 hard_frame_pointer_rtx,
25457 hard_frame_pointer_rtx);
25458 }
25459
25460 /* Force out any pending memory operations that reference stacked data
25461 before stack de-allocation occurs. */
25462 emit_insn (gen_blockage ());
25463 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25464 hard_frame_pointer_rtx));
25465 arm_add_cfa_adjust_cfa_note (insn, 0,
25466 stack_pointer_rtx,
25467 hard_frame_pointer_rtx);
25468 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25469 deleted. */
25470 emit_insn (gen_force_register_use (stack_pointer_rtx));
25471 }
25472 }
25473 else
25474 {
25475 /* Pop off outgoing args and local frame to adjust stack pointer to
25476 last saved register. */
25477 amount = offsets->outgoing_args - offsets->saved_regs;
25478 if (amount)
25479 {
25480 rtx_insn *tmp;
25481 /* Force out any pending memory operations that reference stacked data
25482 before stack de-allocation occurs. */
25483 emit_insn (gen_blockage ());
25484 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25485 stack_pointer_rtx,
25486 GEN_INT (amount)));
25487 arm_add_cfa_adjust_cfa_note (tmp, amount,
25488 stack_pointer_rtx, stack_pointer_rtx);
25489 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25490 not deleted. */
25491 emit_insn (gen_force_register_use (stack_pointer_rtx));
25492 }
25493 }
25494
25495 if (TARGET_HARD_FLOAT)
25496 {
25497 /* Generate VFP register multi-pop. */
25498 int end_reg = LAST_VFP_REGNUM + 1;
25499
25500 /* Scan the registers in reverse order. We need to match
25501 any groupings made in the prologue and generate matching
25502 vldm operations. The need to match groups is because,
25503 unlike pop, vldm can only do consecutive regs. */
25504 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25505 /* Look for a case where a reg does not need restoring. */
25506 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25507 && (!df_regs_ever_live_p (i + 1)
25508 || call_used_regs[i + 1]))
25509 {
25510 /* Restore the regs discovered so far (from reg+2 to
25511 end_reg). */
25512 if (end_reg > i + 2)
25513 arm_emit_vfp_multi_reg_pop (i + 2,
25514 (end_reg - (i + 2)) / 2,
25515 stack_pointer_rtx);
25516 end_reg = i;
25517 }
25518
25519 /* Restore the remaining regs that we have discovered (or possibly
25520 even all of them, if the conditional in the for loop never
25521 fired). */
25522 if (end_reg > i + 2)
25523 arm_emit_vfp_multi_reg_pop (i + 2,
25524 (end_reg - (i + 2)) / 2,
25525 stack_pointer_rtx);
25526 }
25527
25528 if (TARGET_IWMMXT)
25529 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25530 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25531 {
25532 rtx_insn *insn;
25533 rtx addr = gen_rtx_MEM (V2SImode,
25534 gen_rtx_POST_INC (SImode,
25535 stack_pointer_rtx));
25536 set_mem_alias_set (addr, get_frame_alias_set ());
25537 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25538 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25539 gen_rtx_REG (V2SImode, i),
25540 NULL_RTX);
25541 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25542 stack_pointer_rtx, stack_pointer_rtx);
25543 }
25544
25545 if (saved_regs_mask)
25546 {
25547 rtx insn;
25548 bool return_in_pc = false;
25549
25550 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25551 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25552 && !IS_CMSE_ENTRY (func_type)
25553 && !IS_STACKALIGN (func_type)
25554 && really_return
25555 && crtl->args.pretend_args_size == 0
25556 && saved_regs_mask & (1 << LR_REGNUM)
25557 && !crtl->calls_eh_return)
25558 {
25559 saved_regs_mask &= ~(1 << LR_REGNUM);
25560 saved_regs_mask |= (1 << PC_REGNUM);
25561 return_in_pc = true;
25562 }
25563
25564 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25565 {
25566 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25567 if (saved_regs_mask & (1 << i))
25568 {
25569 rtx addr = gen_rtx_MEM (SImode,
25570 gen_rtx_POST_INC (SImode,
25571 stack_pointer_rtx));
25572 set_mem_alias_set (addr, get_frame_alias_set ());
25573
25574 if (i == PC_REGNUM)
25575 {
25576 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25577 XVECEXP (insn, 0, 0) = ret_rtx;
25578 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25579 addr);
25580 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25581 insn = emit_jump_insn (insn);
25582 }
25583 else
25584 {
25585 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25586 addr));
25587 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25588 gen_rtx_REG (SImode, i),
25589 NULL_RTX);
25590 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25591 stack_pointer_rtx,
25592 stack_pointer_rtx);
25593 }
25594 }
25595 }
25596 else
25597 {
25598 if (TARGET_LDRD
25599 && current_tune->prefer_ldrd_strd
25600 && !optimize_function_for_size_p (cfun))
25601 {
25602 if (TARGET_THUMB2)
25603 thumb2_emit_ldrd_pop (saved_regs_mask);
25604 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25605 arm_emit_ldrd_pop (saved_regs_mask);
25606 else
25607 arm_emit_multi_reg_pop (saved_regs_mask);
25608 }
25609 else
25610 arm_emit_multi_reg_pop (saved_regs_mask);
25611 }
25612
25613 if (return_in_pc)
25614 return;
25615 }
25616
25617 amount
25618 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25619 if (amount)
25620 {
25621 int i, j;
25622 rtx dwarf = NULL_RTX;
25623 rtx_insn *tmp =
25624 emit_insn (gen_addsi3 (stack_pointer_rtx,
25625 stack_pointer_rtx,
25626 GEN_INT (amount)));
25627
25628 RTX_FRAME_RELATED_P (tmp) = 1;
25629
25630 if (cfun->machine->uses_anonymous_args)
25631 {
25632 /* Restore pretend args. Refer arm_expand_prologue on how to save
25633 pretend_args in stack. */
25634 int num_regs = crtl->args.pretend_args_size / 4;
25635 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25636 for (j = 0, i = 0; j < num_regs; i++)
25637 if (saved_regs_mask & (1 << i))
25638 {
25639 rtx reg = gen_rtx_REG (SImode, i);
25640 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25641 j++;
25642 }
25643 REG_NOTES (tmp) = dwarf;
25644 }
25645 arm_add_cfa_adjust_cfa_note (tmp, amount,
25646 stack_pointer_rtx, stack_pointer_rtx);
25647 }
25648
25649 /* Clear all caller-saved regs that are not used to return. */
25650 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25651 {
25652 /* CMSE_ENTRY always returns. */
25653 gcc_assert (really_return);
25654 cmse_nonsecure_entry_clear_before_return ();
25655 }
25656
25657 if (!really_return)
25658 return;
25659
25660 if (crtl->calls_eh_return)
25661 emit_insn (gen_addsi3 (stack_pointer_rtx,
25662 stack_pointer_rtx,
25663 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25664
25665 if (IS_STACKALIGN (func_type))
25666 /* Restore the original stack pointer. Before prologue, the stack was
25667 realigned and the original stack pointer saved in r0. For details,
25668 see comment in arm_expand_prologue. */
25669 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25670
25671 emit_jump_insn (simple_return_rtx);
25672 }
25673
25674 /* Implementation of insn prologue_thumb1_interwork. This is the first
25675 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25676
25677 const char *
25678 thumb1_output_interwork (void)
25679 {
25680 const char * name;
25681 FILE *f = asm_out_file;
25682
25683 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25684 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25685 == SYMBOL_REF);
25686 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25687
25688 /* Generate code sequence to switch us into Thumb mode. */
25689 /* The .code 32 directive has already been emitted by
25690 ASM_DECLARE_FUNCTION_NAME. */
25691 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25692 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25693
25694 /* Generate a label, so that the debugger will notice the
25695 change in instruction sets. This label is also used by
25696 the assembler to bypass the ARM code when this function
25697 is called from a Thumb encoded function elsewhere in the
25698 same file. Hence the definition of STUB_NAME here must
25699 agree with the definition in gas/config/tc-arm.c. */
25700
25701 #define STUB_NAME ".real_start_of"
25702
25703 fprintf (f, "\t.code\t16\n");
25704 #ifdef ARM_PE
25705 if (arm_dllexport_name_p (name))
25706 name = arm_strip_name_encoding (name);
25707 #endif
25708 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25709 fprintf (f, "\t.thumb_func\n");
25710 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25711
25712 return "";
25713 }
25714
25715 /* Handle the case of a double word load into a low register from
25716 a computed memory address. The computed address may involve a
25717 register which is overwritten by the load. */
25718 const char *
25719 thumb_load_double_from_address (rtx *operands)
25720 {
25721 rtx addr;
25722 rtx base;
25723 rtx offset;
25724 rtx arg1;
25725 rtx arg2;
25726
25727 gcc_assert (REG_P (operands[0]));
25728 gcc_assert (MEM_P (operands[1]));
25729
25730 /* Get the memory address. */
25731 addr = XEXP (operands[1], 0);
25732
25733 /* Work out how the memory address is computed. */
25734 switch (GET_CODE (addr))
25735 {
25736 case REG:
25737 operands[2] = adjust_address (operands[1], SImode, 4);
25738
25739 if (REGNO (operands[0]) == REGNO (addr))
25740 {
25741 output_asm_insn ("ldr\t%H0, %2", operands);
25742 output_asm_insn ("ldr\t%0, %1", operands);
25743 }
25744 else
25745 {
25746 output_asm_insn ("ldr\t%0, %1", operands);
25747 output_asm_insn ("ldr\t%H0, %2", operands);
25748 }
25749 break;
25750
25751 case CONST:
25752 /* Compute <address> + 4 for the high order load. */
25753 operands[2] = adjust_address (operands[1], SImode, 4);
25754
25755 output_asm_insn ("ldr\t%0, %1", operands);
25756 output_asm_insn ("ldr\t%H0, %2", operands);
25757 break;
25758
25759 case PLUS:
25760 arg1 = XEXP (addr, 0);
25761 arg2 = XEXP (addr, 1);
25762
25763 if (CONSTANT_P (arg1))
25764 base = arg2, offset = arg1;
25765 else
25766 base = arg1, offset = arg2;
25767
25768 gcc_assert (REG_P (base));
25769
25770 /* Catch the case of <address> = <reg> + <reg> */
25771 if (REG_P (offset))
25772 {
25773 int reg_offset = REGNO (offset);
25774 int reg_base = REGNO (base);
25775 int reg_dest = REGNO (operands[0]);
25776
25777 /* Add the base and offset registers together into the
25778 higher destination register. */
25779 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25780 reg_dest + 1, reg_base, reg_offset);
25781
25782 /* Load the lower destination register from the address in
25783 the higher destination register. */
25784 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25785 reg_dest, reg_dest + 1);
25786
25787 /* Load the higher destination register from its own address
25788 plus 4. */
25789 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25790 reg_dest + 1, reg_dest + 1);
25791 }
25792 else
25793 {
25794 /* Compute <address> + 4 for the high order load. */
25795 operands[2] = adjust_address (operands[1], SImode, 4);
25796
25797 /* If the computed address is held in the low order register
25798 then load the high order register first, otherwise always
25799 load the low order register first. */
25800 if (REGNO (operands[0]) == REGNO (base))
25801 {
25802 output_asm_insn ("ldr\t%H0, %2", operands);
25803 output_asm_insn ("ldr\t%0, %1", operands);
25804 }
25805 else
25806 {
25807 output_asm_insn ("ldr\t%0, %1", operands);
25808 output_asm_insn ("ldr\t%H0, %2", operands);
25809 }
25810 }
25811 break;
25812
25813 case LABEL_REF:
25814 /* With no registers to worry about we can just load the value
25815 directly. */
25816 operands[2] = adjust_address (operands[1], SImode, 4);
25817
25818 output_asm_insn ("ldr\t%H0, %2", operands);
25819 output_asm_insn ("ldr\t%0, %1", operands);
25820 break;
25821
25822 default:
25823 gcc_unreachable ();
25824 }
25825
25826 return "";
25827 }
25828
25829 const char *
25830 thumb_output_move_mem_multiple (int n, rtx *operands)
25831 {
25832 switch (n)
25833 {
25834 case 2:
25835 if (REGNO (operands[4]) > REGNO (operands[5]))
25836 std::swap (operands[4], operands[5]);
25837
25838 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25839 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25840 break;
25841
25842 case 3:
25843 if (REGNO (operands[4]) > REGNO (operands[5]))
25844 std::swap (operands[4], operands[5]);
25845 if (REGNO (operands[5]) > REGNO (operands[6]))
25846 std::swap (operands[5], operands[6]);
25847 if (REGNO (operands[4]) > REGNO (operands[5]))
25848 std::swap (operands[4], operands[5]);
25849
25850 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25851 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25852 break;
25853
25854 default:
25855 gcc_unreachable ();
25856 }
25857
25858 return "";
25859 }
25860
25861 /* Output a call-via instruction for thumb state. */
25862 const char *
25863 thumb_call_via_reg (rtx reg)
25864 {
25865 int regno = REGNO (reg);
25866 rtx *labelp;
25867
25868 gcc_assert (regno < LR_REGNUM);
25869
25870 /* If we are in the normal text section we can use a single instance
25871 per compilation unit. If we are doing function sections, then we need
25872 an entry per section, since we can't rely on reachability. */
25873 if (in_section == text_section)
25874 {
25875 thumb_call_reg_needed = 1;
25876
25877 if (thumb_call_via_label[regno] == NULL)
25878 thumb_call_via_label[regno] = gen_label_rtx ();
25879 labelp = thumb_call_via_label + regno;
25880 }
25881 else
25882 {
25883 if (cfun->machine->call_via[regno] == NULL)
25884 cfun->machine->call_via[regno] = gen_label_rtx ();
25885 labelp = cfun->machine->call_via + regno;
25886 }
25887
25888 output_asm_insn ("bl\t%a0", labelp);
25889 return "";
25890 }
25891
25892 /* Routines for generating rtl. */
25893 void
25894 thumb_expand_movmemqi (rtx *operands)
25895 {
25896 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25897 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25898 HOST_WIDE_INT len = INTVAL (operands[2]);
25899 HOST_WIDE_INT offset = 0;
25900
25901 while (len >= 12)
25902 {
25903 emit_insn (gen_movmem12b (out, in, out, in));
25904 len -= 12;
25905 }
25906
25907 if (len >= 8)
25908 {
25909 emit_insn (gen_movmem8b (out, in, out, in));
25910 len -= 8;
25911 }
25912
25913 if (len >= 4)
25914 {
25915 rtx reg = gen_reg_rtx (SImode);
25916 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25917 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25918 len -= 4;
25919 offset += 4;
25920 }
25921
25922 if (len >= 2)
25923 {
25924 rtx reg = gen_reg_rtx (HImode);
25925 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25926 plus_constant (Pmode, in,
25927 offset))));
25928 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25929 offset)),
25930 reg));
25931 len -= 2;
25932 offset += 2;
25933 }
25934
25935 if (len)
25936 {
25937 rtx reg = gen_reg_rtx (QImode);
25938 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25939 plus_constant (Pmode, in,
25940 offset))));
25941 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25942 offset)),
25943 reg));
25944 }
25945 }
25946
25947 void
25948 thumb_reload_out_hi (rtx *operands)
25949 {
25950 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25951 }
25952
25953 /* Return the length of a function name prefix
25954 that starts with the character 'c'. */
25955 static int
25956 arm_get_strip_length (int c)
25957 {
25958 switch (c)
25959 {
25960 ARM_NAME_ENCODING_LENGTHS
25961 default: return 0;
25962 }
25963 }
25964
25965 /* Return a pointer to a function's name with any
25966 and all prefix encodings stripped from it. */
25967 const char *
25968 arm_strip_name_encoding (const char *name)
25969 {
25970 int skip;
25971
25972 while ((skip = arm_get_strip_length (* name)))
25973 name += skip;
25974
25975 return name;
25976 }
25977
25978 /* If there is a '*' anywhere in the name's prefix, then
25979 emit the stripped name verbatim, otherwise prepend an
25980 underscore if leading underscores are being used. */
25981 void
25982 arm_asm_output_labelref (FILE *stream, const char *name)
25983 {
25984 int skip;
25985 int verbatim = 0;
25986
25987 while ((skip = arm_get_strip_length (* name)))
25988 {
25989 verbatim |= (*name == '*');
25990 name += skip;
25991 }
25992
25993 if (verbatim)
25994 fputs (name, stream);
25995 else
25996 asm_fprintf (stream, "%U%s", name);
25997 }
25998
25999 /* This function is used to emit an EABI tag and its associated value.
26000 We emit the numerical value of the tag in case the assembler does not
26001 support textual tags. (Eg gas prior to 2.20). If requested we include
26002 the tag name in a comment so that anyone reading the assembler output
26003 will know which tag is being set.
26004
26005 This function is not static because arm-c.c needs it too. */
26006
26007 void
26008 arm_emit_eabi_attribute (const char *name, int num, int val)
26009 {
26010 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26011 if (flag_verbose_asm || flag_debug_asm)
26012 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26013 asm_fprintf (asm_out_file, "\n");
26014 }
26015
26016 /* This function is used to print CPU tuning information as comment
26017 in assembler file. Pointers are not printed for now. */
26018
26019 void
26020 arm_print_tune_info (void)
26021 {
26022 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26023 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26024 current_tune->constant_limit);
26025 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26026 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26027 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26028 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26029 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26030 "prefetch.l1_cache_size:\t%d\n",
26031 current_tune->prefetch.l1_cache_size);
26032 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26033 "prefetch.l1_cache_line_size:\t%d\n",
26034 current_tune->prefetch.l1_cache_line_size);
26035 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26036 "prefer_constant_pool:\t%d\n",
26037 (int) current_tune->prefer_constant_pool);
26038 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26039 "branch_cost:\t(s:speed, p:predictable)\n");
26040 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26041 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26042 current_tune->branch_cost (false, false));
26043 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26044 current_tune->branch_cost (false, true));
26045 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26046 current_tune->branch_cost (true, false));
26047 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26048 current_tune->branch_cost (true, true));
26049 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26050 "prefer_ldrd_strd:\t%d\n",
26051 (int) current_tune->prefer_ldrd_strd);
26052 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26053 "logical_op_non_short_circuit:\t[%d,%d]\n",
26054 (int) current_tune->logical_op_non_short_circuit_thumb,
26055 (int) current_tune->logical_op_non_short_circuit_arm);
26056 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26057 "prefer_neon_for_64bits:\t%d\n",
26058 (int) current_tune->prefer_neon_for_64bits);
26059 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26060 "disparage_flag_setting_t16_encodings:\t%d\n",
26061 (int) current_tune->disparage_flag_setting_t16_encodings);
26062 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26063 "string_ops_prefer_neon:\t%d\n",
26064 (int) current_tune->string_ops_prefer_neon);
26065 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26066 "max_insns_inline_memset:\t%d\n",
26067 current_tune->max_insns_inline_memset);
26068 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26069 current_tune->fusible_ops);
26070 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26071 (int) current_tune->sched_autopref);
26072 }
26073
26074 static void
26075 arm_file_start (void)
26076 {
26077 int val;
26078
26079 if (TARGET_BPABI)
26080 {
26081 /* We don't have a specified CPU. Use the architecture to
26082 generate the tags.
26083
26084 Note: it might be better to do this unconditionally, then the
26085 assembler would not need to know about all new CPU names as
26086 they are added. */
26087 if (!arm_active_target.core_name)
26088 {
26089 /* armv7ve doesn't support any extensions. */
26090 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26091 {
26092 /* Keep backward compatability for assemblers
26093 which don't support armv7ve. */
26094 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26095 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26096 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26097 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26098 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26099 }
26100 else
26101 {
26102 const char* pos = strchr (arm_active_target.arch_name, '+');
26103 if (pos)
26104 {
26105 char buf[32];
26106 gcc_assert (strlen (arm_active_target.arch_name)
26107 <= sizeof (buf) / sizeof (*pos));
26108 strncpy (buf, arm_active_target.arch_name,
26109 (pos - arm_active_target.arch_name) * sizeof (*pos));
26110 buf[pos - arm_active_target.arch_name] = '\0';
26111 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26112 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26113 }
26114 else
26115 asm_fprintf (asm_out_file, "\t.arch %s\n",
26116 arm_active_target.arch_name);
26117 }
26118 }
26119 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26120 asm_fprintf (asm_out_file, "\t.arch %s\n",
26121 arm_active_target.core_name + 8);
26122 else
26123 {
26124 const char* truncated_name
26125 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26126 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26127 }
26128
26129 if (print_tune_info)
26130 arm_print_tune_info ();
26131
26132 if (! TARGET_SOFT_FLOAT)
26133 {
26134 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26135 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26136
26137 if (TARGET_HARD_FLOAT_ABI)
26138 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26139 }
26140
26141 /* Some of these attributes only apply when the corresponding features
26142 are used. However we don't have any easy way of figuring this out.
26143 Conservatively record the setting that would have been used. */
26144
26145 if (flag_rounding_math)
26146 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26147
26148 if (!flag_unsafe_math_optimizations)
26149 {
26150 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26151 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26152 }
26153 if (flag_signaling_nans)
26154 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26155
26156 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26157 flag_finite_math_only ? 1 : 3);
26158
26159 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26160 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26161 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26162 flag_short_enums ? 1 : 2);
26163
26164 /* Tag_ABI_optimization_goals. */
26165 if (optimize_size)
26166 val = 4;
26167 else if (optimize >= 2)
26168 val = 2;
26169 else if (optimize)
26170 val = 1;
26171 else
26172 val = 6;
26173 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26174
26175 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26176 unaligned_access);
26177
26178 if (arm_fp16_format)
26179 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26180 (int) arm_fp16_format);
26181
26182 if (arm_lang_output_object_attributes_hook)
26183 arm_lang_output_object_attributes_hook();
26184 }
26185
26186 default_file_start ();
26187 }
26188
26189 static void
26190 arm_file_end (void)
26191 {
26192 int regno;
26193
26194 if (NEED_INDICATE_EXEC_STACK)
26195 /* Add .note.GNU-stack. */
26196 file_end_indicate_exec_stack ();
26197
26198 if (! thumb_call_reg_needed)
26199 return;
26200
26201 switch_to_section (text_section);
26202 asm_fprintf (asm_out_file, "\t.code 16\n");
26203 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26204
26205 for (regno = 0; regno < LR_REGNUM; regno++)
26206 {
26207 rtx label = thumb_call_via_label[regno];
26208
26209 if (label != 0)
26210 {
26211 targetm.asm_out.internal_label (asm_out_file, "L",
26212 CODE_LABEL_NUMBER (label));
26213 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26214 }
26215 }
26216 }
26217
26218 #ifndef ARM_PE
26219 /* Symbols in the text segment can be accessed without indirecting via the
26220 constant pool; it may take an extra binary operation, but this is still
26221 faster than indirecting via memory. Don't do this when not optimizing,
26222 since we won't be calculating al of the offsets necessary to do this
26223 simplification. */
26224
26225 static void
26226 arm_encode_section_info (tree decl, rtx rtl, int first)
26227 {
26228 if (optimize > 0 && TREE_CONSTANT (decl))
26229 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26230
26231 default_encode_section_info (decl, rtl, first);
26232 }
26233 #endif /* !ARM_PE */
26234
26235 static void
26236 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26237 {
26238 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26239 && !strcmp (prefix, "L"))
26240 {
26241 arm_ccfsm_state = 0;
26242 arm_target_insn = NULL;
26243 }
26244 default_internal_label (stream, prefix, labelno);
26245 }
26246
26247 /* Output code to add DELTA to the first argument, and then jump
26248 to FUNCTION. Used for C++ multiple inheritance. */
26249
26250 static void
26251 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26252 HOST_WIDE_INT, tree function)
26253 {
26254 static int thunk_label = 0;
26255 char label[256];
26256 char labelpc[256];
26257 int mi_delta = delta;
26258 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26259 int shift = 0;
26260 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26261 ? 1 : 0);
26262 if (mi_delta < 0)
26263 mi_delta = - mi_delta;
26264
26265 final_start_function (emit_barrier (), file, 1);
26266
26267 if (TARGET_THUMB1)
26268 {
26269 int labelno = thunk_label++;
26270 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26271 /* Thunks are entered in arm mode when available. */
26272 if (TARGET_THUMB1_ONLY)
26273 {
26274 /* push r3 so we can use it as a temporary. */
26275 /* TODO: Omit this save if r3 is not used. */
26276 fputs ("\tpush {r3}\n", file);
26277 fputs ("\tldr\tr3, ", file);
26278 }
26279 else
26280 {
26281 fputs ("\tldr\tr12, ", file);
26282 }
26283 assemble_name (file, label);
26284 fputc ('\n', file);
26285 if (flag_pic)
26286 {
26287 /* If we are generating PIC, the ldr instruction below loads
26288 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26289 the address of the add + 8, so we have:
26290
26291 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26292 = target + 1.
26293
26294 Note that we have "+ 1" because some versions of GNU ld
26295 don't set the low bit of the result for R_ARM_REL32
26296 relocations against thumb function symbols.
26297 On ARMv6M this is +4, not +8. */
26298 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26299 assemble_name (file, labelpc);
26300 fputs (":\n", file);
26301 if (TARGET_THUMB1_ONLY)
26302 {
26303 /* This is 2 insns after the start of the thunk, so we know it
26304 is 4-byte aligned. */
26305 fputs ("\tadd\tr3, pc, r3\n", file);
26306 fputs ("\tmov r12, r3\n", file);
26307 }
26308 else
26309 fputs ("\tadd\tr12, pc, r12\n", file);
26310 }
26311 else if (TARGET_THUMB1_ONLY)
26312 fputs ("\tmov r12, r3\n", file);
26313 }
26314 if (TARGET_THUMB1_ONLY)
26315 {
26316 if (mi_delta > 255)
26317 {
26318 fputs ("\tldr\tr3, ", file);
26319 assemble_name (file, label);
26320 fputs ("+4\n", file);
26321 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26322 mi_op, this_regno, this_regno);
26323 }
26324 else if (mi_delta != 0)
26325 {
26326 /* Thumb1 unified syntax requires s suffix in instruction name when
26327 one of the operands is immediate. */
26328 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26329 mi_op, this_regno, this_regno,
26330 mi_delta);
26331 }
26332 }
26333 else
26334 {
26335 /* TODO: Use movw/movt for large constants when available. */
26336 while (mi_delta != 0)
26337 {
26338 if ((mi_delta & (3 << shift)) == 0)
26339 shift += 2;
26340 else
26341 {
26342 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26343 mi_op, this_regno, this_regno,
26344 mi_delta & (0xff << shift));
26345 mi_delta &= ~(0xff << shift);
26346 shift += 8;
26347 }
26348 }
26349 }
26350 if (TARGET_THUMB1)
26351 {
26352 if (TARGET_THUMB1_ONLY)
26353 fputs ("\tpop\t{r3}\n", file);
26354
26355 fprintf (file, "\tbx\tr12\n");
26356 ASM_OUTPUT_ALIGN (file, 2);
26357 assemble_name (file, label);
26358 fputs (":\n", file);
26359 if (flag_pic)
26360 {
26361 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26362 rtx tem = XEXP (DECL_RTL (function), 0);
26363 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26364 pipeline offset is four rather than eight. Adjust the offset
26365 accordingly. */
26366 tem = plus_constant (GET_MODE (tem), tem,
26367 TARGET_THUMB1_ONLY ? -3 : -7);
26368 tem = gen_rtx_MINUS (GET_MODE (tem),
26369 tem,
26370 gen_rtx_SYMBOL_REF (Pmode,
26371 ggc_strdup (labelpc)));
26372 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26373 }
26374 else
26375 /* Output ".word .LTHUNKn". */
26376 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26377
26378 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26379 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26380 }
26381 else
26382 {
26383 fputs ("\tb\t", file);
26384 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26385 if (NEED_PLT_RELOC)
26386 fputs ("(PLT)", file);
26387 fputc ('\n', file);
26388 }
26389
26390 final_end_function ();
26391 }
26392
26393 /* MI thunk handling for TARGET_32BIT. */
26394
26395 static void
26396 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26397 HOST_WIDE_INT vcall_offset, tree function)
26398 {
26399 /* On ARM, this_regno is R0 or R1 depending on
26400 whether the function returns an aggregate or not.
26401 */
26402 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26403 function)
26404 ? R1_REGNUM : R0_REGNUM);
26405
26406 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26407 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26408 reload_completed = 1;
26409 emit_note (NOTE_INSN_PROLOGUE_END);
26410
26411 /* Add DELTA to THIS_RTX. */
26412 if (delta != 0)
26413 arm_split_constant (PLUS, Pmode, NULL_RTX,
26414 delta, this_rtx, this_rtx, false);
26415
26416 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26417 if (vcall_offset != 0)
26418 {
26419 /* Load *THIS_RTX. */
26420 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26421 /* Compute *THIS_RTX + VCALL_OFFSET. */
26422 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26423 false);
26424 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26425 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26426 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26427 }
26428
26429 /* Generate a tail call to the target function. */
26430 if (!TREE_USED (function))
26431 {
26432 assemble_external (function);
26433 TREE_USED (function) = 1;
26434 }
26435 rtx funexp = XEXP (DECL_RTL (function), 0);
26436 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26437 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26438 SIBLING_CALL_P (insn) = 1;
26439
26440 insn = get_insns ();
26441 shorten_branches (insn);
26442 final_start_function (insn, file, 1);
26443 final (insn, file, 1);
26444 final_end_function ();
26445
26446 /* Stop pretending this is a post-reload pass. */
26447 reload_completed = 0;
26448 }
26449
26450 /* Output code to add DELTA to the first argument, and then jump
26451 to FUNCTION. Used for C++ multiple inheritance. */
26452
26453 static void
26454 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26455 HOST_WIDE_INT vcall_offset, tree function)
26456 {
26457 if (TARGET_32BIT)
26458 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26459 else
26460 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26461 }
26462
26463 int
26464 arm_emit_vector_const (FILE *file, rtx x)
26465 {
26466 int i;
26467 const char * pattern;
26468
26469 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26470
26471 switch (GET_MODE (x))
26472 {
26473 case V2SImode: pattern = "%08x"; break;
26474 case V4HImode: pattern = "%04x"; break;
26475 case V8QImode: pattern = "%02x"; break;
26476 default: gcc_unreachable ();
26477 }
26478
26479 fprintf (file, "0x");
26480 for (i = CONST_VECTOR_NUNITS (x); i--;)
26481 {
26482 rtx element;
26483
26484 element = CONST_VECTOR_ELT (x, i);
26485 fprintf (file, pattern, INTVAL (element));
26486 }
26487
26488 return 1;
26489 }
26490
26491 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26492 HFmode constant pool entries are actually loaded with ldr. */
26493 void
26494 arm_emit_fp16_const (rtx c)
26495 {
26496 long bits;
26497
26498 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26499 if (WORDS_BIG_ENDIAN)
26500 assemble_zeros (2);
26501 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26502 if (!WORDS_BIG_ENDIAN)
26503 assemble_zeros (2);
26504 }
26505
26506 const char *
26507 arm_output_load_gr (rtx *operands)
26508 {
26509 rtx reg;
26510 rtx offset;
26511 rtx wcgr;
26512 rtx sum;
26513
26514 if (!MEM_P (operands [1])
26515 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26516 || !REG_P (reg = XEXP (sum, 0))
26517 || !CONST_INT_P (offset = XEXP (sum, 1))
26518 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26519 return "wldrw%?\t%0, %1";
26520
26521 /* Fix up an out-of-range load of a GR register. */
26522 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26523 wcgr = operands[0];
26524 operands[0] = reg;
26525 output_asm_insn ("ldr%?\t%0, %1", operands);
26526
26527 operands[0] = wcgr;
26528 operands[1] = reg;
26529 output_asm_insn ("tmcr%?\t%0, %1", operands);
26530 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26531
26532 return "";
26533 }
26534
26535 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26536
26537 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26538 named arg and all anonymous args onto the stack.
26539 XXX I know the prologue shouldn't be pushing registers, but it is faster
26540 that way. */
26541
26542 static void
26543 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26544 machine_mode mode,
26545 tree type,
26546 int *pretend_size,
26547 int second_time ATTRIBUTE_UNUSED)
26548 {
26549 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26550 int nregs;
26551
26552 cfun->machine->uses_anonymous_args = 1;
26553 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26554 {
26555 nregs = pcum->aapcs_ncrn;
26556 if (nregs & 1)
26557 {
26558 int res = arm_needs_doubleword_align (mode, type);
26559 if (res < 0 && warn_psabi)
26560 inform (input_location, "parameter passing for argument of "
26561 "type %qT changed in GCC 7.1", type);
26562 else if (res > 0)
26563 nregs++;
26564 }
26565 }
26566 else
26567 nregs = pcum->nregs;
26568
26569 if (nregs < NUM_ARG_REGS)
26570 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26571 }
26572
26573 /* We can't rely on the caller doing the proper promotion when
26574 using APCS or ATPCS. */
26575
26576 static bool
26577 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26578 {
26579 return !TARGET_AAPCS_BASED;
26580 }
26581
26582 static machine_mode
26583 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26584 machine_mode mode,
26585 int *punsignedp ATTRIBUTE_UNUSED,
26586 const_tree fntype ATTRIBUTE_UNUSED,
26587 int for_return ATTRIBUTE_UNUSED)
26588 {
26589 if (GET_MODE_CLASS (mode) == MODE_INT
26590 && GET_MODE_SIZE (mode) < 4)
26591 return SImode;
26592
26593 return mode;
26594 }
26595
26596
26597 static bool
26598 arm_default_short_enums (void)
26599 {
26600 return ARM_DEFAULT_SHORT_ENUMS;
26601 }
26602
26603
26604 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26605
26606 static bool
26607 arm_align_anon_bitfield (void)
26608 {
26609 return TARGET_AAPCS_BASED;
26610 }
26611
26612
26613 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26614
26615 static tree
26616 arm_cxx_guard_type (void)
26617 {
26618 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26619 }
26620
26621
26622 /* The EABI says test the least significant bit of a guard variable. */
26623
26624 static bool
26625 arm_cxx_guard_mask_bit (void)
26626 {
26627 return TARGET_AAPCS_BASED;
26628 }
26629
26630
26631 /* The EABI specifies that all array cookies are 8 bytes long. */
26632
26633 static tree
26634 arm_get_cookie_size (tree type)
26635 {
26636 tree size;
26637
26638 if (!TARGET_AAPCS_BASED)
26639 return default_cxx_get_cookie_size (type);
26640
26641 size = build_int_cst (sizetype, 8);
26642 return size;
26643 }
26644
26645
26646 /* The EABI says that array cookies should also contain the element size. */
26647
26648 static bool
26649 arm_cookie_has_size (void)
26650 {
26651 return TARGET_AAPCS_BASED;
26652 }
26653
26654
26655 /* The EABI says constructors and destructors should return a pointer to
26656 the object constructed/destroyed. */
26657
26658 static bool
26659 arm_cxx_cdtor_returns_this (void)
26660 {
26661 return TARGET_AAPCS_BASED;
26662 }
26663
26664 /* The EABI says that an inline function may never be the key
26665 method. */
26666
26667 static bool
26668 arm_cxx_key_method_may_be_inline (void)
26669 {
26670 return !TARGET_AAPCS_BASED;
26671 }
26672
26673 static void
26674 arm_cxx_determine_class_data_visibility (tree decl)
26675 {
26676 if (!TARGET_AAPCS_BASED
26677 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26678 return;
26679
26680 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26681 is exported. However, on systems without dynamic vague linkage,
26682 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26683 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26684 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26685 else
26686 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26687 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26688 }
26689
26690 static bool
26691 arm_cxx_class_data_always_comdat (void)
26692 {
26693 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26694 vague linkage if the class has no key function. */
26695 return !TARGET_AAPCS_BASED;
26696 }
26697
26698
26699 /* The EABI says __aeabi_atexit should be used to register static
26700 destructors. */
26701
26702 static bool
26703 arm_cxx_use_aeabi_atexit (void)
26704 {
26705 return TARGET_AAPCS_BASED;
26706 }
26707
26708
26709 void
26710 arm_set_return_address (rtx source, rtx scratch)
26711 {
26712 arm_stack_offsets *offsets;
26713 HOST_WIDE_INT delta;
26714 rtx addr;
26715 unsigned long saved_regs;
26716
26717 offsets = arm_get_frame_offsets ();
26718 saved_regs = offsets->saved_regs_mask;
26719
26720 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26721 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26722 else
26723 {
26724 if (frame_pointer_needed)
26725 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26726 else
26727 {
26728 /* LR will be the first saved register. */
26729 delta = offsets->outgoing_args - (offsets->frame + 4);
26730
26731
26732 if (delta >= 4096)
26733 {
26734 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26735 GEN_INT (delta & ~4095)));
26736 addr = scratch;
26737 delta &= 4095;
26738 }
26739 else
26740 addr = stack_pointer_rtx;
26741
26742 addr = plus_constant (Pmode, addr, delta);
26743 }
26744 /* The store needs to be marked as frame related in order to prevent
26745 DSE from deleting it as dead if it is based on fp. */
26746 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26747 RTX_FRAME_RELATED_P (insn) = 1;
26748 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26749 }
26750 }
26751
26752
26753 void
26754 thumb_set_return_address (rtx source, rtx scratch)
26755 {
26756 arm_stack_offsets *offsets;
26757 HOST_WIDE_INT delta;
26758 HOST_WIDE_INT limit;
26759 int reg;
26760 rtx addr;
26761 unsigned long mask;
26762
26763 emit_use (source);
26764
26765 offsets = arm_get_frame_offsets ();
26766 mask = offsets->saved_regs_mask;
26767 if (mask & (1 << LR_REGNUM))
26768 {
26769 limit = 1024;
26770 /* Find the saved regs. */
26771 if (frame_pointer_needed)
26772 {
26773 delta = offsets->soft_frame - offsets->saved_args;
26774 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26775 if (TARGET_THUMB1)
26776 limit = 128;
26777 }
26778 else
26779 {
26780 delta = offsets->outgoing_args - offsets->saved_args;
26781 reg = SP_REGNUM;
26782 }
26783 /* Allow for the stack frame. */
26784 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26785 delta -= 16;
26786 /* The link register is always the first saved register. */
26787 delta -= 4;
26788
26789 /* Construct the address. */
26790 addr = gen_rtx_REG (SImode, reg);
26791 if (delta > limit)
26792 {
26793 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26794 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26795 addr = scratch;
26796 }
26797 else
26798 addr = plus_constant (Pmode, addr, delta);
26799
26800 /* The store needs to be marked as frame related in order to prevent
26801 DSE from deleting it as dead if it is based on fp. */
26802 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26803 RTX_FRAME_RELATED_P (insn) = 1;
26804 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26805 }
26806 else
26807 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26808 }
26809
26810 /* Implements target hook vector_mode_supported_p. */
26811 bool
26812 arm_vector_mode_supported_p (machine_mode mode)
26813 {
26814 /* Neon also supports V2SImode, etc. listed in the clause below. */
26815 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26816 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26817 || mode == V2DImode || mode == V8HFmode))
26818 return true;
26819
26820 if ((TARGET_NEON || TARGET_IWMMXT)
26821 && ((mode == V2SImode)
26822 || (mode == V4HImode)
26823 || (mode == V8QImode)))
26824 return true;
26825
26826 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26827 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26828 || mode == V2HAmode))
26829 return true;
26830
26831 return false;
26832 }
26833
26834 /* Implements target hook array_mode_supported_p. */
26835
26836 static bool
26837 arm_array_mode_supported_p (machine_mode mode,
26838 unsigned HOST_WIDE_INT nelems)
26839 {
26840 if (TARGET_NEON
26841 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26842 && (nelems >= 2 && nelems <= 4))
26843 return true;
26844
26845 return false;
26846 }
26847
26848 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26849 registers when autovectorizing for Neon, at least until multiple vector
26850 widths are supported properly by the middle-end. */
26851
26852 static machine_mode
26853 arm_preferred_simd_mode (machine_mode mode)
26854 {
26855 if (TARGET_NEON)
26856 switch (mode)
26857 {
26858 case SFmode:
26859 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26860 case SImode:
26861 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26862 case HImode:
26863 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26864 case QImode:
26865 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26866 case DImode:
26867 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26868 return V2DImode;
26869 break;
26870
26871 default:;
26872 }
26873
26874 if (TARGET_REALLY_IWMMXT)
26875 switch (mode)
26876 {
26877 case SImode:
26878 return V2SImode;
26879 case HImode:
26880 return V4HImode;
26881 case QImode:
26882 return V8QImode;
26883
26884 default:;
26885 }
26886
26887 return word_mode;
26888 }
26889
26890 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26891
26892 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26893 using r0-r4 for function arguments, r7 for the stack frame and don't have
26894 enough left over to do doubleword arithmetic. For Thumb-2 all the
26895 potentially problematic instructions accept high registers so this is not
26896 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26897 that require many low registers. */
26898 static bool
26899 arm_class_likely_spilled_p (reg_class_t rclass)
26900 {
26901 if ((TARGET_THUMB1 && rclass == LO_REGS)
26902 || rclass == CC_REG)
26903 return true;
26904
26905 return false;
26906 }
26907
26908 /* Implements target hook small_register_classes_for_mode_p. */
26909 bool
26910 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26911 {
26912 return TARGET_THUMB1;
26913 }
26914
26915 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26916 ARM insns and therefore guarantee that the shift count is modulo 256.
26917 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26918 guarantee no particular behavior for out-of-range counts. */
26919
26920 static unsigned HOST_WIDE_INT
26921 arm_shift_truncation_mask (machine_mode mode)
26922 {
26923 return mode == SImode ? 255 : 0;
26924 }
26925
26926
26927 /* Map internal gcc register numbers to DWARF2 register numbers. */
26928
26929 unsigned int
26930 arm_dbx_register_number (unsigned int regno)
26931 {
26932 if (regno < 16)
26933 return regno;
26934
26935 if (IS_VFP_REGNUM (regno))
26936 {
26937 /* See comment in arm_dwarf_register_span. */
26938 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26939 return 64 + regno - FIRST_VFP_REGNUM;
26940 else
26941 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26942 }
26943
26944 if (IS_IWMMXT_GR_REGNUM (regno))
26945 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26946
26947 if (IS_IWMMXT_REGNUM (regno))
26948 return 112 + regno - FIRST_IWMMXT_REGNUM;
26949
26950 return DWARF_FRAME_REGISTERS;
26951 }
26952
26953 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26954 GCC models tham as 64 32-bit registers, so we need to describe this to
26955 the DWARF generation code. Other registers can use the default. */
26956 static rtx
26957 arm_dwarf_register_span (rtx rtl)
26958 {
26959 machine_mode mode;
26960 unsigned regno;
26961 rtx parts[16];
26962 int nregs;
26963 int i;
26964
26965 regno = REGNO (rtl);
26966 if (!IS_VFP_REGNUM (regno))
26967 return NULL_RTX;
26968
26969 /* XXX FIXME: The EABI defines two VFP register ranges:
26970 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26971 256-287: D0-D31
26972 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26973 corresponding D register. Until GDB supports this, we shall use the
26974 legacy encodings. We also use these encodings for D0-D15 for
26975 compatibility with older debuggers. */
26976 mode = GET_MODE (rtl);
26977 if (GET_MODE_SIZE (mode) < 8)
26978 return NULL_RTX;
26979
26980 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26981 {
26982 nregs = GET_MODE_SIZE (mode) / 4;
26983 for (i = 0; i < nregs; i += 2)
26984 if (TARGET_BIG_END)
26985 {
26986 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26987 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26988 }
26989 else
26990 {
26991 parts[i] = gen_rtx_REG (SImode, regno + i);
26992 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26993 }
26994 }
26995 else
26996 {
26997 nregs = GET_MODE_SIZE (mode) / 8;
26998 for (i = 0; i < nregs; i++)
26999 parts[i] = gen_rtx_REG (DImode, regno + i);
27000 }
27001
27002 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27003 }
27004
27005 #if ARM_UNWIND_INFO
27006 /* Emit unwind directives for a store-multiple instruction or stack pointer
27007 push during alignment.
27008 These should only ever be generated by the function prologue code, so
27009 expect them to have a particular form.
27010 The store-multiple instruction sometimes pushes pc as the last register,
27011 although it should not be tracked into unwind information, or for -Os
27012 sometimes pushes some dummy registers before first register that needs
27013 to be tracked in unwind information; such dummy registers are there just
27014 to avoid separate stack adjustment, and will not be restored in the
27015 epilogue. */
27016
27017 static void
27018 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27019 {
27020 int i;
27021 HOST_WIDE_INT offset;
27022 HOST_WIDE_INT nregs;
27023 int reg_size;
27024 unsigned reg;
27025 unsigned lastreg;
27026 unsigned padfirst = 0, padlast = 0;
27027 rtx e;
27028
27029 e = XVECEXP (p, 0, 0);
27030 gcc_assert (GET_CODE (e) == SET);
27031
27032 /* First insn will adjust the stack pointer. */
27033 gcc_assert (GET_CODE (e) == SET
27034 && REG_P (SET_DEST (e))
27035 && REGNO (SET_DEST (e)) == SP_REGNUM
27036 && GET_CODE (SET_SRC (e)) == PLUS);
27037
27038 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27039 nregs = XVECLEN (p, 0) - 1;
27040 gcc_assert (nregs);
27041
27042 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27043 if (reg < 16)
27044 {
27045 /* For -Os dummy registers can be pushed at the beginning to
27046 avoid separate stack pointer adjustment. */
27047 e = XVECEXP (p, 0, 1);
27048 e = XEXP (SET_DEST (e), 0);
27049 if (GET_CODE (e) == PLUS)
27050 padfirst = INTVAL (XEXP (e, 1));
27051 gcc_assert (padfirst == 0 || optimize_size);
27052 /* The function prologue may also push pc, but not annotate it as it is
27053 never restored. We turn this into a stack pointer adjustment. */
27054 e = XVECEXP (p, 0, nregs);
27055 e = XEXP (SET_DEST (e), 0);
27056 if (GET_CODE (e) == PLUS)
27057 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27058 else
27059 padlast = offset - 4;
27060 gcc_assert (padlast == 0 || padlast == 4);
27061 if (padlast == 4)
27062 fprintf (asm_out_file, "\t.pad #4\n");
27063 reg_size = 4;
27064 fprintf (asm_out_file, "\t.save {");
27065 }
27066 else if (IS_VFP_REGNUM (reg))
27067 {
27068 reg_size = 8;
27069 fprintf (asm_out_file, "\t.vsave {");
27070 }
27071 else
27072 /* Unknown register type. */
27073 gcc_unreachable ();
27074
27075 /* If the stack increment doesn't match the size of the saved registers,
27076 something has gone horribly wrong. */
27077 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27078
27079 offset = padfirst;
27080 lastreg = 0;
27081 /* The remaining insns will describe the stores. */
27082 for (i = 1; i <= nregs; i++)
27083 {
27084 /* Expect (set (mem <addr>) (reg)).
27085 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27086 e = XVECEXP (p, 0, i);
27087 gcc_assert (GET_CODE (e) == SET
27088 && MEM_P (SET_DEST (e))
27089 && REG_P (SET_SRC (e)));
27090
27091 reg = REGNO (SET_SRC (e));
27092 gcc_assert (reg >= lastreg);
27093
27094 if (i != 1)
27095 fprintf (asm_out_file, ", ");
27096 /* We can't use %r for vfp because we need to use the
27097 double precision register names. */
27098 if (IS_VFP_REGNUM (reg))
27099 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27100 else
27101 asm_fprintf (asm_out_file, "%r", reg);
27102
27103 if (flag_checking)
27104 {
27105 /* Check that the addresses are consecutive. */
27106 e = XEXP (SET_DEST (e), 0);
27107 if (GET_CODE (e) == PLUS)
27108 gcc_assert (REG_P (XEXP (e, 0))
27109 && REGNO (XEXP (e, 0)) == SP_REGNUM
27110 && CONST_INT_P (XEXP (e, 1))
27111 && offset == INTVAL (XEXP (e, 1)));
27112 else
27113 gcc_assert (i == 1
27114 && REG_P (e)
27115 && REGNO (e) == SP_REGNUM);
27116 offset += reg_size;
27117 }
27118 }
27119 fprintf (asm_out_file, "}\n");
27120 if (padfirst)
27121 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27122 }
27123
27124 /* Emit unwind directives for a SET. */
27125
27126 static void
27127 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27128 {
27129 rtx e0;
27130 rtx e1;
27131 unsigned reg;
27132
27133 e0 = XEXP (p, 0);
27134 e1 = XEXP (p, 1);
27135 switch (GET_CODE (e0))
27136 {
27137 case MEM:
27138 /* Pushing a single register. */
27139 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27140 || !REG_P (XEXP (XEXP (e0, 0), 0))
27141 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27142 abort ();
27143
27144 asm_fprintf (asm_out_file, "\t.save ");
27145 if (IS_VFP_REGNUM (REGNO (e1)))
27146 asm_fprintf(asm_out_file, "{d%d}\n",
27147 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27148 else
27149 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27150 break;
27151
27152 case REG:
27153 if (REGNO (e0) == SP_REGNUM)
27154 {
27155 /* A stack increment. */
27156 if (GET_CODE (e1) != PLUS
27157 || !REG_P (XEXP (e1, 0))
27158 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27159 || !CONST_INT_P (XEXP (e1, 1)))
27160 abort ();
27161
27162 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27163 -INTVAL (XEXP (e1, 1)));
27164 }
27165 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27166 {
27167 HOST_WIDE_INT offset;
27168
27169 if (GET_CODE (e1) == PLUS)
27170 {
27171 if (!REG_P (XEXP (e1, 0))
27172 || !CONST_INT_P (XEXP (e1, 1)))
27173 abort ();
27174 reg = REGNO (XEXP (e1, 0));
27175 offset = INTVAL (XEXP (e1, 1));
27176 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27177 HARD_FRAME_POINTER_REGNUM, reg,
27178 offset);
27179 }
27180 else if (REG_P (e1))
27181 {
27182 reg = REGNO (e1);
27183 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27184 HARD_FRAME_POINTER_REGNUM, reg);
27185 }
27186 else
27187 abort ();
27188 }
27189 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27190 {
27191 /* Move from sp to reg. */
27192 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27193 }
27194 else if (GET_CODE (e1) == PLUS
27195 && REG_P (XEXP (e1, 0))
27196 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27197 && CONST_INT_P (XEXP (e1, 1)))
27198 {
27199 /* Set reg to offset from sp. */
27200 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27201 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27202 }
27203 else
27204 abort ();
27205 break;
27206
27207 default:
27208 abort ();
27209 }
27210 }
27211
27212
27213 /* Emit unwind directives for the given insn. */
27214
27215 static void
27216 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27217 {
27218 rtx note, pat;
27219 bool handled_one = false;
27220
27221 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27222 return;
27223
27224 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27225 && (TREE_NOTHROW (current_function_decl)
27226 || crtl->all_throwers_are_sibcalls))
27227 return;
27228
27229 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27230 return;
27231
27232 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27233 {
27234 switch (REG_NOTE_KIND (note))
27235 {
27236 case REG_FRAME_RELATED_EXPR:
27237 pat = XEXP (note, 0);
27238 goto found;
27239
27240 case REG_CFA_REGISTER:
27241 pat = XEXP (note, 0);
27242 if (pat == NULL)
27243 {
27244 pat = PATTERN (insn);
27245 if (GET_CODE (pat) == PARALLEL)
27246 pat = XVECEXP (pat, 0, 0);
27247 }
27248
27249 /* Only emitted for IS_STACKALIGN re-alignment. */
27250 {
27251 rtx dest, src;
27252 unsigned reg;
27253
27254 src = SET_SRC (pat);
27255 dest = SET_DEST (pat);
27256
27257 gcc_assert (src == stack_pointer_rtx);
27258 reg = REGNO (dest);
27259 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27260 reg + 0x90, reg);
27261 }
27262 handled_one = true;
27263 break;
27264
27265 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27266 to get correct dwarf information for shrink-wrap. We should not
27267 emit unwind information for it because these are used either for
27268 pretend arguments or notes to adjust sp and restore registers from
27269 stack. */
27270 case REG_CFA_DEF_CFA:
27271 case REG_CFA_ADJUST_CFA:
27272 case REG_CFA_RESTORE:
27273 return;
27274
27275 case REG_CFA_EXPRESSION:
27276 case REG_CFA_OFFSET:
27277 /* ??? Only handling here what we actually emit. */
27278 gcc_unreachable ();
27279
27280 default:
27281 break;
27282 }
27283 }
27284 if (handled_one)
27285 return;
27286 pat = PATTERN (insn);
27287 found:
27288
27289 switch (GET_CODE (pat))
27290 {
27291 case SET:
27292 arm_unwind_emit_set (asm_out_file, pat);
27293 break;
27294
27295 case SEQUENCE:
27296 /* Store multiple. */
27297 arm_unwind_emit_sequence (asm_out_file, pat);
27298 break;
27299
27300 default:
27301 abort();
27302 }
27303 }
27304
27305
27306 /* Output a reference from a function exception table to the type_info
27307 object X. The EABI specifies that the symbol should be relocated by
27308 an R_ARM_TARGET2 relocation. */
27309
27310 static bool
27311 arm_output_ttype (rtx x)
27312 {
27313 fputs ("\t.word\t", asm_out_file);
27314 output_addr_const (asm_out_file, x);
27315 /* Use special relocations for symbol references. */
27316 if (!CONST_INT_P (x))
27317 fputs ("(TARGET2)", asm_out_file);
27318 fputc ('\n', asm_out_file);
27319
27320 return TRUE;
27321 }
27322
27323 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27324
27325 static void
27326 arm_asm_emit_except_personality (rtx personality)
27327 {
27328 fputs ("\t.personality\t", asm_out_file);
27329 output_addr_const (asm_out_file, personality);
27330 fputc ('\n', asm_out_file);
27331 }
27332 #endif /* ARM_UNWIND_INFO */
27333
27334 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27335
27336 static void
27337 arm_asm_init_sections (void)
27338 {
27339 #if ARM_UNWIND_INFO
27340 exception_section = get_unnamed_section (0, output_section_asm_op,
27341 "\t.handlerdata");
27342 #endif /* ARM_UNWIND_INFO */
27343
27344 #ifdef OBJECT_FORMAT_ELF
27345 if (target_pure_code)
27346 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27347 #endif
27348 }
27349
27350 /* Output unwind directives for the start/end of a function. */
27351
27352 void
27353 arm_output_fn_unwind (FILE * f, bool prologue)
27354 {
27355 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27356 return;
27357
27358 if (prologue)
27359 fputs ("\t.fnstart\n", f);
27360 else
27361 {
27362 /* If this function will never be unwound, then mark it as such.
27363 The came condition is used in arm_unwind_emit to suppress
27364 the frame annotations. */
27365 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27366 && (TREE_NOTHROW (current_function_decl)
27367 || crtl->all_throwers_are_sibcalls))
27368 fputs("\t.cantunwind\n", f);
27369
27370 fputs ("\t.fnend\n", f);
27371 }
27372 }
27373
27374 static bool
27375 arm_emit_tls_decoration (FILE *fp, rtx x)
27376 {
27377 enum tls_reloc reloc;
27378 rtx val;
27379
27380 val = XVECEXP (x, 0, 0);
27381 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27382
27383 output_addr_const (fp, val);
27384
27385 switch (reloc)
27386 {
27387 case TLS_GD32:
27388 fputs ("(tlsgd)", fp);
27389 break;
27390 case TLS_LDM32:
27391 fputs ("(tlsldm)", fp);
27392 break;
27393 case TLS_LDO32:
27394 fputs ("(tlsldo)", fp);
27395 break;
27396 case TLS_IE32:
27397 fputs ("(gottpoff)", fp);
27398 break;
27399 case TLS_LE32:
27400 fputs ("(tpoff)", fp);
27401 break;
27402 case TLS_DESCSEQ:
27403 fputs ("(tlsdesc)", fp);
27404 break;
27405 default:
27406 gcc_unreachable ();
27407 }
27408
27409 switch (reloc)
27410 {
27411 case TLS_GD32:
27412 case TLS_LDM32:
27413 case TLS_IE32:
27414 case TLS_DESCSEQ:
27415 fputs (" + (. - ", fp);
27416 output_addr_const (fp, XVECEXP (x, 0, 2));
27417 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27418 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27419 output_addr_const (fp, XVECEXP (x, 0, 3));
27420 fputc (')', fp);
27421 break;
27422 default:
27423 break;
27424 }
27425
27426 return TRUE;
27427 }
27428
27429 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27430
27431 static void
27432 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27433 {
27434 gcc_assert (size == 4);
27435 fputs ("\t.word\t", file);
27436 output_addr_const (file, x);
27437 fputs ("(tlsldo)", file);
27438 }
27439
27440 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27441
27442 static bool
27443 arm_output_addr_const_extra (FILE *fp, rtx x)
27444 {
27445 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27446 return arm_emit_tls_decoration (fp, x);
27447 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27448 {
27449 char label[256];
27450 int labelno = INTVAL (XVECEXP (x, 0, 0));
27451
27452 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27453 assemble_name_raw (fp, label);
27454
27455 return TRUE;
27456 }
27457 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27458 {
27459 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27460 if (GOT_PCREL)
27461 fputs ("+.", fp);
27462 fputs ("-(", fp);
27463 output_addr_const (fp, XVECEXP (x, 0, 0));
27464 fputc (')', fp);
27465 return TRUE;
27466 }
27467 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27468 {
27469 output_addr_const (fp, XVECEXP (x, 0, 0));
27470 if (GOT_PCREL)
27471 fputs ("+.", fp);
27472 fputs ("-(", fp);
27473 output_addr_const (fp, XVECEXP (x, 0, 1));
27474 fputc (')', fp);
27475 return TRUE;
27476 }
27477 else if (GET_CODE (x) == CONST_VECTOR)
27478 return arm_emit_vector_const (fp, x);
27479
27480 return FALSE;
27481 }
27482
27483 /* Output assembly for a shift instruction.
27484 SET_FLAGS determines how the instruction modifies the condition codes.
27485 0 - Do not set condition codes.
27486 1 - Set condition codes.
27487 2 - Use smallest instruction. */
27488 const char *
27489 arm_output_shift(rtx * operands, int set_flags)
27490 {
27491 char pattern[100];
27492 static const char flag_chars[3] = {'?', '.', '!'};
27493 const char *shift;
27494 HOST_WIDE_INT val;
27495 char c;
27496
27497 c = flag_chars[set_flags];
27498 shift = shift_op(operands[3], &val);
27499 if (shift)
27500 {
27501 if (val != -1)
27502 operands[2] = GEN_INT(val);
27503 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27504 }
27505 else
27506 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27507
27508 output_asm_insn (pattern, operands);
27509 return "";
27510 }
27511
27512 /* Output assembly for a WMMX immediate shift instruction. */
27513 const char *
27514 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27515 {
27516 int shift = INTVAL (operands[2]);
27517 char templ[50];
27518 machine_mode opmode = GET_MODE (operands[0]);
27519
27520 gcc_assert (shift >= 0);
27521
27522 /* If the shift value in the register versions is > 63 (for D qualifier),
27523 31 (for W qualifier) or 15 (for H qualifier). */
27524 if (((opmode == V4HImode) && (shift > 15))
27525 || ((opmode == V2SImode) && (shift > 31))
27526 || ((opmode == DImode) && (shift > 63)))
27527 {
27528 if (wror_or_wsra)
27529 {
27530 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27531 output_asm_insn (templ, operands);
27532 if (opmode == DImode)
27533 {
27534 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27535 output_asm_insn (templ, operands);
27536 }
27537 }
27538 else
27539 {
27540 /* The destination register will contain all zeros. */
27541 sprintf (templ, "wzero\t%%0");
27542 output_asm_insn (templ, operands);
27543 }
27544 return "";
27545 }
27546
27547 if ((opmode == DImode) && (shift > 32))
27548 {
27549 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27550 output_asm_insn (templ, operands);
27551 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27552 output_asm_insn (templ, operands);
27553 }
27554 else
27555 {
27556 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27557 output_asm_insn (templ, operands);
27558 }
27559 return "";
27560 }
27561
27562 /* Output assembly for a WMMX tinsr instruction. */
27563 const char *
27564 arm_output_iwmmxt_tinsr (rtx *operands)
27565 {
27566 int mask = INTVAL (operands[3]);
27567 int i;
27568 char templ[50];
27569 int units = mode_nunits[GET_MODE (operands[0])];
27570 gcc_assert ((mask & (mask - 1)) == 0);
27571 for (i = 0; i < units; ++i)
27572 {
27573 if ((mask & 0x01) == 1)
27574 {
27575 break;
27576 }
27577 mask >>= 1;
27578 }
27579 gcc_assert (i < units);
27580 {
27581 switch (GET_MODE (operands[0]))
27582 {
27583 case V8QImode:
27584 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27585 break;
27586 case V4HImode:
27587 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27588 break;
27589 case V2SImode:
27590 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27591 break;
27592 default:
27593 gcc_unreachable ();
27594 break;
27595 }
27596 output_asm_insn (templ, operands);
27597 }
27598 return "";
27599 }
27600
27601 /* Output a Thumb-1 casesi dispatch sequence. */
27602 const char *
27603 thumb1_output_casesi (rtx *operands)
27604 {
27605 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27606
27607 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27608
27609 switch (GET_MODE(diff_vec))
27610 {
27611 case QImode:
27612 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27613 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27614 case HImode:
27615 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27616 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27617 case SImode:
27618 return "bl\t%___gnu_thumb1_case_si";
27619 default:
27620 gcc_unreachable ();
27621 }
27622 }
27623
27624 /* Output a Thumb-2 casesi instruction. */
27625 const char *
27626 thumb2_output_casesi (rtx *operands)
27627 {
27628 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27629
27630 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27631
27632 output_asm_insn ("cmp\t%0, %1", operands);
27633 output_asm_insn ("bhi\t%l3", operands);
27634 switch (GET_MODE(diff_vec))
27635 {
27636 case QImode:
27637 return "tbb\t[%|pc, %0]";
27638 case HImode:
27639 return "tbh\t[%|pc, %0, lsl #1]";
27640 case SImode:
27641 if (flag_pic)
27642 {
27643 output_asm_insn ("adr\t%4, %l2", operands);
27644 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27645 output_asm_insn ("add\t%4, %4, %5", operands);
27646 return "bx\t%4";
27647 }
27648 else
27649 {
27650 output_asm_insn ("adr\t%4, %l2", operands);
27651 return "ldr\t%|pc, [%4, %0, lsl #2]";
27652 }
27653 default:
27654 gcc_unreachable ();
27655 }
27656 }
27657
27658 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27659 per-core tuning structs. */
27660 static int
27661 arm_issue_rate (void)
27662 {
27663 return current_tune->issue_rate;
27664 }
27665
27666 /* Return how many instructions should scheduler lookahead to choose the
27667 best one. */
27668 static int
27669 arm_first_cycle_multipass_dfa_lookahead (void)
27670 {
27671 int issue_rate = arm_issue_rate ();
27672
27673 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27674 }
27675
27676 /* Enable modeling of L2 auto-prefetcher. */
27677 static int
27678 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27679 {
27680 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27681 }
27682
27683 const char *
27684 arm_mangle_type (const_tree type)
27685 {
27686 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27687 has to be managled as if it is in the "std" namespace. */
27688 if (TARGET_AAPCS_BASED
27689 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27690 return "St9__va_list";
27691
27692 /* Half-precision float. */
27693 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27694 return "Dh";
27695
27696 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27697 builtin type. */
27698 if (TYPE_NAME (type) != NULL)
27699 return arm_mangle_builtin_type (type);
27700
27701 /* Use the default mangling. */
27702 return NULL;
27703 }
27704
27705 /* Order of allocation of core registers for Thumb: this allocation is
27706 written over the corresponding initial entries of the array
27707 initialized with REG_ALLOC_ORDER. We allocate all low registers
27708 first. Saving and restoring a low register is usually cheaper than
27709 using a call-clobbered high register. */
27710
27711 static const int thumb_core_reg_alloc_order[] =
27712 {
27713 3, 2, 1, 0, 4, 5, 6, 7,
27714 12, 14, 8, 9, 10, 11
27715 };
27716
27717 /* Adjust register allocation order when compiling for Thumb. */
27718
27719 void
27720 arm_order_regs_for_local_alloc (void)
27721 {
27722 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27723 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27724 if (TARGET_THUMB)
27725 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27726 sizeof (thumb_core_reg_alloc_order));
27727 }
27728
27729 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27730
27731 bool
27732 arm_frame_pointer_required (void)
27733 {
27734 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27735 return true;
27736
27737 /* If the function receives nonlocal gotos, it needs to save the frame
27738 pointer in the nonlocal_goto_save_area object. */
27739 if (cfun->has_nonlocal_label)
27740 return true;
27741
27742 /* The frame pointer is required for non-leaf APCS frames. */
27743 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27744 return true;
27745
27746 /* If we are probing the stack in the prologue, we will have a faulting
27747 instruction prior to the stack adjustment and this requires a frame
27748 pointer if we want to catch the exception using the EABI unwinder. */
27749 if (!IS_INTERRUPT (arm_current_func_type ())
27750 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27751 && arm_except_unwind_info (&global_options) == UI_TARGET
27752 && cfun->can_throw_non_call_exceptions)
27753 {
27754 HOST_WIDE_INT size = get_frame_size ();
27755
27756 /* That's irrelevant if there is no stack adjustment. */
27757 if (size <= 0)
27758 return false;
27759
27760 /* That's relevant only if there is a stack probe. */
27761 if (crtl->is_leaf && !cfun->calls_alloca)
27762 {
27763 /* We don't have the final size of the frame so adjust. */
27764 size += 32 * UNITS_PER_WORD;
27765 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27766 return true;
27767 }
27768 else
27769 return true;
27770 }
27771
27772 return false;
27773 }
27774
27775 /* Only thumb1 can't support conditional execution, so return true if
27776 the target is not thumb1. */
27777 static bool
27778 arm_have_conditional_execution (void)
27779 {
27780 return !TARGET_THUMB1;
27781 }
27782
27783 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27784 static HOST_WIDE_INT
27785 arm_vector_alignment (const_tree type)
27786 {
27787 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27788
27789 if (TARGET_AAPCS_BASED)
27790 align = MIN (align, 64);
27791
27792 return align;
27793 }
27794
27795 static unsigned int
27796 arm_autovectorize_vector_sizes (void)
27797 {
27798 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27799 }
27800
27801 static bool
27802 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27803 {
27804 /* Vectors which aren't in packed structures will not be less aligned than
27805 the natural alignment of their element type, so this is safe. */
27806 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27807 return !is_packed;
27808
27809 return default_builtin_vector_alignment_reachable (type, is_packed);
27810 }
27811
27812 static bool
27813 arm_builtin_support_vector_misalignment (machine_mode mode,
27814 const_tree type, int misalignment,
27815 bool is_packed)
27816 {
27817 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27818 {
27819 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27820
27821 if (is_packed)
27822 return align == 1;
27823
27824 /* If the misalignment is unknown, we should be able to handle the access
27825 so long as it is not to a member of a packed data structure. */
27826 if (misalignment == -1)
27827 return true;
27828
27829 /* Return true if the misalignment is a multiple of the natural alignment
27830 of the vector's element type. This is probably always going to be
27831 true in practice, since we've already established that this isn't a
27832 packed access. */
27833 return ((misalignment % align) == 0);
27834 }
27835
27836 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27837 is_packed);
27838 }
27839
27840 static void
27841 arm_conditional_register_usage (void)
27842 {
27843 int regno;
27844
27845 if (TARGET_THUMB1 && optimize_size)
27846 {
27847 /* When optimizing for size on Thumb-1, it's better not
27848 to use the HI regs, because of the overhead of
27849 stacking them. */
27850 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27851 fixed_regs[regno] = call_used_regs[regno] = 1;
27852 }
27853
27854 /* The link register can be clobbered by any branch insn,
27855 but we have no way to track that at present, so mark
27856 it as unavailable. */
27857 if (TARGET_THUMB1)
27858 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27859
27860 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27861 {
27862 /* VFPv3 registers are disabled when earlier VFP
27863 versions are selected due to the definition of
27864 LAST_VFP_REGNUM. */
27865 for (regno = FIRST_VFP_REGNUM;
27866 regno <= LAST_VFP_REGNUM; ++ regno)
27867 {
27868 fixed_regs[regno] = 0;
27869 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27870 || regno >= FIRST_VFP_REGNUM + 32;
27871 }
27872 }
27873
27874 if (TARGET_REALLY_IWMMXT)
27875 {
27876 regno = FIRST_IWMMXT_GR_REGNUM;
27877 /* The 2002/10/09 revision of the XScale ABI has wCG0
27878 and wCG1 as call-preserved registers. The 2002/11/21
27879 revision changed this so that all wCG registers are
27880 scratch registers. */
27881 for (regno = FIRST_IWMMXT_GR_REGNUM;
27882 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27883 fixed_regs[regno] = 0;
27884 /* The XScale ABI has wR0 - wR9 as scratch registers,
27885 the rest as call-preserved registers. */
27886 for (regno = FIRST_IWMMXT_REGNUM;
27887 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27888 {
27889 fixed_regs[regno] = 0;
27890 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27891 }
27892 }
27893
27894 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27895 {
27896 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27897 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27898 }
27899 else if (TARGET_APCS_STACK)
27900 {
27901 fixed_regs[10] = 1;
27902 call_used_regs[10] = 1;
27903 }
27904 /* -mcaller-super-interworking reserves r11 for calls to
27905 _interwork_r11_call_via_rN(). Making the register global
27906 is an easy way of ensuring that it remains valid for all
27907 calls. */
27908 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27909 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27910 {
27911 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27912 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27913 if (TARGET_CALLER_INTERWORKING)
27914 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27915 }
27916 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27917 }
27918
27919 static reg_class_t
27920 arm_preferred_rename_class (reg_class_t rclass)
27921 {
27922 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27923 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27924 and code size can be reduced. */
27925 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27926 return LO_REGS;
27927 else
27928 return NO_REGS;
27929 }
27930
27931 /* Compute the attribute "length" of insn "*push_multi".
27932 So this function MUST be kept in sync with that insn pattern. */
27933 int
27934 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27935 {
27936 int i, regno, hi_reg;
27937 int num_saves = XVECLEN (parallel_op, 0);
27938
27939 /* ARM mode. */
27940 if (TARGET_ARM)
27941 return 4;
27942 /* Thumb1 mode. */
27943 if (TARGET_THUMB1)
27944 return 2;
27945
27946 /* Thumb2 mode. */
27947 regno = REGNO (first_op);
27948 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27949 list is 8-bit. Normally this means all registers in the list must be
27950 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27951 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27952 with 16-bit encoding. */
27953 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27954 for (i = 1; i < num_saves && !hi_reg; i++)
27955 {
27956 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27957 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27958 }
27959
27960 if (!hi_reg)
27961 return 2;
27962 return 4;
27963 }
27964
27965 /* Compute the attribute "length" of insn. Currently, this function is used
27966 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27967 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27968 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27969 true if OPERANDS contains insn which explicit updates base register. */
27970
27971 int
27972 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27973 {
27974 /* ARM mode. */
27975 if (TARGET_ARM)
27976 return 4;
27977 /* Thumb1 mode. */
27978 if (TARGET_THUMB1)
27979 return 2;
27980
27981 rtx parallel_op = operands[0];
27982 /* Initialize to elements number of PARALLEL. */
27983 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27984 /* Initialize the value to base register. */
27985 unsigned regno = REGNO (operands[1]);
27986 /* Skip return and write back pattern.
27987 We only need register pop pattern for later analysis. */
27988 unsigned first_indx = 0;
27989 first_indx += return_pc ? 1 : 0;
27990 first_indx += write_back_p ? 1 : 0;
27991
27992 /* A pop operation can be done through LDM or POP. If the base register is SP
27993 and if it's with write back, then a LDM will be alias of POP. */
27994 bool pop_p = (regno == SP_REGNUM && write_back_p);
27995 bool ldm_p = !pop_p;
27996
27997 /* Check base register for LDM. */
27998 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27999 return 4;
28000
28001 /* Check each register in the list. */
28002 for (; indx >= first_indx; indx--)
28003 {
28004 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28005 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28006 comment in arm_attr_length_push_multi. */
28007 if (REGNO_REG_CLASS (regno) == HI_REGS
28008 && (regno != PC_REGNUM || ldm_p))
28009 return 4;
28010 }
28011
28012 return 2;
28013 }
28014
28015 /* Compute the number of instructions emitted by output_move_double. */
28016 int
28017 arm_count_output_move_double_insns (rtx *operands)
28018 {
28019 int count;
28020 rtx ops[2];
28021 /* output_move_double may modify the operands array, so call it
28022 here on a copy of the array. */
28023 ops[0] = operands[0];
28024 ops[1] = operands[1];
28025 output_move_double (ops, false, &count);
28026 return count;
28027 }
28028
28029 int
28030 vfp3_const_double_for_fract_bits (rtx operand)
28031 {
28032 REAL_VALUE_TYPE r0;
28033
28034 if (!CONST_DOUBLE_P (operand))
28035 return 0;
28036
28037 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28038 if (exact_real_inverse (DFmode, &r0)
28039 && !REAL_VALUE_NEGATIVE (r0))
28040 {
28041 if (exact_real_truncate (DFmode, &r0))
28042 {
28043 HOST_WIDE_INT value = real_to_integer (&r0);
28044 value = value & 0xffffffff;
28045 if ((value != 0) && ( (value & (value - 1)) == 0))
28046 {
28047 int ret = exact_log2 (value);
28048 gcc_assert (IN_RANGE (ret, 0, 31));
28049 return ret;
28050 }
28051 }
28052 }
28053 return 0;
28054 }
28055
28056 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28057 log2 is in [1, 32], return that log2. Otherwise return -1.
28058 This is used in the patterns for vcvt.s32.f32 floating-point to
28059 fixed-point conversions. */
28060
28061 int
28062 vfp3_const_double_for_bits (rtx x)
28063 {
28064 const REAL_VALUE_TYPE *r;
28065
28066 if (!CONST_DOUBLE_P (x))
28067 return -1;
28068
28069 r = CONST_DOUBLE_REAL_VALUE (x);
28070
28071 if (REAL_VALUE_NEGATIVE (*r)
28072 || REAL_VALUE_ISNAN (*r)
28073 || REAL_VALUE_ISINF (*r)
28074 || !real_isinteger (r, SFmode))
28075 return -1;
28076
28077 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28078
28079 /* The exact_log2 above will have returned -1 if this is
28080 not an exact log2. */
28081 if (!IN_RANGE (hwint, 1, 32))
28082 return -1;
28083
28084 return hwint;
28085 }
28086
28087 \f
28088 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28089
28090 static void
28091 arm_pre_atomic_barrier (enum memmodel model)
28092 {
28093 if (need_atomic_barrier_p (model, true))
28094 emit_insn (gen_memory_barrier ());
28095 }
28096
28097 static void
28098 arm_post_atomic_barrier (enum memmodel model)
28099 {
28100 if (need_atomic_barrier_p (model, false))
28101 emit_insn (gen_memory_barrier ());
28102 }
28103
28104 /* Emit the load-exclusive and store-exclusive instructions.
28105 Use acquire and release versions if necessary. */
28106
28107 static void
28108 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28109 {
28110 rtx (*gen) (rtx, rtx);
28111
28112 if (acq)
28113 {
28114 switch (mode)
28115 {
28116 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28117 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28118 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28119 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28120 default:
28121 gcc_unreachable ();
28122 }
28123 }
28124 else
28125 {
28126 switch (mode)
28127 {
28128 case QImode: gen = gen_arm_load_exclusiveqi; break;
28129 case HImode: gen = gen_arm_load_exclusivehi; break;
28130 case SImode: gen = gen_arm_load_exclusivesi; break;
28131 case DImode: gen = gen_arm_load_exclusivedi; break;
28132 default:
28133 gcc_unreachable ();
28134 }
28135 }
28136
28137 emit_insn (gen (rval, mem));
28138 }
28139
28140 static void
28141 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28142 rtx mem, bool rel)
28143 {
28144 rtx (*gen) (rtx, rtx, rtx);
28145
28146 if (rel)
28147 {
28148 switch (mode)
28149 {
28150 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28151 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28152 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28153 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28154 default:
28155 gcc_unreachable ();
28156 }
28157 }
28158 else
28159 {
28160 switch (mode)
28161 {
28162 case QImode: gen = gen_arm_store_exclusiveqi; break;
28163 case HImode: gen = gen_arm_store_exclusivehi; break;
28164 case SImode: gen = gen_arm_store_exclusivesi; break;
28165 case DImode: gen = gen_arm_store_exclusivedi; break;
28166 default:
28167 gcc_unreachable ();
28168 }
28169 }
28170
28171 emit_insn (gen (bval, rval, mem));
28172 }
28173
28174 /* Mark the previous jump instruction as unlikely. */
28175
28176 static void
28177 emit_unlikely_jump (rtx insn)
28178 {
28179 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28180
28181 rtx_insn *jump = emit_jump_insn (insn);
28182 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28183 }
28184
28185 /* Expand a compare and swap pattern. */
28186
28187 void
28188 arm_expand_compare_and_swap (rtx operands[])
28189 {
28190 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28191 machine_mode mode;
28192 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28193
28194 bval = operands[0];
28195 rval = operands[1];
28196 mem = operands[2];
28197 oldval = operands[3];
28198 newval = operands[4];
28199 is_weak = operands[5];
28200 mod_s = operands[6];
28201 mod_f = operands[7];
28202 mode = GET_MODE (mem);
28203
28204 /* Normally the succ memory model must be stronger than fail, but in the
28205 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28206 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28207
28208 if (TARGET_HAVE_LDACQ
28209 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28210 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28211 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28212
28213 switch (mode)
28214 {
28215 case QImode:
28216 case HImode:
28217 /* For narrow modes, we're going to perform the comparison in SImode,
28218 so do the zero-extension now. */
28219 rval = gen_reg_rtx (SImode);
28220 oldval = convert_modes (SImode, mode, oldval, true);
28221 /* FALLTHRU */
28222
28223 case SImode:
28224 /* Force the value into a register if needed. We waited until after
28225 the zero-extension above to do this properly. */
28226 if (!arm_add_operand (oldval, SImode))
28227 oldval = force_reg (SImode, oldval);
28228 break;
28229
28230 case DImode:
28231 if (!cmpdi_operand (oldval, mode))
28232 oldval = force_reg (mode, oldval);
28233 break;
28234
28235 default:
28236 gcc_unreachable ();
28237 }
28238
28239 if (TARGET_THUMB1)
28240 {
28241 switch (mode)
28242 {
28243 case QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28244 case HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28245 case SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28246 case DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28247 default:
28248 gcc_unreachable ();
28249 }
28250 }
28251 else
28252 {
28253 switch (mode)
28254 {
28255 case QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28256 case HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28257 case SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28258 case DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28259 default:
28260 gcc_unreachable ();
28261 }
28262 }
28263
28264 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28265 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28266
28267 if (mode == QImode || mode == HImode)
28268 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28269
28270 /* In all cases, we arrange for success to be signaled by Z set.
28271 This arrangement allows for the boolean result to be used directly
28272 in a subsequent branch, post optimization. For Thumb-1 targets, the
28273 boolean negation of the result is also stored in bval because Thumb-1
28274 backend lacks dependency tracking for CC flag due to flag-setting not
28275 being represented at RTL level. */
28276 if (TARGET_THUMB1)
28277 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28278 else
28279 {
28280 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28281 emit_insn (gen_rtx_SET (bval, x));
28282 }
28283 }
28284
28285 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28286 another memory store between the load-exclusive and store-exclusive can
28287 reset the monitor from Exclusive to Open state. This means we must wait
28288 until after reload to split the pattern, lest we get a register spill in
28289 the middle of the atomic sequence. Success of the compare and swap is
28290 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28291 for Thumb-1 targets (ie. negation of the boolean value returned by
28292 atomic_compare_and_swapmode standard pattern in operand 0). */
28293
28294 void
28295 arm_split_compare_and_swap (rtx operands[])
28296 {
28297 rtx rval, mem, oldval, newval, neg_bval;
28298 machine_mode mode;
28299 enum memmodel mod_s, mod_f;
28300 bool is_weak;
28301 rtx_code_label *label1, *label2;
28302 rtx x, cond;
28303
28304 rval = operands[1];
28305 mem = operands[2];
28306 oldval = operands[3];
28307 newval = operands[4];
28308 is_weak = (operands[5] != const0_rtx);
28309 mod_s = memmodel_from_int (INTVAL (operands[6]));
28310 mod_f = memmodel_from_int (INTVAL (operands[7]));
28311 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28312 mode = GET_MODE (mem);
28313
28314 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28315
28316 bool use_acquire = TARGET_HAVE_LDACQ
28317 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28318 || is_mm_release (mod_s));
28319
28320 bool use_release = TARGET_HAVE_LDACQ
28321 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28322 || is_mm_acquire (mod_s));
28323
28324 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28325 a full barrier is emitted after the store-release. */
28326 if (is_armv8_sync)
28327 use_acquire = false;
28328
28329 /* Checks whether a barrier is needed and emits one accordingly. */
28330 if (!(use_acquire || use_release))
28331 arm_pre_atomic_barrier (mod_s);
28332
28333 label1 = NULL;
28334 if (!is_weak)
28335 {
28336 label1 = gen_label_rtx ();
28337 emit_label (label1);
28338 }
28339 label2 = gen_label_rtx ();
28340
28341 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28342
28343 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28344 as required to communicate with arm_expand_compare_and_swap. */
28345 if (TARGET_32BIT)
28346 {
28347 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28348 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28349 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28350 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28351 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28352 }
28353 else
28354 {
28355 emit_move_insn (neg_bval, const1_rtx);
28356 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28357 if (thumb1_cmpneg_operand (oldval, SImode))
28358 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28359 label2, cond));
28360 else
28361 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28362 }
28363
28364 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28365
28366 /* Weak or strong, we want EQ to be true for success, so that we
28367 match the flags that we got from the compare above. */
28368 if (TARGET_32BIT)
28369 {
28370 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28371 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28372 emit_insn (gen_rtx_SET (cond, x));
28373 }
28374
28375 if (!is_weak)
28376 {
28377 /* Z is set to boolean value of !neg_bval, as required to communicate
28378 with arm_expand_compare_and_swap. */
28379 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28380 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28381 }
28382
28383 if (!is_mm_relaxed (mod_f))
28384 emit_label (label2);
28385
28386 /* Checks whether a barrier is needed and emits one accordingly. */
28387 if (is_armv8_sync
28388 || !(use_acquire || use_release))
28389 arm_post_atomic_barrier (mod_s);
28390
28391 if (is_mm_relaxed (mod_f))
28392 emit_label (label2);
28393 }
28394
28395 /* Split an atomic operation pattern. Operation is given by CODE and is one
28396 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28397 operation). Operation is performed on the content at MEM and on VALUE
28398 following the memory model MODEL_RTX. The content at MEM before and after
28399 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28400 success of the operation is returned in COND. Using a scratch register or
28401 an operand register for these determines what result is returned for that
28402 pattern. */
28403
28404 void
28405 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28406 rtx value, rtx model_rtx, rtx cond)
28407 {
28408 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28409 machine_mode mode = GET_MODE (mem);
28410 machine_mode wmode = (mode == DImode ? DImode : SImode);
28411 rtx_code_label *label;
28412 bool all_low_regs, bind_old_new;
28413 rtx x;
28414
28415 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28416
28417 bool use_acquire = TARGET_HAVE_LDACQ
28418 && !(is_mm_relaxed (model) || is_mm_consume (model)
28419 || is_mm_release (model));
28420
28421 bool use_release = TARGET_HAVE_LDACQ
28422 && !(is_mm_relaxed (model) || is_mm_consume (model)
28423 || is_mm_acquire (model));
28424
28425 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28426 a full barrier is emitted after the store-release. */
28427 if (is_armv8_sync)
28428 use_acquire = false;
28429
28430 /* Checks whether a barrier is needed and emits one accordingly. */
28431 if (!(use_acquire || use_release))
28432 arm_pre_atomic_barrier (model);
28433
28434 label = gen_label_rtx ();
28435 emit_label (label);
28436
28437 if (new_out)
28438 new_out = gen_lowpart (wmode, new_out);
28439 if (old_out)
28440 old_out = gen_lowpart (wmode, old_out);
28441 else
28442 old_out = new_out;
28443 value = simplify_gen_subreg (wmode, value, mode, 0);
28444
28445 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28446
28447 /* Does the operation require destination and first operand to use the same
28448 register? This is decided by register constraints of relevant insn
28449 patterns in thumb1.md. */
28450 gcc_assert (!new_out || REG_P (new_out));
28451 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28452 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28453 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28454 bind_old_new =
28455 (TARGET_THUMB1
28456 && code != SET
28457 && code != MINUS
28458 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28459
28460 /* We want to return the old value while putting the result of the operation
28461 in the same register as the old value so copy the old value over to the
28462 destination register and use that register for the operation. */
28463 if (old_out && bind_old_new)
28464 {
28465 emit_move_insn (new_out, old_out);
28466 old_out = new_out;
28467 }
28468
28469 switch (code)
28470 {
28471 case SET:
28472 new_out = value;
28473 break;
28474
28475 case NOT:
28476 x = gen_rtx_AND (wmode, old_out, value);
28477 emit_insn (gen_rtx_SET (new_out, x));
28478 x = gen_rtx_NOT (wmode, new_out);
28479 emit_insn (gen_rtx_SET (new_out, x));
28480 break;
28481
28482 case MINUS:
28483 if (CONST_INT_P (value))
28484 {
28485 value = GEN_INT (-INTVAL (value));
28486 code = PLUS;
28487 }
28488 /* FALLTHRU */
28489
28490 case PLUS:
28491 if (mode == DImode)
28492 {
28493 /* DImode plus/minus need to clobber flags. */
28494 /* The adddi3 and subdi3 patterns are incorrectly written so that
28495 they require matching operands, even when we could easily support
28496 three operands. Thankfully, this can be fixed up post-splitting,
28497 as the individual add+adc patterns do accept three operands and
28498 post-reload cprop can make these moves go away. */
28499 emit_move_insn (new_out, old_out);
28500 if (code == PLUS)
28501 x = gen_adddi3 (new_out, new_out, value);
28502 else
28503 x = gen_subdi3 (new_out, new_out, value);
28504 emit_insn (x);
28505 break;
28506 }
28507 /* FALLTHRU */
28508
28509 default:
28510 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28511 emit_insn (gen_rtx_SET (new_out, x));
28512 break;
28513 }
28514
28515 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28516 use_release);
28517
28518 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28519 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28520
28521 /* Checks whether a barrier is needed and emits one accordingly. */
28522 if (is_armv8_sync
28523 || !(use_acquire || use_release))
28524 arm_post_atomic_barrier (model);
28525 }
28526 \f
28527 #define MAX_VECT_LEN 16
28528
28529 struct expand_vec_perm_d
28530 {
28531 rtx target, op0, op1;
28532 unsigned char perm[MAX_VECT_LEN];
28533 machine_mode vmode;
28534 unsigned char nelt;
28535 bool one_vector_p;
28536 bool testing_p;
28537 };
28538
28539 /* Generate a variable permutation. */
28540
28541 static void
28542 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28543 {
28544 machine_mode vmode = GET_MODE (target);
28545 bool one_vector_p = rtx_equal_p (op0, op1);
28546
28547 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28548 gcc_checking_assert (GET_MODE (op0) == vmode);
28549 gcc_checking_assert (GET_MODE (op1) == vmode);
28550 gcc_checking_assert (GET_MODE (sel) == vmode);
28551 gcc_checking_assert (TARGET_NEON);
28552
28553 if (one_vector_p)
28554 {
28555 if (vmode == V8QImode)
28556 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28557 else
28558 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28559 }
28560 else
28561 {
28562 rtx pair;
28563
28564 if (vmode == V8QImode)
28565 {
28566 pair = gen_reg_rtx (V16QImode);
28567 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28568 pair = gen_lowpart (TImode, pair);
28569 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28570 }
28571 else
28572 {
28573 pair = gen_reg_rtx (OImode);
28574 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28575 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28576 }
28577 }
28578 }
28579
28580 void
28581 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28582 {
28583 machine_mode vmode = GET_MODE (target);
28584 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28585 bool one_vector_p = rtx_equal_p (op0, op1);
28586 rtx rmask[MAX_VECT_LEN], mask;
28587
28588 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28589 numbering of elements for big-endian, we must reverse the order. */
28590 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28591
28592 /* The VTBL instruction does not use a modulo index, so we must take care
28593 of that ourselves. */
28594 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28595 for (i = 0; i < nelt; ++i)
28596 rmask[i] = mask;
28597 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28598 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28599
28600 arm_expand_vec_perm_1 (target, op0, op1, sel);
28601 }
28602
28603 /* Map lane ordering between architectural lane order, and GCC lane order,
28604 taking into account ABI. See comment above output_move_neon for details. */
28605
28606 static int
28607 neon_endian_lane_map (machine_mode mode, int lane)
28608 {
28609 if (BYTES_BIG_ENDIAN)
28610 {
28611 int nelems = GET_MODE_NUNITS (mode);
28612 /* Reverse lane order. */
28613 lane = (nelems - 1 - lane);
28614 /* Reverse D register order, to match ABI. */
28615 if (GET_MODE_SIZE (mode) == 16)
28616 lane = lane ^ (nelems / 2);
28617 }
28618 return lane;
28619 }
28620
28621 /* Some permutations index into pairs of vectors, this is a helper function
28622 to map indexes into those pairs of vectors. */
28623
28624 static int
28625 neon_pair_endian_lane_map (machine_mode mode, int lane)
28626 {
28627 int nelem = GET_MODE_NUNITS (mode);
28628 if (BYTES_BIG_ENDIAN)
28629 lane =
28630 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28631 return lane;
28632 }
28633
28634 /* Generate or test for an insn that supports a constant permutation. */
28635
28636 /* Recognize patterns for the VUZP insns. */
28637
28638 static bool
28639 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28640 {
28641 unsigned int i, odd, mask, nelt = d->nelt;
28642 rtx out0, out1, in0, in1;
28643 rtx (*gen)(rtx, rtx, rtx, rtx);
28644 int first_elem;
28645 int swap_nelt;
28646
28647 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28648 return false;
28649
28650 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28651 big endian pattern on 64 bit vectors, so we correct for that. */
28652 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28653 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28654
28655 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28656
28657 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28658 odd = 0;
28659 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28660 odd = 1;
28661 else
28662 return false;
28663 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28664
28665 for (i = 0; i < nelt; i++)
28666 {
28667 unsigned elt =
28668 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28669 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28670 return false;
28671 }
28672
28673 /* Success! */
28674 if (d->testing_p)
28675 return true;
28676
28677 switch (d->vmode)
28678 {
28679 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28680 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28681 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28682 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28683 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28684 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28685 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28686 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28687 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28688 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28689 default:
28690 gcc_unreachable ();
28691 }
28692
28693 in0 = d->op0;
28694 in1 = d->op1;
28695 if (swap_nelt != 0)
28696 std::swap (in0, in1);
28697
28698 out0 = d->target;
28699 out1 = gen_reg_rtx (d->vmode);
28700 if (odd)
28701 std::swap (out0, out1);
28702
28703 emit_insn (gen (out0, in0, in1, out1));
28704 return true;
28705 }
28706
28707 /* Recognize patterns for the VZIP insns. */
28708
28709 static bool
28710 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28711 {
28712 unsigned int i, high, mask, nelt = d->nelt;
28713 rtx out0, out1, in0, in1;
28714 rtx (*gen)(rtx, rtx, rtx, rtx);
28715 int first_elem;
28716 bool is_swapped;
28717
28718 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28719 return false;
28720
28721 is_swapped = BYTES_BIG_ENDIAN;
28722
28723 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28724
28725 high = nelt / 2;
28726 if (first_elem == neon_endian_lane_map (d->vmode, high))
28727 ;
28728 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28729 high = 0;
28730 else
28731 return false;
28732 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28733
28734 for (i = 0; i < nelt / 2; i++)
28735 {
28736 unsigned elt =
28737 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28738 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28739 != elt)
28740 return false;
28741 elt =
28742 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28743 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28744 != elt)
28745 return false;
28746 }
28747
28748 /* Success! */
28749 if (d->testing_p)
28750 return true;
28751
28752 switch (d->vmode)
28753 {
28754 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28755 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28756 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28757 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28758 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28759 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28760 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28761 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28762 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28763 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28764 default:
28765 gcc_unreachable ();
28766 }
28767
28768 in0 = d->op0;
28769 in1 = d->op1;
28770 if (is_swapped)
28771 std::swap (in0, in1);
28772
28773 out0 = d->target;
28774 out1 = gen_reg_rtx (d->vmode);
28775 if (high)
28776 std::swap (out0, out1);
28777
28778 emit_insn (gen (out0, in0, in1, out1));
28779 return true;
28780 }
28781
28782 /* Recognize patterns for the VREV insns. */
28783
28784 static bool
28785 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28786 {
28787 unsigned int i, j, diff, nelt = d->nelt;
28788 rtx (*gen)(rtx, rtx);
28789
28790 if (!d->one_vector_p)
28791 return false;
28792
28793 diff = d->perm[0];
28794 switch (diff)
28795 {
28796 case 7:
28797 switch (d->vmode)
28798 {
28799 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28800 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28801 default:
28802 return false;
28803 }
28804 break;
28805 case 3:
28806 switch (d->vmode)
28807 {
28808 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28809 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28810 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28811 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28812 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28813 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28814 default:
28815 return false;
28816 }
28817 break;
28818 case 1:
28819 switch (d->vmode)
28820 {
28821 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28822 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28823 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28824 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28825 case V4SImode: gen = gen_neon_vrev64v4si; break;
28826 case V2SImode: gen = gen_neon_vrev64v2si; break;
28827 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28828 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28829 default:
28830 return false;
28831 }
28832 break;
28833 default:
28834 return false;
28835 }
28836
28837 for (i = 0; i < nelt ; i += diff + 1)
28838 for (j = 0; j <= diff; j += 1)
28839 {
28840 /* This is guaranteed to be true as the value of diff
28841 is 7, 3, 1 and we should have enough elements in the
28842 queue to generate this. Getting a vector mask with a
28843 value of diff other than these values implies that
28844 something is wrong by the time we get here. */
28845 gcc_assert (i + j < nelt);
28846 if (d->perm[i + j] != i + diff - j)
28847 return false;
28848 }
28849
28850 /* Success! */
28851 if (d->testing_p)
28852 return true;
28853
28854 emit_insn (gen (d->target, d->op0));
28855 return true;
28856 }
28857
28858 /* Recognize patterns for the VTRN insns. */
28859
28860 static bool
28861 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28862 {
28863 unsigned int i, odd, mask, nelt = d->nelt;
28864 rtx out0, out1, in0, in1;
28865 rtx (*gen)(rtx, rtx, rtx, rtx);
28866
28867 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28868 return false;
28869
28870 /* Note that these are little-endian tests. Adjust for big-endian later. */
28871 if (d->perm[0] == 0)
28872 odd = 0;
28873 else if (d->perm[0] == 1)
28874 odd = 1;
28875 else
28876 return false;
28877 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28878
28879 for (i = 0; i < nelt; i += 2)
28880 {
28881 if (d->perm[i] != i + odd)
28882 return false;
28883 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28884 return false;
28885 }
28886
28887 /* Success! */
28888 if (d->testing_p)
28889 return true;
28890
28891 switch (d->vmode)
28892 {
28893 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28894 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28895 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28896 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28897 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28898 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28899 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28900 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28901 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28902 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28903 default:
28904 gcc_unreachable ();
28905 }
28906
28907 in0 = d->op0;
28908 in1 = d->op1;
28909 if (BYTES_BIG_ENDIAN)
28910 {
28911 std::swap (in0, in1);
28912 odd = !odd;
28913 }
28914
28915 out0 = d->target;
28916 out1 = gen_reg_rtx (d->vmode);
28917 if (odd)
28918 std::swap (out0, out1);
28919
28920 emit_insn (gen (out0, in0, in1, out1));
28921 return true;
28922 }
28923
28924 /* Recognize patterns for the VEXT insns. */
28925
28926 static bool
28927 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28928 {
28929 unsigned int i, nelt = d->nelt;
28930 rtx (*gen) (rtx, rtx, rtx, rtx);
28931 rtx offset;
28932
28933 unsigned int location;
28934
28935 unsigned int next = d->perm[0] + 1;
28936
28937 /* TODO: Handle GCC's numbering of elements for big-endian. */
28938 if (BYTES_BIG_ENDIAN)
28939 return false;
28940
28941 /* Check if the extracted indexes are increasing by one. */
28942 for (i = 1; i < nelt; next++, i++)
28943 {
28944 /* If we hit the most significant element of the 2nd vector in
28945 the previous iteration, no need to test further. */
28946 if (next == 2 * nelt)
28947 return false;
28948
28949 /* If we are operating on only one vector: it could be a
28950 rotation. If there are only two elements of size < 64, let
28951 arm_evpc_neon_vrev catch it. */
28952 if (d->one_vector_p && (next == nelt))
28953 {
28954 if ((nelt == 2) && (d->vmode != V2DImode))
28955 return false;
28956 else
28957 next = 0;
28958 }
28959
28960 if (d->perm[i] != next)
28961 return false;
28962 }
28963
28964 location = d->perm[0];
28965
28966 switch (d->vmode)
28967 {
28968 case V16QImode: gen = gen_neon_vextv16qi; break;
28969 case V8QImode: gen = gen_neon_vextv8qi; break;
28970 case V4HImode: gen = gen_neon_vextv4hi; break;
28971 case V8HImode: gen = gen_neon_vextv8hi; break;
28972 case V2SImode: gen = gen_neon_vextv2si; break;
28973 case V4SImode: gen = gen_neon_vextv4si; break;
28974 case V4HFmode: gen = gen_neon_vextv4hf; break;
28975 case V8HFmode: gen = gen_neon_vextv8hf; break;
28976 case V2SFmode: gen = gen_neon_vextv2sf; break;
28977 case V4SFmode: gen = gen_neon_vextv4sf; break;
28978 case V2DImode: gen = gen_neon_vextv2di; break;
28979 default:
28980 return false;
28981 }
28982
28983 /* Success! */
28984 if (d->testing_p)
28985 return true;
28986
28987 offset = GEN_INT (location);
28988 emit_insn (gen (d->target, d->op0, d->op1, offset));
28989 return true;
28990 }
28991
28992 /* The NEON VTBL instruction is a fully variable permuation that's even
28993 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28994 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28995 can do slightly better by expanding this as a constant where we don't
28996 have to apply a mask. */
28997
28998 static bool
28999 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29000 {
29001 rtx rperm[MAX_VECT_LEN], sel;
29002 machine_mode vmode = d->vmode;
29003 unsigned int i, nelt = d->nelt;
29004
29005 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29006 numbering of elements for big-endian, we must reverse the order. */
29007 if (BYTES_BIG_ENDIAN)
29008 return false;
29009
29010 if (d->testing_p)
29011 return true;
29012
29013 /* Generic code will try constant permutation twice. Once with the
29014 original mode and again with the elements lowered to QImode.
29015 So wait and don't do the selector expansion ourselves. */
29016 if (vmode != V8QImode && vmode != V16QImode)
29017 return false;
29018
29019 for (i = 0; i < nelt; ++i)
29020 rperm[i] = GEN_INT (d->perm[i]);
29021 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29022 sel = force_reg (vmode, sel);
29023
29024 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29025 return true;
29026 }
29027
29028 static bool
29029 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29030 {
29031 /* Check if the input mask matches vext before reordering the
29032 operands. */
29033 if (TARGET_NEON)
29034 if (arm_evpc_neon_vext (d))
29035 return true;
29036
29037 /* The pattern matching functions above are written to look for a small
29038 number to begin the sequence (0, 1, N/2). If we begin with an index
29039 from the second operand, we can swap the operands. */
29040 if (d->perm[0] >= d->nelt)
29041 {
29042 unsigned i, nelt = d->nelt;
29043
29044 for (i = 0; i < nelt; ++i)
29045 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29046
29047 std::swap (d->op0, d->op1);
29048 }
29049
29050 if (TARGET_NEON)
29051 {
29052 if (arm_evpc_neon_vuzp (d))
29053 return true;
29054 if (arm_evpc_neon_vzip (d))
29055 return true;
29056 if (arm_evpc_neon_vrev (d))
29057 return true;
29058 if (arm_evpc_neon_vtrn (d))
29059 return true;
29060 return arm_evpc_neon_vtbl (d);
29061 }
29062 return false;
29063 }
29064
29065 /* Expand a vec_perm_const pattern. */
29066
29067 bool
29068 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29069 {
29070 struct expand_vec_perm_d d;
29071 int i, nelt, which;
29072
29073 d.target = target;
29074 d.op0 = op0;
29075 d.op1 = op1;
29076
29077 d.vmode = GET_MODE (target);
29078 gcc_assert (VECTOR_MODE_P (d.vmode));
29079 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29080 d.testing_p = false;
29081
29082 for (i = which = 0; i < nelt; ++i)
29083 {
29084 rtx e = XVECEXP (sel, 0, i);
29085 int ei = INTVAL (e) & (2 * nelt - 1);
29086 which |= (ei < nelt ? 1 : 2);
29087 d.perm[i] = ei;
29088 }
29089
29090 switch (which)
29091 {
29092 default:
29093 gcc_unreachable();
29094
29095 case 3:
29096 d.one_vector_p = false;
29097 if (!rtx_equal_p (op0, op1))
29098 break;
29099
29100 /* The elements of PERM do not suggest that only the first operand
29101 is used, but both operands are identical. Allow easier matching
29102 of the permutation by folding the permutation into the single
29103 input vector. */
29104 /* FALLTHRU */
29105 case 2:
29106 for (i = 0; i < nelt; ++i)
29107 d.perm[i] &= nelt - 1;
29108 d.op0 = op1;
29109 d.one_vector_p = true;
29110 break;
29111
29112 case 1:
29113 d.op1 = op0;
29114 d.one_vector_p = true;
29115 break;
29116 }
29117
29118 return arm_expand_vec_perm_const_1 (&d);
29119 }
29120
29121 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29122
29123 static bool
29124 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29125 const unsigned char *sel)
29126 {
29127 struct expand_vec_perm_d d;
29128 unsigned int i, nelt, which;
29129 bool ret;
29130
29131 d.vmode = vmode;
29132 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29133 d.testing_p = true;
29134 memcpy (d.perm, sel, nelt);
29135
29136 /* Categorize the set of elements in the selector. */
29137 for (i = which = 0; i < nelt; ++i)
29138 {
29139 unsigned char e = d.perm[i];
29140 gcc_assert (e < 2 * nelt);
29141 which |= (e < nelt ? 1 : 2);
29142 }
29143
29144 /* For all elements from second vector, fold the elements to first. */
29145 if (which == 2)
29146 for (i = 0; i < nelt; ++i)
29147 d.perm[i] -= nelt;
29148
29149 /* Check whether the mask can be applied to the vector type. */
29150 d.one_vector_p = (which != 3);
29151
29152 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29153 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29154 if (!d.one_vector_p)
29155 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29156
29157 start_sequence ();
29158 ret = arm_expand_vec_perm_const_1 (&d);
29159 end_sequence ();
29160
29161 return ret;
29162 }
29163
29164 bool
29165 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29166 {
29167 /* If we are soft float and we do not have ldrd
29168 then all auto increment forms are ok. */
29169 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29170 return true;
29171
29172 switch (code)
29173 {
29174 /* Post increment and Pre Decrement are supported for all
29175 instruction forms except for vector forms. */
29176 case ARM_POST_INC:
29177 case ARM_PRE_DEC:
29178 if (VECTOR_MODE_P (mode))
29179 {
29180 if (code != ARM_PRE_DEC)
29181 return true;
29182 else
29183 return false;
29184 }
29185
29186 return true;
29187
29188 case ARM_POST_DEC:
29189 case ARM_PRE_INC:
29190 /* Without LDRD and mode size greater than
29191 word size, there is no point in auto-incrementing
29192 because ldm and stm will not have these forms. */
29193 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29194 return false;
29195
29196 /* Vector and floating point modes do not support
29197 these auto increment forms. */
29198 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29199 return false;
29200
29201 return true;
29202
29203 default:
29204 return false;
29205
29206 }
29207
29208 return false;
29209 }
29210
29211 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29212 on ARM, since we know that shifts by negative amounts are no-ops.
29213 Additionally, the default expansion code is not available or suitable
29214 for post-reload insn splits (this can occur when the register allocator
29215 chooses not to do a shift in NEON).
29216
29217 This function is used in both initial expand and post-reload splits, and
29218 handles all kinds of 64-bit shifts.
29219
29220 Input requirements:
29221 - It is safe for the input and output to be the same register, but
29222 early-clobber rules apply for the shift amount and scratch registers.
29223 - Shift by register requires both scratch registers. In all other cases
29224 the scratch registers may be NULL.
29225 - Ashiftrt by a register also clobbers the CC register. */
29226 void
29227 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29228 rtx amount, rtx scratch1, rtx scratch2)
29229 {
29230 rtx out_high = gen_highpart (SImode, out);
29231 rtx out_low = gen_lowpart (SImode, out);
29232 rtx in_high = gen_highpart (SImode, in);
29233 rtx in_low = gen_lowpart (SImode, in);
29234
29235 /* Terminology:
29236 in = the register pair containing the input value.
29237 out = the destination register pair.
29238 up = the high- or low-part of each pair.
29239 down = the opposite part to "up".
29240 In a shift, we can consider bits to shift from "up"-stream to
29241 "down"-stream, so in a left-shift "up" is the low-part and "down"
29242 is the high-part of each register pair. */
29243
29244 rtx out_up = code == ASHIFT ? out_low : out_high;
29245 rtx out_down = code == ASHIFT ? out_high : out_low;
29246 rtx in_up = code == ASHIFT ? in_low : in_high;
29247 rtx in_down = code == ASHIFT ? in_high : in_low;
29248
29249 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29250 gcc_assert (out
29251 && (REG_P (out) || GET_CODE (out) == SUBREG)
29252 && GET_MODE (out) == DImode);
29253 gcc_assert (in
29254 && (REG_P (in) || GET_CODE (in) == SUBREG)
29255 && GET_MODE (in) == DImode);
29256 gcc_assert (amount
29257 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29258 && GET_MODE (amount) == SImode)
29259 || CONST_INT_P (amount)));
29260 gcc_assert (scratch1 == NULL
29261 || (GET_CODE (scratch1) == SCRATCH)
29262 || (GET_MODE (scratch1) == SImode
29263 && REG_P (scratch1)));
29264 gcc_assert (scratch2 == NULL
29265 || (GET_CODE (scratch2) == SCRATCH)
29266 || (GET_MODE (scratch2) == SImode
29267 && REG_P (scratch2)));
29268 gcc_assert (!REG_P (out) || !REG_P (amount)
29269 || !HARD_REGISTER_P (out)
29270 || (REGNO (out) != REGNO (amount)
29271 && REGNO (out) + 1 != REGNO (amount)));
29272
29273 /* Macros to make following code more readable. */
29274 #define SUB_32(DEST,SRC) \
29275 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29276 #define RSB_32(DEST,SRC) \
29277 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29278 #define SUB_S_32(DEST,SRC) \
29279 gen_addsi3_compare0 ((DEST), (SRC), \
29280 GEN_INT (-32))
29281 #define SET(DEST,SRC) \
29282 gen_rtx_SET ((DEST), (SRC))
29283 #define SHIFT(CODE,SRC,AMOUNT) \
29284 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29285 #define LSHIFT(CODE,SRC,AMOUNT) \
29286 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29287 SImode, (SRC), (AMOUNT))
29288 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29289 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29290 SImode, (SRC), (AMOUNT))
29291 #define ORR(A,B) \
29292 gen_rtx_IOR (SImode, (A), (B))
29293 #define BRANCH(COND,LABEL) \
29294 gen_arm_cond_branch ((LABEL), \
29295 gen_rtx_ ## COND (CCmode, cc_reg, \
29296 const0_rtx), \
29297 cc_reg)
29298
29299 /* Shifts by register and shifts by constant are handled separately. */
29300 if (CONST_INT_P (amount))
29301 {
29302 /* We have a shift-by-constant. */
29303
29304 /* First, handle out-of-range shift amounts.
29305 In both cases we try to match the result an ARM instruction in a
29306 shift-by-register would give. This helps reduce execution
29307 differences between optimization levels, but it won't stop other
29308 parts of the compiler doing different things. This is "undefined
29309 behavior, in any case. */
29310 if (INTVAL (amount) <= 0)
29311 emit_insn (gen_movdi (out, in));
29312 else if (INTVAL (amount) >= 64)
29313 {
29314 if (code == ASHIFTRT)
29315 {
29316 rtx const31_rtx = GEN_INT (31);
29317 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29318 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29319 }
29320 else
29321 emit_insn (gen_movdi (out, const0_rtx));
29322 }
29323
29324 /* Now handle valid shifts. */
29325 else if (INTVAL (amount) < 32)
29326 {
29327 /* Shifts by a constant less than 32. */
29328 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29329
29330 /* Clearing the out register in DImode first avoids lots
29331 of spilling and results in less stack usage.
29332 Later this redundant insn is completely removed.
29333 Do that only if "in" and "out" are different registers. */
29334 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29335 emit_insn (SET (out, const0_rtx));
29336 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29337 emit_insn (SET (out_down,
29338 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29339 out_down)));
29340 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29341 }
29342 else
29343 {
29344 /* Shifts by a constant greater than 31. */
29345 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29346
29347 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29348 emit_insn (SET (out, const0_rtx));
29349 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29350 if (code == ASHIFTRT)
29351 emit_insn (gen_ashrsi3 (out_up, in_up,
29352 GEN_INT (31)));
29353 else
29354 emit_insn (SET (out_up, const0_rtx));
29355 }
29356 }
29357 else
29358 {
29359 /* We have a shift-by-register. */
29360 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29361
29362 /* This alternative requires the scratch registers. */
29363 gcc_assert (scratch1 && REG_P (scratch1));
29364 gcc_assert (scratch2 && REG_P (scratch2));
29365
29366 /* We will need the values "amount-32" and "32-amount" later.
29367 Swapping them around now allows the later code to be more general. */
29368 switch (code)
29369 {
29370 case ASHIFT:
29371 emit_insn (SUB_32 (scratch1, amount));
29372 emit_insn (RSB_32 (scratch2, amount));
29373 break;
29374 case ASHIFTRT:
29375 emit_insn (RSB_32 (scratch1, amount));
29376 /* Also set CC = amount > 32. */
29377 emit_insn (SUB_S_32 (scratch2, amount));
29378 break;
29379 case LSHIFTRT:
29380 emit_insn (RSB_32 (scratch1, amount));
29381 emit_insn (SUB_32 (scratch2, amount));
29382 break;
29383 default:
29384 gcc_unreachable ();
29385 }
29386
29387 /* Emit code like this:
29388
29389 arithmetic-left:
29390 out_down = in_down << amount;
29391 out_down = (in_up << (amount - 32)) | out_down;
29392 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29393 out_up = in_up << amount;
29394
29395 arithmetic-right:
29396 out_down = in_down >> amount;
29397 out_down = (in_up << (32 - amount)) | out_down;
29398 if (amount < 32)
29399 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29400 out_up = in_up << amount;
29401
29402 logical-right:
29403 out_down = in_down >> amount;
29404 out_down = (in_up << (32 - amount)) | out_down;
29405 if (amount < 32)
29406 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29407 out_up = in_up << amount;
29408
29409 The ARM and Thumb2 variants are the same but implemented slightly
29410 differently. If this were only called during expand we could just
29411 use the Thumb2 case and let combine do the right thing, but this
29412 can also be called from post-reload splitters. */
29413
29414 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29415
29416 if (!TARGET_THUMB2)
29417 {
29418 /* Emit code for ARM mode. */
29419 emit_insn (SET (out_down,
29420 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29421 if (code == ASHIFTRT)
29422 {
29423 rtx_code_label *done_label = gen_label_rtx ();
29424 emit_jump_insn (BRANCH (LT, done_label));
29425 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29426 out_down)));
29427 emit_label (done_label);
29428 }
29429 else
29430 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29431 out_down)));
29432 }
29433 else
29434 {
29435 /* Emit code for Thumb2 mode.
29436 Thumb2 can't do shift and or in one insn. */
29437 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29438 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29439
29440 if (code == ASHIFTRT)
29441 {
29442 rtx_code_label *done_label = gen_label_rtx ();
29443 emit_jump_insn (BRANCH (LT, done_label));
29444 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29445 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29446 emit_label (done_label);
29447 }
29448 else
29449 {
29450 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29451 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29452 }
29453 }
29454
29455 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29456 }
29457
29458 #undef SUB_32
29459 #undef RSB_32
29460 #undef SUB_S_32
29461 #undef SET
29462 #undef SHIFT
29463 #undef LSHIFT
29464 #undef REV_LSHIFT
29465 #undef ORR
29466 #undef BRANCH
29467 }
29468
29469 /* Returns true if the pattern is a valid symbolic address, which is either a
29470 symbol_ref or (symbol_ref + addend).
29471
29472 According to the ARM ELF ABI, the initial addend of REL-type relocations
29473 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29474 literal field of the instruction as a 16-bit signed value in the range
29475 -32768 <= A < 32768. */
29476
29477 bool
29478 arm_valid_symbolic_address_p (rtx addr)
29479 {
29480 rtx xop0, xop1 = NULL_RTX;
29481 rtx tmp = addr;
29482
29483 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29484 return true;
29485
29486 /* (const (plus: symbol_ref const_int)) */
29487 if (GET_CODE (addr) == CONST)
29488 tmp = XEXP (addr, 0);
29489
29490 if (GET_CODE (tmp) == PLUS)
29491 {
29492 xop0 = XEXP (tmp, 0);
29493 xop1 = XEXP (tmp, 1);
29494
29495 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29496 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29497 }
29498
29499 return false;
29500 }
29501
29502 /* Returns true if a valid comparison operation and makes
29503 the operands in a form that is valid. */
29504 bool
29505 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29506 {
29507 enum rtx_code code = GET_CODE (*comparison);
29508 int code_int;
29509 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29510 ? GET_MODE (*op2) : GET_MODE (*op1);
29511
29512 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29513
29514 if (code == UNEQ || code == LTGT)
29515 return false;
29516
29517 code_int = (int)code;
29518 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29519 PUT_CODE (*comparison, (enum rtx_code)code_int);
29520
29521 switch (mode)
29522 {
29523 case SImode:
29524 if (!arm_add_operand (*op1, mode))
29525 *op1 = force_reg (mode, *op1);
29526 if (!arm_add_operand (*op2, mode))
29527 *op2 = force_reg (mode, *op2);
29528 return true;
29529
29530 case DImode:
29531 if (!cmpdi_operand (*op1, mode))
29532 *op1 = force_reg (mode, *op1);
29533 if (!cmpdi_operand (*op2, mode))
29534 *op2 = force_reg (mode, *op2);
29535 return true;
29536
29537 case HFmode:
29538 if (!TARGET_VFP_FP16INST)
29539 break;
29540 /* FP16 comparisons are done in SF mode. */
29541 mode = SFmode;
29542 *op1 = convert_to_mode (mode, *op1, 1);
29543 *op2 = convert_to_mode (mode, *op2, 1);
29544 /* Fall through. */
29545 case SFmode:
29546 case DFmode:
29547 if (!vfp_compare_operand (*op1, mode))
29548 *op1 = force_reg (mode, *op1);
29549 if (!vfp_compare_operand (*op2, mode))
29550 *op2 = force_reg (mode, *op2);
29551 return true;
29552 default:
29553 break;
29554 }
29555
29556 return false;
29557
29558 }
29559
29560 /* Maximum number of instructions to set block of memory. */
29561 static int
29562 arm_block_set_max_insns (void)
29563 {
29564 if (optimize_function_for_size_p (cfun))
29565 return 4;
29566 else
29567 return current_tune->max_insns_inline_memset;
29568 }
29569
29570 /* Return TRUE if it's profitable to set block of memory for
29571 non-vectorized case. VAL is the value to set the memory
29572 with. LENGTH is the number of bytes to set. ALIGN is the
29573 alignment of the destination memory in bytes. UNALIGNED_P
29574 is TRUE if we can only set the memory with instructions
29575 meeting alignment requirements. USE_STRD_P is TRUE if we
29576 can use strd to set the memory. */
29577 static bool
29578 arm_block_set_non_vect_profit_p (rtx val,
29579 unsigned HOST_WIDE_INT length,
29580 unsigned HOST_WIDE_INT align,
29581 bool unaligned_p, bool use_strd_p)
29582 {
29583 int num = 0;
29584 /* For leftovers in bytes of 0-7, we can set the memory block using
29585 strb/strh/str with minimum instruction number. */
29586 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29587
29588 if (unaligned_p)
29589 {
29590 num = arm_const_inline_cost (SET, val);
29591 num += length / align + length % align;
29592 }
29593 else if (use_strd_p)
29594 {
29595 num = arm_const_double_inline_cost (val);
29596 num += (length >> 3) + leftover[length & 7];
29597 }
29598 else
29599 {
29600 num = arm_const_inline_cost (SET, val);
29601 num += (length >> 2) + leftover[length & 3];
29602 }
29603
29604 /* We may be able to combine last pair STRH/STRB into a single STR
29605 by shifting one byte back. */
29606 if (unaligned_access && length > 3 && (length & 3) == 3)
29607 num--;
29608
29609 return (num <= arm_block_set_max_insns ());
29610 }
29611
29612 /* Return TRUE if it's profitable to set block of memory for
29613 vectorized case. LENGTH is the number of bytes to set.
29614 ALIGN is the alignment of destination memory in bytes.
29615 MODE is the vector mode used to set the memory. */
29616 static bool
29617 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29618 unsigned HOST_WIDE_INT align,
29619 machine_mode mode)
29620 {
29621 int num;
29622 bool unaligned_p = ((align & 3) != 0);
29623 unsigned int nelt = GET_MODE_NUNITS (mode);
29624
29625 /* Instruction loading constant value. */
29626 num = 1;
29627 /* Instructions storing the memory. */
29628 num += (length + nelt - 1) / nelt;
29629 /* Instructions adjusting the address expression. Only need to
29630 adjust address expression if it's 4 bytes aligned and bytes
29631 leftover can only be stored by mis-aligned store instruction. */
29632 if (!unaligned_p && (length & 3) != 0)
29633 num++;
29634
29635 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29636 if (!unaligned_p && mode == V16QImode)
29637 num--;
29638
29639 return (num <= arm_block_set_max_insns ());
29640 }
29641
29642 /* Set a block of memory using vectorization instructions for the
29643 unaligned case. We fill the first LENGTH bytes of the memory
29644 area starting from DSTBASE with byte constant VALUE. ALIGN is
29645 the alignment requirement of memory. Return TRUE if succeeded. */
29646 static bool
29647 arm_block_set_unaligned_vect (rtx dstbase,
29648 unsigned HOST_WIDE_INT length,
29649 unsigned HOST_WIDE_INT value,
29650 unsigned HOST_WIDE_INT align)
29651 {
29652 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29653 rtx dst, mem;
29654 rtx val_elt, val_vec, reg;
29655 rtx rval[MAX_VECT_LEN];
29656 rtx (*gen_func) (rtx, rtx);
29657 machine_mode mode;
29658 unsigned HOST_WIDE_INT v = value;
29659 unsigned int offset = 0;
29660 gcc_assert ((align & 0x3) != 0);
29661 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29662 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29663 if (length >= nelt_v16)
29664 {
29665 mode = V16QImode;
29666 gen_func = gen_movmisalignv16qi;
29667 }
29668 else
29669 {
29670 mode = V8QImode;
29671 gen_func = gen_movmisalignv8qi;
29672 }
29673 nelt_mode = GET_MODE_NUNITS (mode);
29674 gcc_assert (length >= nelt_mode);
29675 /* Skip if it isn't profitable. */
29676 if (!arm_block_set_vect_profit_p (length, align, mode))
29677 return false;
29678
29679 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29680 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29681
29682 v = sext_hwi (v, BITS_PER_WORD);
29683 val_elt = GEN_INT (v);
29684 for (j = 0; j < nelt_mode; j++)
29685 rval[j] = val_elt;
29686
29687 reg = gen_reg_rtx (mode);
29688 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29689 /* Emit instruction loading the constant value. */
29690 emit_move_insn (reg, val_vec);
29691
29692 /* Handle nelt_mode bytes in a vector. */
29693 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29694 {
29695 emit_insn ((*gen_func) (mem, reg));
29696 if (i + 2 * nelt_mode <= length)
29697 {
29698 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29699 offset += nelt_mode;
29700 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29701 }
29702 }
29703
29704 /* If there are not less than nelt_v8 bytes leftover, we must be in
29705 V16QI mode. */
29706 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29707
29708 /* Handle (8, 16) bytes leftover. */
29709 if (i + nelt_v8 < length)
29710 {
29711 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29712 offset += length - i;
29713 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29714
29715 /* We are shifting bytes back, set the alignment accordingly. */
29716 if ((length & 1) != 0 && align >= 2)
29717 set_mem_align (mem, BITS_PER_UNIT);
29718
29719 emit_insn (gen_movmisalignv16qi (mem, reg));
29720 }
29721 /* Handle (0, 8] bytes leftover. */
29722 else if (i < length && i + nelt_v8 >= length)
29723 {
29724 if (mode == V16QImode)
29725 reg = gen_lowpart (V8QImode, reg);
29726
29727 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29728 + (nelt_mode - nelt_v8))));
29729 offset += (length - i) + (nelt_mode - nelt_v8);
29730 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29731
29732 /* We are shifting bytes back, set the alignment accordingly. */
29733 if ((length & 1) != 0 && align >= 2)
29734 set_mem_align (mem, BITS_PER_UNIT);
29735
29736 emit_insn (gen_movmisalignv8qi (mem, reg));
29737 }
29738
29739 return true;
29740 }
29741
29742 /* Set a block of memory using vectorization instructions for the
29743 aligned case. We fill the first LENGTH bytes of the memory area
29744 starting from DSTBASE with byte constant VALUE. ALIGN is the
29745 alignment requirement of memory. Return TRUE if succeeded. */
29746 static bool
29747 arm_block_set_aligned_vect (rtx dstbase,
29748 unsigned HOST_WIDE_INT length,
29749 unsigned HOST_WIDE_INT value,
29750 unsigned HOST_WIDE_INT align)
29751 {
29752 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29753 rtx dst, addr, mem;
29754 rtx val_elt, val_vec, reg;
29755 rtx rval[MAX_VECT_LEN];
29756 machine_mode mode;
29757 unsigned HOST_WIDE_INT v = value;
29758 unsigned int offset = 0;
29759
29760 gcc_assert ((align & 0x3) == 0);
29761 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29762 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29763 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29764 mode = V16QImode;
29765 else
29766 mode = V8QImode;
29767
29768 nelt_mode = GET_MODE_NUNITS (mode);
29769 gcc_assert (length >= nelt_mode);
29770 /* Skip if it isn't profitable. */
29771 if (!arm_block_set_vect_profit_p (length, align, mode))
29772 return false;
29773
29774 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29775
29776 v = sext_hwi (v, BITS_PER_WORD);
29777 val_elt = GEN_INT (v);
29778 for (j = 0; j < nelt_mode; j++)
29779 rval[j] = val_elt;
29780
29781 reg = gen_reg_rtx (mode);
29782 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29783 /* Emit instruction loading the constant value. */
29784 emit_move_insn (reg, val_vec);
29785
29786 i = 0;
29787 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29788 if (mode == V16QImode)
29789 {
29790 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29791 emit_insn (gen_movmisalignv16qi (mem, reg));
29792 i += nelt_mode;
29793 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29794 if (i + nelt_v8 < length && i + nelt_v16 > length)
29795 {
29796 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29797 offset += length - nelt_mode;
29798 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29799 /* We are shifting bytes back, set the alignment accordingly. */
29800 if ((length & 0x3) == 0)
29801 set_mem_align (mem, BITS_PER_UNIT * 4);
29802 else if ((length & 0x1) == 0)
29803 set_mem_align (mem, BITS_PER_UNIT * 2);
29804 else
29805 set_mem_align (mem, BITS_PER_UNIT);
29806
29807 emit_insn (gen_movmisalignv16qi (mem, reg));
29808 return true;
29809 }
29810 /* Fall through for bytes leftover. */
29811 mode = V8QImode;
29812 nelt_mode = GET_MODE_NUNITS (mode);
29813 reg = gen_lowpart (V8QImode, reg);
29814 }
29815
29816 /* Handle 8 bytes in a vector. */
29817 for (; (i + nelt_mode <= length); i += nelt_mode)
29818 {
29819 addr = plus_constant (Pmode, dst, i);
29820 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29821 emit_move_insn (mem, reg);
29822 }
29823
29824 /* Handle single word leftover by shifting 4 bytes back. We can
29825 use aligned access for this case. */
29826 if (i + UNITS_PER_WORD == length)
29827 {
29828 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29829 offset += i - UNITS_PER_WORD;
29830 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29831 /* We are shifting 4 bytes back, set the alignment accordingly. */
29832 if (align > UNITS_PER_WORD)
29833 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29834
29835 emit_move_insn (mem, reg);
29836 }
29837 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29838 We have to use unaligned access for this case. */
29839 else if (i < length)
29840 {
29841 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29842 offset += length - nelt_mode;
29843 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29844 /* We are shifting bytes back, set the alignment accordingly. */
29845 if ((length & 1) == 0)
29846 set_mem_align (mem, BITS_PER_UNIT * 2);
29847 else
29848 set_mem_align (mem, BITS_PER_UNIT);
29849
29850 emit_insn (gen_movmisalignv8qi (mem, reg));
29851 }
29852
29853 return true;
29854 }
29855
29856 /* Set a block of memory using plain strh/strb instructions, only
29857 using instructions allowed by ALIGN on processor. We fill the
29858 first LENGTH bytes of the memory area starting from DSTBASE
29859 with byte constant VALUE. ALIGN is the alignment requirement
29860 of memory. */
29861 static bool
29862 arm_block_set_unaligned_non_vect (rtx dstbase,
29863 unsigned HOST_WIDE_INT length,
29864 unsigned HOST_WIDE_INT value,
29865 unsigned HOST_WIDE_INT align)
29866 {
29867 unsigned int i;
29868 rtx dst, addr, mem;
29869 rtx val_exp, val_reg, reg;
29870 machine_mode mode;
29871 HOST_WIDE_INT v = value;
29872
29873 gcc_assert (align == 1 || align == 2);
29874
29875 if (align == 2)
29876 v |= (value << BITS_PER_UNIT);
29877
29878 v = sext_hwi (v, BITS_PER_WORD);
29879 val_exp = GEN_INT (v);
29880 /* Skip if it isn't profitable. */
29881 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29882 align, true, false))
29883 return false;
29884
29885 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29886 mode = (align == 2 ? HImode : QImode);
29887 val_reg = force_reg (SImode, val_exp);
29888 reg = gen_lowpart (mode, val_reg);
29889
29890 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29891 {
29892 addr = plus_constant (Pmode, dst, i);
29893 mem = adjust_automodify_address (dstbase, mode, addr, i);
29894 emit_move_insn (mem, reg);
29895 }
29896
29897 /* Handle single byte leftover. */
29898 if (i + 1 == length)
29899 {
29900 reg = gen_lowpart (QImode, val_reg);
29901 addr = plus_constant (Pmode, dst, i);
29902 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29903 emit_move_insn (mem, reg);
29904 i++;
29905 }
29906
29907 gcc_assert (i == length);
29908 return true;
29909 }
29910
29911 /* Set a block of memory using plain strd/str/strh/strb instructions,
29912 to permit unaligned copies on processors which support unaligned
29913 semantics for those instructions. We fill the first LENGTH bytes
29914 of the memory area starting from DSTBASE with byte constant VALUE.
29915 ALIGN is the alignment requirement of memory. */
29916 static bool
29917 arm_block_set_aligned_non_vect (rtx dstbase,
29918 unsigned HOST_WIDE_INT length,
29919 unsigned HOST_WIDE_INT value,
29920 unsigned HOST_WIDE_INT align)
29921 {
29922 unsigned int i;
29923 rtx dst, addr, mem;
29924 rtx val_exp, val_reg, reg;
29925 unsigned HOST_WIDE_INT v;
29926 bool use_strd_p;
29927
29928 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29929 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29930
29931 v = (value | (value << 8) | (value << 16) | (value << 24));
29932 if (length < UNITS_PER_WORD)
29933 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29934
29935 if (use_strd_p)
29936 v |= (v << BITS_PER_WORD);
29937 else
29938 v = sext_hwi (v, BITS_PER_WORD);
29939
29940 val_exp = GEN_INT (v);
29941 /* Skip if it isn't profitable. */
29942 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29943 align, false, use_strd_p))
29944 {
29945 if (!use_strd_p)
29946 return false;
29947
29948 /* Try without strd. */
29949 v = (v >> BITS_PER_WORD);
29950 v = sext_hwi (v, BITS_PER_WORD);
29951 val_exp = GEN_INT (v);
29952 use_strd_p = false;
29953 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29954 align, false, use_strd_p))
29955 return false;
29956 }
29957
29958 i = 0;
29959 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29960 /* Handle double words using strd if possible. */
29961 if (use_strd_p)
29962 {
29963 val_reg = force_reg (DImode, val_exp);
29964 reg = val_reg;
29965 for (; (i + 8 <= length); i += 8)
29966 {
29967 addr = plus_constant (Pmode, dst, i);
29968 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29969 emit_move_insn (mem, reg);
29970 }
29971 }
29972 else
29973 val_reg = force_reg (SImode, val_exp);
29974
29975 /* Handle words. */
29976 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29977 for (; (i + 4 <= length); i += 4)
29978 {
29979 addr = plus_constant (Pmode, dst, i);
29980 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29981 if ((align & 3) == 0)
29982 emit_move_insn (mem, reg);
29983 else
29984 emit_insn (gen_unaligned_storesi (mem, reg));
29985 }
29986
29987 /* Merge last pair of STRH and STRB into a STR if possible. */
29988 if (unaligned_access && i > 0 && (i + 3) == length)
29989 {
29990 addr = plus_constant (Pmode, dst, i - 1);
29991 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29992 /* We are shifting one byte back, set the alignment accordingly. */
29993 if ((align & 1) == 0)
29994 set_mem_align (mem, BITS_PER_UNIT);
29995
29996 /* Most likely this is an unaligned access, and we can't tell at
29997 compilation time. */
29998 emit_insn (gen_unaligned_storesi (mem, reg));
29999 return true;
30000 }
30001
30002 /* Handle half word leftover. */
30003 if (i + 2 <= length)
30004 {
30005 reg = gen_lowpart (HImode, val_reg);
30006 addr = plus_constant (Pmode, dst, i);
30007 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30008 if ((align & 1) == 0)
30009 emit_move_insn (mem, reg);
30010 else
30011 emit_insn (gen_unaligned_storehi (mem, reg));
30012
30013 i += 2;
30014 }
30015
30016 /* Handle single byte leftover. */
30017 if (i + 1 == length)
30018 {
30019 reg = gen_lowpart (QImode, val_reg);
30020 addr = plus_constant (Pmode, dst, i);
30021 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30022 emit_move_insn (mem, reg);
30023 }
30024
30025 return true;
30026 }
30027
30028 /* Set a block of memory using vectorization instructions for both
30029 aligned and unaligned cases. We fill the first LENGTH bytes of
30030 the memory area starting from DSTBASE with byte constant VALUE.
30031 ALIGN is the alignment requirement of memory. */
30032 static bool
30033 arm_block_set_vect (rtx dstbase,
30034 unsigned HOST_WIDE_INT length,
30035 unsigned HOST_WIDE_INT value,
30036 unsigned HOST_WIDE_INT align)
30037 {
30038 /* Check whether we need to use unaligned store instruction. */
30039 if (((align & 3) != 0 || (length & 3) != 0)
30040 /* Check whether unaligned store instruction is available. */
30041 && (!unaligned_access || BYTES_BIG_ENDIAN))
30042 return false;
30043
30044 if ((align & 3) == 0)
30045 return arm_block_set_aligned_vect (dstbase, length, value, align);
30046 else
30047 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30048 }
30049
30050 /* Expand string store operation. Firstly we try to do that by using
30051 vectorization instructions, then try with ARM unaligned access and
30052 double-word store if profitable. OPERANDS[0] is the destination,
30053 OPERANDS[1] is the number of bytes, operands[2] is the value to
30054 initialize the memory, OPERANDS[3] is the known alignment of the
30055 destination. */
30056 bool
30057 arm_gen_setmem (rtx *operands)
30058 {
30059 rtx dstbase = operands[0];
30060 unsigned HOST_WIDE_INT length;
30061 unsigned HOST_WIDE_INT value;
30062 unsigned HOST_WIDE_INT align;
30063
30064 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30065 return false;
30066
30067 length = UINTVAL (operands[1]);
30068 if (length > 64)
30069 return false;
30070
30071 value = (UINTVAL (operands[2]) & 0xFF);
30072 align = UINTVAL (operands[3]);
30073 if (TARGET_NEON && length >= 8
30074 && current_tune->string_ops_prefer_neon
30075 && arm_block_set_vect (dstbase, length, value, align))
30076 return true;
30077
30078 if (!unaligned_access && (align & 3) != 0)
30079 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30080
30081 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30082 }
30083
30084
30085 static bool
30086 arm_macro_fusion_p (void)
30087 {
30088 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30089 }
30090
30091 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30092 for MOVW / MOVT macro fusion. */
30093
30094 static bool
30095 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30096 {
30097 /* We are trying to fuse
30098 movw imm / movt imm
30099 instructions as a group that gets scheduled together. */
30100
30101 rtx set_dest = SET_DEST (curr_set);
30102
30103 if (GET_MODE (set_dest) != SImode)
30104 return false;
30105
30106 /* We are trying to match:
30107 prev (movw) == (set (reg r0) (const_int imm16))
30108 curr (movt) == (set (zero_extract (reg r0)
30109 (const_int 16)
30110 (const_int 16))
30111 (const_int imm16_1))
30112 or
30113 prev (movw) == (set (reg r1)
30114 (high (symbol_ref ("SYM"))))
30115 curr (movt) == (set (reg r0)
30116 (lo_sum (reg r1)
30117 (symbol_ref ("SYM")))) */
30118
30119 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30120 {
30121 if (CONST_INT_P (SET_SRC (curr_set))
30122 && CONST_INT_P (SET_SRC (prev_set))
30123 && REG_P (XEXP (set_dest, 0))
30124 && REG_P (SET_DEST (prev_set))
30125 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30126 return true;
30127
30128 }
30129 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30130 && REG_P (SET_DEST (curr_set))
30131 && REG_P (SET_DEST (prev_set))
30132 && GET_CODE (SET_SRC (prev_set)) == HIGH
30133 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30134 return true;
30135
30136 return false;
30137 }
30138
30139 static bool
30140 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30141 {
30142 rtx prev_set = single_set (prev);
30143 rtx curr_set = single_set (curr);
30144
30145 if (!prev_set
30146 || !curr_set)
30147 return false;
30148
30149 if (any_condjump_p (curr))
30150 return false;
30151
30152 if (!arm_macro_fusion_p ())
30153 return false;
30154
30155 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30156 && aarch_crypto_can_dual_issue (prev, curr))
30157 return true;
30158
30159 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30160 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30161 return true;
30162
30163 return false;
30164 }
30165
30166 /* Return true iff the instruction fusion described by OP is enabled. */
30167 bool
30168 arm_fusion_enabled_p (tune_params::fuse_ops op)
30169 {
30170 return current_tune->fusible_ops & op;
30171 }
30172
30173 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30174 scheduled for speculative execution. Reject the long-running division
30175 and square-root instructions. */
30176
30177 static bool
30178 arm_sched_can_speculate_insn (rtx_insn *insn)
30179 {
30180 switch (get_attr_type (insn))
30181 {
30182 case TYPE_SDIV:
30183 case TYPE_UDIV:
30184 case TYPE_FDIVS:
30185 case TYPE_FDIVD:
30186 case TYPE_FSQRTS:
30187 case TYPE_FSQRTD:
30188 case TYPE_NEON_FP_SQRT_S:
30189 case TYPE_NEON_FP_SQRT_D:
30190 case TYPE_NEON_FP_SQRT_S_Q:
30191 case TYPE_NEON_FP_SQRT_D_Q:
30192 case TYPE_NEON_FP_DIV_S:
30193 case TYPE_NEON_FP_DIV_D:
30194 case TYPE_NEON_FP_DIV_S_Q:
30195 case TYPE_NEON_FP_DIV_D_Q:
30196 return false;
30197 default:
30198 return true;
30199 }
30200 }
30201
30202 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30203
30204 static unsigned HOST_WIDE_INT
30205 arm_asan_shadow_offset (void)
30206 {
30207 return HOST_WIDE_INT_1U << 29;
30208 }
30209
30210
30211 /* This is a temporary fix for PR60655. Ideally we need
30212 to handle most of these cases in the generic part but
30213 currently we reject minus (..) (sym_ref). We try to
30214 ameliorate the case with minus (sym_ref1) (sym_ref2)
30215 where they are in the same section. */
30216
30217 static bool
30218 arm_const_not_ok_for_debug_p (rtx p)
30219 {
30220 tree decl_op0 = NULL;
30221 tree decl_op1 = NULL;
30222
30223 if (GET_CODE (p) == MINUS)
30224 {
30225 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30226 {
30227 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30228 if (decl_op1
30229 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30230 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30231 {
30232 if ((VAR_P (decl_op1)
30233 || TREE_CODE (decl_op1) == CONST_DECL)
30234 && (VAR_P (decl_op0)
30235 || TREE_CODE (decl_op0) == CONST_DECL))
30236 return (get_variable_section (decl_op1, false)
30237 != get_variable_section (decl_op0, false));
30238
30239 if (TREE_CODE (decl_op1) == LABEL_DECL
30240 && TREE_CODE (decl_op0) == LABEL_DECL)
30241 return (DECL_CONTEXT (decl_op1)
30242 != DECL_CONTEXT (decl_op0));
30243 }
30244
30245 return true;
30246 }
30247 }
30248
30249 return false;
30250 }
30251
30252 /* return TRUE if x is a reference to a value in a constant pool */
30253 extern bool
30254 arm_is_constant_pool_ref (rtx x)
30255 {
30256 return (MEM_P (x)
30257 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30258 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30259 }
30260
30261 /* Remember the last target of arm_set_current_function. */
30262 static GTY(()) tree arm_previous_fndecl;
30263
30264 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30265
30266 void
30267 save_restore_target_globals (tree new_tree)
30268 {
30269 /* If we have a previous state, use it. */
30270 if (TREE_TARGET_GLOBALS (new_tree))
30271 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30272 else if (new_tree == target_option_default_node)
30273 restore_target_globals (&default_target_globals);
30274 else
30275 {
30276 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30277 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30278 }
30279
30280 arm_option_params_internal ();
30281 }
30282
30283 /* Invalidate arm_previous_fndecl. */
30284
30285 void
30286 arm_reset_previous_fndecl (void)
30287 {
30288 arm_previous_fndecl = NULL_TREE;
30289 }
30290
30291 /* Establish appropriate back-end context for processing the function
30292 FNDECL. The argument might be NULL to indicate processing at top
30293 level, outside of any function scope. */
30294
30295 static void
30296 arm_set_current_function (tree fndecl)
30297 {
30298 if (!fndecl || fndecl == arm_previous_fndecl)
30299 return;
30300
30301 tree old_tree = (arm_previous_fndecl
30302 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30303 : NULL_TREE);
30304
30305 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30306
30307 /* If current function has no attributes but previous one did,
30308 use the default node. */
30309 if (! new_tree && old_tree)
30310 new_tree = target_option_default_node;
30311
30312 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30313 the default have been handled by save_restore_target_globals from
30314 arm_pragma_target_parse. */
30315 if (old_tree == new_tree)
30316 return;
30317
30318 arm_previous_fndecl = fndecl;
30319
30320 /* First set the target options. */
30321 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30322
30323 save_restore_target_globals (new_tree);
30324 }
30325
30326 /* Implement TARGET_OPTION_PRINT. */
30327
30328 static void
30329 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30330 {
30331 int flags = ptr->x_target_flags;
30332 const char *fpu_name;
30333
30334 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30335 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30336
30337 fprintf (file, "%*sselected arch %s\n", indent, "",
30338 TARGET_THUMB2_P (flags) ? "thumb2" :
30339 TARGET_THUMB_P (flags) ? "thumb1" :
30340 "arm");
30341
30342 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30343 }
30344
30345 /* Hook to determine if one function can safely inline another. */
30346
30347 static bool
30348 arm_can_inline_p (tree caller, tree callee)
30349 {
30350 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30351 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30352 bool can_inline = true;
30353
30354 struct cl_target_option *caller_opts
30355 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30356 : target_option_default_node);
30357
30358 struct cl_target_option *callee_opts
30359 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30360 : target_option_default_node);
30361
30362 if (callee_opts == caller_opts)
30363 return true;
30364
30365 /* Callee's ISA features should be a subset of the caller's. */
30366 struct arm_build_target caller_target;
30367 struct arm_build_target callee_target;
30368 caller_target.isa = sbitmap_alloc (isa_num_bits);
30369 callee_target.isa = sbitmap_alloc (isa_num_bits);
30370
30371 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30372 false);
30373 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30374 false);
30375 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30376 can_inline = false;
30377
30378 sbitmap_free (caller_target.isa);
30379 sbitmap_free (callee_target.isa);
30380
30381 /* OK to inline between different modes.
30382 Function with mode specific instructions, e.g using asm,
30383 must be explicitly protected with noinline. */
30384 return can_inline;
30385 }
30386
30387 /* Hook to fix function's alignment affected by target attribute. */
30388
30389 static void
30390 arm_relayout_function (tree fndecl)
30391 {
30392 if (DECL_USER_ALIGN (fndecl))
30393 return;
30394
30395 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30396
30397 if (!callee_tree)
30398 callee_tree = target_option_default_node;
30399
30400 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30401 SET_DECL_ALIGN
30402 (fndecl,
30403 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30404 }
30405
30406 /* Inner function to process the attribute((target(...))), take an argument and
30407 set the current options from the argument. If we have a list, recursively
30408 go over the list. */
30409
30410 static bool
30411 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30412 {
30413 if (TREE_CODE (args) == TREE_LIST)
30414 {
30415 bool ret = true;
30416
30417 for (; args; args = TREE_CHAIN (args))
30418 if (TREE_VALUE (args)
30419 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30420 ret = false;
30421 return ret;
30422 }
30423
30424 else if (TREE_CODE (args) != STRING_CST)
30425 {
30426 error ("attribute %<target%> argument not a string");
30427 return false;
30428 }
30429
30430 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30431 char *q;
30432
30433 while ((q = strtok (argstr, ",")) != NULL)
30434 {
30435 while (ISSPACE (*q)) ++q;
30436
30437 argstr = NULL;
30438 if (!strncmp (q, "thumb", 5))
30439 opts->x_target_flags |= MASK_THUMB;
30440
30441 else if (!strncmp (q, "arm", 3))
30442 opts->x_target_flags &= ~MASK_THUMB;
30443
30444 else if (!strncmp (q, "fpu=", 4))
30445 {
30446 int fpu_index;
30447 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30448 &fpu_index, CL_TARGET))
30449 {
30450 error ("invalid fpu for attribute(target(\"%s\"))", q);
30451 return false;
30452 }
30453 if (fpu_index == TARGET_FPU_auto)
30454 {
30455 /* This doesn't really make sense until we support
30456 general dynamic selection of the architecture and all
30457 sub-features. */
30458 sorry ("auto fpu selection not currently permitted here");
30459 return false;
30460 }
30461 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30462 }
30463 else
30464 {
30465 error ("attribute(target(\"%s\")) is unknown", q);
30466 return false;
30467 }
30468 }
30469
30470 return true;
30471 }
30472
30473 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30474
30475 tree
30476 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30477 struct gcc_options *opts_set)
30478 {
30479 struct cl_target_option cl_opts;
30480
30481 if (!arm_valid_target_attribute_rec (args, opts))
30482 return NULL_TREE;
30483
30484 cl_target_option_save (&cl_opts, opts);
30485 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30486 arm_option_check_internal (opts);
30487 /* Do any overrides, such as global options arch=xxx. */
30488 arm_option_override_internal (opts, opts_set);
30489
30490 return build_target_option_node (opts);
30491 }
30492
30493 static void
30494 add_attribute (const char * mode, tree *attributes)
30495 {
30496 size_t len = strlen (mode);
30497 tree value = build_string (len, mode);
30498
30499 TREE_TYPE (value) = build_array_type (char_type_node,
30500 build_index_type (size_int (len)));
30501
30502 *attributes = tree_cons (get_identifier ("target"),
30503 build_tree_list (NULL_TREE, value),
30504 *attributes);
30505 }
30506
30507 /* For testing. Insert thumb or arm modes alternatively on functions. */
30508
30509 static void
30510 arm_insert_attributes (tree fndecl, tree * attributes)
30511 {
30512 const char *mode;
30513
30514 if (! TARGET_FLIP_THUMB)
30515 return;
30516
30517 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30518 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30519 return;
30520
30521 /* Nested definitions must inherit mode. */
30522 if (current_function_decl)
30523 {
30524 mode = TARGET_THUMB ? "thumb" : "arm";
30525 add_attribute (mode, attributes);
30526 return;
30527 }
30528
30529 /* If there is already a setting don't change it. */
30530 if (lookup_attribute ("target", *attributes) != NULL)
30531 return;
30532
30533 mode = thumb_flipper ? "thumb" : "arm";
30534 add_attribute (mode, attributes);
30535
30536 thumb_flipper = !thumb_flipper;
30537 }
30538
30539 /* Hook to validate attribute((target("string"))). */
30540
30541 static bool
30542 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30543 tree args, int ARG_UNUSED (flags))
30544 {
30545 bool ret = true;
30546 struct gcc_options func_options;
30547 tree cur_tree, new_optimize;
30548 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30549
30550 /* Get the optimization options of the current function. */
30551 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30552
30553 /* If the function changed the optimization levels as well as setting target
30554 options, start with the optimizations specified. */
30555 if (!func_optimize)
30556 func_optimize = optimization_default_node;
30557
30558 /* Init func_options. */
30559 memset (&func_options, 0, sizeof (func_options));
30560 init_options_struct (&func_options, NULL);
30561 lang_hooks.init_options_struct (&func_options);
30562
30563 /* Initialize func_options to the defaults. */
30564 cl_optimization_restore (&func_options,
30565 TREE_OPTIMIZATION (func_optimize));
30566
30567 cl_target_option_restore (&func_options,
30568 TREE_TARGET_OPTION (target_option_default_node));
30569
30570 /* Set func_options flags with new target mode. */
30571 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30572 &global_options_set);
30573
30574 if (cur_tree == NULL_TREE)
30575 ret = false;
30576
30577 new_optimize = build_optimization_node (&func_options);
30578
30579 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30580
30581 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30582
30583 finalize_options_struct (&func_options);
30584
30585 return ret;
30586 }
30587
30588 /* Match an ISA feature bitmap to a named FPU. We always use the
30589 first entry that exactly matches the feature set, so that we
30590 effectively canonicalize the FPU name for the assembler. */
30591 static const char*
30592 arm_identify_fpu_from_isa (sbitmap isa)
30593 {
30594 auto_sbitmap fpubits (isa_num_bits);
30595 auto_sbitmap cand_fpubits (isa_num_bits);
30596
30597 bitmap_and (fpubits, isa, isa_all_fpubits);
30598
30599 /* If there are no ISA feature bits relating to the FPU, we must be
30600 doing soft-float. */
30601 if (bitmap_empty_p (fpubits))
30602 return "softvfp";
30603
30604 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30605 {
30606 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30607 if (bitmap_equal_p (fpubits, cand_fpubits))
30608 return all_fpus[i].name;
30609 }
30610 /* We must find an entry, or things have gone wrong. */
30611 gcc_unreachable ();
30612 }
30613
30614 void
30615 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30616 {
30617
30618 fprintf (stream, "\t.syntax unified\n");
30619
30620 if (TARGET_THUMB)
30621 {
30622 if (is_called_in_ARM_mode (decl)
30623 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30624 && cfun->is_thunk))
30625 fprintf (stream, "\t.code 32\n");
30626 else if (TARGET_THUMB1)
30627 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30628 else
30629 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30630 }
30631 else
30632 fprintf (stream, "\t.arm\n");
30633
30634 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30635 (TARGET_SOFT_FLOAT
30636 ? "softvfp"
30637 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30638
30639 if (TARGET_POKE_FUNCTION_NAME)
30640 arm_poke_function_name (stream, (const char *) name);
30641 }
30642
30643 /* If MEM is in the form of [base+offset], extract the two parts
30644 of address and set to BASE and OFFSET, otherwise return false
30645 after clearing BASE and OFFSET. */
30646
30647 static bool
30648 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30649 {
30650 rtx addr;
30651
30652 gcc_assert (MEM_P (mem));
30653
30654 addr = XEXP (mem, 0);
30655
30656 /* Strip off const from addresses like (const (addr)). */
30657 if (GET_CODE (addr) == CONST)
30658 addr = XEXP (addr, 0);
30659
30660 if (GET_CODE (addr) == REG)
30661 {
30662 *base = addr;
30663 *offset = const0_rtx;
30664 return true;
30665 }
30666
30667 if (GET_CODE (addr) == PLUS
30668 && GET_CODE (XEXP (addr, 0)) == REG
30669 && CONST_INT_P (XEXP (addr, 1)))
30670 {
30671 *base = XEXP (addr, 0);
30672 *offset = XEXP (addr, 1);
30673 return true;
30674 }
30675
30676 *base = NULL_RTX;
30677 *offset = NULL_RTX;
30678
30679 return false;
30680 }
30681
30682 /* If INSN is a load or store of address in the form of [base+offset],
30683 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30684 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30685 otherwise return FALSE. */
30686
30687 static bool
30688 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30689 {
30690 rtx x, dest, src;
30691
30692 gcc_assert (INSN_P (insn));
30693 x = PATTERN (insn);
30694 if (GET_CODE (x) != SET)
30695 return false;
30696
30697 src = SET_SRC (x);
30698 dest = SET_DEST (x);
30699 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30700 {
30701 *is_load = false;
30702 extract_base_offset_in_addr (dest, base, offset);
30703 }
30704 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30705 {
30706 *is_load = true;
30707 extract_base_offset_in_addr (src, base, offset);
30708 }
30709 else
30710 return false;
30711
30712 return (*base != NULL_RTX && *offset != NULL_RTX);
30713 }
30714
30715 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30716
30717 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30718 and PRI are only calculated for these instructions. For other instruction,
30719 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30720 instruction fusion can be supported by returning different priorities.
30721
30722 It's important that irrelevant instructions get the largest FUSION_PRI. */
30723
30724 static void
30725 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30726 int *fusion_pri, int *pri)
30727 {
30728 int tmp, off_val;
30729 bool is_load;
30730 rtx base, offset;
30731
30732 gcc_assert (INSN_P (insn));
30733
30734 tmp = max_pri - 1;
30735 if (!fusion_load_store (insn, &base, &offset, &is_load))
30736 {
30737 *pri = tmp;
30738 *fusion_pri = tmp;
30739 return;
30740 }
30741
30742 /* Load goes first. */
30743 if (is_load)
30744 *fusion_pri = tmp - 1;
30745 else
30746 *fusion_pri = tmp - 2;
30747
30748 tmp /= 2;
30749
30750 /* INSN with smaller base register goes first. */
30751 tmp -= ((REGNO (base) & 0xff) << 20);
30752
30753 /* INSN with smaller offset goes first. */
30754 off_val = (int)(INTVAL (offset));
30755 if (off_val >= 0)
30756 tmp -= (off_val & 0xfffff);
30757 else
30758 tmp += ((- off_val) & 0xfffff);
30759
30760 *pri = tmp;
30761 return;
30762 }
30763
30764
30765 /* Construct and return a PARALLEL RTX vector with elements numbering the
30766 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30767 the vector - from the perspective of the architecture. This does not
30768 line up with GCC's perspective on lane numbers, so we end up with
30769 different masks depending on our target endian-ness. The diagram
30770 below may help. We must draw the distinction when building masks
30771 which select one half of the vector. An instruction selecting
30772 architectural low-lanes for a big-endian target, must be described using
30773 a mask selecting GCC high-lanes.
30774
30775 Big-Endian Little-Endian
30776
30777 GCC 0 1 2 3 3 2 1 0
30778 | x | x | x | x | | x | x | x | x |
30779 Architecture 3 2 1 0 3 2 1 0
30780
30781 Low Mask: { 2, 3 } { 0, 1 }
30782 High Mask: { 0, 1 } { 2, 3 }
30783 */
30784
30785 rtx
30786 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30787 {
30788 int nunits = GET_MODE_NUNITS (mode);
30789 rtvec v = rtvec_alloc (nunits / 2);
30790 int high_base = nunits / 2;
30791 int low_base = 0;
30792 int base;
30793 rtx t1;
30794 int i;
30795
30796 if (BYTES_BIG_ENDIAN)
30797 base = high ? low_base : high_base;
30798 else
30799 base = high ? high_base : low_base;
30800
30801 for (i = 0; i < nunits / 2; i++)
30802 RTVEC_ELT (v, i) = GEN_INT (base + i);
30803
30804 t1 = gen_rtx_PARALLEL (mode, v);
30805 return t1;
30806 }
30807
30808 /* Check OP for validity as a PARALLEL RTX vector with elements
30809 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30810 from the perspective of the architecture. See the diagram above
30811 arm_simd_vect_par_cnst_half_p for more details. */
30812
30813 bool
30814 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30815 bool high)
30816 {
30817 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30818 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30819 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30820 int i = 0;
30821
30822 if (!VECTOR_MODE_P (mode))
30823 return false;
30824
30825 if (count_op != count_ideal)
30826 return false;
30827
30828 for (i = 0; i < count_ideal; i++)
30829 {
30830 rtx elt_op = XVECEXP (op, 0, i);
30831 rtx elt_ideal = XVECEXP (ideal, 0, i);
30832
30833 if (!CONST_INT_P (elt_op)
30834 || INTVAL (elt_ideal) != INTVAL (elt_op))
30835 return false;
30836 }
30837 return true;
30838 }
30839
30840 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30841 in Thumb1. */
30842 static bool
30843 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30844 const_tree)
30845 {
30846 /* For now, we punt and not handle this for TARGET_THUMB1. */
30847 if (vcall_offset && TARGET_THUMB1)
30848 return false;
30849
30850 /* Otherwise ok. */
30851 return true;
30852 }
30853
30854 /* Generate RTL for a conditional branch with rtx comparison CODE in
30855 mode CC_MODE. The destination of the unlikely conditional branch
30856 is LABEL_REF. */
30857
30858 void
30859 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30860 rtx label_ref)
30861 {
30862 rtx x;
30863 x = gen_rtx_fmt_ee (code, VOIDmode,
30864 gen_rtx_REG (cc_mode, CC_REGNUM),
30865 const0_rtx);
30866
30867 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30868 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30869 pc_rtx);
30870 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30871 }
30872
30873 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30874
30875 For pure-code sections there is no letter code for this attribute, so
30876 output all the section flags numerically when this is needed. */
30877
30878 static bool
30879 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30880 {
30881
30882 if (flags & SECTION_ARM_PURECODE)
30883 {
30884 *num = 0x20000000;
30885
30886 if (!(flags & SECTION_DEBUG))
30887 *num |= 0x2;
30888 if (flags & SECTION_EXCLUDE)
30889 *num |= 0x80000000;
30890 if (flags & SECTION_WRITE)
30891 *num |= 0x1;
30892 if (flags & SECTION_CODE)
30893 *num |= 0x4;
30894 if (flags & SECTION_MERGE)
30895 *num |= 0x10;
30896 if (flags & SECTION_STRINGS)
30897 *num |= 0x20;
30898 if (flags & SECTION_TLS)
30899 *num |= 0x400;
30900 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30901 *num |= 0x200;
30902
30903 return true;
30904 }
30905
30906 return false;
30907 }
30908
30909 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30910
30911 If pure-code is passed as an option, make sure all functions are in
30912 sections that have the SHF_ARM_PURECODE attribute. */
30913
30914 static section *
30915 arm_function_section (tree decl, enum node_frequency freq,
30916 bool startup, bool exit)
30917 {
30918 const char * section_name;
30919 section * sec;
30920
30921 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30922 return default_function_section (decl, freq, startup, exit);
30923
30924 if (!target_pure_code)
30925 return default_function_section (decl, freq, startup, exit);
30926
30927
30928 section_name = DECL_SECTION_NAME (decl);
30929
30930 /* If a function is not in a named section then it falls under the 'default'
30931 text section, also known as '.text'. We can preserve previous behavior as
30932 the default text section already has the SHF_ARM_PURECODE section
30933 attribute. */
30934 if (!section_name)
30935 {
30936 section *default_sec = default_function_section (decl, freq, startup,
30937 exit);
30938
30939 /* If default_sec is not null, then it must be a special section like for
30940 example .text.startup. We set the pure-code attribute and return the
30941 same section to preserve existing behavior. */
30942 if (default_sec)
30943 default_sec->common.flags |= SECTION_ARM_PURECODE;
30944 return default_sec;
30945 }
30946
30947 /* Otherwise look whether a section has already been created with
30948 'section_name'. */
30949 sec = get_named_section (decl, section_name, 0);
30950 if (!sec)
30951 /* If that is not the case passing NULL as the section's name to
30952 'get_named_section' will create a section with the declaration's
30953 section name. */
30954 sec = get_named_section (decl, NULL, 0);
30955
30956 /* Set the SHF_ARM_PURECODE attribute. */
30957 sec->common.flags |= SECTION_ARM_PURECODE;
30958
30959 return sec;
30960 }
30961
30962 /* Implements the TARGET_SECTION_FLAGS hook.
30963
30964 If DECL is a function declaration and pure-code is passed as an option
30965 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30966 section's name and RELOC indicates whether the declarations initializer may
30967 contain runtime relocations. */
30968
30969 static unsigned int
30970 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30971 {
30972 unsigned int flags = default_section_type_flags (decl, name, reloc);
30973
30974 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30975 flags |= SECTION_ARM_PURECODE;
30976
30977 return flags;
30978 }
30979
30980 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30981
30982 static void
30983 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
30984 rtx op0, rtx op1,
30985 rtx *quot_p, rtx *rem_p)
30986 {
30987 if (mode == SImode)
30988 gcc_assert (!TARGET_IDIV);
30989
30990 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
30991 MODE_INT);
30992
30993 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
30994 libval_mode, 2,
30995 op0, GET_MODE (op0),
30996 op1, GET_MODE (op1));
30997
30998 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
30999 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31000 GET_MODE_SIZE (mode));
31001
31002 gcc_assert (quotient);
31003 gcc_assert (remainder);
31004
31005 *quot_p = quotient;
31006 *rem_p = remainder;
31007 }
31008
31009 /* This function checks for the availability of the coprocessor builtin passed
31010 in BUILTIN for the current target. Returns true if it is available and
31011 false otherwise. If a BUILTIN is passed for which this function has not
31012 been implemented it will cause an exception. */
31013
31014 bool
31015 arm_coproc_builtin_available (enum unspecv builtin)
31016 {
31017 /* None of these builtins are available in Thumb mode if the target only
31018 supports Thumb-1. */
31019 if (TARGET_THUMB1)
31020 return false;
31021
31022 switch (builtin)
31023 {
31024 case VUNSPEC_CDP:
31025 case VUNSPEC_LDC:
31026 case VUNSPEC_LDCL:
31027 case VUNSPEC_STC:
31028 case VUNSPEC_STCL:
31029 case VUNSPEC_MCR:
31030 case VUNSPEC_MRC:
31031 if (arm_arch4)
31032 return true;
31033 break;
31034 case VUNSPEC_CDP2:
31035 case VUNSPEC_LDC2:
31036 case VUNSPEC_LDC2L:
31037 case VUNSPEC_STC2:
31038 case VUNSPEC_STC2L:
31039 case VUNSPEC_MCR2:
31040 case VUNSPEC_MRC2:
31041 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31042 ARMv8-{A,M}. */
31043 if (arm_arch5)
31044 return true;
31045 break;
31046 case VUNSPEC_MCRR:
31047 case VUNSPEC_MRRC:
31048 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31049 ARMv8-{A,M}. */
31050 if (arm_arch6 || arm_arch5te)
31051 return true;
31052 break;
31053 case VUNSPEC_MCRR2:
31054 case VUNSPEC_MRRC2:
31055 if (arm_arch6)
31056 return true;
31057 break;
31058 default:
31059 gcc_unreachable ();
31060 }
31061 return false;
31062 }
31063
31064 /* This function returns true if OP is a valid memory operand for the ldc and
31065 stc coprocessor instructions and false otherwise. */
31066
31067 bool
31068 arm_coproc_ldc_stc_legitimate_address (rtx op)
31069 {
31070 HOST_WIDE_INT range;
31071 /* Has to be a memory operand. */
31072 if (!MEM_P (op))
31073 return false;
31074
31075 op = XEXP (op, 0);
31076
31077 /* We accept registers. */
31078 if (REG_P (op))
31079 return true;
31080
31081 switch GET_CODE (op)
31082 {
31083 case PLUS:
31084 {
31085 /* Or registers with an offset. */
31086 if (!REG_P (XEXP (op, 0)))
31087 return false;
31088
31089 op = XEXP (op, 1);
31090
31091 /* The offset must be an immediate though. */
31092 if (!CONST_INT_P (op))
31093 return false;
31094
31095 range = INTVAL (op);
31096
31097 /* Within the range of [-1020,1020]. */
31098 if (!IN_RANGE (range, -1020, 1020))
31099 return false;
31100
31101 /* And a multiple of 4. */
31102 return (range % 4) == 0;
31103 }
31104 case PRE_INC:
31105 case POST_INC:
31106 case PRE_DEC:
31107 case POST_DEC:
31108 return REG_P (XEXP (op, 0));
31109 default:
31110 gcc_unreachable ();
31111 }
31112 return false;
31113 }
31114 #include "gt-arm.h"