Using leaf_function_p in a backend is dangerous as it incorrectly returns false...
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
67
68 /* This file should be included last. */
69 #include "target-def.h"
70
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode;
73 typedef struct minipool_fixup Mfix;
74
75 void (*arm_lang_output_object_attributes_hook)(void);
76
77 struct four_ints
78 {
79 int i[4];
80 };
81
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx);
84 static bool arm_needs_doubleword_align (machine_mode, const_tree);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets *arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
89 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap);
92 static int arm_address_register_rtx_p (rtx, int);
93 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
94 static bool is_called_in_ARM_mode (tree);
95 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
96 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
97 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
98 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
99 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
100 inline static int thumb1_index_register_rtx_p (rtx, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx, int);
106 static void arm_print_operand_address (FILE *, machine_mode, rtx);
107 static bool arm_print_operand_punct_valid_p (unsigned char code);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
109 static arm_cc get_arm_condition_code (rtx);
110 static const char *output_multi_immediate (rtx *, const char *, const char *,
111 int, HOST_WIDE_INT);
112 static const char *shift_op (rtx, HOST_WIDE_INT *);
113 static struct machine_function *arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
116 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_forward_ref (Mfix *);
118 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
119 static Mnode *add_minipool_backward_ref (Mfix *);
120 static void assign_minipool_offsets (Mfix *);
121 static void arm_print_value (FILE *, rtx);
122 static void dump_minipool (rtx_insn *);
123 static int arm_barrier_cost (rtx_insn *);
124 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
125 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
126 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
127 machine_mode, rtx);
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree);
133 static unsigned long arm_compute_func_type (void);
134 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
135 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
136 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
139 #endif
140 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
141 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
144 static int arm_comp_type_attributes (const_tree, const_tree);
145 static void arm_set_default_type_attributes (tree);
146 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code,
149 unsigned HOST_WIDE_INT val,
150 struct four_ints *return_sequence);
151 static int optimal_immediate_sequence_1 (enum rtx_code code,
152 unsigned HOST_WIDE_INT val,
153 struct four_ints *return_sequence,
154 int i);
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree, tree);
157 static machine_mode arm_promote_function_mode (const_tree,
158 machine_mode, int *,
159 const_tree, int);
160 static bool arm_return_in_memory (const_tree, const_tree);
161 static rtx arm_function_value (const_tree, const_tree, bool);
162 static rtx arm_libcall_value_1 (machine_mode);
163 static rtx arm_libcall_value (machine_mode, const_rtx);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
167 tree);
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode, rtx);
170 static bool arm_legitimate_constant_p (machine_mode, rtx);
171 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
172 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx_insn *emit_set_insn (rtx, rtx);
177 static rtx emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
179 tree, bool);
180 static rtx arm_function_arg (cumulative_args_t, machine_mode,
181 const_tree, bool);
182 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
186 const_tree);
187 static rtx aapcs_libcall_value (machine_mode);
188 static int aapcs_select_return_coproc (const_tree, const_tree);
189
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 #endif
194 #ifndef ARM_PE
195 static void arm_encode_section_info (tree, rtx, int);
196 #endif
197
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree, tree *);
201
202 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
203 tree, int *, int);
204 static bool arm_pass_by_reference (cumulative_args_t,
205 machine_mode, const_tree, bool);
206 static bool arm_promote_prototypes (const_tree);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree);
210 static bool arm_must_pass_in_stack (machine_mode, const_tree);
211 static bool arm_return_in_memory (const_tree, const_tree);
212 #if ARM_UNWIND_INFO
213 static void arm_unwind_emit (FILE *, rtx_insn *);
214 static bool arm_output_ttype (rtx);
215 static void arm_asm_emit_except_personality (rtx);
216 #endif
217 static void arm_asm_init_sections (void);
218 static rtx arm_dwarf_register_span (rtx);
219
220 static tree arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree arm_get_cookie_size (tree);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree, rtx);
232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options *,
235 struct cl_target_option *);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn *);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
249 static bool arm_output_addr_const_extra (FILE *, rtx);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree);
252 static tree arm_promoted_type (const_tree t);
253 static bool arm_scalar_mode_supported_p (machine_mode);
254 static bool arm_frame_pointer_required (void);
255 static bool arm_can_eliminate (const int, const int);
256 static void arm_asm_trampoline_template (FILE *);
257 static void arm_trampoline_init (rtx, tree, rtx);
258 static rtx arm_trampoline_adjust_address (rtx);
259 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
260 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
261 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
262 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool arm_array_mode_supported_p (machine_mode,
264 unsigned HOST_WIDE_INT);
265 static machine_mode arm_preferred_simd_mode (machine_mode);
266 static bool arm_class_likely_spilled_p (reg_class_t);
267 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
268 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
269 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
270 const_tree type,
271 int misalignment,
272 bool is_packed);
273 static void arm_conditional_register_usage (void);
274 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
275 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
281
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
283 const unsigned char *sel);
284
285 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
286
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
288 tree vectype,
289 int misalign ATTRIBUTE_UNUSED);
290 static unsigned arm_add_stmt_cost (void *data, int count,
291 enum vect_cost_for_stmt kind,
292 struct _stmt_vec_info *stmt_info,
293 int misalign,
294 enum vect_cost_model_location where);
295
296 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
297 bool op0_preserve_value);
298 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
299
300 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
302 const_tree);
303 static section *arm_function_section (tree, enum node_frequency, bool, bool);
304 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
305 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
306 int reloc);
307 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
308 static machine_mode arm_floatn_mode (int, bool);
309 \f
310 /* Table of machine attributes. */
311 static const struct attribute_spec arm_attribute_table[] =
312 {
313 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
314 affects_type_identity } */
315 /* Function calls made to this symbol must be done indirectly, because
316 it may lie outside of the 26 bit addressing range of a normal function
317 call. */
318 { "long_call", 0, 0, false, true, true, NULL, false },
319 /* Whereas these functions are always known to reside within the 26 bit
320 addressing range. */
321 { "short_call", 0, 0, false, true, true, NULL, false },
322 /* Specify the procedure call conventions for a function. */
323 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
324 false },
325 /* Interrupt Service Routines have special prologue and epilogue requirements. */
326 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
327 false },
328 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
329 false },
330 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
331 false },
332 #ifdef ARM_PE
333 /* ARM/PE has three new attributes:
334 interfacearm - ?
335 dllexport - for exporting a function/variable that will live in a dll
336 dllimport - for importing a function/variable from a dll
337
338 Microsoft allows multiple declspecs in one __declspec, separating
339 them with spaces. We do NOT support this. Instead, use __declspec
340 multiple times.
341 */
342 { "dllimport", 0, 0, true, false, false, NULL, false },
343 { "dllexport", 0, 0, true, false, false, NULL, false },
344 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
345 false },
346 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
347 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
348 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
349 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
350 false },
351 #endif
352 /* ARMv8-M Security Extensions support. */
353 { "cmse_nonsecure_entry", 0, 0, true, false, false,
354 arm_handle_cmse_nonsecure_entry, false },
355 { "cmse_nonsecure_call", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_call, true },
357 { NULL, 0, 0, false, false, false, NULL, false }
358 };
359 \f
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #endif
365
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
368
369 #undef TARGET_ATTRIBUTE_TABLE
370 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
371
372 #undef TARGET_INSERT_ATTRIBUTES
373 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
374
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
379
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
384
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
391
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
394
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
397
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
400
401 #undef TARGET_CAN_INLINE_P
402 #define TARGET_CAN_INLINE_P arm_can_inline_p
403
404 #undef TARGET_RELAYOUT_FUNCTION
405 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
406
407 #undef TARGET_OPTION_OVERRIDE
408 #define TARGET_OPTION_OVERRIDE arm_option_override
409
410 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
411 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
412
413 #undef TARGET_OPTION_RESTORE
414 #define TARGET_OPTION_RESTORE arm_option_restore
415
416 #undef TARGET_OPTION_PRINT
417 #define TARGET_OPTION_PRINT arm_option_print
418
419 #undef TARGET_COMP_TYPE_ATTRIBUTES
420 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421
422 #undef TARGET_SCHED_MACRO_FUSION_P
423 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424
425 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
426 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427
428 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
429 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430
431 #undef TARGET_SCHED_ADJUST_COST
432 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433
434 #undef TARGET_SET_CURRENT_FUNCTION
435 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
436
437 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
438 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
439
440 #undef TARGET_SCHED_REORDER
441 #define TARGET_SCHED_REORDER arm_sched_reorder
442
443 #undef TARGET_REGISTER_MOVE_COST
444 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
445
446 #undef TARGET_MEMORY_MOVE_COST
447 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
448
449 #undef TARGET_ENCODE_SECTION_INFO
450 #ifdef ARM_PE
451 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
452 #else
453 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
454 #endif
455
456 #undef TARGET_STRIP_NAME_ENCODING
457 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
458
459 #undef TARGET_ASM_INTERNAL_LABEL
460 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
461
462 #undef TARGET_FLOATN_MODE
463 #define TARGET_FLOATN_MODE arm_floatn_mode
464
465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
466 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
467
468 #undef TARGET_FUNCTION_VALUE
469 #define TARGET_FUNCTION_VALUE arm_function_value
470
471 #undef TARGET_LIBCALL_VALUE
472 #define TARGET_LIBCALL_VALUE arm_libcall_value
473
474 #undef TARGET_FUNCTION_VALUE_REGNO_P
475 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
476
477 #undef TARGET_ASM_OUTPUT_MI_THUNK
478 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
479 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
480 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
481
482 #undef TARGET_RTX_COSTS
483 #define TARGET_RTX_COSTS arm_rtx_costs
484 #undef TARGET_ADDRESS_COST
485 #define TARGET_ADDRESS_COST arm_address_cost
486
487 #undef TARGET_SHIFT_TRUNCATION_MASK
488 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
489 #undef TARGET_VECTOR_MODE_SUPPORTED_P
490 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
491 #undef TARGET_ARRAY_MODE_SUPPORTED_P
492 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
493 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
494 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
495 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
496 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
497 arm_autovectorize_vector_sizes
498
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
501
502 #undef TARGET_INIT_BUILTINS
503 #define TARGET_INIT_BUILTINS arm_init_builtins
504 #undef TARGET_EXPAND_BUILTIN
505 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
506 #undef TARGET_BUILTIN_DECL
507 #define TARGET_BUILTIN_DECL arm_builtin_decl
508
509 #undef TARGET_INIT_LIBFUNCS
510 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
511
512 #undef TARGET_PROMOTE_FUNCTION_MODE
513 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
514 #undef TARGET_PROMOTE_PROTOTYPES
515 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
516 #undef TARGET_PASS_BY_REFERENCE
517 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG arm_function_arg
522 #undef TARGET_FUNCTION_ARG_ADVANCE
523 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
524 #undef TARGET_FUNCTION_ARG_BOUNDARY
525 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
526
527 #undef TARGET_SETUP_INCOMING_VARARGS
528 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
529
530 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
531 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
532
533 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
534 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
535 #undef TARGET_TRAMPOLINE_INIT
536 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
537 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
538 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
539
540 #undef TARGET_WARN_FUNC_RETURN
541 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
542
543 #undef TARGET_DEFAULT_SHORT_ENUMS
544 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
545
546 #undef TARGET_ALIGN_ANON_BITFIELD
547 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
548
549 #undef TARGET_NARROW_VOLATILE_BITFIELD
550 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
551
552 #undef TARGET_CXX_GUARD_TYPE
553 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
554
555 #undef TARGET_CXX_GUARD_MASK_BIT
556 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
557
558 #undef TARGET_CXX_GET_COOKIE_SIZE
559 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
560
561 #undef TARGET_CXX_COOKIE_HAS_SIZE
562 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
563
564 #undef TARGET_CXX_CDTOR_RETURNS_THIS
565 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
566
567 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
568 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
569
570 #undef TARGET_CXX_USE_AEABI_ATEXIT
571 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
572
573 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
574 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
575 arm_cxx_determine_class_data_visibility
576
577 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
578 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
579
580 #undef TARGET_RETURN_IN_MSB
581 #define TARGET_RETURN_IN_MSB arm_return_in_msb
582
583 #undef TARGET_RETURN_IN_MEMORY
584 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
585
586 #undef TARGET_MUST_PASS_IN_STACK
587 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
588
589 #if ARM_UNWIND_INFO
590 #undef TARGET_ASM_UNWIND_EMIT
591 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
592
593 /* EABI unwinding tables use a different format for the typeinfo tables. */
594 #undef TARGET_ASM_TTYPE
595 #define TARGET_ASM_TTYPE arm_output_ttype
596
597 #undef TARGET_ARM_EABI_UNWINDER
598 #define TARGET_ARM_EABI_UNWINDER true
599
600 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
601 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
602
603 #endif /* ARM_UNWIND_INFO */
604
605 #undef TARGET_ASM_INIT_SECTIONS
606 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
607
608 #undef TARGET_DWARF_REGISTER_SPAN
609 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
610
611 #undef TARGET_CANNOT_COPY_INSN_P
612 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
613
614 #ifdef HAVE_AS_TLS
615 #undef TARGET_HAVE_TLS
616 #define TARGET_HAVE_TLS true
617 #endif
618
619 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
620 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
621
622 #undef TARGET_LEGITIMATE_CONSTANT_P
623 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
624
625 #undef TARGET_CANNOT_FORCE_CONST_MEM
626 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
627
628 #undef TARGET_MAX_ANCHOR_OFFSET
629 #define TARGET_MAX_ANCHOR_OFFSET 4095
630
631 /* The minimum is set such that the total size of the block
632 for a particular anchor is -4088 + 1 + 4095 bytes, which is
633 divisible by eight, ensuring natural spacing of anchors. */
634 #undef TARGET_MIN_ANCHOR_OFFSET
635 #define TARGET_MIN_ANCHOR_OFFSET -4088
636
637 #undef TARGET_SCHED_ISSUE_RATE
638 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
639
640 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
641 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
642 arm_first_cycle_multipass_dfa_lookahead
643
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
646 arm_first_cycle_multipass_dfa_lookahead_guard
647
648 #undef TARGET_MANGLE_TYPE
649 #define TARGET_MANGLE_TYPE arm_mangle_type
650
651 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
652 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
653
654 #undef TARGET_BUILD_BUILTIN_VA_LIST
655 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
656 #undef TARGET_EXPAND_BUILTIN_VA_START
657 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
658 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
659 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
660
661 #ifdef HAVE_AS_TLS
662 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
663 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
664 #endif
665
666 #undef TARGET_LEGITIMATE_ADDRESS_P
667 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
668
669 #undef TARGET_PREFERRED_RELOAD_CLASS
670 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
671
672 #undef TARGET_PROMOTED_TYPE
673 #define TARGET_PROMOTED_TYPE arm_promoted_type
674
675 #undef TARGET_SCALAR_MODE_SUPPORTED_P
676 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
677
678 #undef TARGET_FRAME_POINTER_REQUIRED
679 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
680
681 #undef TARGET_CAN_ELIMINATE
682 #define TARGET_CAN_ELIMINATE arm_can_eliminate
683
684 #undef TARGET_CONDITIONAL_REGISTER_USAGE
685 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
686
687 #undef TARGET_CLASS_LIKELY_SPILLED_P
688 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
689
690 #undef TARGET_VECTORIZE_BUILTINS
691 #define TARGET_VECTORIZE_BUILTINS
692
693 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
694 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
695 arm_builtin_vectorized_function
696
697 #undef TARGET_VECTOR_ALIGNMENT
698 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
699
700 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
701 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
702 arm_vector_alignment_reachable
703
704 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
705 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
706 arm_builtin_support_vector_misalignment
707
708 #undef TARGET_PREFERRED_RENAME_CLASS
709 #define TARGET_PREFERRED_RENAME_CLASS \
710 arm_preferred_rename_class
711
712 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
713 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
714 arm_vectorize_vec_perm_const_ok
715
716 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
717 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
718 arm_builtin_vectorization_cost
719 #undef TARGET_VECTORIZE_ADD_STMT_COST
720 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
721
722 #undef TARGET_CANONICALIZE_COMPARISON
723 #define TARGET_CANONICALIZE_COMPARISON \
724 arm_canonicalize_comparison
725
726 #undef TARGET_ASAN_SHADOW_OFFSET
727 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
728
729 #undef MAX_INSN_PER_IT_BLOCK
730 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
731
732 #undef TARGET_CAN_USE_DOLOOP_P
733 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
734
735 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
736 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
737
738 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
739 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
740
741 #undef TARGET_SCHED_FUSION_PRIORITY
742 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
743
744 #undef TARGET_ASM_FUNCTION_SECTION
745 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
746
747 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
748 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
749
750 #undef TARGET_SECTION_TYPE_FLAGS
751 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
752
753 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
754 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
755
756 #undef TARGET_C_EXCESS_PRECISION
757 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
758
759 struct gcc_target targetm = TARGET_INITIALIZER;
760 \f
761 /* Obstack for minipool constant handling. */
762 static struct obstack minipool_obstack;
763 static char * minipool_startobj;
764
765 /* The maximum number of insns skipped which
766 will be conditionalised if possible. */
767 static int max_insns_skipped = 5;
768
769 extern FILE * asm_out_file;
770
771 /* True if we are currently building a constant table. */
772 int making_const_table;
773
774 /* The processor for which instructions should be scheduled. */
775 enum processor_type arm_tune = TARGET_CPU_arm_none;
776
777 /* The current tuning set. */
778 const struct tune_params *current_tune;
779
780 /* Which floating point hardware to schedule for. */
781 int arm_fpu_attr;
782
783 /* Used for Thumb call_via trampolines. */
784 rtx thumb_call_via_label[14];
785 static int thumb_call_reg_needed;
786
787 /* The bits in this mask specify which instruction scheduling options should
788 be used. */
789 unsigned int tune_flags = 0;
790
791 /* The highest ARM architecture version supported by the
792 target. */
793 enum base_architecture arm_base_arch = BASE_ARCH_0;
794
795 /* Active target architecture and tuning. */
796
797 struct arm_build_target arm_active_target;
798
799 /* The following are used in the arm.md file as equivalents to bits
800 in the above two flag variables. */
801
802 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
803 int arm_arch3m = 0;
804
805 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
806 int arm_arch4 = 0;
807
808 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
809 int arm_arch4t = 0;
810
811 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
812 int arm_arch5 = 0;
813
814 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
815 int arm_arch5e = 0;
816
817 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
818 int arm_arch6 = 0;
819
820 /* Nonzero if this chip supports the ARM 6K extensions. */
821 int arm_arch6k = 0;
822
823 /* Nonzero if this chip supports the ARM 6KZ extensions. */
824 int arm_arch6kz = 0;
825
826 /* Nonzero if instructions present in ARMv6-M can be used. */
827 int arm_arch6m = 0;
828
829 /* Nonzero if this chip supports the ARM 7 extensions. */
830 int arm_arch7 = 0;
831
832 /* Nonzero if this chip supports the ARM 7ve extensions. */
833 int arm_arch7ve = 0;
834
835 /* Nonzero if instructions not present in the 'M' profile can be used. */
836 int arm_arch_notm = 0;
837
838 /* Nonzero if instructions present in ARMv7E-M can be used. */
839 int arm_arch7em = 0;
840
841 /* Nonzero if instructions present in ARMv8 can be used. */
842 int arm_arch8 = 0;
843
844 /* Nonzero if this chip supports the ARMv8.1 extensions. */
845 int arm_arch8_1 = 0;
846
847 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
848 int arm_arch8_2 = 0;
849
850 /* Nonzero if this chip supports the FP16 instructions extension of ARM
851 Architecture 8.2. */
852 int arm_fp16_inst = 0;
853
854 /* Nonzero if this chip can benefit from load scheduling. */
855 int arm_ld_sched = 0;
856
857 /* Nonzero if this chip is a StrongARM. */
858 int arm_tune_strongarm = 0;
859
860 /* Nonzero if this chip supports Intel Wireless MMX technology. */
861 int arm_arch_iwmmxt = 0;
862
863 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
864 int arm_arch_iwmmxt2 = 0;
865
866 /* Nonzero if this chip is an XScale. */
867 int arm_arch_xscale = 0;
868
869 /* Nonzero if tuning for XScale */
870 int arm_tune_xscale = 0;
871
872 /* Nonzero if we want to tune for stores that access the write-buffer.
873 This typically means an ARM6 or ARM7 with MMU or MPU. */
874 int arm_tune_wbuf = 0;
875
876 /* Nonzero if tuning for Cortex-A9. */
877 int arm_tune_cortex_a9 = 0;
878
879 /* Nonzero if we should define __THUMB_INTERWORK__ in the
880 preprocessor.
881 XXX This is a bit of a hack, it's intended to help work around
882 problems in GLD which doesn't understand that armv5t code is
883 interworking clean. */
884 int arm_cpp_interwork = 0;
885
886 /* Nonzero if chip supports Thumb 1. */
887 int arm_arch_thumb1;
888
889 /* Nonzero if chip supports Thumb 2. */
890 int arm_arch_thumb2;
891
892 /* Nonzero if chip supports integer division instruction. */
893 int arm_arch_arm_hwdiv;
894 int arm_arch_thumb_hwdiv;
895
896 /* Nonzero if chip disallows volatile memory access in IT block. */
897 int arm_arch_no_volatile_ce;
898
899 /* Nonzero if we should use Neon to handle 64-bits operations rather
900 than core registers. */
901 int prefer_neon_for_64bits = 0;
902
903 /* Nonzero if we shouldn't use literal pools. */
904 bool arm_disable_literal_pool = false;
905
906 /* The register number to be used for the PIC offset register. */
907 unsigned arm_pic_register = INVALID_REGNUM;
908
909 enum arm_pcs arm_pcs_default;
910
911 /* For an explanation of these variables, see final_prescan_insn below. */
912 int arm_ccfsm_state;
913 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
914 enum arm_cond_code arm_current_cc;
915
916 rtx arm_target_insn;
917 int arm_target_label;
918 /* The number of conditionally executed insns, including the current insn. */
919 int arm_condexec_count = 0;
920 /* A bitmask specifying the patterns for the IT block.
921 Zero means do not output an IT block before this insn. */
922 int arm_condexec_mask = 0;
923 /* The number of bits used in arm_condexec_mask. */
924 int arm_condexec_masklen = 0;
925
926 /* Nonzero if chip supports the ARMv8 CRC instructions. */
927 int arm_arch_crc = 0;
928
929 /* Nonzero if chip supports the ARMv8-M security extensions. */
930 int arm_arch_cmse = 0;
931
932 /* Nonzero if the core has a very small, high-latency, multiply unit. */
933 int arm_m_profile_small_mul = 0;
934
935 /* The condition codes of the ARM, and the inverse function. */
936 static const char * const arm_condition_codes[] =
937 {
938 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
939 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
940 };
941
942 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
943 int arm_regs_in_sequence[] =
944 {
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
946 };
947
948 #define ARM_LSL_NAME "lsl"
949 #define streq(string1, string2) (strcmp (string1, string2) == 0)
950
951 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
952 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
953 | (1 << PIC_OFFSET_TABLE_REGNUM)))
954 \f
955 /* Initialization code. */
956
957 struct processors
958 {
959 const char *const name;
960 enum processor_type core;
961 unsigned int tune_flags;
962 const char *arch;
963 enum base_architecture base_arch;
964 enum isa_feature isa_bits[isa_num_bits];
965 const struct tune_params *const tune;
966 };
967
968
969 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
970 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
971 { \
972 num_slots, \
973 l1_size, \
974 l1_line_size \
975 }
976
977 /* arm generic vectorizer costs. */
978 static const
979 struct cpu_vec_costs arm_default_vec_cost = {
980 1, /* scalar_stmt_cost. */
981 1, /* scalar load_cost. */
982 1, /* scalar_store_cost. */
983 1, /* vec_stmt_cost. */
984 1, /* vec_to_scalar_cost. */
985 1, /* scalar_to_vec_cost. */
986 1, /* vec_align_load_cost. */
987 1, /* vec_unalign_load_cost. */
988 1, /* vec_unalign_store_cost. */
989 1, /* vec_store_cost. */
990 3, /* cond_taken_branch_cost. */
991 1, /* cond_not_taken_branch_cost. */
992 };
993
994 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
995 #include "aarch-cost-tables.h"
996
997
998
999 const struct cpu_cost_table cortexa9_extra_costs =
1000 {
1001 /* ALU */
1002 {
1003 0, /* arith. */
1004 0, /* logical. */
1005 0, /* shift. */
1006 COSTS_N_INSNS (1), /* shift_reg. */
1007 COSTS_N_INSNS (1), /* arith_shift. */
1008 COSTS_N_INSNS (2), /* arith_shift_reg. */
1009 0, /* log_shift. */
1010 COSTS_N_INSNS (1), /* log_shift_reg. */
1011 COSTS_N_INSNS (1), /* extend. */
1012 COSTS_N_INSNS (2), /* extend_arith. */
1013 COSTS_N_INSNS (1), /* bfi. */
1014 COSTS_N_INSNS (1), /* bfx. */
1015 0, /* clz. */
1016 0, /* rev. */
1017 0, /* non_exec. */
1018 true /* non_exec_costs_exec. */
1019 },
1020 {
1021 /* MULT SImode */
1022 {
1023 COSTS_N_INSNS (3), /* simple. */
1024 COSTS_N_INSNS (3), /* flag_setting. */
1025 COSTS_N_INSNS (2), /* extend. */
1026 COSTS_N_INSNS (3), /* add. */
1027 COSTS_N_INSNS (2), /* extend_add. */
1028 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1029 },
1030 /* MULT DImode */
1031 {
1032 0, /* simple (N/A). */
1033 0, /* flag_setting (N/A). */
1034 COSTS_N_INSNS (4), /* extend. */
1035 0, /* add (N/A). */
1036 COSTS_N_INSNS (4), /* extend_add. */
1037 0 /* idiv (N/A). */
1038 }
1039 },
1040 /* LD/ST */
1041 {
1042 COSTS_N_INSNS (2), /* load. */
1043 COSTS_N_INSNS (2), /* load_sign_extend. */
1044 COSTS_N_INSNS (2), /* ldrd. */
1045 COSTS_N_INSNS (2), /* ldm_1st. */
1046 1, /* ldm_regs_per_insn_1st. */
1047 2, /* ldm_regs_per_insn_subsequent. */
1048 COSTS_N_INSNS (5), /* loadf. */
1049 COSTS_N_INSNS (5), /* loadd. */
1050 COSTS_N_INSNS (1), /* load_unaligned. */
1051 COSTS_N_INSNS (2), /* store. */
1052 COSTS_N_INSNS (2), /* strd. */
1053 COSTS_N_INSNS (2), /* stm_1st. */
1054 1, /* stm_regs_per_insn_1st. */
1055 2, /* stm_regs_per_insn_subsequent. */
1056 COSTS_N_INSNS (1), /* storef. */
1057 COSTS_N_INSNS (1), /* stored. */
1058 COSTS_N_INSNS (1), /* store_unaligned. */
1059 COSTS_N_INSNS (1), /* loadv. */
1060 COSTS_N_INSNS (1) /* storev. */
1061 },
1062 {
1063 /* FP SFmode */
1064 {
1065 COSTS_N_INSNS (14), /* div. */
1066 COSTS_N_INSNS (4), /* mult. */
1067 COSTS_N_INSNS (7), /* mult_addsub. */
1068 COSTS_N_INSNS (30), /* fma. */
1069 COSTS_N_INSNS (3), /* addsub. */
1070 COSTS_N_INSNS (1), /* fpconst. */
1071 COSTS_N_INSNS (1), /* neg. */
1072 COSTS_N_INSNS (3), /* compare. */
1073 COSTS_N_INSNS (3), /* widen. */
1074 COSTS_N_INSNS (3), /* narrow. */
1075 COSTS_N_INSNS (3), /* toint. */
1076 COSTS_N_INSNS (3), /* fromint. */
1077 COSTS_N_INSNS (3) /* roundint. */
1078 },
1079 /* FP DFmode */
1080 {
1081 COSTS_N_INSNS (24), /* div. */
1082 COSTS_N_INSNS (5), /* mult. */
1083 COSTS_N_INSNS (8), /* mult_addsub. */
1084 COSTS_N_INSNS (30), /* fma. */
1085 COSTS_N_INSNS (3), /* addsub. */
1086 COSTS_N_INSNS (1), /* fpconst. */
1087 COSTS_N_INSNS (1), /* neg. */
1088 COSTS_N_INSNS (3), /* compare. */
1089 COSTS_N_INSNS (3), /* widen. */
1090 COSTS_N_INSNS (3), /* narrow. */
1091 COSTS_N_INSNS (3), /* toint. */
1092 COSTS_N_INSNS (3), /* fromint. */
1093 COSTS_N_INSNS (3) /* roundint. */
1094 }
1095 },
1096 /* Vector */
1097 {
1098 COSTS_N_INSNS (1) /* alu. */
1099 }
1100 };
1101
1102 const struct cpu_cost_table cortexa8_extra_costs =
1103 {
1104 /* ALU */
1105 {
1106 0, /* arith. */
1107 0, /* logical. */
1108 COSTS_N_INSNS (1), /* shift. */
1109 0, /* shift_reg. */
1110 COSTS_N_INSNS (1), /* arith_shift. */
1111 0, /* arith_shift_reg. */
1112 COSTS_N_INSNS (1), /* log_shift. */
1113 0, /* log_shift_reg. */
1114 0, /* extend. */
1115 0, /* extend_arith. */
1116 0, /* bfi. */
1117 0, /* bfx. */
1118 0, /* clz. */
1119 0, /* rev. */
1120 0, /* non_exec. */
1121 true /* non_exec_costs_exec. */
1122 },
1123 {
1124 /* MULT SImode */
1125 {
1126 COSTS_N_INSNS (1), /* simple. */
1127 COSTS_N_INSNS (1), /* flag_setting. */
1128 COSTS_N_INSNS (1), /* extend. */
1129 COSTS_N_INSNS (1), /* add. */
1130 COSTS_N_INSNS (1), /* extend_add. */
1131 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1132 },
1133 /* MULT DImode */
1134 {
1135 0, /* simple (N/A). */
1136 0, /* flag_setting (N/A). */
1137 COSTS_N_INSNS (2), /* extend. */
1138 0, /* add (N/A). */
1139 COSTS_N_INSNS (2), /* extend_add. */
1140 0 /* idiv (N/A). */
1141 }
1142 },
1143 /* LD/ST */
1144 {
1145 COSTS_N_INSNS (1), /* load. */
1146 COSTS_N_INSNS (1), /* load_sign_extend. */
1147 COSTS_N_INSNS (1), /* ldrd. */
1148 COSTS_N_INSNS (1), /* ldm_1st. */
1149 1, /* ldm_regs_per_insn_1st. */
1150 2, /* ldm_regs_per_insn_subsequent. */
1151 COSTS_N_INSNS (1), /* loadf. */
1152 COSTS_N_INSNS (1), /* loadd. */
1153 COSTS_N_INSNS (1), /* load_unaligned. */
1154 COSTS_N_INSNS (1), /* store. */
1155 COSTS_N_INSNS (1), /* strd. */
1156 COSTS_N_INSNS (1), /* stm_1st. */
1157 1, /* stm_regs_per_insn_1st. */
1158 2, /* stm_regs_per_insn_subsequent. */
1159 COSTS_N_INSNS (1), /* storef. */
1160 COSTS_N_INSNS (1), /* stored. */
1161 COSTS_N_INSNS (1), /* store_unaligned. */
1162 COSTS_N_INSNS (1), /* loadv. */
1163 COSTS_N_INSNS (1) /* storev. */
1164 },
1165 {
1166 /* FP SFmode */
1167 {
1168 COSTS_N_INSNS (36), /* div. */
1169 COSTS_N_INSNS (11), /* mult. */
1170 COSTS_N_INSNS (20), /* mult_addsub. */
1171 COSTS_N_INSNS (30), /* fma. */
1172 COSTS_N_INSNS (9), /* addsub. */
1173 COSTS_N_INSNS (3), /* fpconst. */
1174 COSTS_N_INSNS (3), /* neg. */
1175 COSTS_N_INSNS (6), /* compare. */
1176 COSTS_N_INSNS (4), /* widen. */
1177 COSTS_N_INSNS (4), /* narrow. */
1178 COSTS_N_INSNS (8), /* toint. */
1179 COSTS_N_INSNS (8), /* fromint. */
1180 COSTS_N_INSNS (8) /* roundint. */
1181 },
1182 /* FP DFmode */
1183 {
1184 COSTS_N_INSNS (64), /* div. */
1185 COSTS_N_INSNS (16), /* mult. */
1186 COSTS_N_INSNS (25), /* mult_addsub. */
1187 COSTS_N_INSNS (30), /* fma. */
1188 COSTS_N_INSNS (9), /* addsub. */
1189 COSTS_N_INSNS (3), /* fpconst. */
1190 COSTS_N_INSNS (3), /* neg. */
1191 COSTS_N_INSNS (6), /* compare. */
1192 COSTS_N_INSNS (6), /* widen. */
1193 COSTS_N_INSNS (6), /* narrow. */
1194 COSTS_N_INSNS (8), /* toint. */
1195 COSTS_N_INSNS (8), /* fromint. */
1196 COSTS_N_INSNS (8) /* roundint. */
1197 }
1198 },
1199 /* Vector */
1200 {
1201 COSTS_N_INSNS (1) /* alu. */
1202 }
1203 };
1204
1205 const struct cpu_cost_table cortexa5_extra_costs =
1206 {
1207 /* ALU */
1208 {
1209 0, /* arith. */
1210 0, /* logical. */
1211 COSTS_N_INSNS (1), /* shift. */
1212 COSTS_N_INSNS (1), /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 COSTS_N_INSNS (1), /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 COSTS_N_INSNS (1), /* log_shift_reg. */
1217 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (1), /* extend_arith. */
1219 COSTS_N_INSNS (1), /* bfi. */
1220 COSTS_N_INSNS (1), /* bfx. */
1221 COSTS_N_INSNS (1), /* clz. */
1222 COSTS_N_INSNS (1), /* rev. */
1223 0, /* non_exec. */
1224 true /* non_exec_costs_exec. */
1225 },
1226
1227 {
1228 /* MULT SImode */
1229 {
1230 0, /* simple. */
1231 COSTS_N_INSNS (1), /* flag_setting. */
1232 COSTS_N_INSNS (1), /* extend. */
1233 COSTS_N_INSNS (1), /* add. */
1234 COSTS_N_INSNS (1), /* extend_add. */
1235 COSTS_N_INSNS (7) /* idiv. */
1236 },
1237 /* MULT DImode */
1238 {
1239 0, /* simple (N/A). */
1240 0, /* flag_setting (N/A). */
1241 COSTS_N_INSNS (1), /* extend. */
1242 0, /* add. */
1243 COSTS_N_INSNS (2), /* extend_add. */
1244 0 /* idiv (N/A). */
1245 }
1246 },
1247 /* LD/ST */
1248 {
1249 COSTS_N_INSNS (1), /* load. */
1250 COSTS_N_INSNS (1), /* load_sign_extend. */
1251 COSTS_N_INSNS (6), /* ldrd. */
1252 COSTS_N_INSNS (1), /* ldm_1st. */
1253 1, /* ldm_regs_per_insn_1st. */
1254 2, /* ldm_regs_per_insn_subsequent. */
1255 COSTS_N_INSNS (2), /* loadf. */
1256 COSTS_N_INSNS (4), /* loadd. */
1257 COSTS_N_INSNS (1), /* load_unaligned. */
1258 COSTS_N_INSNS (1), /* store. */
1259 COSTS_N_INSNS (3), /* strd. */
1260 COSTS_N_INSNS (1), /* stm_1st. */
1261 1, /* stm_regs_per_insn_1st. */
1262 2, /* stm_regs_per_insn_subsequent. */
1263 COSTS_N_INSNS (2), /* storef. */
1264 COSTS_N_INSNS (2), /* stored. */
1265 COSTS_N_INSNS (1), /* store_unaligned. */
1266 COSTS_N_INSNS (1), /* loadv. */
1267 COSTS_N_INSNS (1) /* storev. */
1268 },
1269 {
1270 /* FP SFmode */
1271 {
1272 COSTS_N_INSNS (15), /* div. */
1273 COSTS_N_INSNS (3), /* mult. */
1274 COSTS_N_INSNS (7), /* mult_addsub. */
1275 COSTS_N_INSNS (7), /* fma. */
1276 COSTS_N_INSNS (3), /* addsub. */
1277 COSTS_N_INSNS (3), /* fpconst. */
1278 COSTS_N_INSNS (3), /* neg. */
1279 COSTS_N_INSNS (3), /* compare. */
1280 COSTS_N_INSNS (3), /* widen. */
1281 COSTS_N_INSNS (3), /* narrow. */
1282 COSTS_N_INSNS (3), /* toint. */
1283 COSTS_N_INSNS (3), /* fromint. */
1284 COSTS_N_INSNS (3) /* roundint. */
1285 },
1286 /* FP DFmode */
1287 {
1288 COSTS_N_INSNS (30), /* div. */
1289 COSTS_N_INSNS (6), /* mult. */
1290 COSTS_N_INSNS (10), /* mult_addsub. */
1291 COSTS_N_INSNS (7), /* fma. */
1292 COSTS_N_INSNS (3), /* addsub. */
1293 COSTS_N_INSNS (3), /* fpconst. */
1294 COSTS_N_INSNS (3), /* neg. */
1295 COSTS_N_INSNS (3), /* compare. */
1296 COSTS_N_INSNS (3), /* widen. */
1297 COSTS_N_INSNS (3), /* narrow. */
1298 COSTS_N_INSNS (3), /* toint. */
1299 COSTS_N_INSNS (3), /* fromint. */
1300 COSTS_N_INSNS (3) /* roundint. */
1301 }
1302 },
1303 /* Vector */
1304 {
1305 COSTS_N_INSNS (1) /* alu. */
1306 }
1307 };
1308
1309
1310 const struct cpu_cost_table cortexa7_extra_costs =
1311 {
1312 /* ALU */
1313 {
1314 0, /* arith. */
1315 0, /* logical. */
1316 COSTS_N_INSNS (1), /* shift. */
1317 COSTS_N_INSNS (1), /* shift_reg. */
1318 COSTS_N_INSNS (1), /* arith_shift. */
1319 COSTS_N_INSNS (1), /* arith_shift_reg. */
1320 COSTS_N_INSNS (1), /* log_shift. */
1321 COSTS_N_INSNS (1), /* log_shift_reg. */
1322 COSTS_N_INSNS (1), /* extend. */
1323 COSTS_N_INSNS (1), /* extend_arith. */
1324 COSTS_N_INSNS (1), /* bfi. */
1325 COSTS_N_INSNS (1), /* bfx. */
1326 COSTS_N_INSNS (1), /* clz. */
1327 COSTS_N_INSNS (1), /* rev. */
1328 0, /* non_exec. */
1329 true /* non_exec_costs_exec. */
1330 },
1331
1332 {
1333 /* MULT SImode */
1334 {
1335 0, /* simple. */
1336 COSTS_N_INSNS (1), /* flag_setting. */
1337 COSTS_N_INSNS (1), /* extend. */
1338 COSTS_N_INSNS (1), /* add. */
1339 COSTS_N_INSNS (1), /* extend_add. */
1340 COSTS_N_INSNS (7) /* idiv. */
1341 },
1342 /* MULT DImode */
1343 {
1344 0, /* simple (N/A). */
1345 0, /* flag_setting (N/A). */
1346 COSTS_N_INSNS (1), /* extend. */
1347 0, /* add. */
1348 COSTS_N_INSNS (2), /* extend_add. */
1349 0 /* idiv (N/A). */
1350 }
1351 },
1352 /* LD/ST */
1353 {
1354 COSTS_N_INSNS (1), /* load. */
1355 COSTS_N_INSNS (1), /* load_sign_extend. */
1356 COSTS_N_INSNS (3), /* ldrd. */
1357 COSTS_N_INSNS (1), /* ldm_1st. */
1358 1, /* ldm_regs_per_insn_1st. */
1359 2, /* ldm_regs_per_insn_subsequent. */
1360 COSTS_N_INSNS (2), /* loadf. */
1361 COSTS_N_INSNS (2), /* loadd. */
1362 COSTS_N_INSNS (1), /* load_unaligned. */
1363 COSTS_N_INSNS (1), /* store. */
1364 COSTS_N_INSNS (3), /* strd. */
1365 COSTS_N_INSNS (1), /* stm_1st. */
1366 1, /* stm_regs_per_insn_1st. */
1367 2, /* stm_regs_per_insn_subsequent. */
1368 COSTS_N_INSNS (2), /* storef. */
1369 COSTS_N_INSNS (2), /* stored. */
1370 COSTS_N_INSNS (1), /* store_unaligned. */
1371 COSTS_N_INSNS (1), /* loadv. */
1372 COSTS_N_INSNS (1) /* storev. */
1373 },
1374 {
1375 /* FP SFmode */
1376 {
1377 COSTS_N_INSNS (15), /* div. */
1378 COSTS_N_INSNS (3), /* mult. */
1379 COSTS_N_INSNS (7), /* mult_addsub. */
1380 COSTS_N_INSNS (7), /* fma. */
1381 COSTS_N_INSNS (3), /* addsub. */
1382 COSTS_N_INSNS (3), /* fpconst. */
1383 COSTS_N_INSNS (3), /* neg. */
1384 COSTS_N_INSNS (3), /* compare. */
1385 COSTS_N_INSNS (3), /* widen. */
1386 COSTS_N_INSNS (3), /* narrow. */
1387 COSTS_N_INSNS (3), /* toint. */
1388 COSTS_N_INSNS (3), /* fromint. */
1389 COSTS_N_INSNS (3) /* roundint. */
1390 },
1391 /* FP DFmode */
1392 {
1393 COSTS_N_INSNS (30), /* div. */
1394 COSTS_N_INSNS (6), /* mult. */
1395 COSTS_N_INSNS (10), /* mult_addsub. */
1396 COSTS_N_INSNS (7), /* fma. */
1397 COSTS_N_INSNS (3), /* addsub. */
1398 COSTS_N_INSNS (3), /* fpconst. */
1399 COSTS_N_INSNS (3), /* neg. */
1400 COSTS_N_INSNS (3), /* compare. */
1401 COSTS_N_INSNS (3), /* widen. */
1402 COSTS_N_INSNS (3), /* narrow. */
1403 COSTS_N_INSNS (3), /* toint. */
1404 COSTS_N_INSNS (3), /* fromint. */
1405 COSTS_N_INSNS (3) /* roundint. */
1406 }
1407 },
1408 /* Vector */
1409 {
1410 COSTS_N_INSNS (1) /* alu. */
1411 }
1412 };
1413
1414 const struct cpu_cost_table cortexa12_extra_costs =
1415 {
1416 /* ALU */
1417 {
1418 0, /* arith. */
1419 0, /* logical. */
1420 0, /* shift. */
1421 COSTS_N_INSNS (1), /* shift_reg. */
1422 COSTS_N_INSNS (1), /* arith_shift. */
1423 COSTS_N_INSNS (1), /* arith_shift_reg. */
1424 COSTS_N_INSNS (1), /* log_shift. */
1425 COSTS_N_INSNS (1), /* log_shift_reg. */
1426 0, /* extend. */
1427 COSTS_N_INSNS (1), /* extend_arith. */
1428 0, /* bfi. */
1429 COSTS_N_INSNS (1), /* bfx. */
1430 COSTS_N_INSNS (1), /* clz. */
1431 COSTS_N_INSNS (1), /* rev. */
1432 0, /* non_exec. */
1433 true /* non_exec_costs_exec. */
1434 },
1435 /* MULT SImode */
1436 {
1437 {
1438 COSTS_N_INSNS (2), /* simple. */
1439 COSTS_N_INSNS (3), /* flag_setting. */
1440 COSTS_N_INSNS (2), /* extend. */
1441 COSTS_N_INSNS (3), /* add. */
1442 COSTS_N_INSNS (2), /* extend_add. */
1443 COSTS_N_INSNS (18) /* idiv. */
1444 },
1445 /* MULT DImode */
1446 {
1447 0, /* simple (N/A). */
1448 0, /* flag_setting (N/A). */
1449 COSTS_N_INSNS (3), /* extend. */
1450 0, /* add (N/A). */
1451 COSTS_N_INSNS (3), /* extend_add. */
1452 0 /* idiv (N/A). */
1453 }
1454 },
1455 /* LD/ST */
1456 {
1457 COSTS_N_INSNS (3), /* load. */
1458 COSTS_N_INSNS (3), /* load_sign_extend. */
1459 COSTS_N_INSNS (3), /* ldrd. */
1460 COSTS_N_INSNS (3), /* ldm_1st. */
1461 1, /* ldm_regs_per_insn_1st. */
1462 2, /* ldm_regs_per_insn_subsequent. */
1463 COSTS_N_INSNS (3), /* loadf. */
1464 COSTS_N_INSNS (3), /* loadd. */
1465 0, /* load_unaligned. */
1466 0, /* store. */
1467 0, /* strd. */
1468 0, /* stm_1st. */
1469 1, /* stm_regs_per_insn_1st. */
1470 2, /* stm_regs_per_insn_subsequent. */
1471 COSTS_N_INSNS (2), /* storef. */
1472 COSTS_N_INSNS (2), /* stored. */
1473 0, /* store_unaligned. */
1474 COSTS_N_INSNS (1), /* loadv. */
1475 COSTS_N_INSNS (1) /* storev. */
1476 },
1477 {
1478 /* FP SFmode */
1479 {
1480 COSTS_N_INSNS (17), /* div. */
1481 COSTS_N_INSNS (4), /* mult. */
1482 COSTS_N_INSNS (8), /* mult_addsub. */
1483 COSTS_N_INSNS (8), /* fma. */
1484 COSTS_N_INSNS (4), /* addsub. */
1485 COSTS_N_INSNS (2), /* fpconst. */
1486 COSTS_N_INSNS (2), /* neg. */
1487 COSTS_N_INSNS (2), /* compare. */
1488 COSTS_N_INSNS (4), /* widen. */
1489 COSTS_N_INSNS (4), /* narrow. */
1490 COSTS_N_INSNS (4), /* toint. */
1491 COSTS_N_INSNS (4), /* fromint. */
1492 COSTS_N_INSNS (4) /* roundint. */
1493 },
1494 /* FP DFmode */
1495 {
1496 COSTS_N_INSNS (31), /* div. */
1497 COSTS_N_INSNS (4), /* mult. */
1498 COSTS_N_INSNS (8), /* mult_addsub. */
1499 COSTS_N_INSNS (8), /* fma. */
1500 COSTS_N_INSNS (4), /* addsub. */
1501 COSTS_N_INSNS (2), /* fpconst. */
1502 COSTS_N_INSNS (2), /* neg. */
1503 COSTS_N_INSNS (2), /* compare. */
1504 COSTS_N_INSNS (4), /* widen. */
1505 COSTS_N_INSNS (4), /* narrow. */
1506 COSTS_N_INSNS (4), /* toint. */
1507 COSTS_N_INSNS (4), /* fromint. */
1508 COSTS_N_INSNS (4) /* roundint. */
1509 }
1510 },
1511 /* Vector */
1512 {
1513 COSTS_N_INSNS (1) /* alu. */
1514 }
1515 };
1516
1517 const struct cpu_cost_table cortexa15_extra_costs =
1518 {
1519 /* ALU */
1520 {
1521 0, /* arith. */
1522 0, /* logical. */
1523 0, /* shift. */
1524 0, /* shift_reg. */
1525 COSTS_N_INSNS (1), /* arith_shift. */
1526 COSTS_N_INSNS (1), /* arith_shift_reg. */
1527 COSTS_N_INSNS (1), /* log_shift. */
1528 COSTS_N_INSNS (1), /* log_shift_reg. */
1529 0, /* extend. */
1530 COSTS_N_INSNS (1), /* extend_arith. */
1531 COSTS_N_INSNS (1), /* bfi. */
1532 0, /* bfx. */
1533 0, /* clz. */
1534 0, /* rev. */
1535 0, /* non_exec. */
1536 true /* non_exec_costs_exec. */
1537 },
1538 /* MULT SImode */
1539 {
1540 {
1541 COSTS_N_INSNS (2), /* simple. */
1542 COSTS_N_INSNS (3), /* flag_setting. */
1543 COSTS_N_INSNS (2), /* extend. */
1544 COSTS_N_INSNS (2), /* add. */
1545 COSTS_N_INSNS (2), /* extend_add. */
1546 COSTS_N_INSNS (18) /* idiv. */
1547 },
1548 /* MULT DImode */
1549 {
1550 0, /* simple (N/A). */
1551 0, /* flag_setting (N/A). */
1552 COSTS_N_INSNS (3), /* extend. */
1553 0, /* add (N/A). */
1554 COSTS_N_INSNS (3), /* extend_add. */
1555 0 /* idiv (N/A). */
1556 }
1557 },
1558 /* LD/ST */
1559 {
1560 COSTS_N_INSNS (3), /* load. */
1561 COSTS_N_INSNS (3), /* load_sign_extend. */
1562 COSTS_N_INSNS (3), /* ldrd. */
1563 COSTS_N_INSNS (4), /* ldm_1st. */
1564 1, /* ldm_regs_per_insn_1st. */
1565 2, /* ldm_regs_per_insn_subsequent. */
1566 COSTS_N_INSNS (4), /* loadf. */
1567 COSTS_N_INSNS (4), /* loadd. */
1568 0, /* load_unaligned. */
1569 0, /* store. */
1570 0, /* strd. */
1571 COSTS_N_INSNS (1), /* stm_1st. */
1572 1, /* stm_regs_per_insn_1st. */
1573 2, /* stm_regs_per_insn_subsequent. */
1574 0, /* storef. */
1575 0, /* stored. */
1576 0, /* store_unaligned. */
1577 COSTS_N_INSNS (1), /* loadv. */
1578 COSTS_N_INSNS (1) /* storev. */
1579 },
1580 {
1581 /* FP SFmode */
1582 {
1583 COSTS_N_INSNS (17), /* div. */
1584 COSTS_N_INSNS (4), /* mult. */
1585 COSTS_N_INSNS (8), /* mult_addsub. */
1586 COSTS_N_INSNS (8), /* fma. */
1587 COSTS_N_INSNS (4), /* addsub. */
1588 COSTS_N_INSNS (2), /* fpconst. */
1589 COSTS_N_INSNS (2), /* neg. */
1590 COSTS_N_INSNS (5), /* compare. */
1591 COSTS_N_INSNS (4), /* widen. */
1592 COSTS_N_INSNS (4), /* narrow. */
1593 COSTS_N_INSNS (4), /* toint. */
1594 COSTS_N_INSNS (4), /* fromint. */
1595 COSTS_N_INSNS (4) /* roundint. */
1596 },
1597 /* FP DFmode */
1598 {
1599 COSTS_N_INSNS (31), /* div. */
1600 COSTS_N_INSNS (4), /* mult. */
1601 COSTS_N_INSNS (8), /* mult_addsub. */
1602 COSTS_N_INSNS (8), /* fma. */
1603 COSTS_N_INSNS (4), /* addsub. */
1604 COSTS_N_INSNS (2), /* fpconst. */
1605 COSTS_N_INSNS (2), /* neg. */
1606 COSTS_N_INSNS (2), /* compare. */
1607 COSTS_N_INSNS (4), /* widen. */
1608 COSTS_N_INSNS (4), /* narrow. */
1609 COSTS_N_INSNS (4), /* toint. */
1610 COSTS_N_INSNS (4), /* fromint. */
1611 COSTS_N_INSNS (4) /* roundint. */
1612 }
1613 },
1614 /* Vector */
1615 {
1616 COSTS_N_INSNS (1) /* alu. */
1617 }
1618 };
1619
1620 const struct cpu_cost_table v7m_extra_costs =
1621 {
1622 /* ALU */
1623 {
1624 0, /* arith. */
1625 0, /* logical. */
1626 0, /* shift. */
1627 0, /* shift_reg. */
1628 0, /* arith_shift. */
1629 COSTS_N_INSNS (1), /* arith_shift_reg. */
1630 0, /* log_shift. */
1631 COSTS_N_INSNS (1), /* log_shift_reg. */
1632 0, /* extend. */
1633 COSTS_N_INSNS (1), /* extend_arith. */
1634 0, /* bfi. */
1635 0, /* bfx. */
1636 0, /* clz. */
1637 0, /* rev. */
1638 COSTS_N_INSNS (1), /* non_exec. */
1639 false /* non_exec_costs_exec. */
1640 },
1641 {
1642 /* MULT SImode */
1643 {
1644 COSTS_N_INSNS (1), /* simple. */
1645 COSTS_N_INSNS (1), /* flag_setting. */
1646 COSTS_N_INSNS (2), /* extend. */
1647 COSTS_N_INSNS (1), /* add. */
1648 COSTS_N_INSNS (3), /* extend_add. */
1649 COSTS_N_INSNS (8) /* idiv. */
1650 },
1651 /* MULT DImode */
1652 {
1653 0, /* simple (N/A). */
1654 0, /* flag_setting (N/A). */
1655 COSTS_N_INSNS (2), /* extend. */
1656 0, /* add (N/A). */
1657 COSTS_N_INSNS (3), /* extend_add. */
1658 0 /* idiv (N/A). */
1659 }
1660 },
1661 /* LD/ST */
1662 {
1663 COSTS_N_INSNS (2), /* load. */
1664 0, /* load_sign_extend. */
1665 COSTS_N_INSNS (3), /* ldrd. */
1666 COSTS_N_INSNS (2), /* ldm_1st. */
1667 1, /* ldm_regs_per_insn_1st. */
1668 1, /* ldm_regs_per_insn_subsequent. */
1669 COSTS_N_INSNS (2), /* loadf. */
1670 COSTS_N_INSNS (3), /* loadd. */
1671 COSTS_N_INSNS (1), /* load_unaligned. */
1672 COSTS_N_INSNS (2), /* store. */
1673 COSTS_N_INSNS (3), /* strd. */
1674 COSTS_N_INSNS (2), /* stm_1st. */
1675 1, /* stm_regs_per_insn_1st. */
1676 1, /* stm_regs_per_insn_subsequent. */
1677 COSTS_N_INSNS (2), /* storef. */
1678 COSTS_N_INSNS (3), /* stored. */
1679 COSTS_N_INSNS (1), /* store_unaligned. */
1680 COSTS_N_INSNS (1), /* loadv. */
1681 COSTS_N_INSNS (1) /* storev. */
1682 },
1683 {
1684 /* FP SFmode */
1685 {
1686 COSTS_N_INSNS (7), /* div. */
1687 COSTS_N_INSNS (2), /* mult. */
1688 COSTS_N_INSNS (5), /* mult_addsub. */
1689 COSTS_N_INSNS (3), /* fma. */
1690 COSTS_N_INSNS (1), /* addsub. */
1691 0, /* fpconst. */
1692 0, /* neg. */
1693 0, /* compare. */
1694 0, /* widen. */
1695 0, /* narrow. */
1696 0, /* toint. */
1697 0, /* fromint. */
1698 0 /* roundint. */
1699 },
1700 /* FP DFmode */
1701 {
1702 COSTS_N_INSNS (15), /* div. */
1703 COSTS_N_INSNS (5), /* mult. */
1704 COSTS_N_INSNS (7), /* mult_addsub. */
1705 COSTS_N_INSNS (7), /* fma. */
1706 COSTS_N_INSNS (3), /* addsub. */
1707 0, /* fpconst. */
1708 0, /* neg. */
1709 0, /* compare. */
1710 0, /* widen. */
1711 0, /* narrow. */
1712 0, /* toint. */
1713 0, /* fromint. */
1714 0 /* roundint. */
1715 }
1716 },
1717 /* Vector */
1718 {
1719 COSTS_N_INSNS (1) /* alu. */
1720 }
1721 };
1722
1723 const struct tune_params arm_slowmul_tune =
1724 {
1725 &generic_extra_costs, /* Insn extra costs. */
1726 NULL, /* Sched adj cost. */
1727 arm_default_branch_cost,
1728 &arm_default_vec_cost,
1729 3, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 8, /* Memset max inline. */
1732 1, /* Issue rate. */
1733 ARM_PREFETCH_NOT_BENEFICIAL,
1734 tune_params::PREF_CONST_POOL_TRUE,
1735 tune_params::PREF_LDRD_FALSE,
1736 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1737 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1738 tune_params::DISPARAGE_FLAGS_NEITHER,
1739 tune_params::PREF_NEON_64_FALSE,
1740 tune_params::PREF_NEON_STRINGOPS_FALSE,
1741 tune_params::FUSE_NOTHING,
1742 tune_params::SCHED_AUTOPREF_OFF
1743 };
1744
1745 const struct tune_params arm_fastmul_tune =
1746 {
1747 &generic_extra_costs, /* Insn extra costs. */
1748 NULL, /* Sched adj cost. */
1749 arm_default_branch_cost,
1750 &arm_default_vec_cost,
1751 1, /* Constant limit. */
1752 5, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL,
1756 tune_params::PREF_CONST_POOL_TRUE,
1757 tune_params::PREF_LDRD_FALSE,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER,
1761 tune_params::PREF_NEON_64_FALSE,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE,
1763 tune_params::FUSE_NOTHING,
1764 tune_params::SCHED_AUTOPREF_OFF
1765 };
1766
1767 /* StrongARM has early execution of branches, so a sequence that is worth
1768 skipping is shorter. Set max_insns_skipped to a lower value. */
1769
1770 const struct tune_params arm_strongarm_tune =
1771 {
1772 &generic_extra_costs, /* Insn extra costs. */
1773 NULL, /* Sched adj cost. */
1774 arm_default_branch_cost,
1775 &arm_default_vec_cost,
1776 1, /* Constant limit. */
1777 3, /* Max cond insns. */
1778 8, /* Memset max inline. */
1779 1, /* Issue rate. */
1780 ARM_PREFETCH_NOT_BENEFICIAL,
1781 tune_params::PREF_CONST_POOL_TRUE,
1782 tune_params::PREF_LDRD_FALSE,
1783 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1784 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1785 tune_params::DISPARAGE_FLAGS_NEITHER,
1786 tune_params::PREF_NEON_64_FALSE,
1787 tune_params::PREF_NEON_STRINGOPS_FALSE,
1788 tune_params::FUSE_NOTHING,
1789 tune_params::SCHED_AUTOPREF_OFF
1790 };
1791
1792 const struct tune_params arm_xscale_tune =
1793 {
1794 &generic_extra_costs, /* Insn extra costs. */
1795 xscale_sched_adjust_cost,
1796 arm_default_branch_cost,
1797 &arm_default_vec_cost,
1798 2, /* Constant limit. */
1799 3, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 tune_params::PREF_CONST_POOL_TRUE,
1804 tune_params::PREF_LDRD_FALSE,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER,
1808 tune_params::PREF_NEON_64_FALSE,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_9e_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 NULL, /* Sched adj cost. */
1818 arm_default_branch_cost,
1819 &arm_default_vec_cost,
1820 1, /* Constant limit. */
1821 5, /* Max cond insns. */
1822 8, /* Memset max inline. */
1823 1, /* Issue rate. */
1824 ARM_PREFETCH_NOT_BENEFICIAL,
1825 tune_params::PREF_CONST_POOL_TRUE,
1826 tune_params::PREF_LDRD_FALSE,
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1829 tune_params::DISPARAGE_FLAGS_NEITHER,
1830 tune_params::PREF_NEON_64_FALSE,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE,
1832 tune_params::FUSE_NOTHING,
1833 tune_params::SCHED_AUTOPREF_OFF
1834 };
1835
1836 const struct tune_params arm_marvell_pj4_tune =
1837 {
1838 &generic_extra_costs, /* Insn extra costs. */
1839 NULL, /* Sched adj cost. */
1840 arm_default_branch_cost,
1841 &arm_default_vec_cost,
1842 1, /* Constant limit. */
1843 5, /* Max cond insns. */
1844 8, /* Memset max inline. */
1845 2, /* Issue rate. */
1846 ARM_PREFETCH_NOT_BENEFICIAL,
1847 tune_params::PREF_CONST_POOL_TRUE,
1848 tune_params::PREF_LDRD_FALSE,
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1850 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1851 tune_params::DISPARAGE_FLAGS_NEITHER,
1852 tune_params::PREF_NEON_64_FALSE,
1853 tune_params::PREF_NEON_STRINGOPS_FALSE,
1854 tune_params::FUSE_NOTHING,
1855 tune_params::SCHED_AUTOPREF_OFF
1856 };
1857
1858 const struct tune_params arm_v6t2_tune =
1859 {
1860 &generic_extra_costs, /* Insn extra costs. */
1861 NULL, /* Sched adj cost. */
1862 arm_default_branch_cost,
1863 &arm_default_vec_cost,
1864 1, /* Constant limit. */
1865 5, /* Max cond insns. */
1866 8, /* Memset max inline. */
1867 1, /* Issue rate. */
1868 ARM_PREFETCH_NOT_BENEFICIAL,
1869 tune_params::PREF_CONST_POOL_FALSE,
1870 tune_params::PREF_LDRD_FALSE,
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1872 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1873 tune_params::DISPARAGE_FLAGS_NEITHER,
1874 tune_params::PREF_NEON_64_FALSE,
1875 tune_params::PREF_NEON_STRINGOPS_FALSE,
1876 tune_params::FUSE_NOTHING,
1877 tune_params::SCHED_AUTOPREF_OFF
1878 };
1879
1880
1881 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1882 const struct tune_params arm_cortex_tune =
1883 {
1884 &generic_extra_costs,
1885 NULL, /* Sched adj cost. */
1886 arm_default_branch_cost,
1887 &arm_default_vec_cost,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 8, /* Memset max inline. */
1891 2, /* Issue rate. */
1892 ARM_PREFETCH_NOT_BENEFICIAL,
1893 tune_params::PREF_CONST_POOL_FALSE,
1894 tune_params::PREF_LDRD_FALSE,
1895 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1896 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1897 tune_params::DISPARAGE_FLAGS_NEITHER,
1898 tune_params::PREF_NEON_64_FALSE,
1899 tune_params::PREF_NEON_STRINGOPS_FALSE,
1900 tune_params::FUSE_NOTHING,
1901 tune_params::SCHED_AUTOPREF_OFF
1902 };
1903
1904 const struct tune_params arm_cortex_a8_tune =
1905 {
1906 &cortexa8_extra_costs,
1907 NULL, /* Sched adj cost. */
1908 arm_default_branch_cost,
1909 &arm_default_vec_cost,
1910 1, /* Constant limit. */
1911 5, /* Max cond insns. */
1912 8, /* Memset max inline. */
1913 2, /* Issue rate. */
1914 ARM_PREFETCH_NOT_BENEFICIAL,
1915 tune_params::PREF_CONST_POOL_FALSE,
1916 tune_params::PREF_LDRD_FALSE,
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1918 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1919 tune_params::DISPARAGE_FLAGS_NEITHER,
1920 tune_params::PREF_NEON_64_FALSE,
1921 tune_params::PREF_NEON_STRINGOPS_TRUE,
1922 tune_params::FUSE_NOTHING,
1923 tune_params::SCHED_AUTOPREF_OFF
1924 };
1925
1926 const struct tune_params arm_cortex_a7_tune =
1927 {
1928 &cortexa7_extra_costs,
1929 NULL, /* Sched adj cost. */
1930 arm_default_branch_cost,
1931 &arm_default_vec_cost,
1932 1, /* Constant limit. */
1933 5, /* Max cond insns. */
1934 8, /* Memset max inline. */
1935 2, /* Issue rate. */
1936 ARM_PREFETCH_NOT_BENEFICIAL,
1937 tune_params::PREF_CONST_POOL_FALSE,
1938 tune_params::PREF_LDRD_FALSE,
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1940 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1941 tune_params::DISPARAGE_FLAGS_NEITHER,
1942 tune_params::PREF_NEON_64_FALSE,
1943 tune_params::PREF_NEON_STRINGOPS_TRUE,
1944 tune_params::FUSE_NOTHING,
1945 tune_params::SCHED_AUTOPREF_OFF
1946 };
1947
1948 const struct tune_params arm_cortex_a15_tune =
1949 {
1950 &cortexa15_extra_costs,
1951 NULL, /* Sched adj cost. */
1952 arm_default_branch_cost,
1953 &arm_default_vec_cost,
1954 1, /* Constant limit. */
1955 2, /* Max cond insns. */
1956 8, /* Memset max inline. */
1957 3, /* Issue rate. */
1958 ARM_PREFETCH_NOT_BENEFICIAL,
1959 tune_params::PREF_CONST_POOL_FALSE,
1960 tune_params::PREF_LDRD_TRUE,
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1962 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1963 tune_params::DISPARAGE_FLAGS_ALL,
1964 tune_params::PREF_NEON_64_FALSE,
1965 tune_params::PREF_NEON_STRINGOPS_TRUE,
1966 tune_params::FUSE_NOTHING,
1967 tune_params::SCHED_AUTOPREF_FULL
1968 };
1969
1970 const struct tune_params arm_cortex_a35_tune =
1971 {
1972 &cortexa53_extra_costs,
1973 NULL, /* Sched adj cost. */
1974 arm_default_branch_cost,
1975 &arm_default_vec_cost,
1976 1, /* Constant limit. */
1977 5, /* Max cond insns. */
1978 8, /* Memset max inline. */
1979 1, /* Issue rate. */
1980 ARM_PREFETCH_NOT_BENEFICIAL,
1981 tune_params::PREF_CONST_POOL_FALSE,
1982 tune_params::PREF_LDRD_FALSE,
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1984 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1985 tune_params::DISPARAGE_FLAGS_NEITHER,
1986 tune_params::PREF_NEON_64_FALSE,
1987 tune_params::PREF_NEON_STRINGOPS_TRUE,
1988 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1989 tune_params::SCHED_AUTOPREF_OFF
1990 };
1991
1992 const struct tune_params arm_cortex_a53_tune =
1993 {
1994 &cortexa53_extra_costs,
1995 NULL, /* Sched adj cost. */
1996 arm_default_branch_cost,
1997 &arm_default_vec_cost,
1998 1, /* Constant limit. */
1999 5, /* Max cond insns. */
2000 8, /* Memset max inline. */
2001 2, /* Issue rate. */
2002 ARM_PREFETCH_NOT_BENEFICIAL,
2003 tune_params::PREF_CONST_POOL_FALSE,
2004 tune_params::PREF_LDRD_FALSE,
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2006 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2007 tune_params::DISPARAGE_FLAGS_NEITHER,
2008 tune_params::PREF_NEON_64_FALSE,
2009 tune_params::PREF_NEON_STRINGOPS_TRUE,
2010 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2011 tune_params::SCHED_AUTOPREF_OFF
2012 };
2013
2014 const struct tune_params arm_cortex_a57_tune =
2015 {
2016 &cortexa57_extra_costs,
2017 NULL, /* Sched adj cost. */
2018 arm_default_branch_cost,
2019 &arm_default_vec_cost,
2020 1, /* Constant limit. */
2021 2, /* Max cond insns. */
2022 8, /* Memset max inline. */
2023 3, /* Issue rate. */
2024 ARM_PREFETCH_NOT_BENEFICIAL,
2025 tune_params::PREF_CONST_POOL_FALSE,
2026 tune_params::PREF_LDRD_TRUE,
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2028 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2029 tune_params::DISPARAGE_FLAGS_ALL,
2030 tune_params::PREF_NEON_64_FALSE,
2031 tune_params::PREF_NEON_STRINGOPS_TRUE,
2032 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2033 tune_params::SCHED_AUTOPREF_FULL
2034 };
2035
2036 const struct tune_params arm_exynosm1_tune =
2037 {
2038 &exynosm1_extra_costs,
2039 NULL, /* Sched adj cost. */
2040 arm_default_branch_cost,
2041 &arm_default_vec_cost,
2042 1, /* Constant limit. */
2043 2, /* Max cond insns. */
2044 8, /* Memset max inline. */
2045 3, /* Issue rate. */
2046 ARM_PREFETCH_NOT_BENEFICIAL,
2047 tune_params::PREF_CONST_POOL_FALSE,
2048 tune_params::PREF_LDRD_TRUE,
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2050 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2051 tune_params::DISPARAGE_FLAGS_ALL,
2052 tune_params::PREF_NEON_64_FALSE,
2053 tune_params::PREF_NEON_STRINGOPS_TRUE,
2054 tune_params::FUSE_NOTHING,
2055 tune_params::SCHED_AUTOPREF_OFF
2056 };
2057
2058 const struct tune_params arm_xgene1_tune =
2059 {
2060 &xgene1_extra_costs,
2061 NULL, /* Sched adj cost. */
2062 arm_default_branch_cost,
2063 &arm_default_vec_cost,
2064 1, /* Constant limit. */
2065 2, /* Max cond insns. */
2066 32, /* Memset max inline. */
2067 4, /* Issue rate. */
2068 ARM_PREFETCH_NOT_BENEFICIAL,
2069 tune_params::PREF_CONST_POOL_FALSE,
2070 tune_params::PREF_LDRD_TRUE,
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2072 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2073 tune_params::DISPARAGE_FLAGS_ALL,
2074 tune_params::PREF_NEON_64_FALSE,
2075 tune_params::PREF_NEON_STRINGOPS_FALSE,
2076 tune_params::FUSE_NOTHING,
2077 tune_params::SCHED_AUTOPREF_OFF
2078 };
2079
2080 const struct tune_params arm_qdf24xx_tune =
2081 {
2082 &qdf24xx_extra_costs,
2083 NULL, /* Scheduler cost adjustment. */
2084 arm_default_branch_cost,
2085 &arm_default_vec_cost, /* Vectorizer costs. */
2086 1, /* Constant limit. */
2087 2, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 4, /* Issue rate. */
2090 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2091 tune_params::PREF_CONST_POOL_FALSE,
2092 tune_params::PREF_LDRD_TRUE,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_ALL,
2096 tune_params::PREF_NEON_64_FALSE,
2097 tune_params::PREF_NEON_STRINGOPS_TRUE,
2098 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2099 tune_params::SCHED_AUTOPREF_FULL
2100 };
2101
2102 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2103 less appealing. Set max_insns_skipped to a low value. */
2104
2105 const struct tune_params arm_cortex_a5_tune =
2106 {
2107 &cortexa5_extra_costs,
2108 NULL, /* Sched adj cost. */
2109 arm_cortex_a5_branch_cost,
2110 &arm_default_vec_cost,
2111 1, /* Constant limit. */
2112 1, /* Max cond insns. */
2113 8, /* Memset max inline. */
2114 2, /* Issue rate. */
2115 ARM_PREFETCH_NOT_BENEFICIAL,
2116 tune_params::PREF_CONST_POOL_FALSE,
2117 tune_params::PREF_LDRD_FALSE,
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2120 tune_params::DISPARAGE_FLAGS_NEITHER,
2121 tune_params::PREF_NEON_64_FALSE,
2122 tune_params::PREF_NEON_STRINGOPS_TRUE,
2123 tune_params::FUSE_NOTHING,
2124 tune_params::SCHED_AUTOPREF_OFF
2125 };
2126
2127 const struct tune_params arm_cortex_a9_tune =
2128 {
2129 &cortexa9_extra_costs,
2130 cortex_a9_sched_adjust_cost,
2131 arm_default_branch_cost,
2132 &arm_default_vec_cost,
2133 1, /* Constant limit. */
2134 5, /* Max cond insns. */
2135 8, /* Memset max inline. */
2136 2, /* Issue rate. */
2137 ARM_PREFETCH_BENEFICIAL(4,32,32),
2138 tune_params::PREF_CONST_POOL_FALSE,
2139 tune_params::PREF_LDRD_FALSE,
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2142 tune_params::DISPARAGE_FLAGS_NEITHER,
2143 tune_params::PREF_NEON_64_FALSE,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE,
2145 tune_params::FUSE_NOTHING,
2146 tune_params::SCHED_AUTOPREF_OFF
2147 };
2148
2149 const struct tune_params arm_cortex_a12_tune =
2150 {
2151 &cortexa12_extra_costs,
2152 NULL, /* Sched adj cost. */
2153 arm_default_branch_cost,
2154 &arm_default_vec_cost, /* Vectorizer costs. */
2155 1, /* Constant limit. */
2156 2, /* Max cond insns. */
2157 8, /* Memset max inline. */
2158 2, /* Issue rate. */
2159 ARM_PREFETCH_NOT_BENEFICIAL,
2160 tune_params::PREF_CONST_POOL_FALSE,
2161 tune_params::PREF_LDRD_TRUE,
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2163 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2164 tune_params::DISPARAGE_FLAGS_ALL,
2165 tune_params::PREF_NEON_64_FALSE,
2166 tune_params::PREF_NEON_STRINGOPS_TRUE,
2167 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2168 tune_params::SCHED_AUTOPREF_OFF
2169 };
2170
2171 const struct tune_params arm_cortex_a73_tune =
2172 {
2173 &cortexa57_extra_costs,
2174 NULL, /* Sched adj cost. */
2175 arm_default_branch_cost,
2176 &arm_default_vec_cost, /* Vectorizer costs. */
2177 1, /* Constant limit. */
2178 2, /* Max cond insns. */
2179 8, /* Memset max inline. */
2180 2, /* Issue rate. */
2181 ARM_PREFETCH_NOT_BENEFICIAL,
2182 tune_params::PREF_CONST_POOL_FALSE,
2183 tune_params::PREF_LDRD_TRUE,
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2185 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2186 tune_params::DISPARAGE_FLAGS_ALL,
2187 tune_params::PREF_NEON_64_FALSE,
2188 tune_params::PREF_NEON_STRINGOPS_TRUE,
2189 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2190 tune_params::SCHED_AUTOPREF_FULL
2191 };
2192
2193 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2194 cycle to execute each. An LDR from the constant pool also takes two cycles
2195 to execute, but mildly increases pipelining opportunity (consecutive
2196 loads/stores can be pipelined together, saving one cycle), and may also
2197 improve icache utilisation. Hence we prefer the constant pool for such
2198 processors. */
2199
2200 const struct tune_params arm_v7m_tune =
2201 {
2202 &v7m_extra_costs,
2203 NULL, /* Sched adj cost. */
2204 arm_cortex_m_branch_cost,
2205 &arm_default_vec_cost,
2206 1, /* Constant limit. */
2207 2, /* Max cond insns. */
2208 8, /* Memset max inline. */
2209 1, /* Issue rate. */
2210 ARM_PREFETCH_NOT_BENEFICIAL,
2211 tune_params::PREF_CONST_POOL_TRUE,
2212 tune_params::PREF_LDRD_FALSE,
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2214 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2215 tune_params::DISPARAGE_FLAGS_NEITHER,
2216 tune_params::PREF_NEON_64_FALSE,
2217 tune_params::PREF_NEON_STRINGOPS_FALSE,
2218 tune_params::FUSE_NOTHING,
2219 tune_params::SCHED_AUTOPREF_OFF
2220 };
2221
2222 /* Cortex-M7 tuning. */
2223
2224 const struct tune_params arm_cortex_m7_tune =
2225 {
2226 &v7m_extra_costs,
2227 NULL, /* Sched adj cost. */
2228 arm_cortex_m7_branch_cost,
2229 &arm_default_vec_cost,
2230 0, /* Constant limit. */
2231 1, /* Max cond insns. */
2232 8, /* Memset max inline. */
2233 2, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL,
2235 tune_params::PREF_CONST_POOL_TRUE,
2236 tune_params::PREF_LDRD_FALSE,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_NEITHER,
2240 tune_params::PREF_NEON_64_FALSE,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE,
2242 tune_params::FUSE_NOTHING,
2243 tune_params::SCHED_AUTOPREF_OFF
2244 };
2245
2246 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2247 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2248 cortex-m23. */
2249 const struct tune_params arm_v6m_tune =
2250 {
2251 &generic_extra_costs, /* Insn extra costs. */
2252 NULL, /* Sched adj cost. */
2253 arm_default_branch_cost,
2254 &arm_default_vec_cost, /* Vectorizer costs. */
2255 1, /* Constant limit. */
2256 5, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 1, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL,
2260 tune_params::PREF_CONST_POOL_FALSE,
2261 tune_params::PREF_LDRD_FALSE,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER,
2265 tune_params::PREF_NEON_64_FALSE,
2266 tune_params::PREF_NEON_STRINGOPS_FALSE,
2267 tune_params::FUSE_NOTHING,
2268 tune_params::SCHED_AUTOPREF_OFF
2269 };
2270
2271 const struct tune_params arm_fa726te_tune =
2272 {
2273 &generic_extra_costs, /* Insn extra costs. */
2274 fa726te_sched_adjust_cost,
2275 arm_default_branch_cost,
2276 &arm_default_vec_cost,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_NOT_BENEFICIAL,
2282 tune_params::PREF_CONST_POOL_TRUE,
2283 tune_params::PREF_LDRD_FALSE,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER,
2287 tune_params::PREF_NEON_64_FALSE,
2288 tune_params::PREF_NEON_STRINGOPS_FALSE,
2289 tune_params::FUSE_NOTHING,
2290 tune_params::SCHED_AUTOPREF_OFF
2291 };
2292
2293
2294 /* Not all of these give usefully different compilation alternatives,
2295 but there is no simple way of generalizing them. */
2296 static const struct processors all_cores[] =
2297 {
2298 /* ARM Cores */
2299 #define ARM_CORE(NAME, X, IDENT, TUNE_FLAGS, ARCH, ISA, COSTS) \
2300 {NAME, TARGET_CPU_##IDENT, TUNE_FLAGS, #ARCH, BASE_ARCH_##ARCH, \
2301 {ISA isa_nobit}, &arm_##COSTS##_tune},
2302 #include "arm-cores.def"
2303 #undef ARM_CORE
2304 {NULL, TARGET_CPU_arm_none, 0, NULL, BASE_ARCH_0, {isa_nobit}, NULL}
2305 };
2306
2307 static const struct processors all_architectures[] =
2308 {
2309 /* ARM Architectures */
2310 /* We don't specify tuning costs here as it will be figured out
2311 from the core. */
2312
2313 #define ARM_ARCH(NAME, CORE, TUNE_FLAGS, ARCH, ISA) \
2314 {NAME, TARGET_CPU_##CORE, TUNE_FLAGS, #ARCH, BASE_ARCH_##ARCH, \
2315 {ISA isa_nobit}, NULL},
2316 #include "arm-arches.def"
2317 #undef ARM_ARCH
2318 {NULL, TARGET_CPU_arm_none, 0, NULL, BASE_ARCH_0, {isa_nobit}, NULL}
2319 };
2320
2321 /* The name of the preprocessor macro to define for this architecture. PROFILE
2322 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2323 is thus chosen to be big enough to hold the longest architecture name. */
2324
2325 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2326
2327 /* Available values for -mfpu=. */
2328
2329 const struct arm_fpu_desc all_fpus[] =
2330 {
2331 #undef ARM_FPU
2332 #define ARM_FPU(NAME, CNAME, ISA) \
2333 { NAME, {ISA isa_nobit} },
2334 #include "arm-fpus.def"
2335 #undef ARM_FPU
2336 };
2337
2338 /* Supported TLS relocations. */
2339
2340 enum tls_reloc {
2341 TLS_GD32,
2342 TLS_LDM32,
2343 TLS_LDO32,
2344 TLS_IE32,
2345 TLS_LE32,
2346 TLS_DESCSEQ /* GNU scheme */
2347 };
2348
2349 /* The maximum number of insns to be used when loading a constant. */
2350 inline static int
2351 arm_constant_limit (bool size_p)
2352 {
2353 return size_p ? 1 : current_tune->constant_limit;
2354 }
2355
2356 /* Emit an insn that's a simple single-set. Both the operands must be known
2357 to be valid. */
2358 inline static rtx_insn *
2359 emit_set_insn (rtx x, rtx y)
2360 {
2361 return emit_insn (gen_rtx_SET (x, y));
2362 }
2363
2364 /* Return the number of bits set in VALUE. */
2365 static unsigned
2366 bit_count (unsigned long value)
2367 {
2368 unsigned long count = 0;
2369
2370 while (value)
2371 {
2372 count++;
2373 value &= value - 1; /* Clear the least-significant set bit. */
2374 }
2375
2376 return count;
2377 }
2378
2379 /* Return the number of bits set in BMAP. */
2380 static unsigned
2381 bitmap_popcount (const sbitmap bmap)
2382 {
2383 unsigned int count = 0;
2384 unsigned int n = 0;
2385 sbitmap_iterator sbi;
2386
2387 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2388 count++;
2389 return count;
2390 }
2391
2392 typedef struct
2393 {
2394 machine_mode mode;
2395 const char *name;
2396 } arm_fixed_mode_set;
2397
2398 /* A small helper for setting fixed-point library libfuncs. */
2399
2400 static void
2401 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2402 const char *funcname, const char *modename,
2403 int num_suffix)
2404 {
2405 char buffer[50];
2406
2407 if (num_suffix == 0)
2408 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2409 else
2410 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2411
2412 set_optab_libfunc (optable, mode, buffer);
2413 }
2414
2415 static void
2416 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2417 machine_mode from, const char *funcname,
2418 const char *toname, const char *fromname)
2419 {
2420 char buffer[50];
2421 const char *maybe_suffix_2 = "";
2422
2423 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2424 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2425 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2426 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2427 maybe_suffix_2 = "2";
2428
2429 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2430 maybe_suffix_2);
2431
2432 set_conv_libfunc (optable, to, from, buffer);
2433 }
2434
2435 /* Set up library functions unique to ARM. */
2436
2437 static void
2438 arm_init_libfuncs (void)
2439 {
2440 /* For Linux, we have access to kernel support for atomic operations. */
2441 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2442 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2443
2444 /* There are no special library functions unless we are using the
2445 ARM BPABI. */
2446 if (!TARGET_BPABI)
2447 return;
2448
2449 /* The functions below are described in Section 4 of the "Run-Time
2450 ABI for the ARM architecture", Version 1.0. */
2451
2452 /* Double-precision floating-point arithmetic. Table 2. */
2453 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2454 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2455 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2456 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2457 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2458
2459 /* Double-precision comparisons. Table 3. */
2460 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2461 set_optab_libfunc (ne_optab, DFmode, NULL);
2462 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2463 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2464 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2465 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2466 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2467
2468 /* Single-precision floating-point arithmetic. Table 4. */
2469 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2470 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2471 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2472 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2473 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2474
2475 /* Single-precision comparisons. Table 5. */
2476 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2477 set_optab_libfunc (ne_optab, SFmode, NULL);
2478 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2479 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2480 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2481 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2482 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2483
2484 /* Floating-point to integer conversions. Table 6. */
2485 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2486 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2487 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2488 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2489 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2490 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2491 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2492 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2493
2494 /* Conversions between floating types. Table 7. */
2495 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2496 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2497
2498 /* Integer to floating-point conversions. Table 8. */
2499 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2500 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2501 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2502 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2503 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2504 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2505 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2506 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2507
2508 /* Long long. Table 9. */
2509 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2510 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2511 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2512 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2513 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2514 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2515 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2516 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2517
2518 /* Integer (32/32->32) division. \S 4.3.1. */
2519 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2520 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2521
2522 /* The divmod functions are designed so that they can be used for
2523 plain division, even though they return both the quotient and the
2524 remainder. The quotient is returned in the usual location (i.e.,
2525 r0 for SImode, {r0, r1} for DImode), just as would be expected
2526 for an ordinary division routine. Because the AAPCS calling
2527 conventions specify that all of { r0, r1, r2, r3 } are
2528 callee-saved registers, there is no need to tell the compiler
2529 explicitly that those registers are clobbered by these
2530 routines. */
2531 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2532 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2533
2534 /* For SImode division the ABI provides div-without-mod routines,
2535 which are faster. */
2536 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2537 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2538
2539 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2540 divmod libcalls instead. */
2541 set_optab_libfunc (smod_optab, DImode, NULL);
2542 set_optab_libfunc (umod_optab, DImode, NULL);
2543 set_optab_libfunc (smod_optab, SImode, NULL);
2544 set_optab_libfunc (umod_optab, SImode, NULL);
2545
2546 /* Half-precision float operations. The compiler handles all operations
2547 with NULL libfuncs by converting the SFmode. */
2548 switch (arm_fp16_format)
2549 {
2550 case ARM_FP16_FORMAT_IEEE:
2551 case ARM_FP16_FORMAT_ALTERNATIVE:
2552
2553 /* Conversions. */
2554 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2555 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2556 ? "__gnu_f2h_ieee"
2557 : "__gnu_f2h_alternative"));
2558 set_conv_libfunc (sext_optab, SFmode, HFmode,
2559 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2560 ? "__gnu_h2f_ieee"
2561 : "__gnu_h2f_alternative"));
2562
2563 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2564 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2565 ? "__gnu_d2h_ieee"
2566 : "__gnu_d2h_alternative"));
2567
2568 /* Arithmetic. */
2569 set_optab_libfunc (add_optab, HFmode, NULL);
2570 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2571 set_optab_libfunc (smul_optab, HFmode, NULL);
2572 set_optab_libfunc (neg_optab, HFmode, NULL);
2573 set_optab_libfunc (sub_optab, HFmode, NULL);
2574
2575 /* Comparisons. */
2576 set_optab_libfunc (eq_optab, HFmode, NULL);
2577 set_optab_libfunc (ne_optab, HFmode, NULL);
2578 set_optab_libfunc (lt_optab, HFmode, NULL);
2579 set_optab_libfunc (le_optab, HFmode, NULL);
2580 set_optab_libfunc (ge_optab, HFmode, NULL);
2581 set_optab_libfunc (gt_optab, HFmode, NULL);
2582 set_optab_libfunc (unord_optab, HFmode, NULL);
2583 break;
2584
2585 default:
2586 break;
2587 }
2588
2589 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2590 {
2591 const arm_fixed_mode_set fixed_arith_modes[] =
2592 {
2593 { QQmode, "qq" },
2594 { UQQmode, "uqq" },
2595 { HQmode, "hq" },
2596 { UHQmode, "uhq" },
2597 { SQmode, "sq" },
2598 { USQmode, "usq" },
2599 { DQmode, "dq" },
2600 { UDQmode, "udq" },
2601 { TQmode, "tq" },
2602 { UTQmode, "utq" },
2603 { HAmode, "ha" },
2604 { UHAmode, "uha" },
2605 { SAmode, "sa" },
2606 { USAmode, "usa" },
2607 { DAmode, "da" },
2608 { UDAmode, "uda" },
2609 { TAmode, "ta" },
2610 { UTAmode, "uta" }
2611 };
2612 const arm_fixed_mode_set fixed_conv_modes[] =
2613 {
2614 { QQmode, "qq" },
2615 { UQQmode, "uqq" },
2616 { HQmode, "hq" },
2617 { UHQmode, "uhq" },
2618 { SQmode, "sq" },
2619 { USQmode, "usq" },
2620 { DQmode, "dq" },
2621 { UDQmode, "udq" },
2622 { TQmode, "tq" },
2623 { UTQmode, "utq" },
2624 { HAmode, "ha" },
2625 { UHAmode, "uha" },
2626 { SAmode, "sa" },
2627 { USAmode, "usa" },
2628 { DAmode, "da" },
2629 { UDAmode, "uda" },
2630 { TAmode, "ta" },
2631 { UTAmode, "uta" },
2632 { QImode, "qi" },
2633 { HImode, "hi" },
2634 { SImode, "si" },
2635 { DImode, "di" },
2636 { TImode, "ti" },
2637 { SFmode, "sf" },
2638 { DFmode, "df" }
2639 };
2640 unsigned int i, j;
2641
2642 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2643 {
2644 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2645 "add", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2647 "ssadd", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2649 "usadd", fixed_arith_modes[i].name, 3);
2650 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2651 "sub", fixed_arith_modes[i].name, 3);
2652 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2653 "sssub", fixed_arith_modes[i].name, 3);
2654 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2655 "ussub", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2657 "mul", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2659 "ssmul", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2661 "usmul", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2663 "div", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2665 "udiv", fixed_arith_modes[i].name, 3);
2666 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2667 "ssdiv", fixed_arith_modes[i].name, 3);
2668 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2669 "usdiv", fixed_arith_modes[i].name, 3);
2670 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2671 "neg", fixed_arith_modes[i].name, 2);
2672 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2673 "ssneg", fixed_arith_modes[i].name, 2);
2674 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2675 "usneg", fixed_arith_modes[i].name, 2);
2676 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2677 "ashl", fixed_arith_modes[i].name, 3);
2678 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2679 "ashr", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2681 "lshr", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2683 "ssashl", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2685 "usashl", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2687 "cmp", fixed_arith_modes[i].name, 2);
2688 }
2689
2690 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2691 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2692 {
2693 if (i == j
2694 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2695 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2696 continue;
2697
2698 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2699 fixed_conv_modes[j].mode, "fract",
2700 fixed_conv_modes[i].name,
2701 fixed_conv_modes[j].name);
2702 arm_set_fixed_conv_libfunc (satfract_optab,
2703 fixed_conv_modes[i].mode,
2704 fixed_conv_modes[j].mode, "satfract",
2705 fixed_conv_modes[i].name,
2706 fixed_conv_modes[j].name);
2707 arm_set_fixed_conv_libfunc (fractuns_optab,
2708 fixed_conv_modes[i].mode,
2709 fixed_conv_modes[j].mode, "fractuns",
2710 fixed_conv_modes[i].name,
2711 fixed_conv_modes[j].name);
2712 arm_set_fixed_conv_libfunc (satfractuns_optab,
2713 fixed_conv_modes[i].mode,
2714 fixed_conv_modes[j].mode, "satfractuns",
2715 fixed_conv_modes[i].name,
2716 fixed_conv_modes[j].name);
2717 }
2718 }
2719
2720 if (TARGET_AAPCS_BASED)
2721 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2722 }
2723
2724 /* On AAPCS systems, this is the "struct __va_list". */
2725 static GTY(()) tree va_list_type;
2726
2727 /* Return the type to use as __builtin_va_list. */
2728 static tree
2729 arm_build_builtin_va_list (void)
2730 {
2731 tree va_list_name;
2732 tree ap_field;
2733
2734 if (!TARGET_AAPCS_BASED)
2735 return std_build_builtin_va_list ();
2736
2737 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2738 defined as:
2739
2740 struct __va_list
2741 {
2742 void *__ap;
2743 };
2744
2745 The C Library ABI further reinforces this definition in \S
2746 4.1.
2747
2748 We must follow this definition exactly. The structure tag
2749 name is visible in C++ mangled names, and thus forms a part
2750 of the ABI. The field name may be used by people who
2751 #include <stdarg.h>. */
2752 /* Create the type. */
2753 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2754 /* Give it the required name. */
2755 va_list_name = build_decl (BUILTINS_LOCATION,
2756 TYPE_DECL,
2757 get_identifier ("__va_list"),
2758 va_list_type);
2759 DECL_ARTIFICIAL (va_list_name) = 1;
2760 TYPE_NAME (va_list_type) = va_list_name;
2761 TYPE_STUB_DECL (va_list_type) = va_list_name;
2762 /* Create the __ap field. */
2763 ap_field = build_decl (BUILTINS_LOCATION,
2764 FIELD_DECL,
2765 get_identifier ("__ap"),
2766 ptr_type_node);
2767 DECL_ARTIFICIAL (ap_field) = 1;
2768 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2769 TYPE_FIELDS (va_list_type) = ap_field;
2770 /* Compute its layout. */
2771 layout_type (va_list_type);
2772
2773 return va_list_type;
2774 }
2775
2776 /* Return an expression of type "void *" pointing to the next
2777 available argument in a variable-argument list. VALIST is the
2778 user-level va_list object, of type __builtin_va_list. */
2779 static tree
2780 arm_extract_valist_ptr (tree valist)
2781 {
2782 if (TREE_TYPE (valist) == error_mark_node)
2783 return error_mark_node;
2784
2785 /* On an AAPCS target, the pointer is stored within "struct
2786 va_list". */
2787 if (TARGET_AAPCS_BASED)
2788 {
2789 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2790 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2791 valist, ap_field, NULL_TREE);
2792 }
2793
2794 return valist;
2795 }
2796
2797 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2798 static void
2799 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2800 {
2801 valist = arm_extract_valist_ptr (valist);
2802 std_expand_builtin_va_start (valist, nextarg);
2803 }
2804
2805 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2806 static tree
2807 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2808 gimple_seq *post_p)
2809 {
2810 valist = arm_extract_valist_ptr (valist);
2811 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2812 }
2813
2814 /* Check any incompatible options that the user has specified. */
2815 static void
2816 arm_option_check_internal (struct gcc_options *opts)
2817 {
2818 int flags = opts->x_target_flags;
2819
2820 /* iWMMXt and NEON are incompatible. */
2821 if (TARGET_IWMMXT
2822 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2823 error ("iWMMXt and NEON are incompatible");
2824
2825 /* Make sure that the processor choice does not conflict with any of the
2826 other command line choices. */
2827 if (TARGET_ARM_P (flags)
2828 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2829 error ("target CPU does not support ARM mode");
2830
2831 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2832 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2833 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2834
2835 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2836 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2837
2838 /* If this target is normally configured to use APCS frames, warn if they
2839 are turned off and debugging is turned on. */
2840 if (TARGET_ARM_P (flags)
2841 && write_symbols != NO_DEBUG
2842 && !TARGET_APCS_FRAME
2843 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2844 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2845
2846 /* iWMMXt unsupported under Thumb mode. */
2847 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2848 error ("iWMMXt unsupported under Thumb mode");
2849
2850 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2851 error ("can not use -mtp=cp15 with 16-bit Thumb");
2852
2853 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2854 {
2855 error ("RTP PIC is incompatible with Thumb");
2856 flag_pic = 0;
2857 }
2858
2859 /* We only support -mslow-flash-data on armv7-m targets. */
2860 if (target_slow_flash_data
2861 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2862 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2863 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2864
2865 /* We only support pure-code on Thumb-2 M-profile targets. */
2866 if (target_pure_code
2867 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2868 error ("-mpure-code only supports non-pic code on armv7-m targets");
2869
2870 }
2871
2872 /* Recompute the global settings depending on target attribute options. */
2873
2874 static void
2875 arm_option_params_internal (void)
2876 {
2877 /* If we are not using the default (ARM mode) section anchor offset
2878 ranges, then set the correct ranges now. */
2879 if (TARGET_THUMB1)
2880 {
2881 /* Thumb-1 LDR instructions cannot have negative offsets.
2882 Permissible positive offset ranges are 5-bit (for byte loads),
2883 6-bit (for halfword loads), or 7-bit (for word loads).
2884 Empirical results suggest a 7-bit anchor range gives the best
2885 overall code size. */
2886 targetm.min_anchor_offset = 0;
2887 targetm.max_anchor_offset = 127;
2888 }
2889 else if (TARGET_THUMB2)
2890 {
2891 /* The minimum is set such that the total size of the block
2892 for a particular anchor is 248 + 1 + 4095 bytes, which is
2893 divisible by eight, ensuring natural spacing of anchors. */
2894 targetm.min_anchor_offset = -248;
2895 targetm.max_anchor_offset = 4095;
2896 }
2897 else
2898 {
2899 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2900 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2901 }
2902
2903 if (optimize_size)
2904 {
2905 /* If optimizing for size, bump the number of instructions that we
2906 are prepared to conditionally execute (even on a StrongARM). */
2907 max_insns_skipped = 6;
2908
2909 /* For THUMB2, we limit the conditional sequence to one IT block. */
2910 if (TARGET_THUMB2)
2911 max_insns_skipped = arm_restrict_it ? 1 : 4;
2912 }
2913 else
2914 /* When -mrestrict-it is in use tone down the if-conversion. */
2915 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2916 ? 1 : current_tune->max_insns_skipped;
2917 }
2918
2919 /* True if -mflip-thumb should next add an attribute for the default
2920 mode, false if it should next add an attribute for the opposite mode. */
2921 static GTY(()) bool thumb_flipper;
2922
2923 /* Options after initial target override. */
2924 static GTY(()) tree init_optimize;
2925
2926 static void
2927 arm_override_options_after_change_1 (struct gcc_options *opts)
2928 {
2929 if (opts->x_align_functions <= 0)
2930 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2931 && opts->x_optimize_size ? 2 : 4;
2932 }
2933
2934 /* Implement targetm.override_options_after_change. */
2935
2936 static void
2937 arm_override_options_after_change (void)
2938 {
2939 arm_configure_build_target (&arm_active_target,
2940 TREE_TARGET_OPTION (target_option_default_node),
2941 &global_options_set, false);
2942
2943 arm_override_options_after_change_1 (&global_options);
2944 }
2945
2946 static void
2947 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2948 {
2949 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2950 false);
2951 }
2952
2953 /* Reset options between modes that the user has specified. */
2954 static void
2955 arm_option_override_internal (struct gcc_options *opts,
2956 struct gcc_options *opts_set)
2957 {
2958 arm_override_options_after_change_1 (opts);
2959
2960 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2961 {
2962 /* The default is to enable interworking, so this warning message would
2963 be confusing to users who have just compiled with, eg, -march=armv3. */
2964 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2965 opts->x_target_flags &= ~MASK_INTERWORK;
2966 }
2967
2968 if (TARGET_THUMB_P (opts->x_target_flags)
2969 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2970 {
2971 warning (0, "target CPU does not support THUMB instructions");
2972 opts->x_target_flags &= ~MASK_THUMB;
2973 }
2974
2975 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2976 {
2977 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2978 opts->x_target_flags &= ~MASK_APCS_FRAME;
2979 }
2980
2981 /* Callee super interworking implies thumb interworking. Adding
2982 this to the flags here simplifies the logic elsewhere. */
2983 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2984 opts->x_target_flags |= MASK_INTERWORK;
2985
2986 /* need to remember initial values so combinaisons of options like
2987 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2988 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2989
2990 if (! opts_set->x_arm_restrict_it)
2991 opts->x_arm_restrict_it = arm_arch8;
2992
2993 /* ARM execution state and M profile don't have [restrict] IT. */
2994 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2995 opts->x_arm_restrict_it = 0;
2996
2997 /* Enable -munaligned-access by default for
2998 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2999 i.e. Thumb2 and ARM state only.
3000 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3001 - ARMv8 architecture-base processors.
3002
3003 Disable -munaligned-access by default for
3004 - all pre-ARMv6 architecture-based processors
3005 - ARMv6-M architecture-based processors
3006 - ARMv8-M Baseline processors. */
3007
3008 if (! opts_set->x_unaligned_access)
3009 {
3010 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3011 && arm_arch6 && (arm_arch_notm || arm_arch7));
3012 }
3013 else if (opts->x_unaligned_access == 1
3014 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3015 {
3016 warning (0, "target CPU does not support unaligned accesses");
3017 opts->x_unaligned_access = 0;
3018 }
3019
3020 /* Don't warn since it's on by default in -O2. */
3021 if (TARGET_THUMB1_P (opts->x_target_flags))
3022 opts->x_flag_schedule_insns = 0;
3023 else
3024 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3025
3026 /* Disable shrink-wrap when optimizing function for size, since it tends to
3027 generate additional returns. */
3028 if (optimize_function_for_size_p (cfun)
3029 && TARGET_THUMB2_P (opts->x_target_flags))
3030 opts->x_flag_shrink_wrap = false;
3031 else
3032 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3033
3034 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3035 - epilogue_insns - does not accurately model the corresponding insns
3036 emitted in the asm file. In particular, see the comment in thumb_exit
3037 'Find out how many of the (return) argument registers we can corrupt'.
3038 As a consequence, the epilogue may clobber registers without fipa-ra
3039 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3040 TODO: Accurately model clobbers for epilogue_insns and reenable
3041 fipa-ra. */
3042 if (TARGET_THUMB1_P (opts->x_target_flags))
3043 opts->x_flag_ipa_ra = 0;
3044 else
3045 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3046
3047 /* Thumb2 inline assembly code should always use unified syntax.
3048 This will apply to ARM and Thumb1 eventually. */
3049 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3050
3051 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3052 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3053 #endif
3054 }
3055
3056 /* Convert a static initializer array of feature bits to sbitmap
3057 representation. */
3058 static void
3059 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3060 {
3061 bitmap_clear (isa);
3062 while (*isa_bits != isa_nobit)
3063 bitmap_set_bit (isa, *(isa_bits++));
3064 }
3065
3066 static sbitmap isa_all_fpubits;
3067 static sbitmap isa_quirkbits;
3068
3069 /* Configure a build target TARGET from the user-specified options OPTS and
3070 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3071 architecture have been specified, but the two are not identical. */
3072 void
3073 arm_configure_build_target (struct arm_build_target *target,
3074 struct cl_target_option *opts,
3075 struct gcc_options *opts_set,
3076 bool warn_compatible)
3077 {
3078 const struct processors *arm_selected_tune = NULL;
3079 const struct processors *arm_selected_arch = NULL;
3080 const struct processors *arm_selected_cpu = NULL;
3081 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3082
3083 bitmap_clear (target->isa);
3084 target->core_name = NULL;
3085 target->arch_name = NULL;
3086
3087 if (opts_set->x_arm_arch_option)
3088 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3089
3090 if (opts_set->x_arm_cpu_option)
3091 {
3092 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3093 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3094 }
3095
3096 if (opts_set->x_arm_tune_option)
3097 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3098
3099 if (arm_selected_arch)
3100 {
3101 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3102
3103 if (arm_selected_cpu)
3104 {
3105 auto_sbitmap cpu_isa (isa_num_bits);
3106
3107 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3108 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3109 /* Ignore any bits that are quirk bits. */
3110 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3111 /* Ignore (for now) any bits that might be set by -mfpu. */
3112 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3113
3114 if (!bitmap_empty_p (cpu_isa))
3115 {
3116 if (warn_compatible)
3117 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3118 arm_selected_cpu->name, arm_selected_arch->name);
3119 /* -march wins for code generation.
3120 -mcpu wins for default tuning. */
3121 if (!arm_selected_tune)
3122 arm_selected_tune = arm_selected_cpu;
3123
3124 arm_selected_cpu = arm_selected_arch;
3125 }
3126 else
3127 {
3128 /* Architecture and CPU are essentially the same.
3129 Prefer the CPU setting. */
3130 arm_selected_arch = NULL;
3131 }
3132
3133 target->core_name = arm_selected_cpu->name;
3134 }
3135 else
3136 {
3137 /* Pick a CPU based on the architecture. */
3138 arm_selected_cpu = arm_selected_arch;
3139 target->arch_name = arm_selected_arch->name;
3140 /* Note: target->core_name is left unset in this path. */
3141 }
3142 }
3143 else if (arm_selected_cpu)
3144 {
3145 target->core_name = arm_selected_cpu->name;
3146 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3147 }
3148 /* If the user did not specify a processor, choose one for them. */
3149 else
3150 {
3151 const struct processors * sel;
3152 auto_sbitmap sought_isa (isa_num_bits);
3153 bitmap_clear (sought_isa);
3154 auto_sbitmap default_isa (isa_num_bits);
3155
3156 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3157 gcc_assert (arm_selected_cpu->name);
3158
3159 /* RWE: All of the selection logic below (to the end of this
3160 'if' clause) looks somewhat suspect. It appears to be mostly
3161 there to support forcing thumb support when the default CPU
3162 does not have thumb (somewhat dubious in terms of what the
3163 user might be expecting). I think it should be removed once
3164 support for the pre-thumb era cores is removed. */
3165 sel = arm_selected_cpu;
3166 arm_initialize_isa (default_isa, sel->isa_bits);
3167
3168 /* Now check to see if the user has specified any command line
3169 switches that require certain abilities from the cpu. */
3170
3171 if (TARGET_INTERWORK || TARGET_THUMB)
3172 {
3173 bitmap_set_bit (sought_isa, isa_bit_thumb);
3174 bitmap_set_bit (sought_isa, isa_bit_mode32);
3175
3176 /* There are no ARM processors that support both APCS-26 and
3177 interworking. Therefore we forcibly remove MODE26 from
3178 from the isa features here (if it was set), so that the
3179 search below will always be able to find a compatible
3180 processor. */
3181 bitmap_clear_bit (default_isa, isa_bit_mode26);
3182 }
3183
3184 /* If there are such requirements and the default CPU does not
3185 satisfy them, we need to run over the complete list of
3186 cores looking for one that is satisfactory. */
3187 if (!bitmap_empty_p (sought_isa)
3188 && !bitmap_subset_p (sought_isa, default_isa))
3189 {
3190 auto_sbitmap candidate_isa (isa_num_bits);
3191 /* We're only interested in a CPU with at least the
3192 capabilities of the default CPU and the required
3193 additional features. */
3194 bitmap_ior (default_isa, default_isa, sought_isa);
3195
3196 /* Try to locate a CPU type that supports all of the abilities
3197 of the default CPU, plus the extra abilities requested by
3198 the user. */
3199 for (sel = all_cores; sel->name != NULL; sel++)
3200 {
3201 arm_initialize_isa (candidate_isa, sel->isa_bits);
3202 /* An exact match? */
3203 if (bitmap_equal_p (default_isa, candidate_isa))
3204 break;
3205 }
3206
3207 if (sel->name == NULL)
3208 {
3209 unsigned current_bit_count = isa_num_bits;
3210 const struct processors * best_fit = NULL;
3211
3212 /* Ideally we would like to issue an error message here
3213 saying that it was not possible to find a CPU compatible
3214 with the default CPU, but which also supports the command
3215 line options specified by the programmer, and so they
3216 ought to use the -mcpu=<name> command line option to
3217 override the default CPU type.
3218
3219 If we cannot find a CPU that has exactly the
3220 characteristics of the default CPU and the given
3221 command line options we scan the array again looking
3222 for a best match. The best match must have at least
3223 the capabilities of the perfect match. */
3224 for (sel = all_cores; sel->name != NULL; sel++)
3225 {
3226 arm_initialize_isa (candidate_isa, sel->isa_bits);
3227
3228 if (bitmap_subset_p (default_isa, candidate_isa))
3229 {
3230 unsigned count;
3231
3232 bitmap_and_compl (candidate_isa, candidate_isa,
3233 default_isa);
3234 count = bitmap_popcount (candidate_isa);
3235
3236 if (count < current_bit_count)
3237 {
3238 best_fit = sel;
3239 current_bit_count = count;
3240 }
3241 }
3242
3243 gcc_assert (best_fit);
3244 sel = best_fit;
3245 }
3246 }
3247 arm_selected_cpu = sel;
3248 }
3249
3250 /* Now we know the CPU, we can finally initialize the target
3251 structure. */
3252 target->core_name = arm_selected_cpu->name;
3253 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3254 }
3255
3256 gcc_assert (arm_selected_cpu);
3257
3258 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3259 {
3260 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3261 auto_sbitmap fpu_bits (isa_num_bits);
3262
3263 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3264 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3265 bitmap_ior (target->isa, target->isa, fpu_bits);
3266 }
3267 else if (target->core_name == NULL)
3268 /* To support this we need to be able to parse FPU feature options
3269 from the architecture string. */
3270 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3271
3272 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3273 if (!arm_selected_tune)
3274 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3275
3276 /* Finish initializing the target structure. */
3277 target->arch_pp_name = arm_selected_cpu->arch;
3278 target->base_arch = arm_selected_cpu->base_arch;
3279 target->arch_core = arm_selected_cpu->core;
3280
3281 target->tune_flags = arm_selected_tune->tune_flags;
3282 target->tune = arm_selected_tune->tune;
3283 target->tune_core = arm_selected_tune->core;
3284 }
3285
3286 /* Fix up any incompatible options that the user has specified. */
3287 static void
3288 arm_option_override (void)
3289 {
3290 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3291 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3292
3293 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3294 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3295
3296 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3297 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3298
3299 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3300
3301 if (!global_options_set.x_arm_fpu_index)
3302 {
3303 const char *target_fpu_name;
3304 bool ok;
3305 int fpu_index;
3306
3307 #ifdef FPUTYPE_DEFAULT
3308 target_fpu_name = FPUTYPE_DEFAULT;
3309 #else
3310 target_fpu_name = "vfp";
3311 #endif
3312
3313 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3314 CL_TARGET);
3315 gcc_assert (ok);
3316 arm_fpu_index = (enum fpu_type) fpu_index;
3317 }
3318
3319 /* Create the default target_options structure. We need this early
3320 to configure the overall build target. */
3321 target_option_default_node = target_option_current_node
3322 = build_target_option_node (&global_options);
3323
3324 arm_configure_build_target (&arm_active_target,
3325 TREE_TARGET_OPTION (target_option_default_node),
3326 &global_options_set, true);
3327
3328 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3329 SUBTARGET_OVERRIDE_OPTIONS;
3330 #endif
3331
3332 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3333 arm_base_arch = arm_active_target.base_arch;
3334
3335 arm_tune = arm_active_target.tune_core;
3336 tune_flags = arm_active_target.tune_flags;
3337 current_tune = arm_active_target.tune;
3338
3339 /* TBD: Dwarf info for apcs frame is not handled yet. */
3340 if (TARGET_APCS_FRAME)
3341 flag_shrink_wrap = false;
3342
3343 /* BPABI targets use linker tricks to allow interworking on cores
3344 without thumb support. */
3345 if (TARGET_INTERWORK
3346 && !TARGET_BPABI
3347 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3348 {
3349 warning (0, "target CPU does not support interworking" );
3350 target_flags &= ~MASK_INTERWORK;
3351 }
3352
3353 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3354 {
3355 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3356 target_flags |= MASK_APCS_FRAME;
3357 }
3358
3359 if (TARGET_POKE_FUNCTION_NAME)
3360 target_flags |= MASK_APCS_FRAME;
3361
3362 if (TARGET_APCS_REENT && flag_pic)
3363 error ("-fpic and -mapcs-reent are incompatible");
3364
3365 if (TARGET_APCS_REENT)
3366 warning (0, "APCS reentrant code not supported. Ignored");
3367
3368 /* Initialize boolean versions of the architectural flags, for use
3369 in the arm.md file. */
3370 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3371 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3372 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3373 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3374 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3375 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3376 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3377 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3378 arm_arch6m = arm_arch6 && !arm_arch_notm;
3379 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3380 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3381 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3382 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3383 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3384 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3385 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3386 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3387 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3388 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3389 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3390 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3391 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3392 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3393 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3394 arm_arch7ve
3395 = (arm_arch6k && arm_arch7 && arm_arch_thumb_hwdiv && arm_arch_arm_hwdiv);
3396 if (arm_fp16_inst)
3397 {
3398 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3399 error ("selected fp16 options are incompatible.");
3400 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3401 }
3402
3403
3404 /* Set up some tuning parameters. */
3405 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3406 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3407 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3408 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3409 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3410 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3411
3412 /* And finally, set up some quirks. */
3413 arm_arch_no_volatile_ce
3414 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3415 arm_arch6kz
3416 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3417
3418 /* V5 code we generate is completely interworking capable, so we turn off
3419 TARGET_INTERWORK here to avoid many tests later on. */
3420
3421 /* XXX However, we must pass the right pre-processor defines to CPP
3422 or GLD can get confused. This is a hack. */
3423 if (TARGET_INTERWORK)
3424 arm_cpp_interwork = 1;
3425
3426 if (arm_arch5)
3427 target_flags &= ~MASK_INTERWORK;
3428
3429 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3430 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3431
3432 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3433 error ("iwmmxt abi requires an iwmmxt capable cpu");
3434
3435 /* If soft-float is specified then don't use FPU. */
3436 if (TARGET_SOFT_FLOAT)
3437 arm_fpu_attr = FPU_NONE;
3438 else
3439 arm_fpu_attr = FPU_VFP;
3440
3441 if (TARGET_AAPCS_BASED)
3442 {
3443 if (TARGET_CALLER_INTERWORKING)
3444 error ("AAPCS does not support -mcaller-super-interworking");
3445 else
3446 if (TARGET_CALLEE_INTERWORKING)
3447 error ("AAPCS does not support -mcallee-super-interworking");
3448 }
3449
3450 /* __fp16 support currently assumes the core has ldrh. */
3451 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3452 sorry ("__fp16 and no ldrh");
3453
3454 if (TARGET_AAPCS_BASED)
3455 {
3456 if (arm_abi == ARM_ABI_IWMMXT)
3457 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3458 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3459 && TARGET_HARD_FLOAT)
3460 {
3461 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3462 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3463 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3464 }
3465 else
3466 arm_pcs_default = ARM_PCS_AAPCS;
3467 }
3468 else
3469 {
3470 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3471 sorry ("-mfloat-abi=hard and VFP");
3472
3473 if (arm_abi == ARM_ABI_APCS)
3474 arm_pcs_default = ARM_PCS_APCS;
3475 else
3476 arm_pcs_default = ARM_PCS_ATPCS;
3477 }
3478
3479 /* For arm2/3 there is no need to do any scheduling if we are doing
3480 software floating-point. */
3481 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3482 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3483
3484 /* Use the cp15 method if it is available. */
3485 if (target_thread_pointer == TP_AUTO)
3486 {
3487 if (arm_arch6k && !TARGET_THUMB1)
3488 target_thread_pointer = TP_CP15;
3489 else
3490 target_thread_pointer = TP_SOFT;
3491 }
3492
3493 /* Override the default structure alignment for AAPCS ABI. */
3494 if (!global_options_set.x_arm_structure_size_boundary)
3495 {
3496 if (TARGET_AAPCS_BASED)
3497 arm_structure_size_boundary = 8;
3498 }
3499 else
3500 {
3501 if (arm_structure_size_boundary != 8
3502 && arm_structure_size_boundary != 32
3503 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3504 {
3505 if (ARM_DOUBLEWORD_ALIGN)
3506 warning (0,
3507 "structure size boundary can only be set to 8, 32 or 64");
3508 else
3509 warning (0, "structure size boundary can only be set to 8 or 32");
3510 arm_structure_size_boundary
3511 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3512 }
3513 }
3514
3515 if (TARGET_VXWORKS_RTP)
3516 {
3517 if (!global_options_set.x_arm_pic_data_is_text_relative)
3518 arm_pic_data_is_text_relative = 0;
3519 }
3520 else if (flag_pic
3521 && !arm_pic_data_is_text_relative
3522 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3523 /* When text & data segments don't have a fixed displacement, the
3524 intended use is with a single, read only, pic base register.
3525 Unless the user explicitly requested not to do that, set
3526 it. */
3527 target_flags |= MASK_SINGLE_PIC_BASE;
3528
3529 /* If stack checking is disabled, we can use r10 as the PIC register,
3530 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3531 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3532 {
3533 if (TARGET_VXWORKS_RTP)
3534 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3535 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3536 }
3537
3538 if (flag_pic && TARGET_VXWORKS_RTP)
3539 arm_pic_register = 9;
3540
3541 if (arm_pic_register_string != NULL)
3542 {
3543 int pic_register = decode_reg_name (arm_pic_register_string);
3544
3545 if (!flag_pic)
3546 warning (0, "-mpic-register= is useless without -fpic");
3547
3548 /* Prevent the user from choosing an obviously stupid PIC register. */
3549 else if (pic_register < 0 || call_used_regs[pic_register]
3550 || pic_register == HARD_FRAME_POINTER_REGNUM
3551 || pic_register == STACK_POINTER_REGNUM
3552 || pic_register >= PC_REGNUM
3553 || (TARGET_VXWORKS_RTP
3554 && (unsigned int) pic_register != arm_pic_register))
3555 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3556 else
3557 arm_pic_register = pic_register;
3558 }
3559
3560 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3561 if (fix_cm3_ldrd == 2)
3562 {
3563 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3564 fix_cm3_ldrd = 1;
3565 else
3566 fix_cm3_ldrd = 0;
3567 }
3568
3569 /* Hot/Cold partitioning is not currently supported, since we can't
3570 handle literal pool placement in that case. */
3571 if (flag_reorder_blocks_and_partition)
3572 {
3573 inform (input_location,
3574 "-freorder-blocks-and-partition not supported on this architecture");
3575 flag_reorder_blocks_and_partition = 0;
3576 flag_reorder_blocks = 1;
3577 }
3578
3579 if (flag_pic)
3580 /* Hoisting PIC address calculations more aggressively provides a small,
3581 but measurable, size reduction for PIC code. Therefore, we decrease
3582 the bar for unrestricted expression hoisting to the cost of PIC address
3583 calculation, which is 2 instructions. */
3584 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3585 global_options.x_param_values,
3586 global_options_set.x_param_values);
3587
3588 /* ARM EABI defaults to strict volatile bitfields. */
3589 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3590 && abi_version_at_least(2))
3591 flag_strict_volatile_bitfields = 1;
3592
3593 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3594 have deemed it beneficial (signified by setting
3595 prefetch.num_slots to 1 or more). */
3596 if (flag_prefetch_loop_arrays < 0
3597 && HAVE_prefetch
3598 && optimize >= 3
3599 && current_tune->prefetch.num_slots > 0)
3600 flag_prefetch_loop_arrays = 1;
3601
3602 /* Set up parameters to be used in prefetching algorithm. Do not
3603 override the defaults unless we are tuning for a core we have
3604 researched values for. */
3605 if (current_tune->prefetch.num_slots > 0)
3606 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3607 current_tune->prefetch.num_slots,
3608 global_options.x_param_values,
3609 global_options_set.x_param_values);
3610 if (current_tune->prefetch.l1_cache_line_size >= 0)
3611 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3612 current_tune->prefetch.l1_cache_line_size,
3613 global_options.x_param_values,
3614 global_options_set.x_param_values);
3615 if (current_tune->prefetch.l1_cache_size >= 0)
3616 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3617 current_tune->prefetch.l1_cache_size,
3618 global_options.x_param_values,
3619 global_options_set.x_param_values);
3620
3621 /* Use Neon to perform 64-bits operations rather than core
3622 registers. */
3623 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3624 if (use_neon_for_64bits == 1)
3625 prefer_neon_for_64bits = true;
3626
3627 /* Use the alternative scheduling-pressure algorithm by default. */
3628 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3629 global_options.x_param_values,
3630 global_options_set.x_param_values);
3631
3632 /* Look through ready list and all of queue for instructions
3633 relevant for L2 auto-prefetcher. */
3634 int param_sched_autopref_queue_depth;
3635
3636 switch (current_tune->sched_autopref)
3637 {
3638 case tune_params::SCHED_AUTOPREF_OFF:
3639 param_sched_autopref_queue_depth = -1;
3640 break;
3641
3642 case tune_params::SCHED_AUTOPREF_RANK:
3643 param_sched_autopref_queue_depth = 0;
3644 break;
3645
3646 case tune_params::SCHED_AUTOPREF_FULL:
3647 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3648 break;
3649
3650 default:
3651 gcc_unreachable ();
3652 }
3653
3654 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3655 param_sched_autopref_queue_depth,
3656 global_options.x_param_values,
3657 global_options_set.x_param_values);
3658
3659 /* Currently, for slow flash data, we just disable literal pools. We also
3660 disable it for pure-code. */
3661 if (target_slow_flash_data || target_pure_code)
3662 arm_disable_literal_pool = true;
3663
3664 if (use_cmse && !arm_arch_cmse)
3665 error ("target CPU does not support ARMv8-M Security Extensions");
3666
3667 /* Disable scheduling fusion by default if it's not armv7 processor
3668 or doesn't prefer ldrd/strd. */
3669 if (flag_schedule_fusion == 2
3670 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3671 flag_schedule_fusion = 0;
3672
3673 /* Need to remember initial options before they are overriden. */
3674 init_optimize = build_optimization_node (&global_options);
3675
3676 arm_option_override_internal (&global_options, &global_options_set);
3677 arm_option_check_internal (&global_options);
3678 arm_option_params_internal ();
3679
3680 /* Resynchronize the saved target options. */
3681 cl_target_option_save (TREE_TARGET_OPTION (target_option_default_node),
3682 &global_options);
3683 /* Register global variables with the garbage collector. */
3684 arm_add_gc_roots ();
3685
3686 /* Init initial mode for testing. */
3687 thumb_flipper = TARGET_THUMB;
3688 }
3689
3690 static void
3691 arm_add_gc_roots (void)
3692 {
3693 gcc_obstack_init(&minipool_obstack);
3694 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3695 }
3696 \f
3697 /* A table of known ARM exception types.
3698 For use with the interrupt function attribute. */
3699
3700 typedef struct
3701 {
3702 const char *const arg;
3703 const unsigned long return_value;
3704 }
3705 isr_attribute_arg;
3706
3707 static const isr_attribute_arg isr_attribute_args [] =
3708 {
3709 { "IRQ", ARM_FT_ISR },
3710 { "irq", ARM_FT_ISR },
3711 { "FIQ", ARM_FT_FIQ },
3712 { "fiq", ARM_FT_FIQ },
3713 { "ABORT", ARM_FT_ISR },
3714 { "abort", ARM_FT_ISR },
3715 { "ABORT", ARM_FT_ISR },
3716 { "abort", ARM_FT_ISR },
3717 { "UNDEF", ARM_FT_EXCEPTION },
3718 { "undef", ARM_FT_EXCEPTION },
3719 { "SWI", ARM_FT_EXCEPTION },
3720 { "swi", ARM_FT_EXCEPTION },
3721 { NULL, ARM_FT_NORMAL }
3722 };
3723
3724 /* Returns the (interrupt) function type of the current
3725 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3726
3727 static unsigned long
3728 arm_isr_value (tree argument)
3729 {
3730 const isr_attribute_arg * ptr;
3731 const char * arg;
3732
3733 if (!arm_arch_notm)
3734 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3735
3736 /* No argument - default to IRQ. */
3737 if (argument == NULL_TREE)
3738 return ARM_FT_ISR;
3739
3740 /* Get the value of the argument. */
3741 if (TREE_VALUE (argument) == NULL_TREE
3742 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3743 return ARM_FT_UNKNOWN;
3744
3745 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3746
3747 /* Check it against the list of known arguments. */
3748 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3749 if (streq (arg, ptr->arg))
3750 return ptr->return_value;
3751
3752 /* An unrecognized interrupt type. */
3753 return ARM_FT_UNKNOWN;
3754 }
3755
3756 /* Computes the type of the current function. */
3757
3758 static unsigned long
3759 arm_compute_func_type (void)
3760 {
3761 unsigned long type = ARM_FT_UNKNOWN;
3762 tree a;
3763 tree attr;
3764
3765 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3766
3767 /* Decide if the current function is volatile. Such functions
3768 never return, and many memory cycles can be saved by not storing
3769 register values that will never be needed again. This optimization
3770 was added to speed up context switching in a kernel application. */
3771 if (optimize > 0
3772 && (TREE_NOTHROW (current_function_decl)
3773 || !(flag_unwind_tables
3774 || (flag_exceptions
3775 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3776 && TREE_THIS_VOLATILE (current_function_decl))
3777 type |= ARM_FT_VOLATILE;
3778
3779 if (cfun->static_chain_decl != NULL)
3780 type |= ARM_FT_NESTED;
3781
3782 attr = DECL_ATTRIBUTES (current_function_decl);
3783
3784 a = lookup_attribute ("naked", attr);
3785 if (a != NULL_TREE)
3786 type |= ARM_FT_NAKED;
3787
3788 a = lookup_attribute ("isr", attr);
3789 if (a == NULL_TREE)
3790 a = lookup_attribute ("interrupt", attr);
3791
3792 if (a == NULL_TREE)
3793 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3794 else
3795 type |= arm_isr_value (TREE_VALUE (a));
3796
3797 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3798 type |= ARM_FT_CMSE_ENTRY;
3799
3800 return type;
3801 }
3802
3803 /* Returns the type of the current function. */
3804
3805 unsigned long
3806 arm_current_func_type (void)
3807 {
3808 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3809 cfun->machine->func_type = arm_compute_func_type ();
3810
3811 return cfun->machine->func_type;
3812 }
3813
3814 bool
3815 arm_allocate_stack_slots_for_args (void)
3816 {
3817 /* Naked functions should not allocate stack slots for arguments. */
3818 return !IS_NAKED (arm_current_func_type ());
3819 }
3820
3821 static bool
3822 arm_warn_func_return (tree decl)
3823 {
3824 /* Naked functions are implemented entirely in assembly, including the
3825 return sequence, so suppress warnings about this. */
3826 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3827 }
3828
3829 \f
3830 /* Output assembler code for a block containing the constant parts
3831 of a trampoline, leaving space for the variable parts.
3832
3833 On the ARM, (if r8 is the static chain regnum, and remembering that
3834 referencing pc adds an offset of 8) the trampoline looks like:
3835 ldr r8, [pc, #0]
3836 ldr pc, [pc]
3837 .word static chain value
3838 .word function's address
3839 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3840
3841 static void
3842 arm_asm_trampoline_template (FILE *f)
3843 {
3844 fprintf (f, "\t.syntax unified\n");
3845
3846 if (TARGET_ARM)
3847 {
3848 fprintf (f, "\t.arm\n");
3849 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3850 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3851 }
3852 else if (TARGET_THUMB2)
3853 {
3854 fprintf (f, "\t.thumb\n");
3855 /* The Thumb-2 trampoline is similar to the arm implementation.
3856 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3857 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3858 STATIC_CHAIN_REGNUM, PC_REGNUM);
3859 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3860 }
3861 else
3862 {
3863 ASM_OUTPUT_ALIGN (f, 2);
3864 fprintf (f, "\t.code\t16\n");
3865 fprintf (f, ".Ltrampoline_start:\n");
3866 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3867 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3868 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3869 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3870 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3871 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3872 }
3873 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3874 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3875 }
3876
3877 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3878
3879 static void
3880 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3881 {
3882 rtx fnaddr, mem, a_tramp;
3883
3884 emit_block_move (m_tramp, assemble_trampoline_template (),
3885 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3886
3887 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3888 emit_move_insn (mem, chain_value);
3889
3890 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3891 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3892 emit_move_insn (mem, fnaddr);
3893
3894 a_tramp = XEXP (m_tramp, 0);
3895 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3896 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3897 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3898 }
3899
3900 /* Thumb trampolines should be entered in thumb mode, so set
3901 the bottom bit of the address. */
3902
3903 static rtx
3904 arm_trampoline_adjust_address (rtx addr)
3905 {
3906 if (TARGET_THUMB)
3907 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3908 NULL, 0, OPTAB_LIB_WIDEN);
3909 return addr;
3910 }
3911 \f
3912 /* Return 1 if it is possible to return using a single instruction.
3913 If SIBLING is non-null, this is a test for a return before a sibling
3914 call. SIBLING is the call insn, so we can examine its register usage. */
3915
3916 int
3917 use_return_insn (int iscond, rtx sibling)
3918 {
3919 int regno;
3920 unsigned int func_type;
3921 unsigned long saved_int_regs;
3922 unsigned HOST_WIDE_INT stack_adjust;
3923 arm_stack_offsets *offsets;
3924
3925 /* Never use a return instruction before reload has run. */
3926 if (!reload_completed)
3927 return 0;
3928
3929 func_type = arm_current_func_type ();
3930
3931 /* Naked, volatile and stack alignment functions need special
3932 consideration. */
3933 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3934 return 0;
3935
3936 /* So do interrupt functions that use the frame pointer and Thumb
3937 interrupt functions. */
3938 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3939 return 0;
3940
3941 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3942 && !optimize_function_for_size_p (cfun))
3943 return 0;
3944
3945 offsets = arm_get_frame_offsets ();
3946 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3947
3948 /* As do variadic functions. */
3949 if (crtl->args.pretend_args_size
3950 || cfun->machine->uses_anonymous_args
3951 /* Or if the function calls __builtin_eh_return () */
3952 || crtl->calls_eh_return
3953 /* Or if the function calls alloca */
3954 || cfun->calls_alloca
3955 /* Or if there is a stack adjustment. However, if the stack pointer
3956 is saved on the stack, we can use a pre-incrementing stack load. */
3957 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3958 && stack_adjust == 4))
3959 /* Or if the static chain register was saved above the frame, under the
3960 assumption that the stack pointer isn't saved on the stack. */
3961 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3962 && arm_compute_static_chain_stack_bytes() != 0))
3963 return 0;
3964
3965 saved_int_regs = offsets->saved_regs_mask;
3966
3967 /* Unfortunately, the insn
3968
3969 ldmib sp, {..., sp, ...}
3970
3971 triggers a bug on most SA-110 based devices, such that the stack
3972 pointer won't be correctly restored if the instruction takes a
3973 page fault. We work around this problem by popping r3 along with
3974 the other registers, since that is never slower than executing
3975 another instruction.
3976
3977 We test for !arm_arch5 here, because code for any architecture
3978 less than this could potentially be run on one of the buggy
3979 chips. */
3980 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3981 {
3982 /* Validate that r3 is a call-clobbered register (always true in
3983 the default abi) ... */
3984 if (!call_used_regs[3])
3985 return 0;
3986
3987 /* ... that it isn't being used for a return value ... */
3988 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3989 return 0;
3990
3991 /* ... or for a tail-call argument ... */
3992 if (sibling)
3993 {
3994 gcc_assert (CALL_P (sibling));
3995
3996 if (find_regno_fusage (sibling, USE, 3))
3997 return 0;
3998 }
3999
4000 /* ... and that there are no call-saved registers in r0-r2
4001 (always true in the default ABI). */
4002 if (saved_int_regs & 0x7)
4003 return 0;
4004 }
4005
4006 /* Can't be done if interworking with Thumb, and any registers have been
4007 stacked. */
4008 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4009 return 0;
4010
4011 /* On StrongARM, conditional returns are expensive if they aren't
4012 taken and multiple registers have been stacked. */
4013 if (iscond && arm_tune_strongarm)
4014 {
4015 /* Conditional return when just the LR is stored is a simple
4016 conditional-load instruction, that's not expensive. */
4017 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4018 return 0;
4019
4020 if (flag_pic
4021 && arm_pic_register != INVALID_REGNUM
4022 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4023 return 0;
4024 }
4025
4026 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4027 several instructions if anything needs to be popped. */
4028 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4029 return 0;
4030
4031 /* If there are saved registers but the LR isn't saved, then we need
4032 two instructions for the return. */
4033 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4034 return 0;
4035
4036 /* Can't be done if any of the VFP regs are pushed,
4037 since this also requires an insn. */
4038 if (TARGET_HARD_FLOAT)
4039 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4040 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4041 return 0;
4042
4043 if (TARGET_REALLY_IWMMXT)
4044 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4045 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4046 return 0;
4047
4048 return 1;
4049 }
4050
4051 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4052 shrink-wrapping if possible. This is the case if we need to emit a
4053 prologue, which we can test by looking at the offsets. */
4054 bool
4055 use_simple_return_p (void)
4056 {
4057 arm_stack_offsets *offsets;
4058
4059 offsets = arm_get_frame_offsets ();
4060 return offsets->outgoing_args != 0;
4061 }
4062
4063 /* Return TRUE if int I is a valid immediate ARM constant. */
4064
4065 int
4066 const_ok_for_arm (HOST_WIDE_INT i)
4067 {
4068 int lowbit;
4069
4070 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4071 be all zero, or all one. */
4072 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4073 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4074 != ((~(unsigned HOST_WIDE_INT) 0)
4075 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4076 return FALSE;
4077
4078 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4079
4080 /* Fast return for 0 and small values. We must do this for zero, since
4081 the code below can't handle that one case. */
4082 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4083 return TRUE;
4084
4085 /* Get the number of trailing zeros. */
4086 lowbit = ffs((int) i) - 1;
4087
4088 /* Only even shifts are allowed in ARM mode so round down to the
4089 nearest even number. */
4090 if (TARGET_ARM)
4091 lowbit &= ~1;
4092
4093 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4094 return TRUE;
4095
4096 if (TARGET_ARM)
4097 {
4098 /* Allow rotated constants in ARM mode. */
4099 if (lowbit <= 4
4100 && ((i & ~0xc000003f) == 0
4101 || (i & ~0xf000000f) == 0
4102 || (i & ~0xfc000003) == 0))
4103 return TRUE;
4104 }
4105 else
4106 {
4107 HOST_WIDE_INT v;
4108
4109 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4110 v = i & 0xff;
4111 v |= v << 16;
4112 if (i == v || i == (v | (v << 8)))
4113 return TRUE;
4114
4115 /* Allow repeated pattern 0xXY00XY00. */
4116 v = i & 0xff00;
4117 v |= v << 16;
4118 if (i == v)
4119 return TRUE;
4120 }
4121
4122 return FALSE;
4123 }
4124
4125 /* Return true if I is a valid constant for the operation CODE. */
4126 int
4127 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4128 {
4129 if (const_ok_for_arm (i))
4130 return 1;
4131
4132 switch (code)
4133 {
4134 case SET:
4135 /* See if we can use movw. */
4136 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4137 return 1;
4138 else
4139 /* Otherwise, try mvn. */
4140 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4141
4142 case PLUS:
4143 /* See if we can use addw or subw. */
4144 if (TARGET_THUMB2
4145 && ((i & 0xfffff000) == 0
4146 || ((-i) & 0xfffff000) == 0))
4147 return 1;
4148 /* Fall through. */
4149 case COMPARE:
4150 case EQ:
4151 case NE:
4152 case GT:
4153 case LE:
4154 case LT:
4155 case GE:
4156 case GEU:
4157 case LTU:
4158 case GTU:
4159 case LEU:
4160 case UNORDERED:
4161 case ORDERED:
4162 case UNEQ:
4163 case UNGE:
4164 case UNLT:
4165 case UNGT:
4166 case UNLE:
4167 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4168
4169 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4170 case XOR:
4171 return 0;
4172
4173 case IOR:
4174 if (TARGET_THUMB2)
4175 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4176 return 0;
4177
4178 case AND:
4179 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4180
4181 default:
4182 gcc_unreachable ();
4183 }
4184 }
4185
4186 /* Return true if I is a valid di mode constant for the operation CODE. */
4187 int
4188 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4189 {
4190 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4191 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4192 rtx hi = GEN_INT (hi_val);
4193 rtx lo = GEN_INT (lo_val);
4194
4195 if (TARGET_THUMB1)
4196 return 0;
4197
4198 switch (code)
4199 {
4200 case AND:
4201 case IOR:
4202 case XOR:
4203 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4204 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4205 case PLUS:
4206 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4207
4208 default:
4209 return 0;
4210 }
4211 }
4212
4213 /* Emit a sequence of insns to handle a large constant.
4214 CODE is the code of the operation required, it can be any of SET, PLUS,
4215 IOR, AND, XOR, MINUS;
4216 MODE is the mode in which the operation is being performed;
4217 VAL is the integer to operate on;
4218 SOURCE is the other operand (a register, or a null-pointer for SET);
4219 SUBTARGETS means it is safe to create scratch registers if that will
4220 either produce a simpler sequence, or we will want to cse the values.
4221 Return value is the number of insns emitted. */
4222
4223 /* ??? Tweak this for thumb2. */
4224 int
4225 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4226 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4227 {
4228 rtx cond;
4229
4230 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4231 cond = COND_EXEC_TEST (PATTERN (insn));
4232 else
4233 cond = NULL_RTX;
4234
4235 if (subtargets || code == SET
4236 || (REG_P (target) && REG_P (source)
4237 && REGNO (target) != REGNO (source)))
4238 {
4239 /* After arm_reorg has been called, we can't fix up expensive
4240 constants by pushing them into memory so we must synthesize
4241 them in-line, regardless of the cost. This is only likely to
4242 be more costly on chips that have load delay slots and we are
4243 compiling without running the scheduler (so no splitting
4244 occurred before the final instruction emission).
4245
4246 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4247 */
4248 if (!cfun->machine->after_arm_reorg
4249 && !cond
4250 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4251 1, 0)
4252 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4253 + (code != SET))))
4254 {
4255 if (code == SET)
4256 {
4257 /* Currently SET is the only monadic value for CODE, all
4258 the rest are diadic. */
4259 if (TARGET_USE_MOVT)
4260 arm_emit_movpair (target, GEN_INT (val));
4261 else
4262 emit_set_insn (target, GEN_INT (val));
4263
4264 return 1;
4265 }
4266 else
4267 {
4268 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4269
4270 if (TARGET_USE_MOVT)
4271 arm_emit_movpair (temp, GEN_INT (val));
4272 else
4273 emit_set_insn (temp, GEN_INT (val));
4274
4275 /* For MINUS, the value is subtracted from, since we never
4276 have subtraction of a constant. */
4277 if (code == MINUS)
4278 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4279 else
4280 emit_set_insn (target,
4281 gen_rtx_fmt_ee (code, mode, source, temp));
4282 return 2;
4283 }
4284 }
4285 }
4286
4287 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4288 1);
4289 }
4290
4291 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4292 ARM/THUMB2 immediates, and add up to VAL.
4293 Thr function return value gives the number of insns required. */
4294 static int
4295 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4296 struct four_ints *return_sequence)
4297 {
4298 int best_consecutive_zeros = 0;
4299 int i;
4300 int best_start = 0;
4301 int insns1, insns2;
4302 struct four_ints tmp_sequence;
4303
4304 /* If we aren't targeting ARM, the best place to start is always at
4305 the bottom, otherwise look more closely. */
4306 if (TARGET_ARM)
4307 {
4308 for (i = 0; i < 32; i += 2)
4309 {
4310 int consecutive_zeros = 0;
4311
4312 if (!(val & (3 << i)))
4313 {
4314 while ((i < 32) && !(val & (3 << i)))
4315 {
4316 consecutive_zeros += 2;
4317 i += 2;
4318 }
4319 if (consecutive_zeros > best_consecutive_zeros)
4320 {
4321 best_consecutive_zeros = consecutive_zeros;
4322 best_start = i - consecutive_zeros;
4323 }
4324 i -= 2;
4325 }
4326 }
4327 }
4328
4329 /* So long as it won't require any more insns to do so, it's
4330 desirable to emit a small constant (in bits 0...9) in the last
4331 insn. This way there is more chance that it can be combined with
4332 a later addressing insn to form a pre-indexed load or store
4333 operation. Consider:
4334
4335 *((volatile int *)0xe0000100) = 1;
4336 *((volatile int *)0xe0000110) = 2;
4337
4338 We want this to wind up as:
4339
4340 mov rA, #0xe0000000
4341 mov rB, #1
4342 str rB, [rA, #0x100]
4343 mov rB, #2
4344 str rB, [rA, #0x110]
4345
4346 rather than having to synthesize both large constants from scratch.
4347
4348 Therefore, we calculate how many insns would be required to emit
4349 the constant starting from `best_start', and also starting from
4350 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4351 yield a shorter sequence, we may as well use zero. */
4352 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4353 if (best_start != 0
4354 && ((HOST_WIDE_INT_1U << best_start) < val))
4355 {
4356 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4357 if (insns2 <= insns1)
4358 {
4359 *return_sequence = tmp_sequence;
4360 insns1 = insns2;
4361 }
4362 }
4363
4364 return insns1;
4365 }
4366
4367 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4368 static int
4369 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4370 struct four_ints *return_sequence, int i)
4371 {
4372 int remainder = val & 0xffffffff;
4373 int insns = 0;
4374
4375 /* Try and find a way of doing the job in either two or three
4376 instructions.
4377
4378 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4379 location. We start at position I. This may be the MSB, or
4380 optimial_immediate_sequence may have positioned it at the largest block
4381 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4382 wrapping around to the top of the word when we drop off the bottom.
4383 In the worst case this code should produce no more than four insns.
4384
4385 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4386 constants, shifted to any arbitrary location. We should always start
4387 at the MSB. */
4388 do
4389 {
4390 int end;
4391 unsigned int b1, b2, b3, b4;
4392 unsigned HOST_WIDE_INT result;
4393 int loc;
4394
4395 gcc_assert (insns < 4);
4396
4397 if (i <= 0)
4398 i += 32;
4399
4400 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4401 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4402 {
4403 loc = i;
4404 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4405 /* We can use addw/subw for the last 12 bits. */
4406 result = remainder;
4407 else
4408 {
4409 /* Use an 8-bit shifted/rotated immediate. */
4410 end = i - 8;
4411 if (end < 0)
4412 end += 32;
4413 result = remainder & ((0x0ff << end)
4414 | ((i < end) ? (0xff >> (32 - end))
4415 : 0));
4416 i -= 8;
4417 }
4418 }
4419 else
4420 {
4421 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4422 arbitrary shifts. */
4423 i -= TARGET_ARM ? 2 : 1;
4424 continue;
4425 }
4426
4427 /* Next, see if we can do a better job with a thumb2 replicated
4428 constant.
4429
4430 We do it this way around to catch the cases like 0x01F001E0 where
4431 two 8-bit immediates would work, but a replicated constant would
4432 make it worse.
4433
4434 TODO: 16-bit constants that don't clear all the bits, but still win.
4435 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4436 if (TARGET_THUMB2)
4437 {
4438 b1 = (remainder & 0xff000000) >> 24;
4439 b2 = (remainder & 0x00ff0000) >> 16;
4440 b3 = (remainder & 0x0000ff00) >> 8;
4441 b4 = remainder & 0xff;
4442
4443 if (loc > 24)
4444 {
4445 /* The 8-bit immediate already found clears b1 (and maybe b2),
4446 but must leave b3 and b4 alone. */
4447
4448 /* First try to find a 32-bit replicated constant that clears
4449 almost everything. We can assume that we can't do it in one,
4450 or else we wouldn't be here. */
4451 unsigned int tmp = b1 & b2 & b3 & b4;
4452 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4453 + (tmp << 24);
4454 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4455 + (tmp == b3) + (tmp == b4);
4456 if (tmp
4457 && (matching_bytes >= 3
4458 || (matching_bytes == 2
4459 && const_ok_for_op (remainder & ~tmp2, code))))
4460 {
4461 /* At least 3 of the bytes match, and the fourth has at
4462 least as many bits set, or two of the bytes match
4463 and it will only require one more insn to finish. */
4464 result = tmp2;
4465 i = tmp != b1 ? 32
4466 : tmp != b2 ? 24
4467 : tmp != b3 ? 16
4468 : 8;
4469 }
4470
4471 /* Second, try to find a 16-bit replicated constant that can
4472 leave three of the bytes clear. If b2 or b4 is already
4473 zero, then we can. If the 8-bit from above would not
4474 clear b2 anyway, then we still win. */
4475 else if (b1 == b3 && (!b2 || !b4
4476 || (remainder & 0x00ff0000 & ~result)))
4477 {
4478 result = remainder & 0xff00ff00;
4479 i = 24;
4480 }
4481 }
4482 else if (loc > 16)
4483 {
4484 /* The 8-bit immediate already found clears b2 (and maybe b3)
4485 and we don't get here unless b1 is alredy clear, but it will
4486 leave b4 unchanged. */
4487
4488 /* If we can clear b2 and b4 at once, then we win, since the
4489 8-bits couldn't possibly reach that far. */
4490 if (b2 == b4)
4491 {
4492 result = remainder & 0x00ff00ff;
4493 i = 16;
4494 }
4495 }
4496 }
4497
4498 return_sequence->i[insns++] = result;
4499 remainder &= ~result;
4500
4501 if (code == SET || code == MINUS)
4502 code = PLUS;
4503 }
4504 while (remainder);
4505
4506 return insns;
4507 }
4508
4509 /* Emit an instruction with the indicated PATTERN. If COND is
4510 non-NULL, conditionalize the execution of the instruction on COND
4511 being true. */
4512
4513 static void
4514 emit_constant_insn (rtx cond, rtx pattern)
4515 {
4516 if (cond)
4517 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4518 emit_insn (pattern);
4519 }
4520
4521 /* As above, but extra parameter GENERATE which, if clear, suppresses
4522 RTL generation. */
4523
4524 static int
4525 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4526 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4527 int subtargets, int generate)
4528 {
4529 int can_invert = 0;
4530 int can_negate = 0;
4531 int final_invert = 0;
4532 int i;
4533 int set_sign_bit_copies = 0;
4534 int clear_sign_bit_copies = 0;
4535 int clear_zero_bit_copies = 0;
4536 int set_zero_bit_copies = 0;
4537 int insns = 0, neg_insns, inv_insns;
4538 unsigned HOST_WIDE_INT temp1, temp2;
4539 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4540 struct four_ints *immediates;
4541 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4542
4543 /* Find out which operations are safe for a given CODE. Also do a quick
4544 check for degenerate cases; these can occur when DImode operations
4545 are split. */
4546 switch (code)
4547 {
4548 case SET:
4549 can_invert = 1;
4550 break;
4551
4552 case PLUS:
4553 can_negate = 1;
4554 break;
4555
4556 case IOR:
4557 if (remainder == 0xffffffff)
4558 {
4559 if (generate)
4560 emit_constant_insn (cond,
4561 gen_rtx_SET (target,
4562 GEN_INT (ARM_SIGN_EXTEND (val))));
4563 return 1;
4564 }
4565
4566 if (remainder == 0)
4567 {
4568 if (reload_completed && rtx_equal_p (target, source))
4569 return 0;
4570
4571 if (generate)
4572 emit_constant_insn (cond, gen_rtx_SET (target, source));
4573 return 1;
4574 }
4575 break;
4576
4577 case AND:
4578 if (remainder == 0)
4579 {
4580 if (generate)
4581 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4582 return 1;
4583 }
4584 if (remainder == 0xffffffff)
4585 {
4586 if (reload_completed && rtx_equal_p (target, source))
4587 return 0;
4588 if (generate)
4589 emit_constant_insn (cond, gen_rtx_SET (target, source));
4590 return 1;
4591 }
4592 can_invert = 1;
4593 break;
4594
4595 case XOR:
4596 if (remainder == 0)
4597 {
4598 if (reload_completed && rtx_equal_p (target, source))
4599 return 0;
4600 if (generate)
4601 emit_constant_insn (cond, gen_rtx_SET (target, source));
4602 return 1;
4603 }
4604
4605 if (remainder == 0xffffffff)
4606 {
4607 if (generate)
4608 emit_constant_insn (cond,
4609 gen_rtx_SET (target,
4610 gen_rtx_NOT (mode, source)));
4611 return 1;
4612 }
4613 final_invert = 1;
4614 break;
4615
4616 case MINUS:
4617 /* We treat MINUS as (val - source), since (source - val) is always
4618 passed as (source + (-val)). */
4619 if (remainder == 0)
4620 {
4621 if (generate)
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (target,
4624 gen_rtx_NEG (mode, source)));
4625 return 1;
4626 }
4627 if (const_ok_for_arm (val))
4628 {
4629 if (generate)
4630 emit_constant_insn (cond,
4631 gen_rtx_SET (target,
4632 gen_rtx_MINUS (mode, GEN_INT (val),
4633 source)));
4634 return 1;
4635 }
4636
4637 break;
4638
4639 default:
4640 gcc_unreachable ();
4641 }
4642
4643 /* If we can do it in one insn get out quickly. */
4644 if (const_ok_for_op (val, code))
4645 {
4646 if (generate)
4647 emit_constant_insn (cond,
4648 gen_rtx_SET (target,
4649 (source
4650 ? gen_rtx_fmt_ee (code, mode, source,
4651 GEN_INT (val))
4652 : GEN_INT (val))));
4653 return 1;
4654 }
4655
4656 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4657 insn. */
4658 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4659 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4660 {
4661 if (generate)
4662 {
4663 if (mode == SImode && i == 16)
4664 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4665 smaller insn. */
4666 emit_constant_insn (cond,
4667 gen_zero_extendhisi2
4668 (target, gen_lowpart (HImode, source)));
4669 else
4670 /* Extz only supports SImode, but we can coerce the operands
4671 into that mode. */
4672 emit_constant_insn (cond,
4673 gen_extzv_t2 (gen_lowpart (SImode, target),
4674 gen_lowpart (SImode, source),
4675 GEN_INT (i), const0_rtx));
4676 }
4677
4678 return 1;
4679 }
4680
4681 /* Calculate a few attributes that may be useful for specific
4682 optimizations. */
4683 /* Count number of leading zeros. */
4684 for (i = 31; i >= 0; i--)
4685 {
4686 if ((remainder & (1 << i)) == 0)
4687 clear_sign_bit_copies++;
4688 else
4689 break;
4690 }
4691
4692 /* Count number of leading 1's. */
4693 for (i = 31; i >= 0; i--)
4694 {
4695 if ((remainder & (1 << i)) != 0)
4696 set_sign_bit_copies++;
4697 else
4698 break;
4699 }
4700
4701 /* Count number of trailing zero's. */
4702 for (i = 0; i <= 31; i++)
4703 {
4704 if ((remainder & (1 << i)) == 0)
4705 clear_zero_bit_copies++;
4706 else
4707 break;
4708 }
4709
4710 /* Count number of trailing 1's. */
4711 for (i = 0; i <= 31; i++)
4712 {
4713 if ((remainder & (1 << i)) != 0)
4714 set_zero_bit_copies++;
4715 else
4716 break;
4717 }
4718
4719 switch (code)
4720 {
4721 case SET:
4722 /* See if we can do this by sign_extending a constant that is known
4723 to be negative. This is a good, way of doing it, since the shift
4724 may well merge into a subsequent insn. */
4725 if (set_sign_bit_copies > 1)
4726 {
4727 if (const_ok_for_arm
4728 (temp1 = ARM_SIGN_EXTEND (remainder
4729 << (set_sign_bit_copies - 1))))
4730 {
4731 if (generate)
4732 {
4733 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4734 emit_constant_insn (cond,
4735 gen_rtx_SET (new_src, GEN_INT (temp1)));
4736 emit_constant_insn (cond,
4737 gen_ashrsi3 (target, new_src,
4738 GEN_INT (set_sign_bit_copies - 1)));
4739 }
4740 return 2;
4741 }
4742 /* For an inverted constant, we will need to set the low bits,
4743 these will be shifted out of harm's way. */
4744 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4745 if (const_ok_for_arm (~temp1))
4746 {
4747 if (generate)
4748 {
4749 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4750 emit_constant_insn (cond,
4751 gen_rtx_SET (new_src, GEN_INT (temp1)));
4752 emit_constant_insn (cond,
4753 gen_ashrsi3 (target, new_src,
4754 GEN_INT (set_sign_bit_copies - 1)));
4755 }
4756 return 2;
4757 }
4758 }
4759
4760 /* See if we can calculate the value as the difference between two
4761 valid immediates. */
4762 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4763 {
4764 int topshift = clear_sign_bit_copies & ~1;
4765
4766 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4767 & (0xff000000 >> topshift));
4768
4769 /* If temp1 is zero, then that means the 9 most significant
4770 bits of remainder were 1 and we've caused it to overflow.
4771 When topshift is 0 we don't need to do anything since we
4772 can borrow from 'bit 32'. */
4773 if (temp1 == 0 && topshift != 0)
4774 temp1 = 0x80000000 >> (topshift - 1);
4775
4776 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4777
4778 if (const_ok_for_arm (temp2))
4779 {
4780 if (generate)
4781 {
4782 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4783 emit_constant_insn (cond,
4784 gen_rtx_SET (new_src, GEN_INT (temp1)));
4785 emit_constant_insn (cond,
4786 gen_addsi3 (target, new_src,
4787 GEN_INT (-temp2)));
4788 }
4789
4790 return 2;
4791 }
4792 }
4793
4794 /* See if we can generate this by setting the bottom (or the top)
4795 16 bits, and then shifting these into the other half of the
4796 word. We only look for the simplest cases, to do more would cost
4797 too much. Be careful, however, not to generate this when the
4798 alternative would take fewer insns. */
4799 if (val & 0xffff0000)
4800 {
4801 temp1 = remainder & 0xffff0000;
4802 temp2 = remainder & 0x0000ffff;
4803
4804 /* Overlaps outside this range are best done using other methods. */
4805 for (i = 9; i < 24; i++)
4806 {
4807 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4808 && !const_ok_for_arm (temp2))
4809 {
4810 rtx new_src = (subtargets
4811 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4812 : target);
4813 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4814 source, subtargets, generate);
4815 source = new_src;
4816 if (generate)
4817 emit_constant_insn
4818 (cond,
4819 gen_rtx_SET
4820 (target,
4821 gen_rtx_IOR (mode,
4822 gen_rtx_ASHIFT (mode, source,
4823 GEN_INT (i)),
4824 source)));
4825 return insns + 1;
4826 }
4827 }
4828
4829 /* Don't duplicate cases already considered. */
4830 for (i = 17; i < 24; i++)
4831 {
4832 if (((temp1 | (temp1 >> i)) == remainder)
4833 && !const_ok_for_arm (temp1))
4834 {
4835 rtx new_src = (subtargets
4836 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4837 : target);
4838 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4839 source, subtargets, generate);
4840 source = new_src;
4841 if (generate)
4842 emit_constant_insn
4843 (cond,
4844 gen_rtx_SET (target,
4845 gen_rtx_IOR
4846 (mode,
4847 gen_rtx_LSHIFTRT (mode, source,
4848 GEN_INT (i)),
4849 source)));
4850 return insns + 1;
4851 }
4852 }
4853 }
4854 break;
4855
4856 case IOR:
4857 case XOR:
4858 /* If we have IOR or XOR, and the constant can be loaded in a
4859 single instruction, and we can find a temporary to put it in,
4860 then this can be done in two instructions instead of 3-4. */
4861 if (subtargets
4862 /* TARGET can't be NULL if SUBTARGETS is 0 */
4863 || (reload_completed && !reg_mentioned_p (target, source)))
4864 {
4865 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4866 {
4867 if (generate)
4868 {
4869 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4870
4871 emit_constant_insn (cond,
4872 gen_rtx_SET (sub, GEN_INT (val)));
4873 emit_constant_insn (cond,
4874 gen_rtx_SET (target,
4875 gen_rtx_fmt_ee (code, mode,
4876 source, sub)));
4877 }
4878 return 2;
4879 }
4880 }
4881
4882 if (code == XOR)
4883 break;
4884
4885 /* Convert.
4886 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4887 and the remainder 0s for e.g. 0xfff00000)
4888 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4889
4890 This can be done in 2 instructions by using shifts with mov or mvn.
4891 e.g. for
4892 x = x | 0xfff00000;
4893 we generate.
4894 mvn r0, r0, asl #12
4895 mvn r0, r0, lsr #12 */
4896 if (set_sign_bit_copies > 8
4897 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4898 {
4899 if (generate)
4900 {
4901 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4902 rtx shift = GEN_INT (set_sign_bit_copies);
4903
4904 emit_constant_insn
4905 (cond,
4906 gen_rtx_SET (sub,
4907 gen_rtx_NOT (mode,
4908 gen_rtx_ASHIFT (mode,
4909 source,
4910 shift))));
4911 emit_constant_insn
4912 (cond,
4913 gen_rtx_SET (target,
4914 gen_rtx_NOT (mode,
4915 gen_rtx_LSHIFTRT (mode, sub,
4916 shift))));
4917 }
4918 return 2;
4919 }
4920
4921 /* Convert
4922 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4923 to
4924 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4925
4926 For eg. r0 = r0 | 0xfff
4927 mvn r0, r0, lsr #12
4928 mvn r0, r0, asl #12
4929
4930 */
4931 if (set_zero_bit_copies > 8
4932 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4933 {
4934 if (generate)
4935 {
4936 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4937 rtx shift = GEN_INT (set_zero_bit_copies);
4938
4939 emit_constant_insn
4940 (cond,
4941 gen_rtx_SET (sub,
4942 gen_rtx_NOT (mode,
4943 gen_rtx_LSHIFTRT (mode,
4944 source,
4945 shift))));
4946 emit_constant_insn
4947 (cond,
4948 gen_rtx_SET (target,
4949 gen_rtx_NOT (mode,
4950 gen_rtx_ASHIFT (mode, sub,
4951 shift))));
4952 }
4953 return 2;
4954 }
4955
4956 /* This will never be reached for Thumb2 because orn is a valid
4957 instruction. This is for Thumb1 and the ARM 32 bit cases.
4958
4959 x = y | constant (such that ~constant is a valid constant)
4960 Transform this to
4961 x = ~(~y & ~constant).
4962 */
4963 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4964 {
4965 if (generate)
4966 {
4967 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4968 emit_constant_insn (cond,
4969 gen_rtx_SET (sub,
4970 gen_rtx_NOT (mode, source)));
4971 source = sub;
4972 if (subtargets)
4973 sub = gen_reg_rtx (mode);
4974 emit_constant_insn (cond,
4975 gen_rtx_SET (sub,
4976 gen_rtx_AND (mode, source,
4977 GEN_INT (temp1))));
4978 emit_constant_insn (cond,
4979 gen_rtx_SET (target,
4980 gen_rtx_NOT (mode, sub)));
4981 }
4982 return 3;
4983 }
4984 break;
4985
4986 case AND:
4987 /* See if two shifts will do 2 or more insn's worth of work. */
4988 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4989 {
4990 HOST_WIDE_INT shift_mask = ((0xffffffff
4991 << (32 - clear_sign_bit_copies))
4992 & 0xffffffff);
4993
4994 if ((remainder | shift_mask) != 0xffffffff)
4995 {
4996 HOST_WIDE_INT new_val
4997 = ARM_SIGN_EXTEND (remainder | shift_mask);
4998
4999 if (generate)
5000 {
5001 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5002 insns = arm_gen_constant (AND, SImode, cond, new_val,
5003 new_src, source, subtargets, 1);
5004 source = new_src;
5005 }
5006 else
5007 {
5008 rtx targ = subtargets ? NULL_RTX : target;
5009 insns = arm_gen_constant (AND, mode, cond, new_val,
5010 targ, source, subtargets, 0);
5011 }
5012 }
5013
5014 if (generate)
5015 {
5016 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5017 rtx shift = GEN_INT (clear_sign_bit_copies);
5018
5019 emit_insn (gen_ashlsi3 (new_src, source, shift));
5020 emit_insn (gen_lshrsi3 (target, new_src, shift));
5021 }
5022
5023 return insns + 2;
5024 }
5025
5026 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5027 {
5028 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5029
5030 if ((remainder | shift_mask) != 0xffffffff)
5031 {
5032 HOST_WIDE_INT new_val
5033 = ARM_SIGN_EXTEND (remainder | shift_mask);
5034 if (generate)
5035 {
5036 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5037
5038 insns = arm_gen_constant (AND, mode, cond, new_val,
5039 new_src, source, subtargets, 1);
5040 source = new_src;
5041 }
5042 else
5043 {
5044 rtx targ = subtargets ? NULL_RTX : target;
5045
5046 insns = arm_gen_constant (AND, mode, cond, new_val,
5047 targ, source, subtargets, 0);
5048 }
5049 }
5050
5051 if (generate)
5052 {
5053 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5054 rtx shift = GEN_INT (clear_zero_bit_copies);
5055
5056 emit_insn (gen_lshrsi3 (new_src, source, shift));
5057 emit_insn (gen_ashlsi3 (target, new_src, shift));
5058 }
5059
5060 return insns + 2;
5061 }
5062
5063 break;
5064
5065 default:
5066 break;
5067 }
5068
5069 /* Calculate what the instruction sequences would be if we generated it
5070 normally, negated, or inverted. */
5071 if (code == AND)
5072 /* AND cannot be split into multiple insns, so invert and use BIC. */
5073 insns = 99;
5074 else
5075 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5076
5077 if (can_negate)
5078 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5079 &neg_immediates);
5080 else
5081 neg_insns = 99;
5082
5083 if (can_invert || final_invert)
5084 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5085 &inv_immediates);
5086 else
5087 inv_insns = 99;
5088
5089 immediates = &pos_immediates;
5090
5091 /* Is the negated immediate sequence more efficient? */
5092 if (neg_insns < insns && neg_insns <= inv_insns)
5093 {
5094 insns = neg_insns;
5095 immediates = &neg_immediates;
5096 }
5097 else
5098 can_negate = 0;
5099
5100 /* Is the inverted immediate sequence more efficient?
5101 We must allow for an extra NOT instruction for XOR operations, although
5102 there is some chance that the final 'mvn' will get optimized later. */
5103 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5104 {
5105 insns = inv_insns;
5106 immediates = &inv_immediates;
5107 }
5108 else
5109 {
5110 can_invert = 0;
5111 final_invert = 0;
5112 }
5113
5114 /* Now output the chosen sequence as instructions. */
5115 if (generate)
5116 {
5117 for (i = 0; i < insns; i++)
5118 {
5119 rtx new_src, temp1_rtx;
5120
5121 temp1 = immediates->i[i];
5122
5123 if (code == SET || code == MINUS)
5124 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5125 else if ((final_invert || i < (insns - 1)) && subtargets)
5126 new_src = gen_reg_rtx (mode);
5127 else
5128 new_src = target;
5129
5130 if (can_invert)
5131 temp1 = ~temp1;
5132 else if (can_negate)
5133 temp1 = -temp1;
5134
5135 temp1 = trunc_int_for_mode (temp1, mode);
5136 temp1_rtx = GEN_INT (temp1);
5137
5138 if (code == SET)
5139 ;
5140 else if (code == MINUS)
5141 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5142 else
5143 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5144
5145 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5146 source = new_src;
5147
5148 if (code == SET)
5149 {
5150 can_negate = can_invert;
5151 can_invert = 0;
5152 code = PLUS;
5153 }
5154 else if (code == MINUS)
5155 code = PLUS;
5156 }
5157 }
5158
5159 if (final_invert)
5160 {
5161 if (generate)
5162 emit_constant_insn (cond, gen_rtx_SET (target,
5163 gen_rtx_NOT (mode, source)));
5164 insns++;
5165 }
5166
5167 return insns;
5168 }
5169
5170 /* Canonicalize a comparison so that we are more likely to recognize it.
5171 This can be done for a few constant compares, where we can make the
5172 immediate value easier to load. */
5173
5174 static void
5175 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5176 bool op0_preserve_value)
5177 {
5178 machine_mode mode;
5179 unsigned HOST_WIDE_INT i, maxval;
5180
5181 mode = GET_MODE (*op0);
5182 if (mode == VOIDmode)
5183 mode = GET_MODE (*op1);
5184
5185 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5186
5187 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5188 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5189 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5190 for GTU/LEU in Thumb mode. */
5191 if (mode == DImode)
5192 {
5193
5194 if (*code == GT || *code == LE
5195 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5196 {
5197 /* Missing comparison. First try to use an available
5198 comparison. */
5199 if (CONST_INT_P (*op1))
5200 {
5201 i = INTVAL (*op1);
5202 switch (*code)
5203 {
5204 case GT:
5205 case LE:
5206 if (i != maxval
5207 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5208 {
5209 *op1 = GEN_INT (i + 1);
5210 *code = *code == GT ? GE : LT;
5211 return;
5212 }
5213 break;
5214 case GTU:
5215 case LEU:
5216 if (i != ~((unsigned HOST_WIDE_INT) 0)
5217 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5218 {
5219 *op1 = GEN_INT (i + 1);
5220 *code = *code == GTU ? GEU : LTU;
5221 return;
5222 }
5223 break;
5224 default:
5225 gcc_unreachable ();
5226 }
5227 }
5228
5229 /* If that did not work, reverse the condition. */
5230 if (!op0_preserve_value)
5231 {
5232 std::swap (*op0, *op1);
5233 *code = (int)swap_condition ((enum rtx_code)*code);
5234 }
5235 }
5236 return;
5237 }
5238
5239 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5240 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5241 to facilitate possible combining with a cmp into 'ands'. */
5242 if (mode == SImode
5243 && GET_CODE (*op0) == ZERO_EXTEND
5244 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5245 && GET_MODE (XEXP (*op0, 0)) == QImode
5246 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5247 && subreg_lowpart_p (XEXP (*op0, 0))
5248 && *op1 == const0_rtx)
5249 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5250 GEN_INT (255));
5251
5252 /* Comparisons smaller than DImode. Only adjust comparisons against
5253 an out-of-range constant. */
5254 if (!CONST_INT_P (*op1)
5255 || const_ok_for_arm (INTVAL (*op1))
5256 || const_ok_for_arm (- INTVAL (*op1)))
5257 return;
5258
5259 i = INTVAL (*op1);
5260
5261 switch (*code)
5262 {
5263 case EQ:
5264 case NE:
5265 return;
5266
5267 case GT:
5268 case LE:
5269 if (i != maxval
5270 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5271 {
5272 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5273 *code = *code == GT ? GE : LT;
5274 return;
5275 }
5276 break;
5277
5278 case GE:
5279 case LT:
5280 if (i != ~maxval
5281 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5282 {
5283 *op1 = GEN_INT (i - 1);
5284 *code = *code == GE ? GT : LE;
5285 return;
5286 }
5287 break;
5288
5289 case GTU:
5290 case LEU:
5291 if (i != ~((unsigned HOST_WIDE_INT) 0)
5292 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5293 {
5294 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5295 *code = *code == GTU ? GEU : LTU;
5296 return;
5297 }
5298 break;
5299
5300 case GEU:
5301 case LTU:
5302 if (i != 0
5303 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5304 {
5305 *op1 = GEN_INT (i - 1);
5306 *code = *code == GEU ? GTU : LEU;
5307 return;
5308 }
5309 break;
5310
5311 default:
5312 gcc_unreachable ();
5313 }
5314 }
5315
5316
5317 /* Define how to find the value returned by a function. */
5318
5319 static rtx
5320 arm_function_value(const_tree type, const_tree func,
5321 bool outgoing ATTRIBUTE_UNUSED)
5322 {
5323 machine_mode mode;
5324 int unsignedp ATTRIBUTE_UNUSED;
5325 rtx r ATTRIBUTE_UNUSED;
5326
5327 mode = TYPE_MODE (type);
5328
5329 if (TARGET_AAPCS_BASED)
5330 return aapcs_allocate_return_reg (mode, type, func);
5331
5332 /* Promote integer types. */
5333 if (INTEGRAL_TYPE_P (type))
5334 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5335
5336 /* Promotes small structs returned in a register to full-word size
5337 for big-endian AAPCS. */
5338 if (arm_return_in_msb (type))
5339 {
5340 HOST_WIDE_INT size = int_size_in_bytes (type);
5341 if (size % UNITS_PER_WORD != 0)
5342 {
5343 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5344 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5345 }
5346 }
5347
5348 return arm_libcall_value_1 (mode);
5349 }
5350
5351 /* libcall hashtable helpers. */
5352
5353 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5354 {
5355 static inline hashval_t hash (const rtx_def *);
5356 static inline bool equal (const rtx_def *, const rtx_def *);
5357 static inline void remove (rtx_def *);
5358 };
5359
5360 inline bool
5361 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5362 {
5363 return rtx_equal_p (p1, p2);
5364 }
5365
5366 inline hashval_t
5367 libcall_hasher::hash (const rtx_def *p1)
5368 {
5369 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5370 }
5371
5372 typedef hash_table<libcall_hasher> libcall_table_type;
5373
5374 static void
5375 add_libcall (libcall_table_type *htab, rtx libcall)
5376 {
5377 *htab->find_slot (libcall, INSERT) = libcall;
5378 }
5379
5380 static bool
5381 arm_libcall_uses_aapcs_base (const_rtx libcall)
5382 {
5383 static bool init_done = false;
5384 static libcall_table_type *libcall_htab = NULL;
5385
5386 if (!init_done)
5387 {
5388 init_done = true;
5389
5390 libcall_htab = new libcall_table_type (31);
5391 add_libcall (libcall_htab,
5392 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5393 add_libcall (libcall_htab,
5394 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5395 add_libcall (libcall_htab,
5396 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5397 add_libcall (libcall_htab,
5398 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5399
5400 add_libcall (libcall_htab,
5401 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5402 add_libcall (libcall_htab,
5403 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5404 add_libcall (libcall_htab,
5405 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5406 add_libcall (libcall_htab,
5407 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5408
5409 add_libcall (libcall_htab,
5410 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5411 add_libcall (libcall_htab,
5412 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5413 add_libcall (libcall_htab,
5414 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5415 add_libcall (libcall_htab,
5416 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5417 add_libcall (libcall_htab,
5418 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5419 add_libcall (libcall_htab,
5420 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5425
5426 /* Values from double-precision helper functions are returned in core
5427 registers if the selected core only supports single-precision
5428 arithmetic, even if we are using the hard-float ABI. The same is
5429 true for single-precision helpers, but we will never be using the
5430 hard-float ABI on a CPU which doesn't support single-precision
5431 operations in hardware. */
5432 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5433 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5434 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5435 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5436 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5437 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5438 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5439 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5440 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5441 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5442 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5443 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5444 SFmode));
5445 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5446 DFmode));
5447 add_libcall (libcall_htab,
5448 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5449 }
5450
5451 return libcall && libcall_htab->find (libcall) != NULL;
5452 }
5453
5454 static rtx
5455 arm_libcall_value_1 (machine_mode mode)
5456 {
5457 if (TARGET_AAPCS_BASED)
5458 return aapcs_libcall_value (mode);
5459 else if (TARGET_IWMMXT_ABI
5460 && arm_vector_mode_supported_p (mode))
5461 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5462 else
5463 return gen_rtx_REG (mode, ARG_REGISTER (1));
5464 }
5465
5466 /* Define how to find the value returned by a library function
5467 assuming the value has mode MODE. */
5468
5469 static rtx
5470 arm_libcall_value (machine_mode mode, const_rtx libcall)
5471 {
5472 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5473 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5474 {
5475 /* The following libcalls return their result in integer registers,
5476 even though they return a floating point value. */
5477 if (arm_libcall_uses_aapcs_base (libcall))
5478 return gen_rtx_REG (mode, ARG_REGISTER(1));
5479
5480 }
5481
5482 return arm_libcall_value_1 (mode);
5483 }
5484
5485 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5486
5487 static bool
5488 arm_function_value_regno_p (const unsigned int regno)
5489 {
5490 if (regno == ARG_REGISTER (1)
5491 || (TARGET_32BIT
5492 && TARGET_AAPCS_BASED
5493 && TARGET_HARD_FLOAT
5494 && regno == FIRST_VFP_REGNUM)
5495 || (TARGET_IWMMXT_ABI
5496 && regno == FIRST_IWMMXT_REGNUM))
5497 return true;
5498
5499 return false;
5500 }
5501
5502 /* Determine the amount of memory needed to store the possible return
5503 registers of an untyped call. */
5504 int
5505 arm_apply_result_size (void)
5506 {
5507 int size = 16;
5508
5509 if (TARGET_32BIT)
5510 {
5511 if (TARGET_HARD_FLOAT_ABI)
5512 size += 32;
5513 if (TARGET_IWMMXT_ABI)
5514 size += 8;
5515 }
5516
5517 return size;
5518 }
5519
5520 /* Decide whether TYPE should be returned in memory (true)
5521 or in a register (false). FNTYPE is the type of the function making
5522 the call. */
5523 static bool
5524 arm_return_in_memory (const_tree type, const_tree fntype)
5525 {
5526 HOST_WIDE_INT size;
5527
5528 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5529
5530 if (TARGET_AAPCS_BASED)
5531 {
5532 /* Simple, non-aggregate types (ie not including vectors and
5533 complex) are always returned in a register (or registers).
5534 We don't care about which register here, so we can short-cut
5535 some of the detail. */
5536 if (!AGGREGATE_TYPE_P (type)
5537 && TREE_CODE (type) != VECTOR_TYPE
5538 && TREE_CODE (type) != COMPLEX_TYPE)
5539 return false;
5540
5541 /* Any return value that is no larger than one word can be
5542 returned in r0. */
5543 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5544 return false;
5545
5546 /* Check any available co-processors to see if they accept the
5547 type as a register candidate (VFP, for example, can return
5548 some aggregates in consecutive registers). These aren't
5549 available if the call is variadic. */
5550 if (aapcs_select_return_coproc (type, fntype) >= 0)
5551 return false;
5552
5553 /* Vector values should be returned using ARM registers, not
5554 memory (unless they're over 16 bytes, which will break since
5555 we only have four call-clobbered registers to play with). */
5556 if (TREE_CODE (type) == VECTOR_TYPE)
5557 return (size < 0 || size > (4 * UNITS_PER_WORD));
5558
5559 /* The rest go in memory. */
5560 return true;
5561 }
5562
5563 if (TREE_CODE (type) == VECTOR_TYPE)
5564 return (size < 0 || size > (4 * UNITS_PER_WORD));
5565
5566 if (!AGGREGATE_TYPE_P (type) &&
5567 (TREE_CODE (type) != VECTOR_TYPE))
5568 /* All simple types are returned in registers. */
5569 return false;
5570
5571 if (arm_abi != ARM_ABI_APCS)
5572 {
5573 /* ATPCS and later return aggregate types in memory only if they are
5574 larger than a word (or are variable size). */
5575 return (size < 0 || size > UNITS_PER_WORD);
5576 }
5577
5578 /* For the arm-wince targets we choose to be compatible with Microsoft's
5579 ARM and Thumb compilers, which always return aggregates in memory. */
5580 #ifndef ARM_WINCE
5581 /* All structures/unions bigger than one word are returned in memory.
5582 Also catch the case where int_size_in_bytes returns -1. In this case
5583 the aggregate is either huge or of variable size, and in either case
5584 we will want to return it via memory and not in a register. */
5585 if (size < 0 || size > UNITS_PER_WORD)
5586 return true;
5587
5588 if (TREE_CODE (type) == RECORD_TYPE)
5589 {
5590 tree field;
5591
5592 /* For a struct the APCS says that we only return in a register
5593 if the type is 'integer like' and every addressable element
5594 has an offset of zero. For practical purposes this means
5595 that the structure can have at most one non bit-field element
5596 and that this element must be the first one in the structure. */
5597
5598 /* Find the first field, ignoring non FIELD_DECL things which will
5599 have been created by C++. */
5600 for (field = TYPE_FIELDS (type);
5601 field && TREE_CODE (field) != FIELD_DECL;
5602 field = DECL_CHAIN (field))
5603 continue;
5604
5605 if (field == NULL)
5606 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5607
5608 /* Check that the first field is valid for returning in a register. */
5609
5610 /* ... Floats are not allowed */
5611 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5612 return true;
5613
5614 /* ... Aggregates that are not themselves valid for returning in
5615 a register are not allowed. */
5616 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5617 return true;
5618
5619 /* Now check the remaining fields, if any. Only bitfields are allowed,
5620 since they are not addressable. */
5621 for (field = DECL_CHAIN (field);
5622 field;
5623 field = DECL_CHAIN (field))
5624 {
5625 if (TREE_CODE (field) != FIELD_DECL)
5626 continue;
5627
5628 if (!DECL_BIT_FIELD_TYPE (field))
5629 return true;
5630 }
5631
5632 return false;
5633 }
5634
5635 if (TREE_CODE (type) == UNION_TYPE)
5636 {
5637 tree field;
5638
5639 /* Unions can be returned in registers if every element is
5640 integral, or can be returned in an integer register. */
5641 for (field = TYPE_FIELDS (type);
5642 field;
5643 field = DECL_CHAIN (field))
5644 {
5645 if (TREE_CODE (field) != FIELD_DECL)
5646 continue;
5647
5648 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5649 return true;
5650
5651 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5652 return true;
5653 }
5654
5655 return false;
5656 }
5657 #endif /* not ARM_WINCE */
5658
5659 /* Return all other types in memory. */
5660 return true;
5661 }
5662
5663 const struct pcs_attribute_arg
5664 {
5665 const char *arg;
5666 enum arm_pcs value;
5667 } pcs_attribute_args[] =
5668 {
5669 {"aapcs", ARM_PCS_AAPCS},
5670 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5671 #if 0
5672 /* We could recognize these, but changes would be needed elsewhere
5673 * to implement them. */
5674 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5675 {"atpcs", ARM_PCS_ATPCS},
5676 {"apcs", ARM_PCS_APCS},
5677 #endif
5678 {NULL, ARM_PCS_UNKNOWN}
5679 };
5680
5681 static enum arm_pcs
5682 arm_pcs_from_attribute (tree attr)
5683 {
5684 const struct pcs_attribute_arg *ptr;
5685 const char *arg;
5686
5687 /* Get the value of the argument. */
5688 if (TREE_VALUE (attr) == NULL_TREE
5689 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5690 return ARM_PCS_UNKNOWN;
5691
5692 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5693
5694 /* Check it against the list of known arguments. */
5695 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5696 if (streq (arg, ptr->arg))
5697 return ptr->value;
5698
5699 /* An unrecognized interrupt type. */
5700 return ARM_PCS_UNKNOWN;
5701 }
5702
5703 /* Get the PCS variant to use for this call. TYPE is the function's type
5704 specification, DECL is the specific declartion. DECL may be null if
5705 the call could be indirect or if this is a library call. */
5706 static enum arm_pcs
5707 arm_get_pcs_model (const_tree type, const_tree decl)
5708 {
5709 bool user_convention = false;
5710 enum arm_pcs user_pcs = arm_pcs_default;
5711 tree attr;
5712
5713 gcc_assert (type);
5714
5715 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5716 if (attr)
5717 {
5718 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5719 user_convention = true;
5720 }
5721
5722 if (TARGET_AAPCS_BASED)
5723 {
5724 /* Detect varargs functions. These always use the base rules
5725 (no argument is ever a candidate for a co-processor
5726 register). */
5727 bool base_rules = stdarg_p (type);
5728
5729 if (user_convention)
5730 {
5731 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5732 sorry ("non-AAPCS derived PCS variant");
5733 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5734 error ("variadic functions must use the base AAPCS variant");
5735 }
5736
5737 if (base_rules)
5738 return ARM_PCS_AAPCS;
5739 else if (user_convention)
5740 return user_pcs;
5741 else if (decl && flag_unit_at_a_time)
5742 {
5743 /* Local functions never leak outside this compilation unit,
5744 so we are free to use whatever conventions are
5745 appropriate. */
5746 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5747 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5748 if (i && i->local)
5749 return ARM_PCS_AAPCS_LOCAL;
5750 }
5751 }
5752 else if (user_convention && user_pcs != arm_pcs_default)
5753 sorry ("PCS variant");
5754
5755 /* For everything else we use the target's default. */
5756 return arm_pcs_default;
5757 }
5758
5759
5760 static void
5761 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5762 const_tree fntype ATTRIBUTE_UNUSED,
5763 rtx libcall ATTRIBUTE_UNUSED,
5764 const_tree fndecl ATTRIBUTE_UNUSED)
5765 {
5766 /* Record the unallocated VFP registers. */
5767 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5768 pcum->aapcs_vfp_reg_alloc = 0;
5769 }
5770
5771 /* Walk down the type tree of TYPE counting consecutive base elements.
5772 If *MODEP is VOIDmode, then set it to the first valid floating point
5773 type. If a non-floating point type is found, or if a floating point
5774 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5775 otherwise return the count in the sub-tree. */
5776 static int
5777 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5778 {
5779 machine_mode mode;
5780 HOST_WIDE_INT size;
5781
5782 switch (TREE_CODE (type))
5783 {
5784 case REAL_TYPE:
5785 mode = TYPE_MODE (type);
5786 if (mode != DFmode && mode != SFmode && mode != HFmode)
5787 return -1;
5788
5789 if (*modep == VOIDmode)
5790 *modep = mode;
5791
5792 if (*modep == mode)
5793 return 1;
5794
5795 break;
5796
5797 case COMPLEX_TYPE:
5798 mode = TYPE_MODE (TREE_TYPE (type));
5799 if (mode != DFmode && mode != SFmode)
5800 return -1;
5801
5802 if (*modep == VOIDmode)
5803 *modep = mode;
5804
5805 if (*modep == mode)
5806 return 2;
5807
5808 break;
5809
5810 case VECTOR_TYPE:
5811 /* Use V2SImode and V4SImode as representatives of all 64-bit
5812 and 128-bit vector types, whether or not those modes are
5813 supported with the present options. */
5814 size = int_size_in_bytes (type);
5815 switch (size)
5816 {
5817 case 8:
5818 mode = V2SImode;
5819 break;
5820 case 16:
5821 mode = V4SImode;
5822 break;
5823 default:
5824 return -1;
5825 }
5826
5827 if (*modep == VOIDmode)
5828 *modep = mode;
5829
5830 /* Vector modes are considered to be opaque: two vectors are
5831 equivalent for the purposes of being homogeneous aggregates
5832 if they are the same size. */
5833 if (*modep == mode)
5834 return 1;
5835
5836 break;
5837
5838 case ARRAY_TYPE:
5839 {
5840 int count;
5841 tree index = TYPE_DOMAIN (type);
5842
5843 /* Can't handle incomplete types nor sizes that are not
5844 fixed. */
5845 if (!COMPLETE_TYPE_P (type)
5846 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5847 return -1;
5848
5849 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5850 if (count == -1
5851 || !index
5852 || !TYPE_MAX_VALUE (index)
5853 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5854 || !TYPE_MIN_VALUE (index)
5855 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5856 || count < 0)
5857 return -1;
5858
5859 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5860 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5861
5862 /* There must be no padding. */
5863 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5864 return -1;
5865
5866 return count;
5867 }
5868
5869 case RECORD_TYPE:
5870 {
5871 int count = 0;
5872 int sub_count;
5873 tree field;
5874
5875 /* Can't handle incomplete types nor sizes that are not
5876 fixed. */
5877 if (!COMPLETE_TYPE_P (type)
5878 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5879 return -1;
5880
5881 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5882 {
5883 if (TREE_CODE (field) != FIELD_DECL)
5884 continue;
5885
5886 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5887 if (sub_count < 0)
5888 return -1;
5889 count += sub_count;
5890 }
5891
5892 /* There must be no padding. */
5893 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5894 return -1;
5895
5896 return count;
5897 }
5898
5899 case UNION_TYPE:
5900 case QUAL_UNION_TYPE:
5901 {
5902 /* These aren't very interesting except in a degenerate case. */
5903 int count = 0;
5904 int sub_count;
5905 tree field;
5906
5907 /* Can't handle incomplete types nor sizes that are not
5908 fixed. */
5909 if (!COMPLETE_TYPE_P (type)
5910 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5911 return -1;
5912
5913 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5914 {
5915 if (TREE_CODE (field) != FIELD_DECL)
5916 continue;
5917
5918 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5919 if (sub_count < 0)
5920 return -1;
5921 count = count > sub_count ? count : sub_count;
5922 }
5923
5924 /* There must be no padding. */
5925 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5926 return -1;
5927
5928 return count;
5929 }
5930
5931 default:
5932 break;
5933 }
5934
5935 return -1;
5936 }
5937
5938 /* Return true if PCS_VARIANT should use VFP registers. */
5939 static bool
5940 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5941 {
5942 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5943 {
5944 static bool seen_thumb1_vfp = false;
5945
5946 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5947 {
5948 sorry ("Thumb-1 hard-float VFP ABI");
5949 /* sorry() is not immediately fatal, so only display this once. */
5950 seen_thumb1_vfp = true;
5951 }
5952
5953 return true;
5954 }
5955
5956 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5957 return false;
5958
5959 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5960 (TARGET_VFP_DOUBLE || !is_double));
5961 }
5962
5963 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5964 suitable for passing or returning in VFP registers for the PCS
5965 variant selected. If it is, then *BASE_MODE is updated to contain
5966 a machine mode describing each element of the argument's type and
5967 *COUNT to hold the number of such elements. */
5968 static bool
5969 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5970 machine_mode mode, const_tree type,
5971 machine_mode *base_mode, int *count)
5972 {
5973 machine_mode new_mode = VOIDmode;
5974
5975 /* If we have the type information, prefer that to working things
5976 out from the mode. */
5977 if (type)
5978 {
5979 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5980
5981 if (ag_count > 0 && ag_count <= 4)
5982 *count = ag_count;
5983 else
5984 return false;
5985 }
5986 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5987 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5988 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5989 {
5990 *count = 1;
5991 new_mode = mode;
5992 }
5993 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5994 {
5995 *count = 2;
5996 new_mode = (mode == DCmode ? DFmode : SFmode);
5997 }
5998 else
5999 return false;
6000
6001
6002 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6003 return false;
6004
6005 *base_mode = new_mode;
6006 return true;
6007 }
6008
6009 static bool
6010 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6011 machine_mode mode, const_tree type)
6012 {
6013 int count ATTRIBUTE_UNUSED;
6014 machine_mode ag_mode ATTRIBUTE_UNUSED;
6015
6016 if (!use_vfp_abi (pcs_variant, false))
6017 return false;
6018 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6019 &ag_mode, &count);
6020 }
6021
6022 static bool
6023 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6024 const_tree type)
6025 {
6026 if (!use_vfp_abi (pcum->pcs_variant, false))
6027 return false;
6028
6029 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6030 &pcum->aapcs_vfp_rmode,
6031 &pcum->aapcs_vfp_rcount);
6032 }
6033
6034 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6035 for the behaviour of this function. */
6036
6037 static bool
6038 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6039 const_tree type ATTRIBUTE_UNUSED)
6040 {
6041 int rmode_size
6042 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6043 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6044 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6045 int regno;
6046
6047 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6048 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6049 {
6050 pcum->aapcs_vfp_reg_alloc = mask << regno;
6051 if (mode == BLKmode
6052 || (mode == TImode && ! TARGET_NEON)
6053 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6054 {
6055 int i;
6056 int rcount = pcum->aapcs_vfp_rcount;
6057 int rshift = shift;
6058 machine_mode rmode = pcum->aapcs_vfp_rmode;
6059 rtx par;
6060 if (!TARGET_NEON)
6061 {
6062 /* Avoid using unsupported vector modes. */
6063 if (rmode == V2SImode)
6064 rmode = DImode;
6065 else if (rmode == V4SImode)
6066 {
6067 rmode = DImode;
6068 rcount *= 2;
6069 rshift /= 2;
6070 }
6071 }
6072 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6073 for (i = 0; i < rcount; i++)
6074 {
6075 rtx tmp = gen_rtx_REG (rmode,
6076 FIRST_VFP_REGNUM + regno + i * rshift);
6077 tmp = gen_rtx_EXPR_LIST
6078 (VOIDmode, tmp,
6079 GEN_INT (i * GET_MODE_SIZE (rmode)));
6080 XVECEXP (par, 0, i) = tmp;
6081 }
6082
6083 pcum->aapcs_reg = par;
6084 }
6085 else
6086 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6087 return true;
6088 }
6089 return false;
6090 }
6091
6092 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6093 comment there for the behaviour of this function. */
6094
6095 static rtx
6096 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6097 machine_mode mode,
6098 const_tree type ATTRIBUTE_UNUSED)
6099 {
6100 if (!use_vfp_abi (pcs_variant, false))
6101 return NULL;
6102
6103 if (mode == BLKmode
6104 || (GET_MODE_CLASS (mode) == MODE_INT
6105 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6106 && !TARGET_NEON))
6107 {
6108 int count;
6109 machine_mode ag_mode;
6110 int i;
6111 rtx par;
6112 int shift;
6113
6114 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6115 &ag_mode, &count);
6116
6117 if (!TARGET_NEON)
6118 {
6119 if (ag_mode == V2SImode)
6120 ag_mode = DImode;
6121 else if (ag_mode == V4SImode)
6122 {
6123 ag_mode = DImode;
6124 count *= 2;
6125 }
6126 }
6127 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6128 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6129 for (i = 0; i < count; i++)
6130 {
6131 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6132 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6133 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6134 XVECEXP (par, 0, i) = tmp;
6135 }
6136
6137 return par;
6138 }
6139
6140 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6141 }
6142
6143 static void
6144 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6145 machine_mode mode ATTRIBUTE_UNUSED,
6146 const_tree type ATTRIBUTE_UNUSED)
6147 {
6148 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6149 pcum->aapcs_vfp_reg_alloc = 0;
6150 return;
6151 }
6152
6153 #define AAPCS_CP(X) \
6154 { \
6155 aapcs_ ## X ## _cum_init, \
6156 aapcs_ ## X ## _is_call_candidate, \
6157 aapcs_ ## X ## _allocate, \
6158 aapcs_ ## X ## _is_return_candidate, \
6159 aapcs_ ## X ## _allocate_return_reg, \
6160 aapcs_ ## X ## _advance \
6161 }
6162
6163 /* Table of co-processors that can be used to pass arguments in
6164 registers. Idealy no arugment should be a candidate for more than
6165 one co-processor table entry, but the table is processed in order
6166 and stops after the first match. If that entry then fails to put
6167 the argument into a co-processor register, the argument will go on
6168 the stack. */
6169 static struct
6170 {
6171 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6172 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6173
6174 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6175 BLKmode) is a candidate for this co-processor's registers; this
6176 function should ignore any position-dependent state in
6177 CUMULATIVE_ARGS and only use call-type dependent information. */
6178 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6179
6180 /* Return true if the argument does get a co-processor register; it
6181 should set aapcs_reg to an RTX of the register allocated as is
6182 required for a return from FUNCTION_ARG. */
6183 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6184
6185 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6186 be returned in this co-processor's registers. */
6187 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6188
6189 /* Allocate and return an RTX element to hold the return type of a call. This
6190 routine must not fail and will only be called if is_return_candidate
6191 returned true with the same parameters. */
6192 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6193
6194 /* Finish processing this argument and prepare to start processing
6195 the next one. */
6196 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6197 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6198 {
6199 AAPCS_CP(vfp)
6200 };
6201
6202 #undef AAPCS_CP
6203
6204 static int
6205 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6206 const_tree type)
6207 {
6208 int i;
6209
6210 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6211 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6212 return i;
6213
6214 return -1;
6215 }
6216
6217 static int
6218 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6219 {
6220 /* We aren't passed a decl, so we can't check that a call is local.
6221 However, it isn't clear that that would be a win anyway, since it
6222 might limit some tail-calling opportunities. */
6223 enum arm_pcs pcs_variant;
6224
6225 if (fntype)
6226 {
6227 const_tree fndecl = NULL_TREE;
6228
6229 if (TREE_CODE (fntype) == FUNCTION_DECL)
6230 {
6231 fndecl = fntype;
6232 fntype = TREE_TYPE (fntype);
6233 }
6234
6235 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6236 }
6237 else
6238 pcs_variant = arm_pcs_default;
6239
6240 if (pcs_variant != ARM_PCS_AAPCS)
6241 {
6242 int i;
6243
6244 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6245 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6246 TYPE_MODE (type),
6247 type))
6248 return i;
6249 }
6250 return -1;
6251 }
6252
6253 static rtx
6254 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6255 const_tree fntype)
6256 {
6257 /* We aren't passed a decl, so we can't check that a call is local.
6258 However, it isn't clear that that would be a win anyway, since it
6259 might limit some tail-calling opportunities. */
6260 enum arm_pcs pcs_variant;
6261 int unsignedp ATTRIBUTE_UNUSED;
6262
6263 if (fntype)
6264 {
6265 const_tree fndecl = NULL_TREE;
6266
6267 if (TREE_CODE (fntype) == FUNCTION_DECL)
6268 {
6269 fndecl = fntype;
6270 fntype = TREE_TYPE (fntype);
6271 }
6272
6273 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6274 }
6275 else
6276 pcs_variant = arm_pcs_default;
6277
6278 /* Promote integer types. */
6279 if (type && INTEGRAL_TYPE_P (type))
6280 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6281
6282 if (pcs_variant != ARM_PCS_AAPCS)
6283 {
6284 int i;
6285
6286 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6287 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6288 type))
6289 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6290 mode, type);
6291 }
6292
6293 /* Promotes small structs returned in a register to full-word size
6294 for big-endian AAPCS. */
6295 if (type && arm_return_in_msb (type))
6296 {
6297 HOST_WIDE_INT size = int_size_in_bytes (type);
6298 if (size % UNITS_PER_WORD != 0)
6299 {
6300 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6301 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6302 }
6303 }
6304
6305 return gen_rtx_REG (mode, R0_REGNUM);
6306 }
6307
6308 static rtx
6309 aapcs_libcall_value (machine_mode mode)
6310 {
6311 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6312 && GET_MODE_SIZE (mode) <= 4)
6313 mode = SImode;
6314
6315 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6316 }
6317
6318 /* Lay out a function argument using the AAPCS rules. The rule
6319 numbers referred to here are those in the AAPCS. */
6320 static void
6321 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6322 const_tree type, bool named)
6323 {
6324 int nregs, nregs2;
6325 int ncrn;
6326
6327 /* We only need to do this once per argument. */
6328 if (pcum->aapcs_arg_processed)
6329 return;
6330
6331 pcum->aapcs_arg_processed = true;
6332
6333 /* Special case: if named is false then we are handling an incoming
6334 anonymous argument which is on the stack. */
6335 if (!named)
6336 return;
6337
6338 /* Is this a potential co-processor register candidate? */
6339 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6340 {
6341 int slot = aapcs_select_call_coproc (pcum, mode, type);
6342 pcum->aapcs_cprc_slot = slot;
6343
6344 /* We don't have to apply any of the rules from part B of the
6345 preparation phase, these are handled elsewhere in the
6346 compiler. */
6347
6348 if (slot >= 0)
6349 {
6350 /* A Co-processor register candidate goes either in its own
6351 class of registers or on the stack. */
6352 if (!pcum->aapcs_cprc_failed[slot])
6353 {
6354 /* C1.cp - Try to allocate the argument to co-processor
6355 registers. */
6356 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6357 return;
6358
6359 /* C2.cp - Put the argument on the stack and note that we
6360 can't assign any more candidates in this slot. We also
6361 need to note that we have allocated stack space, so that
6362 we won't later try to split a non-cprc candidate between
6363 core registers and the stack. */
6364 pcum->aapcs_cprc_failed[slot] = true;
6365 pcum->can_split = false;
6366 }
6367
6368 /* We didn't get a register, so this argument goes on the
6369 stack. */
6370 gcc_assert (pcum->can_split == false);
6371 return;
6372 }
6373 }
6374
6375 /* C3 - For double-word aligned arguments, round the NCRN up to the
6376 next even number. */
6377 ncrn = pcum->aapcs_ncrn;
6378 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6379 ncrn++;
6380
6381 nregs = ARM_NUM_REGS2(mode, type);
6382
6383 /* Sigh, this test should really assert that nregs > 0, but a GCC
6384 extension allows empty structs and then gives them empty size; it
6385 then allows such a structure to be passed by value. For some of
6386 the code below we have to pretend that such an argument has
6387 non-zero size so that we 'locate' it correctly either in
6388 registers or on the stack. */
6389 gcc_assert (nregs >= 0);
6390
6391 nregs2 = nregs ? nregs : 1;
6392
6393 /* C4 - Argument fits entirely in core registers. */
6394 if (ncrn + nregs2 <= NUM_ARG_REGS)
6395 {
6396 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6397 pcum->aapcs_next_ncrn = ncrn + nregs;
6398 return;
6399 }
6400
6401 /* C5 - Some core registers left and there are no arguments already
6402 on the stack: split this argument between the remaining core
6403 registers and the stack. */
6404 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6405 {
6406 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6407 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6408 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6409 return;
6410 }
6411
6412 /* C6 - NCRN is set to 4. */
6413 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6414
6415 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6416 return;
6417 }
6418
6419 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6420 for a call to a function whose data type is FNTYPE.
6421 For a library call, FNTYPE is NULL. */
6422 void
6423 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6424 rtx libname,
6425 tree fndecl ATTRIBUTE_UNUSED)
6426 {
6427 /* Long call handling. */
6428 if (fntype)
6429 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6430 else
6431 pcum->pcs_variant = arm_pcs_default;
6432
6433 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6434 {
6435 if (arm_libcall_uses_aapcs_base (libname))
6436 pcum->pcs_variant = ARM_PCS_AAPCS;
6437
6438 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6439 pcum->aapcs_reg = NULL_RTX;
6440 pcum->aapcs_partial = 0;
6441 pcum->aapcs_arg_processed = false;
6442 pcum->aapcs_cprc_slot = -1;
6443 pcum->can_split = true;
6444
6445 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6446 {
6447 int i;
6448
6449 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6450 {
6451 pcum->aapcs_cprc_failed[i] = false;
6452 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6453 }
6454 }
6455 return;
6456 }
6457
6458 /* Legacy ABIs */
6459
6460 /* On the ARM, the offset starts at 0. */
6461 pcum->nregs = 0;
6462 pcum->iwmmxt_nregs = 0;
6463 pcum->can_split = true;
6464
6465 /* Varargs vectors are treated the same as long long.
6466 named_count avoids having to change the way arm handles 'named' */
6467 pcum->named_count = 0;
6468 pcum->nargs = 0;
6469
6470 if (TARGET_REALLY_IWMMXT && fntype)
6471 {
6472 tree fn_arg;
6473
6474 for (fn_arg = TYPE_ARG_TYPES (fntype);
6475 fn_arg;
6476 fn_arg = TREE_CHAIN (fn_arg))
6477 pcum->named_count += 1;
6478
6479 if (! pcum->named_count)
6480 pcum->named_count = INT_MAX;
6481 }
6482 }
6483
6484 /* Return true if mode/type need doubleword alignment. */
6485 static bool
6486 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6487 {
6488 if (!type)
6489 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6490
6491 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6492 if (!AGGREGATE_TYPE_P (type))
6493 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6494
6495 /* Array types: Use member alignment of element type. */
6496 if (TREE_CODE (type) == ARRAY_TYPE)
6497 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6498
6499 /* Record/aggregate types: Use greatest member alignment of any member. */
6500 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6501 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6502 return true;
6503
6504 return false;
6505 }
6506
6507
6508 /* Determine where to put an argument to a function.
6509 Value is zero to push the argument on the stack,
6510 or a hard register in which to store the argument.
6511
6512 MODE is the argument's machine mode.
6513 TYPE is the data type of the argument (as a tree).
6514 This is null for libcalls where that information may
6515 not be available.
6516 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6517 the preceding args and about the function being called.
6518 NAMED is nonzero if this argument is a named parameter
6519 (otherwise it is an extra parameter matching an ellipsis).
6520
6521 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6522 other arguments are passed on the stack. If (NAMED == 0) (which happens
6523 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6524 defined), say it is passed in the stack (function_prologue will
6525 indeed make it pass in the stack if necessary). */
6526
6527 static rtx
6528 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6529 const_tree type, bool named)
6530 {
6531 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6532 int nregs;
6533
6534 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6535 a call insn (op3 of a call_value insn). */
6536 if (mode == VOIDmode)
6537 return const0_rtx;
6538
6539 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6540 {
6541 aapcs_layout_arg (pcum, mode, type, named);
6542 return pcum->aapcs_reg;
6543 }
6544
6545 /* Varargs vectors are treated the same as long long.
6546 named_count avoids having to change the way arm handles 'named' */
6547 if (TARGET_IWMMXT_ABI
6548 && arm_vector_mode_supported_p (mode)
6549 && pcum->named_count > pcum->nargs + 1)
6550 {
6551 if (pcum->iwmmxt_nregs <= 9)
6552 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6553 else
6554 {
6555 pcum->can_split = false;
6556 return NULL_RTX;
6557 }
6558 }
6559
6560 /* Put doubleword aligned quantities in even register pairs. */
6561 if (pcum->nregs & 1
6562 && ARM_DOUBLEWORD_ALIGN
6563 && arm_needs_doubleword_align (mode, type))
6564 pcum->nregs++;
6565
6566 /* Only allow splitting an arg between regs and memory if all preceding
6567 args were allocated to regs. For args passed by reference we only count
6568 the reference pointer. */
6569 if (pcum->can_split)
6570 nregs = 1;
6571 else
6572 nregs = ARM_NUM_REGS2 (mode, type);
6573
6574 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6575 return NULL_RTX;
6576
6577 return gen_rtx_REG (mode, pcum->nregs);
6578 }
6579
6580 static unsigned int
6581 arm_function_arg_boundary (machine_mode mode, const_tree type)
6582 {
6583 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6584 ? DOUBLEWORD_ALIGNMENT
6585 : PARM_BOUNDARY);
6586 }
6587
6588 static int
6589 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6590 tree type, bool named)
6591 {
6592 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6593 int nregs = pcum->nregs;
6594
6595 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6596 {
6597 aapcs_layout_arg (pcum, mode, type, named);
6598 return pcum->aapcs_partial;
6599 }
6600
6601 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6602 return 0;
6603
6604 if (NUM_ARG_REGS > nregs
6605 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6606 && pcum->can_split)
6607 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6608
6609 return 0;
6610 }
6611
6612 /* Update the data in PCUM to advance over an argument
6613 of mode MODE and data type TYPE.
6614 (TYPE is null for libcalls where that information may not be available.) */
6615
6616 static void
6617 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6618 const_tree type, bool named)
6619 {
6620 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6621
6622 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6623 {
6624 aapcs_layout_arg (pcum, mode, type, named);
6625
6626 if (pcum->aapcs_cprc_slot >= 0)
6627 {
6628 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6629 type);
6630 pcum->aapcs_cprc_slot = -1;
6631 }
6632
6633 /* Generic stuff. */
6634 pcum->aapcs_arg_processed = false;
6635 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6636 pcum->aapcs_reg = NULL_RTX;
6637 pcum->aapcs_partial = 0;
6638 }
6639 else
6640 {
6641 pcum->nargs += 1;
6642 if (arm_vector_mode_supported_p (mode)
6643 && pcum->named_count > pcum->nargs
6644 && TARGET_IWMMXT_ABI)
6645 pcum->iwmmxt_nregs += 1;
6646 else
6647 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6648 }
6649 }
6650
6651 /* Variable sized types are passed by reference. This is a GCC
6652 extension to the ARM ABI. */
6653
6654 static bool
6655 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6656 machine_mode mode ATTRIBUTE_UNUSED,
6657 const_tree type, bool named ATTRIBUTE_UNUSED)
6658 {
6659 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6660 }
6661 \f
6662 /* Encode the current state of the #pragma [no_]long_calls. */
6663 typedef enum
6664 {
6665 OFF, /* No #pragma [no_]long_calls is in effect. */
6666 LONG, /* #pragma long_calls is in effect. */
6667 SHORT /* #pragma no_long_calls is in effect. */
6668 } arm_pragma_enum;
6669
6670 static arm_pragma_enum arm_pragma_long_calls = OFF;
6671
6672 void
6673 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6674 {
6675 arm_pragma_long_calls = LONG;
6676 }
6677
6678 void
6679 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6680 {
6681 arm_pragma_long_calls = SHORT;
6682 }
6683
6684 void
6685 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6686 {
6687 arm_pragma_long_calls = OFF;
6688 }
6689 \f
6690 /* Handle an attribute requiring a FUNCTION_DECL;
6691 arguments as in struct attribute_spec.handler. */
6692 static tree
6693 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6694 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6695 {
6696 if (TREE_CODE (*node) != FUNCTION_DECL)
6697 {
6698 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6699 name);
6700 *no_add_attrs = true;
6701 }
6702
6703 return NULL_TREE;
6704 }
6705
6706 /* Handle an "interrupt" or "isr" attribute;
6707 arguments as in struct attribute_spec.handler. */
6708 static tree
6709 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6710 bool *no_add_attrs)
6711 {
6712 if (DECL_P (*node))
6713 {
6714 if (TREE_CODE (*node) != FUNCTION_DECL)
6715 {
6716 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6717 name);
6718 *no_add_attrs = true;
6719 }
6720 /* FIXME: the argument if any is checked for type attributes;
6721 should it be checked for decl ones? */
6722 }
6723 else
6724 {
6725 if (TREE_CODE (*node) == FUNCTION_TYPE
6726 || TREE_CODE (*node) == METHOD_TYPE)
6727 {
6728 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6729 {
6730 warning (OPT_Wattributes, "%qE attribute ignored",
6731 name);
6732 *no_add_attrs = true;
6733 }
6734 }
6735 else if (TREE_CODE (*node) == POINTER_TYPE
6736 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6737 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6738 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6739 {
6740 *node = build_variant_type_copy (*node);
6741 TREE_TYPE (*node) = build_type_attribute_variant
6742 (TREE_TYPE (*node),
6743 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6744 *no_add_attrs = true;
6745 }
6746 else
6747 {
6748 /* Possibly pass this attribute on from the type to a decl. */
6749 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6750 | (int) ATTR_FLAG_FUNCTION_NEXT
6751 | (int) ATTR_FLAG_ARRAY_NEXT))
6752 {
6753 *no_add_attrs = true;
6754 return tree_cons (name, args, NULL_TREE);
6755 }
6756 else
6757 {
6758 warning (OPT_Wattributes, "%qE attribute ignored",
6759 name);
6760 }
6761 }
6762 }
6763
6764 return NULL_TREE;
6765 }
6766
6767 /* Handle a "pcs" attribute; arguments as in struct
6768 attribute_spec.handler. */
6769 static tree
6770 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6771 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6772 {
6773 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6774 {
6775 warning (OPT_Wattributes, "%qE attribute ignored", name);
6776 *no_add_attrs = true;
6777 }
6778 return NULL_TREE;
6779 }
6780
6781 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6782 /* Handle the "notshared" attribute. This attribute is another way of
6783 requesting hidden visibility. ARM's compiler supports
6784 "__declspec(notshared)"; we support the same thing via an
6785 attribute. */
6786
6787 static tree
6788 arm_handle_notshared_attribute (tree *node,
6789 tree name ATTRIBUTE_UNUSED,
6790 tree args ATTRIBUTE_UNUSED,
6791 int flags ATTRIBUTE_UNUSED,
6792 bool *no_add_attrs)
6793 {
6794 tree decl = TYPE_NAME (*node);
6795
6796 if (decl)
6797 {
6798 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6799 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6800 *no_add_attrs = false;
6801 }
6802 return NULL_TREE;
6803 }
6804 #endif
6805
6806 /* This function returns true if a function with declaration FNDECL and type
6807 FNTYPE uses the stack to pass arguments or return variables and false
6808 otherwise. This is used for functions with the attributes
6809 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6810 diagnostic messages if the stack is used. NAME is the name of the attribute
6811 used. */
6812
6813 static bool
6814 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6815 {
6816 function_args_iterator args_iter;
6817 CUMULATIVE_ARGS args_so_far_v;
6818 cumulative_args_t args_so_far;
6819 bool first_param = true;
6820 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6821
6822 /* Error out if any argument is passed on the stack. */
6823 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6824 args_so_far = pack_cumulative_args (&args_so_far_v);
6825 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6826 {
6827 rtx arg_rtx;
6828 machine_mode arg_mode = TYPE_MODE (arg_type);
6829
6830 prev_arg_type = arg_type;
6831 if (VOID_TYPE_P (arg_type))
6832 continue;
6833
6834 if (!first_param)
6835 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6836 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6837 if (!arg_rtx
6838 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6839 {
6840 error ("%qE attribute not available to functions with arguments "
6841 "passed on the stack", name);
6842 return true;
6843 }
6844 first_param = false;
6845 }
6846
6847 /* Error out for variadic functions since we cannot control how many
6848 arguments will be passed and thus stack could be used. stdarg_p () is not
6849 used for the checking to avoid browsing arguments twice. */
6850 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6851 {
6852 error ("%qE attribute not available to functions with variable number "
6853 "of arguments", name);
6854 return true;
6855 }
6856
6857 /* Error out if return value is passed on the stack. */
6858 ret_type = TREE_TYPE (fntype);
6859 if (arm_return_in_memory (ret_type, fntype))
6860 {
6861 error ("%qE attribute not available to functions that return value on "
6862 "the stack", name);
6863 return true;
6864 }
6865 return false;
6866 }
6867
6868 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6869 function will check whether the attribute is allowed here and will add the
6870 attribute to the function declaration tree or otherwise issue a warning. */
6871
6872 static tree
6873 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6874 tree /* args */,
6875 int /* flags */,
6876 bool *no_add_attrs)
6877 {
6878 tree fndecl;
6879
6880 if (!use_cmse)
6881 {
6882 *no_add_attrs = true;
6883 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6884 name);
6885 return NULL_TREE;
6886 }
6887
6888 /* Ignore attribute for function types. */
6889 if (TREE_CODE (*node) != FUNCTION_DECL)
6890 {
6891 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6892 name);
6893 *no_add_attrs = true;
6894 return NULL_TREE;
6895 }
6896
6897 fndecl = *node;
6898
6899 /* Warn for static linkage functions. */
6900 if (!TREE_PUBLIC (fndecl))
6901 {
6902 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6903 "with static linkage", name);
6904 *no_add_attrs = true;
6905 return NULL_TREE;
6906 }
6907
6908 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6909 TREE_TYPE (fndecl));
6910 return NULL_TREE;
6911 }
6912
6913
6914 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6915 function will check whether the attribute is allowed here and will add the
6916 attribute to the function type tree or otherwise issue a diagnostic. The
6917 reason we check this at declaration time is to only allow the use of the
6918 attribute with declarations of function pointers and not function
6919 declarations. This function checks NODE is of the expected type and issues
6920 diagnostics otherwise using NAME. If it is not of the expected type
6921 *NO_ADD_ATTRS will be set to true. */
6922
6923 static tree
6924 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6925 tree /* args */,
6926 int /* flags */,
6927 bool *no_add_attrs)
6928 {
6929 tree decl = NULL_TREE, fntype = NULL_TREE;
6930 tree type;
6931
6932 if (!use_cmse)
6933 {
6934 *no_add_attrs = true;
6935 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6936 name);
6937 return NULL_TREE;
6938 }
6939
6940 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6941 {
6942 decl = *node;
6943 fntype = TREE_TYPE (decl);
6944 }
6945
6946 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6947 fntype = TREE_TYPE (fntype);
6948
6949 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6950 {
6951 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6952 "function pointer", name);
6953 *no_add_attrs = true;
6954 return NULL_TREE;
6955 }
6956
6957 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6958
6959 if (*no_add_attrs)
6960 return NULL_TREE;
6961
6962 /* Prevent trees being shared among function types with and without
6963 cmse_nonsecure_call attribute. */
6964 type = TREE_TYPE (decl);
6965
6966 type = build_distinct_type_copy (type);
6967 TREE_TYPE (decl) = type;
6968 fntype = type;
6969
6970 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6971 {
6972 type = fntype;
6973 fntype = TREE_TYPE (fntype);
6974 fntype = build_distinct_type_copy (fntype);
6975 TREE_TYPE (type) = fntype;
6976 }
6977
6978 /* Construct a type attribute and add it to the function type. */
6979 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6980 TYPE_ATTRIBUTES (fntype));
6981 TYPE_ATTRIBUTES (fntype) = attrs;
6982 return NULL_TREE;
6983 }
6984
6985 /* Return 0 if the attributes for two types are incompatible, 1 if they
6986 are compatible, and 2 if they are nearly compatible (which causes a
6987 warning to be generated). */
6988 static int
6989 arm_comp_type_attributes (const_tree type1, const_tree type2)
6990 {
6991 int l1, l2, s1, s2;
6992
6993 /* Check for mismatch of non-default calling convention. */
6994 if (TREE_CODE (type1) != FUNCTION_TYPE)
6995 return 1;
6996
6997 /* Check for mismatched call attributes. */
6998 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6999 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7000 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7001 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7002
7003 /* Only bother to check if an attribute is defined. */
7004 if (l1 | l2 | s1 | s2)
7005 {
7006 /* If one type has an attribute, the other must have the same attribute. */
7007 if ((l1 != l2) || (s1 != s2))
7008 return 0;
7009
7010 /* Disallow mixed attributes. */
7011 if ((l1 & s2) || (l2 & s1))
7012 return 0;
7013 }
7014
7015 /* Check for mismatched ISR attribute. */
7016 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7017 if (! l1)
7018 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7019 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7020 if (! l2)
7021 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7022 if (l1 != l2)
7023 return 0;
7024
7025 l1 = lookup_attribute ("cmse_nonsecure_call",
7026 TYPE_ATTRIBUTES (type1)) != NULL;
7027 l2 = lookup_attribute ("cmse_nonsecure_call",
7028 TYPE_ATTRIBUTES (type2)) != NULL;
7029
7030 if (l1 != l2)
7031 return 0;
7032
7033 return 1;
7034 }
7035
7036 /* Assigns default attributes to newly defined type. This is used to
7037 set short_call/long_call attributes for function types of
7038 functions defined inside corresponding #pragma scopes. */
7039 static void
7040 arm_set_default_type_attributes (tree type)
7041 {
7042 /* Add __attribute__ ((long_call)) to all functions, when
7043 inside #pragma long_calls or __attribute__ ((short_call)),
7044 when inside #pragma no_long_calls. */
7045 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7046 {
7047 tree type_attr_list, attr_name;
7048 type_attr_list = TYPE_ATTRIBUTES (type);
7049
7050 if (arm_pragma_long_calls == LONG)
7051 attr_name = get_identifier ("long_call");
7052 else if (arm_pragma_long_calls == SHORT)
7053 attr_name = get_identifier ("short_call");
7054 else
7055 return;
7056
7057 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7058 TYPE_ATTRIBUTES (type) = type_attr_list;
7059 }
7060 }
7061 \f
7062 /* Return true if DECL is known to be linked into section SECTION. */
7063
7064 static bool
7065 arm_function_in_section_p (tree decl, section *section)
7066 {
7067 /* We can only be certain about the prevailing symbol definition. */
7068 if (!decl_binds_to_current_def_p (decl))
7069 return false;
7070
7071 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7072 if (!DECL_SECTION_NAME (decl))
7073 {
7074 /* Make sure that we will not create a unique section for DECL. */
7075 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7076 return false;
7077 }
7078
7079 return function_section (decl) == section;
7080 }
7081
7082 /* Return nonzero if a 32-bit "long_call" should be generated for
7083 a call from the current function to DECL. We generate a long_call
7084 if the function:
7085
7086 a. has an __attribute__((long call))
7087 or b. is within the scope of a #pragma long_calls
7088 or c. the -mlong-calls command line switch has been specified
7089
7090 However we do not generate a long call if the function:
7091
7092 d. has an __attribute__ ((short_call))
7093 or e. is inside the scope of a #pragma no_long_calls
7094 or f. is defined in the same section as the current function. */
7095
7096 bool
7097 arm_is_long_call_p (tree decl)
7098 {
7099 tree attrs;
7100
7101 if (!decl)
7102 return TARGET_LONG_CALLS;
7103
7104 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7105 if (lookup_attribute ("short_call", attrs))
7106 return false;
7107
7108 /* For "f", be conservative, and only cater for cases in which the
7109 whole of the current function is placed in the same section. */
7110 if (!flag_reorder_blocks_and_partition
7111 && TREE_CODE (decl) == FUNCTION_DECL
7112 && arm_function_in_section_p (decl, current_function_section ()))
7113 return false;
7114
7115 if (lookup_attribute ("long_call", attrs))
7116 return true;
7117
7118 return TARGET_LONG_CALLS;
7119 }
7120
7121 /* Return nonzero if it is ok to make a tail-call to DECL. */
7122 static bool
7123 arm_function_ok_for_sibcall (tree decl, tree exp)
7124 {
7125 unsigned long func_type;
7126
7127 if (cfun->machine->sibcall_blocked)
7128 return false;
7129
7130 /* Never tailcall something if we are generating code for Thumb-1. */
7131 if (TARGET_THUMB1)
7132 return false;
7133
7134 /* The PIC register is live on entry to VxWorks PLT entries, so we
7135 must make the call before restoring the PIC register. */
7136 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7137 return false;
7138
7139 /* If we are interworking and the function is not declared static
7140 then we can't tail-call it unless we know that it exists in this
7141 compilation unit (since it might be a Thumb routine). */
7142 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7143 && !TREE_ASM_WRITTEN (decl))
7144 return false;
7145
7146 func_type = arm_current_func_type ();
7147 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7148 if (IS_INTERRUPT (func_type))
7149 return false;
7150
7151 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7152 generated for entry functions themselves. */
7153 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7154 return false;
7155
7156 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7157 this would complicate matters for later code generation. */
7158 if (TREE_CODE (exp) == CALL_EXPR)
7159 {
7160 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7161 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7162 return false;
7163 }
7164
7165 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7166 {
7167 /* Check that the return value locations are the same. For
7168 example that we aren't returning a value from the sibling in
7169 a VFP register but then need to transfer it to a core
7170 register. */
7171 rtx a, b;
7172 tree decl_or_type = decl;
7173
7174 /* If it is an indirect function pointer, get the function type. */
7175 if (!decl)
7176 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7177
7178 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7179 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7180 cfun->decl, false);
7181 if (!rtx_equal_p (a, b))
7182 return false;
7183 }
7184
7185 /* Never tailcall if function may be called with a misaligned SP. */
7186 if (IS_STACKALIGN (func_type))
7187 return false;
7188
7189 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7190 references should become a NOP. Don't convert such calls into
7191 sibling calls. */
7192 if (TARGET_AAPCS_BASED
7193 && arm_abi == ARM_ABI_AAPCS
7194 && decl
7195 && DECL_WEAK (decl))
7196 return false;
7197
7198 /* Everything else is ok. */
7199 return true;
7200 }
7201
7202 \f
7203 /* Addressing mode support functions. */
7204
7205 /* Return nonzero if X is a legitimate immediate operand when compiling
7206 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7207 int
7208 legitimate_pic_operand_p (rtx x)
7209 {
7210 if (GET_CODE (x) == SYMBOL_REF
7211 || (GET_CODE (x) == CONST
7212 && GET_CODE (XEXP (x, 0)) == PLUS
7213 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7214 return 0;
7215
7216 return 1;
7217 }
7218
7219 /* Record that the current function needs a PIC register. Initialize
7220 cfun->machine->pic_reg if we have not already done so. */
7221
7222 static void
7223 require_pic_register (void)
7224 {
7225 /* A lot of the logic here is made obscure by the fact that this
7226 routine gets called as part of the rtx cost estimation process.
7227 We don't want those calls to affect any assumptions about the real
7228 function; and further, we can't call entry_of_function() until we
7229 start the real expansion process. */
7230 if (!crtl->uses_pic_offset_table)
7231 {
7232 gcc_assert (can_create_pseudo_p ());
7233 if (arm_pic_register != INVALID_REGNUM
7234 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7235 {
7236 if (!cfun->machine->pic_reg)
7237 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7238
7239 /* Play games to avoid marking the function as needing pic
7240 if we are being called as part of the cost-estimation
7241 process. */
7242 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7243 crtl->uses_pic_offset_table = 1;
7244 }
7245 else
7246 {
7247 rtx_insn *seq, *insn;
7248
7249 if (!cfun->machine->pic_reg)
7250 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7251
7252 /* Play games to avoid marking the function as needing pic
7253 if we are being called as part of the cost-estimation
7254 process. */
7255 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7256 {
7257 crtl->uses_pic_offset_table = 1;
7258 start_sequence ();
7259
7260 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7261 && arm_pic_register > LAST_LO_REGNUM)
7262 emit_move_insn (cfun->machine->pic_reg,
7263 gen_rtx_REG (Pmode, arm_pic_register));
7264 else
7265 arm_load_pic_register (0UL);
7266
7267 seq = get_insns ();
7268 end_sequence ();
7269
7270 for (insn = seq; insn; insn = NEXT_INSN (insn))
7271 if (INSN_P (insn))
7272 INSN_LOCATION (insn) = prologue_location;
7273
7274 /* We can be called during expansion of PHI nodes, where
7275 we can't yet emit instructions directly in the final
7276 insn stream. Queue the insns on the entry edge, they will
7277 be committed after everything else is expanded. */
7278 insert_insn_on_edge (seq,
7279 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7280 }
7281 }
7282 }
7283 }
7284
7285 rtx
7286 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7287 {
7288 if (GET_CODE (orig) == SYMBOL_REF
7289 || GET_CODE (orig) == LABEL_REF)
7290 {
7291 if (reg == 0)
7292 {
7293 gcc_assert (can_create_pseudo_p ());
7294 reg = gen_reg_rtx (Pmode);
7295 }
7296
7297 /* VxWorks does not impose a fixed gap between segments; the run-time
7298 gap can be different from the object-file gap. We therefore can't
7299 use GOTOFF unless we are absolutely sure that the symbol is in the
7300 same segment as the GOT. Unfortunately, the flexibility of linker
7301 scripts means that we can't be sure of that in general, so assume
7302 that GOTOFF is never valid on VxWorks. */
7303 rtx_insn *insn;
7304 if ((GET_CODE (orig) == LABEL_REF
7305 || (GET_CODE (orig) == SYMBOL_REF &&
7306 SYMBOL_REF_LOCAL_P (orig)))
7307 && NEED_GOT_RELOC
7308 && arm_pic_data_is_text_relative)
7309 insn = arm_pic_static_addr (orig, reg);
7310 else
7311 {
7312 rtx pat;
7313 rtx mem;
7314
7315 /* If this function doesn't have a pic register, create one now. */
7316 require_pic_register ();
7317
7318 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7319
7320 /* Make the MEM as close to a constant as possible. */
7321 mem = SET_SRC (pat);
7322 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7323 MEM_READONLY_P (mem) = 1;
7324 MEM_NOTRAP_P (mem) = 1;
7325
7326 insn = emit_insn (pat);
7327 }
7328
7329 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7330 by loop. */
7331 set_unique_reg_note (insn, REG_EQUAL, orig);
7332
7333 return reg;
7334 }
7335 else if (GET_CODE (orig) == CONST)
7336 {
7337 rtx base, offset;
7338
7339 if (GET_CODE (XEXP (orig, 0)) == PLUS
7340 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7341 return orig;
7342
7343 /* Handle the case where we have: const (UNSPEC_TLS). */
7344 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7345 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7346 return orig;
7347
7348 /* Handle the case where we have:
7349 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7350 CONST_INT. */
7351 if (GET_CODE (XEXP (orig, 0)) == PLUS
7352 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7353 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7354 {
7355 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7356 return orig;
7357 }
7358
7359 if (reg == 0)
7360 {
7361 gcc_assert (can_create_pseudo_p ());
7362 reg = gen_reg_rtx (Pmode);
7363 }
7364
7365 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7366
7367 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7368 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7369 base == reg ? 0 : reg);
7370
7371 if (CONST_INT_P (offset))
7372 {
7373 /* The base register doesn't really matter, we only want to
7374 test the index for the appropriate mode. */
7375 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7376 {
7377 gcc_assert (can_create_pseudo_p ());
7378 offset = force_reg (Pmode, offset);
7379 }
7380
7381 if (CONST_INT_P (offset))
7382 return plus_constant (Pmode, base, INTVAL (offset));
7383 }
7384
7385 if (GET_MODE_SIZE (mode) > 4
7386 && (GET_MODE_CLASS (mode) == MODE_INT
7387 || TARGET_SOFT_FLOAT))
7388 {
7389 emit_insn (gen_addsi3 (reg, base, offset));
7390 return reg;
7391 }
7392
7393 return gen_rtx_PLUS (Pmode, base, offset);
7394 }
7395
7396 return orig;
7397 }
7398
7399
7400 /* Find a spare register to use during the prolog of a function. */
7401
7402 static int
7403 thumb_find_work_register (unsigned long pushed_regs_mask)
7404 {
7405 int reg;
7406
7407 /* Check the argument registers first as these are call-used. The
7408 register allocation order means that sometimes r3 might be used
7409 but earlier argument registers might not, so check them all. */
7410 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7411 if (!df_regs_ever_live_p (reg))
7412 return reg;
7413
7414 /* Before going on to check the call-saved registers we can try a couple
7415 more ways of deducing that r3 is available. The first is when we are
7416 pushing anonymous arguments onto the stack and we have less than 4
7417 registers worth of fixed arguments(*). In this case r3 will be part of
7418 the variable argument list and so we can be sure that it will be
7419 pushed right at the start of the function. Hence it will be available
7420 for the rest of the prologue.
7421 (*): ie crtl->args.pretend_args_size is greater than 0. */
7422 if (cfun->machine->uses_anonymous_args
7423 && crtl->args.pretend_args_size > 0)
7424 return LAST_ARG_REGNUM;
7425
7426 /* The other case is when we have fixed arguments but less than 4 registers
7427 worth. In this case r3 might be used in the body of the function, but
7428 it is not being used to convey an argument into the function. In theory
7429 we could just check crtl->args.size to see how many bytes are
7430 being passed in argument registers, but it seems that it is unreliable.
7431 Sometimes it will have the value 0 when in fact arguments are being
7432 passed. (See testcase execute/20021111-1.c for an example). So we also
7433 check the args_info.nregs field as well. The problem with this field is
7434 that it makes no allowances for arguments that are passed to the
7435 function but which are not used. Hence we could miss an opportunity
7436 when a function has an unused argument in r3. But it is better to be
7437 safe than to be sorry. */
7438 if (! cfun->machine->uses_anonymous_args
7439 && crtl->args.size >= 0
7440 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7441 && (TARGET_AAPCS_BASED
7442 ? crtl->args.info.aapcs_ncrn < 4
7443 : crtl->args.info.nregs < 4))
7444 return LAST_ARG_REGNUM;
7445
7446 /* Otherwise look for a call-saved register that is going to be pushed. */
7447 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7448 if (pushed_regs_mask & (1 << reg))
7449 return reg;
7450
7451 if (TARGET_THUMB2)
7452 {
7453 /* Thumb-2 can use high regs. */
7454 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7455 if (pushed_regs_mask & (1 << reg))
7456 return reg;
7457 }
7458 /* Something went wrong - thumb_compute_save_reg_mask()
7459 should have arranged for a suitable register to be pushed. */
7460 gcc_unreachable ();
7461 }
7462
7463 static GTY(()) int pic_labelno;
7464
7465 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7466 low register. */
7467
7468 void
7469 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7470 {
7471 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7472
7473 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7474 return;
7475
7476 gcc_assert (flag_pic);
7477
7478 pic_reg = cfun->machine->pic_reg;
7479 if (TARGET_VXWORKS_RTP)
7480 {
7481 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7482 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7483 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7484
7485 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7486
7487 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7488 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7489 }
7490 else
7491 {
7492 /* We use an UNSPEC rather than a LABEL_REF because this label
7493 never appears in the code stream. */
7494
7495 labelno = GEN_INT (pic_labelno++);
7496 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7497 l1 = gen_rtx_CONST (VOIDmode, l1);
7498
7499 /* On the ARM the PC register contains 'dot + 8' at the time of the
7500 addition, on the Thumb it is 'dot + 4'. */
7501 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7502 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7503 UNSPEC_GOTSYM_OFF);
7504 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7505
7506 if (TARGET_32BIT)
7507 {
7508 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7509 }
7510 else /* TARGET_THUMB1 */
7511 {
7512 if (arm_pic_register != INVALID_REGNUM
7513 && REGNO (pic_reg) > LAST_LO_REGNUM)
7514 {
7515 /* We will have pushed the pic register, so we should always be
7516 able to find a work register. */
7517 pic_tmp = gen_rtx_REG (SImode,
7518 thumb_find_work_register (saved_regs));
7519 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7520 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7521 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7522 }
7523 else if (arm_pic_register != INVALID_REGNUM
7524 && arm_pic_register > LAST_LO_REGNUM
7525 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7526 {
7527 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7528 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7529 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7530 }
7531 else
7532 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7533 }
7534 }
7535
7536 /* Need to emit this whether or not we obey regdecls,
7537 since setjmp/longjmp can cause life info to screw up. */
7538 emit_use (pic_reg);
7539 }
7540
7541 /* Generate code to load the address of a static var when flag_pic is set. */
7542 static rtx_insn *
7543 arm_pic_static_addr (rtx orig, rtx reg)
7544 {
7545 rtx l1, labelno, offset_rtx;
7546
7547 gcc_assert (flag_pic);
7548
7549 /* We use an UNSPEC rather than a LABEL_REF because this label
7550 never appears in the code stream. */
7551 labelno = GEN_INT (pic_labelno++);
7552 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7553 l1 = gen_rtx_CONST (VOIDmode, l1);
7554
7555 /* On the ARM the PC register contains 'dot + 8' at the time of the
7556 addition, on the Thumb it is 'dot + 4'. */
7557 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7558 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7559 UNSPEC_SYMBOL_OFFSET);
7560 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7561
7562 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7563 }
7564
7565 /* Return nonzero if X is valid as an ARM state addressing register. */
7566 static int
7567 arm_address_register_rtx_p (rtx x, int strict_p)
7568 {
7569 int regno;
7570
7571 if (!REG_P (x))
7572 return 0;
7573
7574 regno = REGNO (x);
7575
7576 if (strict_p)
7577 return ARM_REGNO_OK_FOR_BASE_P (regno);
7578
7579 return (regno <= LAST_ARM_REGNUM
7580 || regno >= FIRST_PSEUDO_REGISTER
7581 || regno == FRAME_POINTER_REGNUM
7582 || regno == ARG_POINTER_REGNUM);
7583 }
7584
7585 /* Return TRUE if this rtx is the difference of a symbol and a label,
7586 and will reduce to a PC-relative relocation in the object file.
7587 Expressions like this can be left alone when generating PIC, rather
7588 than forced through the GOT. */
7589 static int
7590 pcrel_constant_p (rtx x)
7591 {
7592 if (GET_CODE (x) == MINUS)
7593 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7594
7595 return FALSE;
7596 }
7597
7598 /* Return true if X will surely end up in an index register after next
7599 splitting pass. */
7600 static bool
7601 will_be_in_index_register (const_rtx x)
7602 {
7603 /* arm.md: calculate_pic_address will split this into a register. */
7604 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7605 }
7606
7607 /* Return nonzero if X is a valid ARM state address operand. */
7608 int
7609 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7610 int strict_p)
7611 {
7612 bool use_ldrd;
7613 enum rtx_code code = GET_CODE (x);
7614
7615 if (arm_address_register_rtx_p (x, strict_p))
7616 return 1;
7617
7618 use_ldrd = (TARGET_LDRD
7619 && (mode == DImode || mode == DFmode));
7620
7621 if (code == POST_INC || code == PRE_DEC
7622 || ((code == PRE_INC || code == POST_DEC)
7623 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7624 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7625
7626 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7627 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7628 && GET_CODE (XEXP (x, 1)) == PLUS
7629 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7630 {
7631 rtx addend = XEXP (XEXP (x, 1), 1);
7632
7633 /* Don't allow ldrd post increment by register because it's hard
7634 to fixup invalid register choices. */
7635 if (use_ldrd
7636 && GET_CODE (x) == POST_MODIFY
7637 && REG_P (addend))
7638 return 0;
7639
7640 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7641 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7642 }
7643
7644 /* After reload constants split into minipools will have addresses
7645 from a LABEL_REF. */
7646 else if (reload_completed
7647 && (code == LABEL_REF
7648 || (code == CONST
7649 && GET_CODE (XEXP (x, 0)) == PLUS
7650 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7651 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7652 return 1;
7653
7654 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7655 return 0;
7656
7657 else if (code == PLUS)
7658 {
7659 rtx xop0 = XEXP (x, 0);
7660 rtx xop1 = XEXP (x, 1);
7661
7662 return ((arm_address_register_rtx_p (xop0, strict_p)
7663 && ((CONST_INT_P (xop1)
7664 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7665 || (!strict_p && will_be_in_index_register (xop1))))
7666 || (arm_address_register_rtx_p (xop1, strict_p)
7667 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7668 }
7669
7670 #if 0
7671 /* Reload currently can't handle MINUS, so disable this for now */
7672 else if (GET_CODE (x) == MINUS)
7673 {
7674 rtx xop0 = XEXP (x, 0);
7675 rtx xop1 = XEXP (x, 1);
7676
7677 return (arm_address_register_rtx_p (xop0, strict_p)
7678 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7679 }
7680 #endif
7681
7682 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7683 && code == SYMBOL_REF
7684 && CONSTANT_POOL_ADDRESS_P (x)
7685 && ! (flag_pic
7686 && symbol_mentioned_p (get_pool_constant (x))
7687 && ! pcrel_constant_p (get_pool_constant (x))))
7688 return 1;
7689
7690 return 0;
7691 }
7692
7693 /* Return nonzero if X is a valid Thumb-2 address operand. */
7694 static int
7695 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7696 {
7697 bool use_ldrd;
7698 enum rtx_code code = GET_CODE (x);
7699
7700 if (arm_address_register_rtx_p (x, strict_p))
7701 return 1;
7702
7703 use_ldrd = (TARGET_LDRD
7704 && (mode == DImode || mode == DFmode));
7705
7706 if (code == POST_INC || code == PRE_DEC
7707 || ((code == PRE_INC || code == POST_DEC)
7708 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7709 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7710
7711 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7712 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7713 && GET_CODE (XEXP (x, 1)) == PLUS
7714 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7715 {
7716 /* Thumb-2 only has autoincrement by constant. */
7717 rtx addend = XEXP (XEXP (x, 1), 1);
7718 HOST_WIDE_INT offset;
7719
7720 if (!CONST_INT_P (addend))
7721 return 0;
7722
7723 offset = INTVAL(addend);
7724 if (GET_MODE_SIZE (mode) <= 4)
7725 return (offset > -256 && offset < 256);
7726
7727 return (use_ldrd && offset > -1024 && offset < 1024
7728 && (offset & 3) == 0);
7729 }
7730
7731 /* After reload constants split into minipools will have addresses
7732 from a LABEL_REF. */
7733 else if (reload_completed
7734 && (code == LABEL_REF
7735 || (code == CONST
7736 && GET_CODE (XEXP (x, 0)) == PLUS
7737 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7738 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7739 return 1;
7740
7741 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7742 return 0;
7743
7744 else if (code == PLUS)
7745 {
7746 rtx xop0 = XEXP (x, 0);
7747 rtx xop1 = XEXP (x, 1);
7748
7749 return ((arm_address_register_rtx_p (xop0, strict_p)
7750 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7751 || (!strict_p && will_be_in_index_register (xop1))))
7752 || (arm_address_register_rtx_p (xop1, strict_p)
7753 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7754 }
7755
7756 /* Normally we can assign constant values to target registers without
7757 the help of constant pool. But there are cases we have to use constant
7758 pool like:
7759 1) assign a label to register.
7760 2) sign-extend a 8bit value to 32bit and then assign to register.
7761
7762 Constant pool access in format:
7763 (set (reg r0) (mem (symbol_ref (".LC0"))))
7764 will cause the use of literal pool (later in function arm_reorg).
7765 So here we mark such format as an invalid format, then the compiler
7766 will adjust it into:
7767 (set (reg r0) (symbol_ref (".LC0")))
7768 (set (reg r0) (mem (reg r0))).
7769 No extra register is required, and (mem (reg r0)) won't cause the use
7770 of literal pools. */
7771 else if (arm_disable_literal_pool && code == SYMBOL_REF
7772 && CONSTANT_POOL_ADDRESS_P (x))
7773 return 0;
7774
7775 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7776 && code == SYMBOL_REF
7777 && CONSTANT_POOL_ADDRESS_P (x)
7778 && ! (flag_pic
7779 && symbol_mentioned_p (get_pool_constant (x))
7780 && ! pcrel_constant_p (get_pool_constant (x))))
7781 return 1;
7782
7783 return 0;
7784 }
7785
7786 /* Return nonzero if INDEX is valid for an address index operand in
7787 ARM state. */
7788 static int
7789 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7790 int strict_p)
7791 {
7792 HOST_WIDE_INT range;
7793 enum rtx_code code = GET_CODE (index);
7794
7795 /* Standard coprocessor addressing modes. */
7796 if (TARGET_HARD_FLOAT
7797 && (mode == SFmode || mode == DFmode))
7798 return (code == CONST_INT && INTVAL (index) < 1024
7799 && INTVAL (index) > -1024
7800 && (INTVAL (index) & 3) == 0);
7801
7802 /* For quad modes, we restrict the constant offset to be slightly less
7803 than what the instruction format permits. We do this because for
7804 quad mode moves, we will actually decompose them into two separate
7805 double-mode reads or writes. INDEX must therefore be a valid
7806 (double-mode) offset and so should INDEX+8. */
7807 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7808 return (code == CONST_INT
7809 && INTVAL (index) < 1016
7810 && INTVAL (index) > -1024
7811 && (INTVAL (index) & 3) == 0);
7812
7813 /* We have no such constraint on double mode offsets, so we permit the
7814 full range of the instruction format. */
7815 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7816 return (code == CONST_INT
7817 && INTVAL (index) < 1024
7818 && INTVAL (index) > -1024
7819 && (INTVAL (index) & 3) == 0);
7820
7821 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7822 return (code == CONST_INT
7823 && INTVAL (index) < 1024
7824 && INTVAL (index) > -1024
7825 && (INTVAL (index) & 3) == 0);
7826
7827 if (arm_address_register_rtx_p (index, strict_p)
7828 && (GET_MODE_SIZE (mode) <= 4))
7829 return 1;
7830
7831 if (mode == DImode || mode == DFmode)
7832 {
7833 if (code == CONST_INT)
7834 {
7835 HOST_WIDE_INT val = INTVAL (index);
7836
7837 if (TARGET_LDRD)
7838 return val > -256 && val < 256;
7839 else
7840 return val > -4096 && val < 4092;
7841 }
7842
7843 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7844 }
7845
7846 if (GET_MODE_SIZE (mode) <= 4
7847 && ! (arm_arch4
7848 && (mode == HImode
7849 || mode == HFmode
7850 || (mode == QImode && outer == SIGN_EXTEND))))
7851 {
7852 if (code == MULT)
7853 {
7854 rtx xiop0 = XEXP (index, 0);
7855 rtx xiop1 = XEXP (index, 1);
7856
7857 return ((arm_address_register_rtx_p (xiop0, strict_p)
7858 && power_of_two_operand (xiop1, SImode))
7859 || (arm_address_register_rtx_p (xiop1, strict_p)
7860 && power_of_two_operand (xiop0, SImode)));
7861 }
7862 else if (code == LSHIFTRT || code == ASHIFTRT
7863 || code == ASHIFT || code == ROTATERT)
7864 {
7865 rtx op = XEXP (index, 1);
7866
7867 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7868 && CONST_INT_P (op)
7869 && INTVAL (op) > 0
7870 && INTVAL (op) <= 31);
7871 }
7872 }
7873
7874 /* For ARM v4 we may be doing a sign-extend operation during the
7875 load. */
7876 if (arm_arch4)
7877 {
7878 if (mode == HImode
7879 || mode == HFmode
7880 || (outer == SIGN_EXTEND && mode == QImode))
7881 range = 256;
7882 else
7883 range = 4096;
7884 }
7885 else
7886 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7887
7888 return (code == CONST_INT
7889 && INTVAL (index) < range
7890 && INTVAL (index) > -range);
7891 }
7892
7893 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7894 index operand. i.e. 1, 2, 4 or 8. */
7895 static bool
7896 thumb2_index_mul_operand (rtx op)
7897 {
7898 HOST_WIDE_INT val;
7899
7900 if (!CONST_INT_P (op))
7901 return false;
7902
7903 val = INTVAL(op);
7904 return (val == 1 || val == 2 || val == 4 || val == 8);
7905 }
7906
7907 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7908 static int
7909 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7910 {
7911 enum rtx_code code = GET_CODE (index);
7912
7913 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7914 /* Standard coprocessor addressing modes. */
7915 if (TARGET_HARD_FLOAT
7916 && (mode == SFmode || mode == DFmode))
7917 return (code == CONST_INT && INTVAL (index) < 1024
7918 /* Thumb-2 allows only > -256 index range for it's core register
7919 load/stores. Since we allow SF/DF in core registers, we have
7920 to use the intersection between -256~4096 (core) and -1024~1024
7921 (coprocessor). */
7922 && INTVAL (index) > -256
7923 && (INTVAL (index) & 3) == 0);
7924
7925 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7926 {
7927 /* For DImode assume values will usually live in core regs
7928 and only allow LDRD addressing modes. */
7929 if (!TARGET_LDRD || mode != DImode)
7930 return (code == CONST_INT
7931 && INTVAL (index) < 1024
7932 && INTVAL (index) > -1024
7933 && (INTVAL (index) & 3) == 0);
7934 }
7935
7936 /* For quad modes, we restrict the constant offset to be slightly less
7937 than what the instruction format permits. We do this because for
7938 quad mode moves, we will actually decompose them into two separate
7939 double-mode reads or writes. INDEX must therefore be a valid
7940 (double-mode) offset and so should INDEX+8. */
7941 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7942 return (code == CONST_INT
7943 && INTVAL (index) < 1016
7944 && INTVAL (index) > -1024
7945 && (INTVAL (index) & 3) == 0);
7946
7947 /* We have no such constraint on double mode offsets, so we permit the
7948 full range of the instruction format. */
7949 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7950 return (code == CONST_INT
7951 && INTVAL (index) < 1024
7952 && INTVAL (index) > -1024
7953 && (INTVAL (index) & 3) == 0);
7954
7955 if (arm_address_register_rtx_p (index, strict_p)
7956 && (GET_MODE_SIZE (mode) <= 4))
7957 return 1;
7958
7959 if (mode == DImode || mode == DFmode)
7960 {
7961 if (code == CONST_INT)
7962 {
7963 HOST_WIDE_INT val = INTVAL (index);
7964 /* ??? Can we assume ldrd for thumb2? */
7965 /* Thumb-2 ldrd only has reg+const addressing modes. */
7966 /* ldrd supports offsets of +-1020.
7967 However the ldr fallback does not. */
7968 return val > -256 && val < 256 && (val & 3) == 0;
7969 }
7970 else
7971 return 0;
7972 }
7973
7974 if (code == MULT)
7975 {
7976 rtx xiop0 = XEXP (index, 0);
7977 rtx xiop1 = XEXP (index, 1);
7978
7979 return ((arm_address_register_rtx_p (xiop0, strict_p)
7980 && thumb2_index_mul_operand (xiop1))
7981 || (arm_address_register_rtx_p (xiop1, strict_p)
7982 && thumb2_index_mul_operand (xiop0)));
7983 }
7984 else if (code == ASHIFT)
7985 {
7986 rtx op = XEXP (index, 1);
7987
7988 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7989 && CONST_INT_P (op)
7990 && INTVAL (op) > 0
7991 && INTVAL (op) <= 3);
7992 }
7993
7994 return (code == CONST_INT
7995 && INTVAL (index) < 4096
7996 && INTVAL (index) > -256);
7997 }
7998
7999 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8000 static int
8001 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8002 {
8003 int regno;
8004
8005 if (!REG_P (x))
8006 return 0;
8007
8008 regno = REGNO (x);
8009
8010 if (strict_p)
8011 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8012
8013 return (regno <= LAST_LO_REGNUM
8014 || regno > LAST_VIRTUAL_REGISTER
8015 || regno == FRAME_POINTER_REGNUM
8016 || (GET_MODE_SIZE (mode) >= 4
8017 && (regno == STACK_POINTER_REGNUM
8018 || regno >= FIRST_PSEUDO_REGISTER
8019 || x == hard_frame_pointer_rtx
8020 || x == arg_pointer_rtx)));
8021 }
8022
8023 /* Return nonzero if x is a legitimate index register. This is the case
8024 for any base register that can access a QImode object. */
8025 inline static int
8026 thumb1_index_register_rtx_p (rtx x, int strict_p)
8027 {
8028 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8029 }
8030
8031 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8032
8033 The AP may be eliminated to either the SP or the FP, so we use the
8034 least common denominator, e.g. SImode, and offsets from 0 to 64.
8035
8036 ??? Verify whether the above is the right approach.
8037
8038 ??? Also, the FP may be eliminated to the SP, so perhaps that
8039 needs special handling also.
8040
8041 ??? Look at how the mips16 port solves this problem. It probably uses
8042 better ways to solve some of these problems.
8043
8044 Although it is not incorrect, we don't accept QImode and HImode
8045 addresses based on the frame pointer or arg pointer until the
8046 reload pass starts. This is so that eliminating such addresses
8047 into stack based ones won't produce impossible code. */
8048 int
8049 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8050 {
8051 /* ??? Not clear if this is right. Experiment. */
8052 if (GET_MODE_SIZE (mode) < 4
8053 && !(reload_in_progress || reload_completed)
8054 && (reg_mentioned_p (frame_pointer_rtx, x)
8055 || reg_mentioned_p (arg_pointer_rtx, x)
8056 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8057 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8058 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8059 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8060 return 0;
8061
8062 /* Accept any base register. SP only in SImode or larger. */
8063 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8064 return 1;
8065
8066 /* This is PC relative data before arm_reorg runs. */
8067 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8068 && GET_CODE (x) == SYMBOL_REF
8069 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8070 return 1;
8071
8072 /* This is PC relative data after arm_reorg runs. */
8073 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8074 && reload_completed
8075 && (GET_CODE (x) == LABEL_REF
8076 || (GET_CODE (x) == CONST
8077 && GET_CODE (XEXP (x, 0)) == PLUS
8078 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8079 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8080 return 1;
8081
8082 /* Post-inc indexing only supported for SImode and larger. */
8083 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8084 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8085 return 1;
8086
8087 else if (GET_CODE (x) == PLUS)
8088 {
8089 /* REG+REG address can be any two index registers. */
8090 /* We disallow FRAME+REG addressing since we know that FRAME
8091 will be replaced with STACK, and SP relative addressing only
8092 permits SP+OFFSET. */
8093 if (GET_MODE_SIZE (mode) <= 4
8094 && XEXP (x, 0) != frame_pointer_rtx
8095 && XEXP (x, 1) != frame_pointer_rtx
8096 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8097 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8098 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8099 return 1;
8100
8101 /* REG+const has 5-7 bit offset for non-SP registers. */
8102 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8103 || XEXP (x, 0) == arg_pointer_rtx)
8104 && CONST_INT_P (XEXP (x, 1))
8105 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8106 return 1;
8107
8108 /* REG+const has 10-bit offset for SP, but only SImode and
8109 larger is supported. */
8110 /* ??? Should probably check for DI/DFmode overflow here
8111 just like GO_IF_LEGITIMATE_OFFSET does. */
8112 else if (REG_P (XEXP (x, 0))
8113 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8114 && GET_MODE_SIZE (mode) >= 4
8115 && CONST_INT_P (XEXP (x, 1))
8116 && INTVAL (XEXP (x, 1)) >= 0
8117 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8118 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8119 return 1;
8120
8121 else if (REG_P (XEXP (x, 0))
8122 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8123 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8124 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8125 && REGNO (XEXP (x, 0))
8126 <= LAST_VIRTUAL_POINTER_REGISTER))
8127 && GET_MODE_SIZE (mode) >= 4
8128 && CONST_INT_P (XEXP (x, 1))
8129 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8130 return 1;
8131 }
8132
8133 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8134 && GET_MODE_SIZE (mode) == 4
8135 && GET_CODE (x) == SYMBOL_REF
8136 && CONSTANT_POOL_ADDRESS_P (x)
8137 && ! (flag_pic
8138 && symbol_mentioned_p (get_pool_constant (x))
8139 && ! pcrel_constant_p (get_pool_constant (x))))
8140 return 1;
8141
8142 return 0;
8143 }
8144
8145 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8146 instruction of mode MODE. */
8147 int
8148 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8149 {
8150 switch (GET_MODE_SIZE (mode))
8151 {
8152 case 1:
8153 return val >= 0 && val < 32;
8154
8155 case 2:
8156 return val >= 0 && val < 64 && (val & 1) == 0;
8157
8158 default:
8159 return (val >= 0
8160 && (val + GET_MODE_SIZE (mode)) <= 128
8161 && (val & 3) == 0);
8162 }
8163 }
8164
8165 bool
8166 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8167 {
8168 if (TARGET_ARM)
8169 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8170 else if (TARGET_THUMB2)
8171 return thumb2_legitimate_address_p (mode, x, strict_p);
8172 else /* if (TARGET_THUMB1) */
8173 return thumb1_legitimate_address_p (mode, x, strict_p);
8174 }
8175
8176 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8177
8178 Given an rtx X being reloaded into a reg required to be
8179 in class CLASS, return the class of reg to actually use.
8180 In general this is just CLASS, but for the Thumb core registers and
8181 immediate constants we prefer a LO_REGS class or a subset. */
8182
8183 static reg_class_t
8184 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8185 {
8186 if (TARGET_32BIT)
8187 return rclass;
8188 else
8189 {
8190 if (rclass == GENERAL_REGS)
8191 return LO_REGS;
8192 else
8193 return rclass;
8194 }
8195 }
8196
8197 /* Build the SYMBOL_REF for __tls_get_addr. */
8198
8199 static GTY(()) rtx tls_get_addr_libfunc;
8200
8201 static rtx
8202 get_tls_get_addr (void)
8203 {
8204 if (!tls_get_addr_libfunc)
8205 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8206 return tls_get_addr_libfunc;
8207 }
8208
8209 rtx
8210 arm_load_tp (rtx target)
8211 {
8212 if (!target)
8213 target = gen_reg_rtx (SImode);
8214
8215 if (TARGET_HARD_TP)
8216 {
8217 /* Can return in any reg. */
8218 emit_insn (gen_load_tp_hard (target));
8219 }
8220 else
8221 {
8222 /* Always returned in r0. Immediately copy the result into a pseudo,
8223 otherwise other uses of r0 (e.g. setting up function arguments) may
8224 clobber the value. */
8225
8226 rtx tmp;
8227
8228 emit_insn (gen_load_tp_soft ());
8229
8230 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8231 emit_move_insn (target, tmp);
8232 }
8233 return target;
8234 }
8235
8236 static rtx
8237 load_tls_operand (rtx x, rtx reg)
8238 {
8239 rtx tmp;
8240
8241 if (reg == NULL_RTX)
8242 reg = gen_reg_rtx (SImode);
8243
8244 tmp = gen_rtx_CONST (SImode, x);
8245
8246 emit_move_insn (reg, tmp);
8247
8248 return reg;
8249 }
8250
8251 static rtx_insn *
8252 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8253 {
8254 rtx label, labelno, sum;
8255
8256 gcc_assert (reloc != TLS_DESCSEQ);
8257 start_sequence ();
8258
8259 labelno = GEN_INT (pic_labelno++);
8260 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8261 label = gen_rtx_CONST (VOIDmode, label);
8262
8263 sum = gen_rtx_UNSPEC (Pmode,
8264 gen_rtvec (4, x, GEN_INT (reloc), label,
8265 GEN_INT (TARGET_ARM ? 8 : 4)),
8266 UNSPEC_TLS);
8267 reg = load_tls_operand (sum, reg);
8268
8269 if (TARGET_ARM)
8270 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8271 else
8272 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8273
8274 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8275 LCT_PURE, /* LCT_CONST? */
8276 Pmode, 1, reg, Pmode);
8277
8278 rtx_insn *insns = get_insns ();
8279 end_sequence ();
8280
8281 return insns;
8282 }
8283
8284 static rtx
8285 arm_tls_descseq_addr (rtx x, rtx reg)
8286 {
8287 rtx labelno = GEN_INT (pic_labelno++);
8288 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8289 rtx sum = gen_rtx_UNSPEC (Pmode,
8290 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8291 gen_rtx_CONST (VOIDmode, label),
8292 GEN_INT (!TARGET_ARM)),
8293 UNSPEC_TLS);
8294 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8295
8296 emit_insn (gen_tlscall (x, labelno));
8297 if (!reg)
8298 reg = gen_reg_rtx (SImode);
8299 else
8300 gcc_assert (REGNO (reg) != R0_REGNUM);
8301
8302 emit_move_insn (reg, reg0);
8303
8304 return reg;
8305 }
8306
8307 rtx
8308 legitimize_tls_address (rtx x, rtx reg)
8309 {
8310 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8311 rtx_insn *insns;
8312 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8313
8314 switch (model)
8315 {
8316 case TLS_MODEL_GLOBAL_DYNAMIC:
8317 if (TARGET_GNU2_TLS)
8318 {
8319 reg = arm_tls_descseq_addr (x, reg);
8320
8321 tp = arm_load_tp (NULL_RTX);
8322
8323 dest = gen_rtx_PLUS (Pmode, tp, reg);
8324 }
8325 else
8326 {
8327 /* Original scheme */
8328 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8329 dest = gen_reg_rtx (Pmode);
8330 emit_libcall_block (insns, dest, ret, x);
8331 }
8332 return dest;
8333
8334 case TLS_MODEL_LOCAL_DYNAMIC:
8335 if (TARGET_GNU2_TLS)
8336 {
8337 reg = arm_tls_descseq_addr (x, reg);
8338
8339 tp = arm_load_tp (NULL_RTX);
8340
8341 dest = gen_rtx_PLUS (Pmode, tp, reg);
8342 }
8343 else
8344 {
8345 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8346
8347 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8348 share the LDM result with other LD model accesses. */
8349 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8350 UNSPEC_TLS);
8351 dest = gen_reg_rtx (Pmode);
8352 emit_libcall_block (insns, dest, ret, eqv);
8353
8354 /* Load the addend. */
8355 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8356 GEN_INT (TLS_LDO32)),
8357 UNSPEC_TLS);
8358 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8359 dest = gen_rtx_PLUS (Pmode, dest, addend);
8360 }
8361 return dest;
8362
8363 case TLS_MODEL_INITIAL_EXEC:
8364 labelno = GEN_INT (pic_labelno++);
8365 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8366 label = gen_rtx_CONST (VOIDmode, label);
8367 sum = gen_rtx_UNSPEC (Pmode,
8368 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8369 GEN_INT (TARGET_ARM ? 8 : 4)),
8370 UNSPEC_TLS);
8371 reg = load_tls_operand (sum, reg);
8372
8373 if (TARGET_ARM)
8374 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8375 else if (TARGET_THUMB2)
8376 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8377 else
8378 {
8379 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8380 emit_move_insn (reg, gen_const_mem (SImode, reg));
8381 }
8382
8383 tp = arm_load_tp (NULL_RTX);
8384
8385 return gen_rtx_PLUS (Pmode, tp, reg);
8386
8387 case TLS_MODEL_LOCAL_EXEC:
8388 tp = arm_load_tp (NULL_RTX);
8389
8390 reg = gen_rtx_UNSPEC (Pmode,
8391 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8392 UNSPEC_TLS);
8393 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8394
8395 return gen_rtx_PLUS (Pmode, tp, reg);
8396
8397 default:
8398 abort ();
8399 }
8400 }
8401
8402 /* Try machine-dependent ways of modifying an illegitimate address
8403 to be legitimate. If we find one, return the new, valid address. */
8404 rtx
8405 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8406 {
8407 if (arm_tls_referenced_p (x))
8408 {
8409 rtx addend = NULL;
8410
8411 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8412 {
8413 addend = XEXP (XEXP (x, 0), 1);
8414 x = XEXP (XEXP (x, 0), 0);
8415 }
8416
8417 if (GET_CODE (x) != SYMBOL_REF)
8418 return x;
8419
8420 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8421
8422 x = legitimize_tls_address (x, NULL_RTX);
8423
8424 if (addend)
8425 {
8426 x = gen_rtx_PLUS (SImode, x, addend);
8427 orig_x = x;
8428 }
8429 else
8430 return x;
8431 }
8432
8433 if (!TARGET_ARM)
8434 {
8435 /* TODO: legitimize_address for Thumb2. */
8436 if (TARGET_THUMB2)
8437 return x;
8438 return thumb_legitimize_address (x, orig_x, mode);
8439 }
8440
8441 if (GET_CODE (x) == PLUS)
8442 {
8443 rtx xop0 = XEXP (x, 0);
8444 rtx xop1 = XEXP (x, 1);
8445
8446 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8447 xop0 = force_reg (SImode, xop0);
8448
8449 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8450 && !symbol_mentioned_p (xop1))
8451 xop1 = force_reg (SImode, xop1);
8452
8453 if (ARM_BASE_REGISTER_RTX_P (xop0)
8454 && CONST_INT_P (xop1))
8455 {
8456 HOST_WIDE_INT n, low_n;
8457 rtx base_reg, val;
8458 n = INTVAL (xop1);
8459
8460 /* VFP addressing modes actually allow greater offsets, but for
8461 now we just stick with the lowest common denominator. */
8462 if (mode == DImode || mode == DFmode)
8463 {
8464 low_n = n & 0x0f;
8465 n &= ~0x0f;
8466 if (low_n > 4)
8467 {
8468 n += 16;
8469 low_n -= 16;
8470 }
8471 }
8472 else
8473 {
8474 low_n = ((mode) == TImode ? 0
8475 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8476 n -= low_n;
8477 }
8478
8479 base_reg = gen_reg_rtx (SImode);
8480 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8481 emit_move_insn (base_reg, val);
8482 x = plus_constant (Pmode, base_reg, low_n);
8483 }
8484 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8485 x = gen_rtx_PLUS (SImode, xop0, xop1);
8486 }
8487
8488 /* XXX We don't allow MINUS any more -- see comment in
8489 arm_legitimate_address_outer_p (). */
8490 else if (GET_CODE (x) == MINUS)
8491 {
8492 rtx xop0 = XEXP (x, 0);
8493 rtx xop1 = XEXP (x, 1);
8494
8495 if (CONSTANT_P (xop0))
8496 xop0 = force_reg (SImode, xop0);
8497
8498 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8499 xop1 = force_reg (SImode, xop1);
8500
8501 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8502 x = gen_rtx_MINUS (SImode, xop0, xop1);
8503 }
8504
8505 /* Make sure to take full advantage of the pre-indexed addressing mode
8506 with absolute addresses which often allows for the base register to
8507 be factorized for multiple adjacent memory references, and it might
8508 even allows for the mini pool to be avoided entirely. */
8509 else if (CONST_INT_P (x) && optimize > 0)
8510 {
8511 unsigned int bits;
8512 HOST_WIDE_INT mask, base, index;
8513 rtx base_reg;
8514
8515 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8516 use a 8-bit index. So let's use a 12-bit index for SImode only and
8517 hope that arm_gen_constant will enable ldrb to use more bits. */
8518 bits = (mode == SImode) ? 12 : 8;
8519 mask = (1 << bits) - 1;
8520 base = INTVAL (x) & ~mask;
8521 index = INTVAL (x) & mask;
8522 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8523 {
8524 /* It'll most probably be more efficient to generate the base
8525 with more bits set and use a negative index instead. */
8526 base |= mask;
8527 index -= mask;
8528 }
8529 base_reg = force_reg (SImode, GEN_INT (base));
8530 x = plus_constant (Pmode, base_reg, index);
8531 }
8532
8533 if (flag_pic)
8534 {
8535 /* We need to find and carefully transform any SYMBOL and LABEL
8536 references; so go back to the original address expression. */
8537 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8538
8539 if (new_x != orig_x)
8540 x = new_x;
8541 }
8542
8543 return x;
8544 }
8545
8546
8547 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8548 to be legitimate. If we find one, return the new, valid address. */
8549 rtx
8550 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8551 {
8552 if (GET_CODE (x) == PLUS
8553 && CONST_INT_P (XEXP (x, 1))
8554 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8555 || INTVAL (XEXP (x, 1)) < 0))
8556 {
8557 rtx xop0 = XEXP (x, 0);
8558 rtx xop1 = XEXP (x, 1);
8559 HOST_WIDE_INT offset = INTVAL (xop1);
8560
8561 /* Try and fold the offset into a biasing of the base register and
8562 then offsetting that. Don't do this when optimizing for space
8563 since it can cause too many CSEs. */
8564 if (optimize_size && offset >= 0
8565 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8566 {
8567 HOST_WIDE_INT delta;
8568
8569 if (offset >= 256)
8570 delta = offset - (256 - GET_MODE_SIZE (mode));
8571 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8572 delta = 31 * GET_MODE_SIZE (mode);
8573 else
8574 delta = offset & (~31 * GET_MODE_SIZE (mode));
8575
8576 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8577 NULL_RTX);
8578 x = plus_constant (Pmode, xop0, delta);
8579 }
8580 else if (offset < 0 && offset > -256)
8581 /* Small negative offsets are best done with a subtract before the
8582 dereference, forcing these into a register normally takes two
8583 instructions. */
8584 x = force_operand (x, NULL_RTX);
8585 else
8586 {
8587 /* For the remaining cases, force the constant into a register. */
8588 xop1 = force_reg (SImode, xop1);
8589 x = gen_rtx_PLUS (SImode, xop0, xop1);
8590 }
8591 }
8592 else if (GET_CODE (x) == PLUS
8593 && s_register_operand (XEXP (x, 1), SImode)
8594 && !s_register_operand (XEXP (x, 0), SImode))
8595 {
8596 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8597
8598 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8599 }
8600
8601 if (flag_pic)
8602 {
8603 /* We need to find and carefully transform any SYMBOL and LABEL
8604 references; so go back to the original address expression. */
8605 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8606
8607 if (new_x != orig_x)
8608 x = new_x;
8609 }
8610
8611 return x;
8612 }
8613
8614 /* Return TRUE if X contains any TLS symbol references. */
8615
8616 bool
8617 arm_tls_referenced_p (rtx x)
8618 {
8619 if (! TARGET_HAVE_TLS)
8620 return false;
8621
8622 subrtx_iterator::array_type array;
8623 FOR_EACH_SUBRTX (iter, array, x, ALL)
8624 {
8625 const_rtx x = *iter;
8626 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8627 return true;
8628
8629 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8630 TLS offsets, not real symbol references. */
8631 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8632 iter.skip_subrtxes ();
8633 }
8634 return false;
8635 }
8636
8637 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8638
8639 On the ARM, allow any integer (invalid ones are removed later by insn
8640 patterns), nice doubles and symbol_refs which refer to the function's
8641 constant pool XXX.
8642
8643 When generating pic allow anything. */
8644
8645 static bool
8646 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8647 {
8648 return flag_pic || !label_mentioned_p (x);
8649 }
8650
8651 static bool
8652 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8653 {
8654 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8655 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8656 for ARMv8-M Baseline or later the result is valid. */
8657 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8658 x = XEXP (x, 0);
8659
8660 return (CONST_INT_P (x)
8661 || CONST_DOUBLE_P (x)
8662 || CONSTANT_ADDRESS_P (x)
8663 || flag_pic);
8664 }
8665
8666 static bool
8667 arm_legitimate_constant_p (machine_mode mode, rtx x)
8668 {
8669 return (!arm_cannot_force_const_mem (mode, x)
8670 && (TARGET_32BIT
8671 ? arm_legitimate_constant_p_1 (mode, x)
8672 : thumb_legitimate_constant_p (mode, x)));
8673 }
8674
8675 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8676
8677 static bool
8678 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8679 {
8680 rtx base, offset;
8681
8682 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8683 {
8684 split_const (x, &base, &offset);
8685 if (GET_CODE (base) == SYMBOL_REF
8686 && !offset_within_block_p (base, INTVAL (offset)))
8687 return true;
8688 }
8689 return arm_tls_referenced_p (x);
8690 }
8691 \f
8692 #define REG_OR_SUBREG_REG(X) \
8693 (REG_P (X) \
8694 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8695
8696 #define REG_OR_SUBREG_RTX(X) \
8697 (REG_P (X) ? (X) : SUBREG_REG (X))
8698
8699 static inline int
8700 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8701 {
8702 machine_mode mode = GET_MODE (x);
8703 int total, words;
8704
8705 switch (code)
8706 {
8707 case ASHIFT:
8708 case ASHIFTRT:
8709 case LSHIFTRT:
8710 case ROTATERT:
8711 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8712
8713 case PLUS:
8714 case MINUS:
8715 case COMPARE:
8716 case NEG:
8717 case NOT:
8718 return COSTS_N_INSNS (1);
8719
8720 case MULT:
8721 if (CONST_INT_P (XEXP (x, 1)))
8722 {
8723 int cycles = 0;
8724 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8725
8726 while (i)
8727 {
8728 i >>= 2;
8729 cycles++;
8730 }
8731 return COSTS_N_INSNS (2) + cycles;
8732 }
8733 return COSTS_N_INSNS (1) + 16;
8734
8735 case SET:
8736 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8737 the mode. */
8738 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8739 return (COSTS_N_INSNS (words)
8740 + 4 * ((MEM_P (SET_SRC (x)))
8741 + MEM_P (SET_DEST (x))));
8742
8743 case CONST_INT:
8744 if (outer == SET)
8745 {
8746 if (UINTVAL (x) < 256
8747 /* 16-bit constant. */
8748 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8749 return 0;
8750 if (thumb_shiftable_const (INTVAL (x)))
8751 return COSTS_N_INSNS (2);
8752 return COSTS_N_INSNS (3);
8753 }
8754 else if ((outer == PLUS || outer == COMPARE)
8755 && INTVAL (x) < 256 && INTVAL (x) > -256)
8756 return 0;
8757 else if ((outer == IOR || outer == XOR || outer == AND)
8758 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8759 return COSTS_N_INSNS (1);
8760 else if (outer == AND)
8761 {
8762 int i;
8763 /* This duplicates the tests in the andsi3 expander. */
8764 for (i = 9; i <= 31; i++)
8765 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8766 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8767 return COSTS_N_INSNS (2);
8768 }
8769 else if (outer == ASHIFT || outer == ASHIFTRT
8770 || outer == LSHIFTRT)
8771 return 0;
8772 return COSTS_N_INSNS (2);
8773
8774 case CONST:
8775 case CONST_DOUBLE:
8776 case LABEL_REF:
8777 case SYMBOL_REF:
8778 return COSTS_N_INSNS (3);
8779
8780 case UDIV:
8781 case UMOD:
8782 case DIV:
8783 case MOD:
8784 return 100;
8785
8786 case TRUNCATE:
8787 return 99;
8788
8789 case AND:
8790 case XOR:
8791 case IOR:
8792 /* XXX guess. */
8793 return 8;
8794
8795 case MEM:
8796 /* XXX another guess. */
8797 /* Memory costs quite a lot for the first word, but subsequent words
8798 load at the equivalent of a single insn each. */
8799 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8800 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8801 ? 4 : 0));
8802
8803 case IF_THEN_ELSE:
8804 /* XXX a guess. */
8805 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8806 return 14;
8807 return 2;
8808
8809 case SIGN_EXTEND:
8810 case ZERO_EXTEND:
8811 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8812 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8813
8814 if (mode == SImode)
8815 return total;
8816
8817 if (arm_arch6)
8818 return total + COSTS_N_INSNS (1);
8819
8820 /* Assume a two-shift sequence. Increase the cost slightly so
8821 we prefer actual shifts over an extend operation. */
8822 return total + 1 + COSTS_N_INSNS (2);
8823
8824 default:
8825 return 99;
8826 }
8827 }
8828
8829 /* Estimates the size cost of thumb1 instructions.
8830 For now most of the code is copied from thumb1_rtx_costs. We need more
8831 fine grain tuning when we have more related test cases. */
8832 static inline int
8833 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8834 {
8835 machine_mode mode = GET_MODE (x);
8836 int words, cost;
8837
8838 switch (code)
8839 {
8840 case ASHIFT:
8841 case ASHIFTRT:
8842 case LSHIFTRT:
8843 case ROTATERT:
8844 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8845
8846 case PLUS:
8847 case MINUS:
8848 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8849 defined by RTL expansion, especially for the expansion of
8850 multiplication. */
8851 if ((GET_CODE (XEXP (x, 0)) == MULT
8852 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8853 || (GET_CODE (XEXP (x, 1)) == MULT
8854 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8855 return COSTS_N_INSNS (2);
8856 /* Fall through. */
8857 case COMPARE:
8858 case NEG:
8859 case NOT:
8860 return COSTS_N_INSNS (1);
8861
8862 case MULT:
8863 if (CONST_INT_P (XEXP (x, 1)))
8864 {
8865 /* Thumb1 mul instruction can't operate on const. We must Load it
8866 into a register first. */
8867 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8868 /* For the targets which have a very small and high-latency multiply
8869 unit, we prefer to synthesize the mult with up to 5 instructions,
8870 giving a good balance between size and performance. */
8871 if (arm_arch6m && arm_m_profile_small_mul)
8872 return COSTS_N_INSNS (5);
8873 else
8874 return COSTS_N_INSNS (1) + const_size;
8875 }
8876 return COSTS_N_INSNS (1);
8877
8878 case SET:
8879 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8880 the mode. */
8881 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8882 cost = COSTS_N_INSNS (words);
8883 if (satisfies_constraint_J (SET_SRC (x))
8884 || satisfies_constraint_K (SET_SRC (x))
8885 /* Too big an immediate for a 2-byte mov, using MOVT. */
8886 || (CONST_INT_P (SET_SRC (x))
8887 && UINTVAL (SET_SRC (x)) >= 256
8888 && TARGET_HAVE_MOVT
8889 && satisfies_constraint_j (SET_SRC (x)))
8890 /* thumb1_movdi_insn. */
8891 || ((words > 1) && MEM_P (SET_SRC (x))))
8892 cost += COSTS_N_INSNS (1);
8893 return cost;
8894
8895 case CONST_INT:
8896 if (outer == SET)
8897 {
8898 if (UINTVAL (x) < 256)
8899 return COSTS_N_INSNS (1);
8900 /* movw is 4byte long. */
8901 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8902 return COSTS_N_INSNS (2);
8903 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8904 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8905 return COSTS_N_INSNS (2);
8906 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8907 if (thumb_shiftable_const (INTVAL (x)))
8908 return COSTS_N_INSNS (2);
8909 return COSTS_N_INSNS (3);
8910 }
8911 else if ((outer == PLUS || outer == COMPARE)
8912 && INTVAL (x) < 256 && INTVAL (x) > -256)
8913 return 0;
8914 else if ((outer == IOR || outer == XOR || outer == AND)
8915 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8916 return COSTS_N_INSNS (1);
8917 else if (outer == AND)
8918 {
8919 int i;
8920 /* This duplicates the tests in the andsi3 expander. */
8921 for (i = 9; i <= 31; i++)
8922 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8923 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8924 return COSTS_N_INSNS (2);
8925 }
8926 else if (outer == ASHIFT || outer == ASHIFTRT
8927 || outer == LSHIFTRT)
8928 return 0;
8929 return COSTS_N_INSNS (2);
8930
8931 case CONST:
8932 case CONST_DOUBLE:
8933 case LABEL_REF:
8934 case SYMBOL_REF:
8935 return COSTS_N_INSNS (3);
8936
8937 case UDIV:
8938 case UMOD:
8939 case DIV:
8940 case MOD:
8941 return 100;
8942
8943 case TRUNCATE:
8944 return 99;
8945
8946 case AND:
8947 case XOR:
8948 case IOR:
8949 return COSTS_N_INSNS (1);
8950
8951 case MEM:
8952 return (COSTS_N_INSNS (1)
8953 + COSTS_N_INSNS (1)
8954 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8955 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8956 ? COSTS_N_INSNS (1) : 0));
8957
8958 case IF_THEN_ELSE:
8959 /* XXX a guess. */
8960 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8961 return 14;
8962 return 2;
8963
8964 case ZERO_EXTEND:
8965 /* XXX still guessing. */
8966 switch (GET_MODE (XEXP (x, 0)))
8967 {
8968 case QImode:
8969 return (1 + (mode == DImode ? 4 : 0)
8970 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8971
8972 case HImode:
8973 return (4 + (mode == DImode ? 4 : 0)
8974 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8975
8976 case SImode:
8977 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8978
8979 default:
8980 return 99;
8981 }
8982
8983 default:
8984 return 99;
8985 }
8986 }
8987
8988 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8989 operand, then return the operand that is being shifted. If the shift
8990 is not by a constant, then set SHIFT_REG to point to the operand.
8991 Return NULL if OP is not a shifter operand. */
8992 static rtx
8993 shifter_op_p (rtx op, rtx *shift_reg)
8994 {
8995 enum rtx_code code = GET_CODE (op);
8996
8997 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8998 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8999 return XEXP (op, 0);
9000 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9001 return XEXP (op, 0);
9002 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9003 || code == ASHIFTRT)
9004 {
9005 if (!CONST_INT_P (XEXP (op, 1)))
9006 *shift_reg = XEXP (op, 1);
9007 return XEXP (op, 0);
9008 }
9009
9010 return NULL;
9011 }
9012
9013 static bool
9014 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9015 {
9016 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9017 rtx_code code = GET_CODE (x);
9018 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9019
9020 switch (XINT (x, 1))
9021 {
9022 case UNSPEC_UNALIGNED_LOAD:
9023 /* We can only do unaligned loads into the integer unit, and we can't
9024 use LDM or LDRD. */
9025 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9026 if (speed_p)
9027 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9028 + extra_cost->ldst.load_unaligned);
9029
9030 #ifdef NOT_YET
9031 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9032 ADDR_SPACE_GENERIC, speed_p);
9033 #endif
9034 return true;
9035
9036 case UNSPEC_UNALIGNED_STORE:
9037 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9038 if (speed_p)
9039 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9040 + extra_cost->ldst.store_unaligned);
9041
9042 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9043 #ifdef NOT_YET
9044 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9045 ADDR_SPACE_GENERIC, speed_p);
9046 #endif
9047 return true;
9048
9049 case UNSPEC_VRINTZ:
9050 case UNSPEC_VRINTP:
9051 case UNSPEC_VRINTM:
9052 case UNSPEC_VRINTR:
9053 case UNSPEC_VRINTX:
9054 case UNSPEC_VRINTA:
9055 if (speed_p)
9056 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9057
9058 return true;
9059 default:
9060 *cost = COSTS_N_INSNS (2);
9061 break;
9062 }
9063 return true;
9064 }
9065
9066 /* Cost of a libcall. We assume one insn per argument, an amount for the
9067 call (one insn for -Os) and then one for processing the result. */
9068 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9069
9070 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9071 do \
9072 { \
9073 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9074 if (shift_op != NULL \
9075 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9076 { \
9077 if (shift_reg) \
9078 { \
9079 if (speed_p) \
9080 *cost += extra_cost->alu.arith_shift_reg; \
9081 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9082 ASHIFT, 1, speed_p); \
9083 } \
9084 else if (speed_p) \
9085 *cost += extra_cost->alu.arith_shift; \
9086 \
9087 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9088 ASHIFT, 0, speed_p) \
9089 + rtx_cost (XEXP (x, 1 - IDX), \
9090 GET_MODE (shift_op), \
9091 OP, 1, speed_p)); \
9092 return true; \
9093 } \
9094 } \
9095 while (0);
9096
9097 /* RTX costs. Make an estimate of the cost of executing the operation
9098 X, which is contained with an operation with code OUTER_CODE.
9099 SPEED_P indicates whether the cost desired is the performance cost,
9100 or the size cost. The estimate is stored in COST and the return
9101 value is TRUE if the cost calculation is final, or FALSE if the
9102 caller should recurse through the operands of X to add additional
9103 costs.
9104
9105 We currently make no attempt to model the size savings of Thumb-2
9106 16-bit instructions. At the normal points in compilation where
9107 this code is called we have no measure of whether the condition
9108 flags are live or not, and thus no realistic way to determine what
9109 the size will eventually be. */
9110 static bool
9111 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9112 const struct cpu_cost_table *extra_cost,
9113 int *cost, bool speed_p)
9114 {
9115 machine_mode mode = GET_MODE (x);
9116
9117 *cost = COSTS_N_INSNS (1);
9118
9119 if (TARGET_THUMB1)
9120 {
9121 if (speed_p)
9122 *cost = thumb1_rtx_costs (x, code, outer_code);
9123 else
9124 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9125 return true;
9126 }
9127
9128 switch (code)
9129 {
9130 case SET:
9131 *cost = 0;
9132 /* SET RTXs don't have a mode so we get it from the destination. */
9133 mode = GET_MODE (SET_DEST (x));
9134
9135 if (REG_P (SET_SRC (x))
9136 && REG_P (SET_DEST (x)))
9137 {
9138 /* Assume that most copies can be done with a single insn,
9139 unless we don't have HW FP, in which case everything
9140 larger than word mode will require two insns. */
9141 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9142 && GET_MODE_SIZE (mode) > 4)
9143 || mode == DImode)
9144 ? 2 : 1);
9145 /* Conditional register moves can be encoded
9146 in 16 bits in Thumb mode. */
9147 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9148 *cost >>= 1;
9149
9150 return true;
9151 }
9152
9153 if (CONST_INT_P (SET_SRC (x)))
9154 {
9155 /* Handle CONST_INT here, since the value doesn't have a mode
9156 and we would otherwise be unable to work out the true cost. */
9157 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9158 0, speed_p);
9159 outer_code = SET;
9160 /* Slightly lower the cost of setting a core reg to a constant.
9161 This helps break up chains and allows for better scheduling. */
9162 if (REG_P (SET_DEST (x))
9163 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9164 *cost -= 1;
9165 x = SET_SRC (x);
9166 /* Immediate moves with an immediate in the range [0, 255] can be
9167 encoded in 16 bits in Thumb mode. */
9168 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9169 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9170 *cost >>= 1;
9171 goto const_int_cost;
9172 }
9173
9174 return false;
9175
9176 case MEM:
9177 /* A memory access costs 1 insn if the mode is small, or the address is
9178 a single register, otherwise it costs one insn per word. */
9179 if (REG_P (XEXP (x, 0)))
9180 *cost = COSTS_N_INSNS (1);
9181 else if (flag_pic
9182 && GET_CODE (XEXP (x, 0)) == PLUS
9183 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9184 /* This will be split into two instructions.
9185 See arm.md:calculate_pic_address. */
9186 *cost = COSTS_N_INSNS (2);
9187 else
9188 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9189
9190 /* For speed optimizations, add the costs of the address and
9191 accessing memory. */
9192 if (speed_p)
9193 #ifdef NOT_YET
9194 *cost += (extra_cost->ldst.load
9195 + arm_address_cost (XEXP (x, 0), mode,
9196 ADDR_SPACE_GENERIC, speed_p));
9197 #else
9198 *cost += extra_cost->ldst.load;
9199 #endif
9200 return true;
9201
9202 case PARALLEL:
9203 {
9204 /* Calculations of LDM costs are complex. We assume an initial cost
9205 (ldm_1st) which will load the number of registers mentioned in
9206 ldm_regs_per_insn_1st registers; then each additional
9207 ldm_regs_per_insn_subsequent registers cost one more insn. The
9208 formula for N regs is thus:
9209
9210 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9211 + ldm_regs_per_insn_subsequent - 1)
9212 / ldm_regs_per_insn_subsequent).
9213
9214 Additional costs may also be added for addressing. A similar
9215 formula is used for STM. */
9216
9217 bool is_ldm = load_multiple_operation (x, SImode);
9218 bool is_stm = store_multiple_operation (x, SImode);
9219
9220 if (is_ldm || is_stm)
9221 {
9222 if (speed_p)
9223 {
9224 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9225 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9226 ? extra_cost->ldst.ldm_regs_per_insn_1st
9227 : extra_cost->ldst.stm_regs_per_insn_1st;
9228 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9229 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9230 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9231
9232 *cost += regs_per_insn_1st
9233 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9234 + regs_per_insn_sub - 1)
9235 / regs_per_insn_sub);
9236 return true;
9237 }
9238
9239 }
9240 return false;
9241 }
9242 case DIV:
9243 case UDIV:
9244 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9245 && (mode == SFmode || !TARGET_VFP_SINGLE))
9246 *cost += COSTS_N_INSNS (speed_p
9247 ? extra_cost->fp[mode != SFmode].div : 0);
9248 else if (mode == SImode && TARGET_IDIV)
9249 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9250 else
9251 *cost = LIBCALL_COST (2);
9252 return false; /* All arguments must be in registers. */
9253
9254 case MOD:
9255 /* MOD by a power of 2 can be expanded as:
9256 rsbs r1, r0, #0
9257 and r0, r0, #(n - 1)
9258 and r1, r1, #(n - 1)
9259 rsbpl r0, r1, #0. */
9260 if (CONST_INT_P (XEXP (x, 1))
9261 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9262 && mode == SImode)
9263 {
9264 *cost += COSTS_N_INSNS (3);
9265
9266 if (speed_p)
9267 *cost += 2 * extra_cost->alu.logical
9268 + extra_cost->alu.arith;
9269 return true;
9270 }
9271
9272 /* Fall-through. */
9273 case UMOD:
9274 *cost = LIBCALL_COST (2);
9275 return false; /* All arguments must be in registers. */
9276
9277 case ROTATE:
9278 if (mode == SImode && REG_P (XEXP (x, 1)))
9279 {
9280 *cost += (COSTS_N_INSNS (1)
9281 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9282 if (speed_p)
9283 *cost += extra_cost->alu.shift_reg;
9284 return true;
9285 }
9286 /* Fall through */
9287 case ROTATERT:
9288 case ASHIFT:
9289 case LSHIFTRT:
9290 case ASHIFTRT:
9291 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9292 {
9293 *cost += (COSTS_N_INSNS (2)
9294 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9295 if (speed_p)
9296 *cost += 2 * extra_cost->alu.shift;
9297 return true;
9298 }
9299 else if (mode == SImode)
9300 {
9301 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9302 /* Slightly disparage register shifts at -Os, but not by much. */
9303 if (!CONST_INT_P (XEXP (x, 1)))
9304 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9305 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9306 return true;
9307 }
9308 else if (GET_MODE_CLASS (mode) == MODE_INT
9309 && GET_MODE_SIZE (mode) < 4)
9310 {
9311 if (code == ASHIFT)
9312 {
9313 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9314 /* Slightly disparage register shifts at -Os, but not by
9315 much. */
9316 if (!CONST_INT_P (XEXP (x, 1)))
9317 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9318 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9319 }
9320 else if (code == LSHIFTRT || code == ASHIFTRT)
9321 {
9322 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9323 {
9324 /* Can use SBFX/UBFX. */
9325 if (speed_p)
9326 *cost += extra_cost->alu.bfx;
9327 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9328 }
9329 else
9330 {
9331 *cost += COSTS_N_INSNS (1);
9332 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9333 if (speed_p)
9334 {
9335 if (CONST_INT_P (XEXP (x, 1)))
9336 *cost += 2 * extra_cost->alu.shift;
9337 else
9338 *cost += (extra_cost->alu.shift
9339 + extra_cost->alu.shift_reg);
9340 }
9341 else
9342 /* Slightly disparage register shifts. */
9343 *cost += !CONST_INT_P (XEXP (x, 1));
9344 }
9345 }
9346 else /* Rotates. */
9347 {
9348 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9349 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9350 if (speed_p)
9351 {
9352 if (CONST_INT_P (XEXP (x, 1)))
9353 *cost += (2 * extra_cost->alu.shift
9354 + extra_cost->alu.log_shift);
9355 else
9356 *cost += (extra_cost->alu.shift
9357 + extra_cost->alu.shift_reg
9358 + extra_cost->alu.log_shift_reg);
9359 }
9360 }
9361 return true;
9362 }
9363
9364 *cost = LIBCALL_COST (2);
9365 return false;
9366
9367 case BSWAP:
9368 if (arm_arch6)
9369 {
9370 if (mode == SImode)
9371 {
9372 if (speed_p)
9373 *cost += extra_cost->alu.rev;
9374
9375 return false;
9376 }
9377 }
9378 else
9379 {
9380 /* No rev instruction available. Look at arm_legacy_rev
9381 and thumb_legacy_rev for the form of RTL used then. */
9382 if (TARGET_THUMB)
9383 {
9384 *cost += COSTS_N_INSNS (9);
9385
9386 if (speed_p)
9387 {
9388 *cost += 6 * extra_cost->alu.shift;
9389 *cost += 3 * extra_cost->alu.logical;
9390 }
9391 }
9392 else
9393 {
9394 *cost += COSTS_N_INSNS (4);
9395
9396 if (speed_p)
9397 {
9398 *cost += 2 * extra_cost->alu.shift;
9399 *cost += extra_cost->alu.arith_shift;
9400 *cost += 2 * extra_cost->alu.logical;
9401 }
9402 }
9403 return true;
9404 }
9405 return false;
9406
9407 case MINUS:
9408 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9409 && (mode == SFmode || !TARGET_VFP_SINGLE))
9410 {
9411 if (GET_CODE (XEXP (x, 0)) == MULT
9412 || GET_CODE (XEXP (x, 1)) == MULT)
9413 {
9414 rtx mul_op0, mul_op1, sub_op;
9415
9416 if (speed_p)
9417 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9418
9419 if (GET_CODE (XEXP (x, 0)) == MULT)
9420 {
9421 mul_op0 = XEXP (XEXP (x, 0), 0);
9422 mul_op1 = XEXP (XEXP (x, 0), 1);
9423 sub_op = XEXP (x, 1);
9424 }
9425 else
9426 {
9427 mul_op0 = XEXP (XEXP (x, 1), 0);
9428 mul_op1 = XEXP (XEXP (x, 1), 1);
9429 sub_op = XEXP (x, 0);
9430 }
9431
9432 /* The first operand of the multiply may be optionally
9433 negated. */
9434 if (GET_CODE (mul_op0) == NEG)
9435 mul_op0 = XEXP (mul_op0, 0);
9436
9437 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9438 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9439 + rtx_cost (sub_op, mode, code, 0, speed_p));
9440
9441 return true;
9442 }
9443
9444 if (speed_p)
9445 *cost += extra_cost->fp[mode != SFmode].addsub;
9446 return false;
9447 }
9448
9449 if (mode == SImode)
9450 {
9451 rtx shift_by_reg = NULL;
9452 rtx shift_op;
9453 rtx non_shift_op;
9454
9455 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9456 if (shift_op == NULL)
9457 {
9458 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9459 non_shift_op = XEXP (x, 0);
9460 }
9461 else
9462 non_shift_op = XEXP (x, 1);
9463
9464 if (shift_op != NULL)
9465 {
9466 if (shift_by_reg != NULL)
9467 {
9468 if (speed_p)
9469 *cost += extra_cost->alu.arith_shift_reg;
9470 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9471 }
9472 else if (speed_p)
9473 *cost += extra_cost->alu.arith_shift;
9474
9475 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9476 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9477 return true;
9478 }
9479
9480 if (arm_arch_thumb2
9481 && GET_CODE (XEXP (x, 1)) == MULT)
9482 {
9483 /* MLS. */
9484 if (speed_p)
9485 *cost += extra_cost->mult[0].add;
9486 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9487 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9488 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9489 return true;
9490 }
9491
9492 if (CONST_INT_P (XEXP (x, 0)))
9493 {
9494 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9495 INTVAL (XEXP (x, 0)), NULL_RTX,
9496 NULL_RTX, 1, 0);
9497 *cost = COSTS_N_INSNS (insns);
9498 if (speed_p)
9499 *cost += insns * extra_cost->alu.arith;
9500 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9501 return true;
9502 }
9503 else if (speed_p)
9504 *cost += extra_cost->alu.arith;
9505
9506 return false;
9507 }
9508
9509 if (GET_MODE_CLASS (mode) == MODE_INT
9510 && GET_MODE_SIZE (mode) < 4)
9511 {
9512 rtx shift_op, shift_reg;
9513 shift_reg = NULL;
9514
9515 /* We check both sides of the MINUS for shifter operands since,
9516 unlike PLUS, it's not commutative. */
9517
9518 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9519 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9520
9521 /* Slightly disparage, as we might need to widen the result. */
9522 *cost += 1;
9523 if (speed_p)
9524 *cost += extra_cost->alu.arith;
9525
9526 if (CONST_INT_P (XEXP (x, 0)))
9527 {
9528 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9529 return true;
9530 }
9531
9532 return false;
9533 }
9534
9535 if (mode == DImode)
9536 {
9537 *cost += COSTS_N_INSNS (1);
9538
9539 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9540 {
9541 rtx op1 = XEXP (x, 1);
9542
9543 if (speed_p)
9544 *cost += 2 * extra_cost->alu.arith;
9545
9546 if (GET_CODE (op1) == ZERO_EXTEND)
9547 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9548 0, speed_p);
9549 else
9550 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9551 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9552 0, speed_p);
9553 return true;
9554 }
9555 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9556 {
9557 if (speed_p)
9558 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9559 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9560 0, speed_p)
9561 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9562 return true;
9563 }
9564 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9565 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9566 {
9567 if (speed_p)
9568 *cost += (extra_cost->alu.arith
9569 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9570 ? extra_cost->alu.arith
9571 : extra_cost->alu.arith_shift));
9572 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9573 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9574 GET_CODE (XEXP (x, 1)), 0, speed_p));
9575 return true;
9576 }
9577
9578 if (speed_p)
9579 *cost += 2 * extra_cost->alu.arith;
9580 return false;
9581 }
9582
9583 /* Vector mode? */
9584
9585 *cost = LIBCALL_COST (2);
9586 return false;
9587
9588 case PLUS:
9589 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9590 && (mode == SFmode || !TARGET_VFP_SINGLE))
9591 {
9592 if (GET_CODE (XEXP (x, 0)) == MULT)
9593 {
9594 rtx mul_op0, mul_op1, add_op;
9595
9596 if (speed_p)
9597 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9598
9599 mul_op0 = XEXP (XEXP (x, 0), 0);
9600 mul_op1 = XEXP (XEXP (x, 0), 1);
9601 add_op = XEXP (x, 1);
9602
9603 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9604 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9605 + rtx_cost (add_op, mode, code, 0, speed_p));
9606
9607 return true;
9608 }
9609
9610 if (speed_p)
9611 *cost += extra_cost->fp[mode != SFmode].addsub;
9612 return false;
9613 }
9614 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9615 {
9616 *cost = LIBCALL_COST (2);
9617 return false;
9618 }
9619
9620 /* Narrow modes can be synthesized in SImode, but the range
9621 of useful sub-operations is limited. Check for shift operations
9622 on one of the operands. Only left shifts can be used in the
9623 narrow modes. */
9624 if (GET_MODE_CLASS (mode) == MODE_INT
9625 && GET_MODE_SIZE (mode) < 4)
9626 {
9627 rtx shift_op, shift_reg;
9628 shift_reg = NULL;
9629
9630 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9631
9632 if (CONST_INT_P (XEXP (x, 1)))
9633 {
9634 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9635 INTVAL (XEXP (x, 1)), NULL_RTX,
9636 NULL_RTX, 1, 0);
9637 *cost = COSTS_N_INSNS (insns);
9638 if (speed_p)
9639 *cost += insns * extra_cost->alu.arith;
9640 /* Slightly penalize a narrow operation as the result may
9641 need widening. */
9642 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9643 return true;
9644 }
9645
9646 /* Slightly penalize a narrow operation as the result may
9647 need widening. */
9648 *cost += 1;
9649 if (speed_p)
9650 *cost += extra_cost->alu.arith;
9651
9652 return false;
9653 }
9654
9655 if (mode == SImode)
9656 {
9657 rtx shift_op, shift_reg;
9658
9659 if (TARGET_INT_SIMD
9660 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9661 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9662 {
9663 /* UXTA[BH] or SXTA[BH]. */
9664 if (speed_p)
9665 *cost += extra_cost->alu.extend_arith;
9666 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9667 0, speed_p)
9668 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9669 return true;
9670 }
9671
9672 shift_reg = NULL;
9673 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9674 if (shift_op != NULL)
9675 {
9676 if (shift_reg)
9677 {
9678 if (speed_p)
9679 *cost += extra_cost->alu.arith_shift_reg;
9680 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9681 }
9682 else if (speed_p)
9683 *cost += extra_cost->alu.arith_shift;
9684
9685 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9686 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9687 return true;
9688 }
9689 if (GET_CODE (XEXP (x, 0)) == MULT)
9690 {
9691 rtx mul_op = XEXP (x, 0);
9692
9693 if (TARGET_DSP_MULTIPLY
9694 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9695 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9696 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9697 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9698 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9699 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9700 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9701 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9702 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9703 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9704 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9705 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9706 == 16))))))
9707 {
9708 /* SMLA[BT][BT]. */
9709 if (speed_p)
9710 *cost += extra_cost->mult[0].extend_add;
9711 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9712 SIGN_EXTEND, 0, speed_p)
9713 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9714 SIGN_EXTEND, 0, speed_p)
9715 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9716 return true;
9717 }
9718
9719 if (speed_p)
9720 *cost += extra_cost->mult[0].add;
9721 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9722 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9723 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9724 return true;
9725 }
9726 if (CONST_INT_P (XEXP (x, 1)))
9727 {
9728 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9729 INTVAL (XEXP (x, 1)), NULL_RTX,
9730 NULL_RTX, 1, 0);
9731 *cost = COSTS_N_INSNS (insns);
9732 if (speed_p)
9733 *cost += insns * extra_cost->alu.arith;
9734 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9735 return true;
9736 }
9737 else if (speed_p)
9738 *cost += extra_cost->alu.arith;
9739
9740 return false;
9741 }
9742
9743 if (mode == DImode)
9744 {
9745 if (arm_arch3m
9746 && GET_CODE (XEXP (x, 0)) == MULT
9747 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9748 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9749 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9750 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9751 {
9752 if (speed_p)
9753 *cost += extra_cost->mult[1].extend_add;
9754 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9755 ZERO_EXTEND, 0, speed_p)
9756 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9757 ZERO_EXTEND, 0, speed_p)
9758 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9759 return true;
9760 }
9761
9762 *cost += COSTS_N_INSNS (1);
9763
9764 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9765 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9766 {
9767 if (speed_p)
9768 *cost += (extra_cost->alu.arith
9769 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9770 ? extra_cost->alu.arith
9771 : extra_cost->alu.arith_shift));
9772
9773 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9774 0, speed_p)
9775 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9776 return true;
9777 }
9778
9779 if (speed_p)
9780 *cost += 2 * extra_cost->alu.arith;
9781 return false;
9782 }
9783
9784 /* Vector mode? */
9785 *cost = LIBCALL_COST (2);
9786 return false;
9787 case IOR:
9788 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9789 {
9790 if (speed_p)
9791 *cost += extra_cost->alu.rev;
9792
9793 return true;
9794 }
9795 /* Fall through. */
9796 case AND: case XOR:
9797 if (mode == SImode)
9798 {
9799 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9800 rtx op0 = XEXP (x, 0);
9801 rtx shift_op, shift_reg;
9802
9803 if (subcode == NOT
9804 && (code == AND
9805 || (code == IOR && TARGET_THUMB2)))
9806 op0 = XEXP (op0, 0);
9807
9808 shift_reg = NULL;
9809 shift_op = shifter_op_p (op0, &shift_reg);
9810 if (shift_op != NULL)
9811 {
9812 if (shift_reg)
9813 {
9814 if (speed_p)
9815 *cost += extra_cost->alu.log_shift_reg;
9816 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9817 }
9818 else if (speed_p)
9819 *cost += extra_cost->alu.log_shift;
9820
9821 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9822 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9823 return true;
9824 }
9825
9826 if (CONST_INT_P (XEXP (x, 1)))
9827 {
9828 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9829 INTVAL (XEXP (x, 1)), NULL_RTX,
9830 NULL_RTX, 1, 0);
9831
9832 *cost = COSTS_N_INSNS (insns);
9833 if (speed_p)
9834 *cost += insns * extra_cost->alu.logical;
9835 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9836 return true;
9837 }
9838
9839 if (speed_p)
9840 *cost += extra_cost->alu.logical;
9841 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9842 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9843 return true;
9844 }
9845
9846 if (mode == DImode)
9847 {
9848 rtx op0 = XEXP (x, 0);
9849 enum rtx_code subcode = GET_CODE (op0);
9850
9851 *cost += COSTS_N_INSNS (1);
9852
9853 if (subcode == NOT
9854 && (code == AND
9855 || (code == IOR && TARGET_THUMB2)))
9856 op0 = XEXP (op0, 0);
9857
9858 if (GET_CODE (op0) == ZERO_EXTEND)
9859 {
9860 if (speed_p)
9861 *cost += 2 * extra_cost->alu.logical;
9862
9863 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9864 0, speed_p)
9865 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9866 return true;
9867 }
9868 else if (GET_CODE (op0) == SIGN_EXTEND)
9869 {
9870 if (speed_p)
9871 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9872
9873 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9874 0, speed_p)
9875 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9876 return true;
9877 }
9878
9879 if (speed_p)
9880 *cost += 2 * extra_cost->alu.logical;
9881
9882 return true;
9883 }
9884 /* Vector mode? */
9885
9886 *cost = LIBCALL_COST (2);
9887 return false;
9888
9889 case MULT:
9890 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9891 && (mode == SFmode || !TARGET_VFP_SINGLE))
9892 {
9893 rtx op0 = XEXP (x, 0);
9894
9895 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9896 op0 = XEXP (op0, 0);
9897
9898 if (speed_p)
9899 *cost += extra_cost->fp[mode != SFmode].mult;
9900
9901 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9902 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9903 return true;
9904 }
9905 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9906 {
9907 *cost = LIBCALL_COST (2);
9908 return false;
9909 }
9910
9911 if (mode == SImode)
9912 {
9913 if (TARGET_DSP_MULTIPLY
9914 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9915 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9916 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9917 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9918 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9919 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9920 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9921 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9922 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9923 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9924 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9925 && (INTVAL (XEXP (XEXP (x, 1), 1))
9926 == 16))))))
9927 {
9928 /* SMUL[TB][TB]. */
9929 if (speed_p)
9930 *cost += extra_cost->mult[0].extend;
9931 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9932 SIGN_EXTEND, 0, speed_p);
9933 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9934 SIGN_EXTEND, 1, speed_p);
9935 return true;
9936 }
9937 if (speed_p)
9938 *cost += extra_cost->mult[0].simple;
9939 return false;
9940 }
9941
9942 if (mode == DImode)
9943 {
9944 if (arm_arch3m
9945 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9946 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9947 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9948 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9949 {
9950 if (speed_p)
9951 *cost += extra_cost->mult[1].extend;
9952 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9953 ZERO_EXTEND, 0, speed_p)
9954 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9955 ZERO_EXTEND, 0, speed_p));
9956 return true;
9957 }
9958
9959 *cost = LIBCALL_COST (2);
9960 return false;
9961 }
9962
9963 /* Vector mode? */
9964 *cost = LIBCALL_COST (2);
9965 return false;
9966
9967 case NEG:
9968 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9969 && (mode == SFmode || !TARGET_VFP_SINGLE))
9970 {
9971 if (GET_CODE (XEXP (x, 0)) == MULT)
9972 {
9973 /* VNMUL. */
9974 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9975 return true;
9976 }
9977
9978 if (speed_p)
9979 *cost += extra_cost->fp[mode != SFmode].neg;
9980
9981 return false;
9982 }
9983 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9984 {
9985 *cost = LIBCALL_COST (1);
9986 return false;
9987 }
9988
9989 if (mode == SImode)
9990 {
9991 if (GET_CODE (XEXP (x, 0)) == ABS)
9992 {
9993 *cost += COSTS_N_INSNS (1);
9994 /* Assume the non-flag-changing variant. */
9995 if (speed_p)
9996 *cost += (extra_cost->alu.log_shift
9997 + extra_cost->alu.arith_shift);
9998 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
9999 return true;
10000 }
10001
10002 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10003 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10004 {
10005 *cost += COSTS_N_INSNS (1);
10006 /* No extra cost for MOV imm and MVN imm. */
10007 /* If the comparison op is using the flags, there's no further
10008 cost, otherwise we need to add the cost of the comparison. */
10009 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10010 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10011 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10012 {
10013 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10014 *cost += (COSTS_N_INSNS (1)
10015 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10016 0, speed_p)
10017 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10018 1, speed_p));
10019 if (speed_p)
10020 *cost += extra_cost->alu.arith;
10021 }
10022 return true;
10023 }
10024
10025 if (speed_p)
10026 *cost += extra_cost->alu.arith;
10027 return false;
10028 }
10029
10030 if (GET_MODE_CLASS (mode) == MODE_INT
10031 && GET_MODE_SIZE (mode) < 4)
10032 {
10033 /* Slightly disparage, as we might need an extend operation. */
10034 *cost += 1;
10035 if (speed_p)
10036 *cost += extra_cost->alu.arith;
10037 return false;
10038 }
10039
10040 if (mode == DImode)
10041 {
10042 *cost += COSTS_N_INSNS (1);
10043 if (speed_p)
10044 *cost += 2 * extra_cost->alu.arith;
10045 return false;
10046 }
10047
10048 /* Vector mode? */
10049 *cost = LIBCALL_COST (1);
10050 return false;
10051
10052 case NOT:
10053 if (mode == SImode)
10054 {
10055 rtx shift_op;
10056 rtx shift_reg = NULL;
10057
10058 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10059
10060 if (shift_op)
10061 {
10062 if (shift_reg != NULL)
10063 {
10064 if (speed_p)
10065 *cost += extra_cost->alu.log_shift_reg;
10066 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10067 }
10068 else if (speed_p)
10069 *cost += extra_cost->alu.log_shift;
10070 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10071 return true;
10072 }
10073
10074 if (speed_p)
10075 *cost += extra_cost->alu.logical;
10076 return false;
10077 }
10078 if (mode == DImode)
10079 {
10080 *cost += COSTS_N_INSNS (1);
10081 return false;
10082 }
10083
10084 /* Vector mode? */
10085
10086 *cost += LIBCALL_COST (1);
10087 return false;
10088
10089 case IF_THEN_ELSE:
10090 {
10091 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10092 {
10093 *cost += COSTS_N_INSNS (3);
10094 return true;
10095 }
10096 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10097 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10098
10099 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10100 /* Assume that if one arm of the if_then_else is a register,
10101 that it will be tied with the result and eliminate the
10102 conditional insn. */
10103 if (REG_P (XEXP (x, 1)))
10104 *cost += op2cost;
10105 else if (REG_P (XEXP (x, 2)))
10106 *cost += op1cost;
10107 else
10108 {
10109 if (speed_p)
10110 {
10111 if (extra_cost->alu.non_exec_costs_exec)
10112 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10113 else
10114 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10115 }
10116 else
10117 *cost += op1cost + op2cost;
10118 }
10119 }
10120 return true;
10121
10122 case COMPARE:
10123 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10124 *cost = 0;
10125 else
10126 {
10127 machine_mode op0mode;
10128 /* We'll mostly assume that the cost of a compare is the cost of the
10129 LHS. However, there are some notable exceptions. */
10130
10131 /* Floating point compares are never done as side-effects. */
10132 op0mode = GET_MODE (XEXP (x, 0));
10133 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10134 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10135 {
10136 if (speed_p)
10137 *cost += extra_cost->fp[op0mode != SFmode].compare;
10138
10139 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10140 {
10141 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10142 return true;
10143 }
10144
10145 return false;
10146 }
10147 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10148 {
10149 *cost = LIBCALL_COST (2);
10150 return false;
10151 }
10152
10153 /* DImode compares normally take two insns. */
10154 if (op0mode == DImode)
10155 {
10156 *cost += COSTS_N_INSNS (1);
10157 if (speed_p)
10158 *cost += 2 * extra_cost->alu.arith;
10159 return false;
10160 }
10161
10162 if (op0mode == SImode)
10163 {
10164 rtx shift_op;
10165 rtx shift_reg;
10166
10167 if (XEXP (x, 1) == const0_rtx
10168 && !(REG_P (XEXP (x, 0))
10169 || (GET_CODE (XEXP (x, 0)) == SUBREG
10170 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10171 {
10172 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10173
10174 /* Multiply operations that set the flags are often
10175 significantly more expensive. */
10176 if (speed_p
10177 && GET_CODE (XEXP (x, 0)) == MULT
10178 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10179 *cost += extra_cost->mult[0].flag_setting;
10180
10181 if (speed_p
10182 && GET_CODE (XEXP (x, 0)) == PLUS
10183 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10184 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10185 0), 1), mode))
10186 *cost += extra_cost->mult[0].flag_setting;
10187 return true;
10188 }
10189
10190 shift_reg = NULL;
10191 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10192 if (shift_op != NULL)
10193 {
10194 if (shift_reg != NULL)
10195 {
10196 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10197 1, speed_p);
10198 if (speed_p)
10199 *cost += extra_cost->alu.arith_shift_reg;
10200 }
10201 else if (speed_p)
10202 *cost += extra_cost->alu.arith_shift;
10203 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10204 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10205 return true;
10206 }
10207
10208 if (speed_p)
10209 *cost += extra_cost->alu.arith;
10210 if (CONST_INT_P (XEXP (x, 1))
10211 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10212 {
10213 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10214 return true;
10215 }
10216 return false;
10217 }
10218
10219 /* Vector mode? */
10220
10221 *cost = LIBCALL_COST (2);
10222 return false;
10223 }
10224 return true;
10225
10226 case EQ:
10227 case NE:
10228 case LT:
10229 case LE:
10230 case GT:
10231 case GE:
10232 case LTU:
10233 case LEU:
10234 case GEU:
10235 case GTU:
10236 case ORDERED:
10237 case UNORDERED:
10238 case UNEQ:
10239 case UNLE:
10240 case UNLT:
10241 case UNGE:
10242 case UNGT:
10243 case LTGT:
10244 if (outer_code == SET)
10245 {
10246 /* Is it a store-flag operation? */
10247 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10248 && XEXP (x, 1) == const0_rtx)
10249 {
10250 /* Thumb also needs an IT insn. */
10251 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10252 return true;
10253 }
10254 if (XEXP (x, 1) == const0_rtx)
10255 {
10256 switch (code)
10257 {
10258 case LT:
10259 /* LSR Rd, Rn, #31. */
10260 if (speed_p)
10261 *cost += extra_cost->alu.shift;
10262 break;
10263
10264 case EQ:
10265 /* RSBS T1, Rn, #0
10266 ADC Rd, Rn, T1. */
10267
10268 case NE:
10269 /* SUBS T1, Rn, #1
10270 SBC Rd, Rn, T1. */
10271 *cost += COSTS_N_INSNS (1);
10272 break;
10273
10274 case LE:
10275 /* RSBS T1, Rn, Rn, LSR #31
10276 ADC Rd, Rn, T1. */
10277 *cost += COSTS_N_INSNS (1);
10278 if (speed_p)
10279 *cost += extra_cost->alu.arith_shift;
10280 break;
10281
10282 case GT:
10283 /* RSB Rd, Rn, Rn, ASR #1
10284 LSR Rd, Rd, #31. */
10285 *cost += COSTS_N_INSNS (1);
10286 if (speed_p)
10287 *cost += (extra_cost->alu.arith_shift
10288 + extra_cost->alu.shift);
10289 break;
10290
10291 case GE:
10292 /* ASR Rd, Rn, #31
10293 ADD Rd, Rn, #1. */
10294 *cost += COSTS_N_INSNS (1);
10295 if (speed_p)
10296 *cost += extra_cost->alu.shift;
10297 break;
10298
10299 default:
10300 /* Remaining cases are either meaningless or would take
10301 three insns anyway. */
10302 *cost = COSTS_N_INSNS (3);
10303 break;
10304 }
10305 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10306 return true;
10307 }
10308 else
10309 {
10310 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10311 if (CONST_INT_P (XEXP (x, 1))
10312 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10313 {
10314 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10315 return true;
10316 }
10317
10318 return false;
10319 }
10320 }
10321 /* Not directly inside a set. If it involves the condition code
10322 register it must be the condition for a branch, cond_exec or
10323 I_T_E operation. Since the comparison is performed elsewhere
10324 this is just the control part which has no additional
10325 cost. */
10326 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10327 && XEXP (x, 1) == const0_rtx)
10328 {
10329 *cost = 0;
10330 return true;
10331 }
10332 return false;
10333
10334 case ABS:
10335 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10336 && (mode == SFmode || !TARGET_VFP_SINGLE))
10337 {
10338 if (speed_p)
10339 *cost += extra_cost->fp[mode != SFmode].neg;
10340
10341 return false;
10342 }
10343 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10344 {
10345 *cost = LIBCALL_COST (1);
10346 return false;
10347 }
10348
10349 if (mode == SImode)
10350 {
10351 if (speed_p)
10352 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10353 return false;
10354 }
10355 /* Vector mode? */
10356 *cost = LIBCALL_COST (1);
10357 return false;
10358
10359 case SIGN_EXTEND:
10360 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10361 && MEM_P (XEXP (x, 0)))
10362 {
10363 if (mode == DImode)
10364 *cost += COSTS_N_INSNS (1);
10365
10366 if (!speed_p)
10367 return true;
10368
10369 if (GET_MODE (XEXP (x, 0)) == SImode)
10370 *cost += extra_cost->ldst.load;
10371 else
10372 *cost += extra_cost->ldst.load_sign_extend;
10373
10374 if (mode == DImode)
10375 *cost += extra_cost->alu.shift;
10376
10377 return true;
10378 }
10379
10380 /* Widening from less than 32-bits requires an extend operation. */
10381 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10382 {
10383 /* We have SXTB/SXTH. */
10384 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10385 if (speed_p)
10386 *cost += extra_cost->alu.extend;
10387 }
10388 else if (GET_MODE (XEXP (x, 0)) != SImode)
10389 {
10390 /* Needs two shifts. */
10391 *cost += COSTS_N_INSNS (1);
10392 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10393 if (speed_p)
10394 *cost += 2 * extra_cost->alu.shift;
10395 }
10396
10397 /* Widening beyond 32-bits requires one more insn. */
10398 if (mode == DImode)
10399 {
10400 *cost += COSTS_N_INSNS (1);
10401 if (speed_p)
10402 *cost += extra_cost->alu.shift;
10403 }
10404
10405 return true;
10406
10407 case ZERO_EXTEND:
10408 if ((arm_arch4
10409 || GET_MODE (XEXP (x, 0)) == SImode
10410 || GET_MODE (XEXP (x, 0)) == QImode)
10411 && MEM_P (XEXP (x, 0)))
10412 {
10413 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10414
10415 if (mode == DImode)
10416 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10417
10418 return true;
10419 }
10420
10421 /* Widening from less than 32-bits requires an extend operation. */
10422 if (GET_MODE (XEXP (x, 0)) == QImode)
10423 {
10424 /* UXTB can be a shorter instruction in Thumb2, but it might
10425 be slower than the AND Rd, Rn, #255 alternative. When
10426 optimizing for speed it should never be slower to use
10427 AND, and we don't really model 16-bit vs 32-bit insns
10428 here. */
10429 if (speed_p)
10430 *cost += extra_cost->alu.logical;
10431 }
10432 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10433 {
10434 /* We have UXTB/UXTH. */
10435 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10436 if (speed_p)
10437 *cost += extra_cost->alu.extend;
10438 }
10439 else if (GET_MODE (XEXP (x, 0)) != SImode)
10440 {
10441 /* Needs two shifts. It's marginally preferable to use
10442 shifts rather than two BIC instructions as the second
10443 shift may merge with a subsequent insn as a shifter
10444 op. */
10445 *cost = COSTS_N_INSNS (2);
10446 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10447 if (speed_p)
10448 *cost += 2 * extra_cost->alu.shift;
10449 }
10450
10451 /* Widening beyond 32-bits requires one more insn. */
10452 if (mode == DImode)
10453 {
10454 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10455 }
10456
10457 return true;
10458
10459 case CONST_INT:
10460 *cost = 0;
10461 /* CONST_INT has no mode, so we cannot tell for sure how many
10462 insns are really going to be needed. The best we can do is
10463 look at the value passed. If it fits in SImode, then assume
10464 that's the mode it will be used for. Otherwise assume it
10465 will be used in DImode. */
10466 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10467 mode = SImode;
10468 else
10469 mode = DImode;
10470
10471 /* Avoid blowing up in arm_gen_constant (). */
10472 if (!(outer_code == PLUS
10473 || outer_code == AND
10474 || outer_code == IOR
10475 || outer_code == XOR
10476 || outer_code == MINUS))
10477 outer_code = SET;
10478
10479 const_int_cost:
10480 if (mode == SImode)
10481 {
10482 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10483 INTVAL (x), NULL, NULL,
10484 0, 0));
10485 /* Extra costs? */
10486 }
10487 else
10488 {
10489 *cost += COSTS_N_INSNS (arm_gen_constant
10490 (outer_code, SImode, NULL,
10491 trunc_int_for_mode (INTVAL (x), SImode),
10492 NULL, NULL, 0, 0)
10493 + arm_gen_constant (outer_code, SImode, NULL,
10494 INTVAL (x) >> 32, NULL,
10495 NULL, 0, 0));
10496 /* Extra costs? */
10497 }
10498
10499 return true;
10500
10501 case CONST:
10502 case LABEL_REF:
10503 case SYMBOL_REF:
10504 if (speed_p)
10505 {
10506 if (arm_arch_thumb2 && !flag_pic)
10507 *cost += COSTS_N_INSNS (1);
10508 else
10509 *cost += extra_cost->ldst.load;
10510 }
10511 else
10512 *cost += COSTS_N_INSNS (1);
10513
10514 if (flag_pic)
10515 {
10516 *cost += COSTS_N_INSNS (1);
10517 if (speed_p)
10518 *cost += extra_cost->alu.arith;
10519 }
10520
10521 return true;
10522
10523 case CONST_FIXED:
10524 *cost = COSTS_N_INSNS (4);
10525 /* Fixme. */
10526 return true;
10527
10528 case CONST_DOUBLE:
10529 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10530 && (mode == SFmode || !TARGET_VFP_SINGLE))
10531 {
10532 if (vfp3_const_double_rtx (x))
10533 {
10534 if (speed_p)
10535 *cost += extra_cost->fp[mode == DFmode].fpconst;
10536 return true;
10537 }
10538
10539 if (speed_p)
10540 {
10541 if (mode == DFmode)
10542 *cost += extra_cost->ldst.loadd;
10543 else
10544 *cost += extra_cost->ldst.loadf;
10545 }
10546 else
10547 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10548
10549 return true;
10550 }
10551 *cost = COSTS_N_INSNS (4);
10552 return true;
10553
10554 case CONST_VECTOR:
10555 /* Fixme. */
10556 if (TARGET_NEON
10557 && TARGET_HARD_FLOAT
10558 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10559 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10560 *cost = COSTS_N_INSNS (1);
10561 else
10562 *cost = COSTS_N_INSNS (4);
10563 return true;
10564
10565 case HIGH:
10566 case LO_SUM:
10567 /* When optimizing for size, we prefer constant pool entries to
10568 MOVW/MOVT pairs, so bump the cost of these slightly. */
10569 if (!speed_p)
10570 *cost += 1;
10571 return true;
10572
10573 case CLZ:
10574 if (speed_p)
10575 *cost += extra_cost->alu.clz;
10576 return false;
10577
10578 case SMIN:
10579 if (XEXP (x, 1) == const0_rtx)
10580 {
10581 if (speed_p)
10582 *cost += extra_cost->alu.log_shift;
10583 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10584 return true;
10585 }
10586 /* Fall through. */
10587 case SMAX:
10588 case UMIN:
10589 case UMAX:
10590 *cost += COSTS_N_INSNS (1);
10591 return false;
10592
10593 case TRUNCATE:
10594 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10595 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10596 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10598 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10599 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10600 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10601 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10602 == ZERO_EXTEND))))
10603 {
10604 if (speed_p)
10605 *cost += extra_cost->mult[1].extend;
10606 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10607 ZERO_EXTEND, 0, speed_p)
10608 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10609 ZERO_EXTEND, 0, speed_p));
10610 return true;
10611 }
10612 *cost = LIBCALL_COST (1);
10613 return false;
10614
10615 case UNSPEC_VOLATILE:
10616 case UNSPEC:
10617 return arm_unspec_cost (x, outer_code, speed_p, cost);
10618
10619 case PC:
10620 /* Reading the PC is like reading any other register. Writing it
10621 is more expensive, but we take that into account elsewhere. */
10622 *cost = 0;
10623 return true;
10624
10625 case ZERO_EXTRACT:
10626 /* TODO: Simple zero_extract of bottom bits using AND. */
10627 /* Fall through. */
10628 case SIGN_EXTRACT:
10629 if (arm_arch6
10630 && mode == SImode
10631 && CONST_INT_P (XEXP (x, 1))
10632 && CONST_INT_P (XEXP (x, 2)))
10633 {
10634 if (speed_p)
10635 *cost += extra_cost->alu.bfx;
10636 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10637 return true;
10638 }
10639 /* Without UBFX/SBFX, need to resort to shift operations. */
10640 *cost += COSTS_N_INSNS (1);
10641 if (speed_p)
10642 *cost += 2 * extra_cost->alu.shift;
10643 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10644 return true;
10645
10646 case FLOAT_EXTEND:
10647 if (TARGET_HARD_FLOAT)
10648 {
10649 if (speed_p)
10650 *cost += extra_cost->fp[mode == DFmode].widen;
10651 if (!TARGET_FPU_ARMV8
10652 && GET_MODE (XEXP (x, 0)) == HFmode)
10653 {
10654 /* Pre v8, widening HF->DF is a two-step process, first
10655 widening to SFmode. */
10656 *cost += COSTS_N_INSNS (1);
10657 if (speed_p)
10658 *cost += extra_cost->fp[0].widen;
10659 }
10660 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10661 return true;
10662 }
10663
10664 *cost = LIBCALL_COST (1);
10665 return false;
10666
10667 case FLOAT_TRUNCATE:
10668 if (TARGET_HARD_FLOAT)
10669 {
10670 if (speed_p)
10671 *cost += extra_cost->fp[mode == DFmode].narrow;
10672 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10673 return true;
10674 /* Vector modes? */
10675 }
10676 *cost = LIBCALL_COST (1);
10677 return false;
10678
10679 case FMA:
10680 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10681 {
10682 rtx op0 = XEXP (x, 0);
10683 rtx op1 = XEXP (x, 1);
10684 rtx op2 = XEXP (x, 2);
10685
10686
10687 /* vfms or vfnma. */
10688 if (GET_CODE (op0) == NEG)
10689 op0 = XEXP (op0, 0);
10690
10691 /* vfnms or vfnma. */
10692 if (GET_CODE (op2) == NEG)
10693 op2 = XEXP (op2, 0);
10694
10695 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10696 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10697 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10698
10699 if (speed_p)
10700 *cost += extra_cost->fp[mode ==DFmode].fma;
10701
10702 return true;
10703 }
10704
10705 *cost = LIBCALL_COST (3);
10706 return false;
10707
10708 case FIX:
10709 case UNSIGNED_FIX:
10710 if (TARGET_HARD_FLOAT)
10711 {
10712 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10713 a vcvt fixed-point conversion. */
10714 if (code == FIX && mode == SImode
10715 && GET_CODE (XEXP (x, 0)) == FIX
10716 && GET_MODE (XEXP (x, 0)) == SFmode
10717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10718 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10719 > 0)
10720 {
10721 if (speed_p)
10722 *cost += extra_cost->fp[0].toint;
10723
10724 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10725 code, 0, speed_p);
10726 return true;
10727 }
10728
10729 if (GET_MODE_CLASS (mode) == MODE_INT)
10730 {
10731 mode = GET_MODE (XEXP (x, 0));
10732 if (speed_p)
10733 *cost += extra_cost->fp[mode == DFmode].toint;
10734 /* Strip of the 'cost' of rounding towards zero. */
10735 if (GET_CODE (XEXP (x, 0)) == FIX)
10736 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10737 0, speed_p);
10738 else
10739 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10740 /* ??? Increase the cost to deal with transferring from
10741 FP -> CORE registers? */
10742 return true;
10743 }
10744 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10745 && TARGET_FPU_ARMV8)
10746 {
10747 if (speed_p)
10748 *cost += extra_cost->fp[mode == DFmode].roundint;
10749 return false;
10750 }
10751 /* Vector costs? */
10752 }
10753 *cost = LIBCALL_COST (1);
10754 return false;
10755
10756 case FLOAT:
10757 case UNSIGNED_FLOAT:
10758 if (TARGET_HARD_FLOAT)
10759 {
10760 /* ??? Increase the cost to deal with transferring from CORE
10761 -> FP registers? */
10762 if (speed_p)
10763 *cost += extra_cost->fp[mode == DFmode].fromint;
10764 return false;
10765 }
10766 *cost = LIBCALL_COST (1);
10767 return false;
10768
10769 case CALL:
10770 return true;
10771
10772 case ASM_OPERANDS:
10773 {
10774 /* Just a guess. Guess number of instructions in the asm
10775 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10776 though (see PR60663). */
10777 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10778 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10779
10780 *cost = COSTS_N_INSNS (asm_length + num_operands);
10781 return true;
10782 }
10783 default:
10784 if (mode != VOIDmode)
10785 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10786 else
10787 *cost = COSTS_N_INSNS (4); /* Who knows? */
10788 return false;
10789 }
10790 }
10791
10792 #undef HANDLE_NARROW_SHIFT_ARITH
10793
10794 /* RTX costs entry point. */
10795
10796 static bool
10797 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10798 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10799 {
10800 bool result;
10801 int code = GET_CODE (x);
10802 gcc_assert (current_tune->insn_extra_cost);
10803
10804 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10805 (enum rtx_code) outer_code,
10806 current_tune->insn_extra_cost,
10807 total, speed);
10808
10809 if (dump_file && (dump_flags & TDF_DETAILS))
10810 {
10811 print_rtl_single (dump_file, x);
10812 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10813 *total, result ? "final" : "partial");
10814 }
10815 return result;
10816 }
10817
10818 /* All address computations that can be done are free, but rtx cost returns
10819 the same for practically all of them. So we weight the different types
10820 of address here in the order (most pref first):
10821 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10822 static inline int
10823 arm_arm_address_cost (rtx x)
10824 {
10825 enum rtx_code c = GET_CODE (x);
10826
10827 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10828 return 0;
10829 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10830 return 10;
10831
10832 if (c == PLUS)
10833 {
10834 if (CONST_INT_P (XEXP (x, 1)))
10835 return 2;
10836
10837 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10838 return 3;
10839
10840 return 4;
10841 }
10842
10843 return 6;
10844 }
10845
10846 static inline int
10847 arm_thumb_address_cost (rtx x)
10848 {
10849 enum rtx_code c = GET_CODE (x);
10850
10851 if (c == REG)
10852 return 1;
10853 if (c == PLUS
10854 && REG_P (XEXP (x, 0))
10855 && CONST_INT_P (XEXP (x, 1)))
10856 return 1;
10857
10858 return 2;
10859 }
10860
10861 static int
10862 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10863 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10864 {
10865 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10866 }
10867
10868 /* Adjust cost hook for XScale. */
10869 static bool
10870 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10871 int * cost)
10872 {
10873 /* Some true dependencies can have a higher cost depending
10874 on precisely how certain input operands are used. */
10875 if (dep_type == 0
10876 && recog_memoized (insn) >= 0
10877 && recog_memoized (dep) >= 0)
10878 {
10879 int shift_opnum = get_attr_shift (insn);
10880 enum attr_type attr_type = get_attr_type (dep);
10881
10882 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10883 operand for INSN. If we have a shifted input operand and the
10884 instruction we depend on is another ALU instruction, then we may
10885 have to account for an additional stall. */
10886 if (shift_opnum != 0
10887 && (attr_type == TYPE_ALU_SHIFT_IMM
10888 || attr_type == TYPE_ALUS_SHIFT_IMM
10889 || attr_type == TYPE_LOGIC_SHIFT_IMM
10890 || attr_type == TYPE_LOGICS_SHIFT_IMM
10891 || attr_type == TYPE_ALU_SHIFT_REG
10892 || attr_type == TYPE_ALUS_SHIFT_REG
10893 || attr_type == TYPE_LOGIC_SHIFT_REG
10894 || attr_type == TYPE_LOGICS_SHIFT_REG
10895 || attr_type == TYPE_MOV_SHIFT
10896 || attr_type == TYPE_MVN_SHIFT
10897 || attr_type == TYPE_MOV_SHIFT_REG
10898 || attr_type == TYPE_MVN_SHIFT_REG))
10899 {
10900 rtx shifted_operand;
10901 int opno;
10902
10903 /* Get the shifted operand. */
10904 extract_insn (insn);
10905 shifted_operand = recog_data.operand[shift_opnum];
10906
10907 /* Iterate over all the operands in DEP. If we write an operand
10908 that overlaps with SHIFTED_OPERAND, then we have increase the
10909 cost of this dependency. */
10910 extract_insn (dep);
10911 preprocess_constraints (dep);
10912 for (opno = 0; opno < recog_data.n_operands; opno++)
10913 {
10914 /* We can ignore strict inputs. */
10915 if (recog_data.operand_type[opno] == OP_IN)
10916 continue;
10917
10918 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10919 shifted_operand))
10920 {
10921 *cost = 2;
10922 return false;
10923 }
10924 }
10925 }
10926 }
10927 return true;
10928 }
10929
10930 /* Adjust cost hook for Cortex A9. */
10931 static bool
10932 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10933 int * cost)
10934 {
10935 switch (dep_type)
10936 {
10937 case REG_DEP_ANTI:
10938 *cost = 0;
10939 return false;
10940
10941 case REG_DEP_TRUE:
10942 case REG_DEP_OUTPUT:
10943 if (recog_memoized (insn) >= 0
10944 && recog_memoized (dep) >= 0)
10945 {
10946 if (GET_CODE (PATTERN (insn)) == SET)
10947 {
10948 if (GET_MODE_CLASS
10949 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10950 || GET_MODE_CLASS
10951 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10952 {
10953 enum attr_type attr_type_insn = get_attr_type (insn);
10954 enum attr_type attr_type_dep = get_attr_type (dep);
10955
10956 /* By default all dependencies of the form
10957 s0 = s0 <op> s1
10958 s0 = s0 <op> s2
10959 have an extra latency of 1 cycle because
10960 of the input and output dependency in this
10961 case. However this gets modeled as an true
10962 dependency and hence all these checks. */
10963 if (REG_P (SET_DEST (PATTERN (insn)))
10964 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10965 {
10966 /* FMACS is a special case where the dependent
10967 instruction can be issued 3 cycles before
10968 the normal latency in case of an output
10969 dependency. */
10970 if ((attr_type_insn == TYPE_FMACS
10971 || attr_type_insn == TYPE_FMACD)
10972 && (attr_type_dep == TYPE_FMACS
10973 || attr_type_dep == TYPE_FMACD))
10974 {
10975 if (dep_type == REG_DEP_OUTPUT)
10976 *cost = insn_default_latency (dep) - 3;
10977 else
10978 *cost = insn_default_latency (dep);
10979 return false;
10980 }
10981 else
10982 {
10983 if (dep_type == REG_DEP_OUTPUT)
10984 *cost = insn_default_latency (dep) + 1;
10985 else
10986 *cost = insn_default_latency (dep);
10987 }
10988 return false;
10989 }
10990 }
10991 }
10992 }
10993 break;
10994
10995 default:
10996 gcc_unreachable ();
10997 }
10998
10999 return true;
11000 }
11001
11002 /* Adjust cost hook for FA726TE. */
11003 static bool
11004 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11005 int * cost)
11006 {
11007 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11008 have penalty of 3. */
11009 if (dep_type == REG_DEP_TRUE
11010 && recog_memoized (insn) >= 0
11011 && recog_memoized (dep) >= 0
11012 && get_attr_conds (dep) == CONDS_SET)
11013 {
11014 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11015 if (get_attr_conds (insn) == CONDS_USE
11016 && get_attr_type (insn) != TYPE_BRANCH)
11017 {
11018 *cost = 3;
11019 return false;
11020 }
11021
11022 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11023 || get_attr_conds (insn) == CONDS_USE)
11024 {
11025 *cost = 0;
11026 return false;
11027 }
11028 }
11029
11030 return true;
11031 }
11032
11033 /* Implement TARGET_REGISTER_MOVE_COST.
11034
11035 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11036 it is typically more expensive than a single memory access. We set
11037 the cost to less than two memory accesses so that floating
11038 point to integer conversion does not go through memory. */
11039
11040 int
11041 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11042 reg_class_t from, reg_class_t to)
11043 {
11044 if (TARGET_32BIT)
11045 {
11046 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11047 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11048 return 15;
11049 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11050 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11051 return 4;
11052 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11053 return 20;
11054 else
11055 return 2;
11056 }
11057 else
11058 {
11059 if (from == HI_REGS || to == HI_REGS)
11060 return 4;
11061 else
11062 return 2;
11063 }
11064 }
11065
11066 /* Implement TARGET_MEMORY_MOVE_COST. */
11067
11068 int
11069 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11070 bool in ATTRIBUTE_UNUSED)
11071 {
11072 if (TARGET_32BIT)
11073 return 10;
11074 else
11075 {
11076 if (GET_MODE_SIZE (mode) < 4)
11077 return 8;
11078 else
11079 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11080 }
11081 }
11082
11083 /* Vectorizer cost model implementation. */
11084
11085 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11086 static int
11087 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11088 tree vectype,
11089 int misalign ATTRIBUTE_UNUSED)
11090 {
11091 unsigned elements;
11092
11093 switch (type_of_cost)
11094 {
11095 case scalar_stmt:
11096 return current_tune->vec_costs->scalar_stmt_cost;
11097
11098 case scalar_load:
11099 return current_tune->vec_costs->scalar_load_cost;
11100
11101 case scalar_store:
11102 return current_tune->vec_costs->scalar_store_cost;
11103
11104 case vector_stmt:
11105 return current_tune->vec_costs->vec_stmt_cost;
11106
11107 case vector_load:
11108 return current_tune->vec_costs->vec_align_load_cost;
11109
11110 case vector_store:
11111 return current_tune->vec_costs->vec_store_cost;
11112
11113 case vec_to_scalar:
11114 return current_tune->vec_costs->vec_to_scalar_cost;
11115
11116 case scalar_to_vec:
11117 return current_tune->vec_costs->scalar_to_vec_cost;
11118
11119 case unaligned_load:
11120 return current_tune->vec_costs->vec_unalign_load_cost;
11121
11122 case unaligned_store:
11123 return current_tune->vec_costs->vec_unalign_store_cost;
11124
11125 case cond_branch_taken:
11126 return current_tune->vec_costs->cond_taken_branch_cost;
11127
11128 case cond_branch_not_taken:
11129 return current_tune->vec_costs->cond_not_taken_branch_cost;
11130
11131 case vec_perm:
11132 case vec_promote_demote:
11133 return current_tune->vec_costs->vec_stmt_cost;
11134
11135 case vec_construct:
11136 elements = TYPE_VECTOR_SUBPARTS (vectype);
11137 return elements / 2 + 1;
11138
11139 default:
11140 gcc_unreachable ();
11141 }
11142 }
11143
11144 /* Implement targetm.vectorize.add_stmt_cost. */
11145
11146 static unsigned
11147 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11148 struct _stmt_vec_info *stmt_info, int misalign,
11149 enum vect_cost_model_location where)
11150 {
11151 unsigned *cost = (unsigned *) data;
11152 unsigned retval = 0;
11153
11154 if (flag_vect_cost_model)
11155 {
11156 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11157 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11158
11159 /* Statements in an inner loop relative to the loop being
11160 vectorized are weighted more heavily. The value here is
11161 arbitrary and could potentially be improved with analysis. */
11162 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11163 count *= 50; /* FIXME. */
11164
11165 retval = (unsigned) (count * stmt_cost);
11166 cost[where] += retval;
11167 }
11168
11169 return retval;
11170 }
11171
11172 /* Return true if and only if this insn can dual-issue only as older. */
11173 static bool
11174 cortexa7_older_only (rtx_insn *insn)
11175 {
11176 if (recog_memoized (insn) < 0)
11177 return false;
11178
11179 switch (get_attr_type (insn))
11180 {
11181 case TYPE_ALU_DSP_REG:
11182 case TYPE_ALU_SREG:
11183 case TYPE_ALUS_SREG:
11184 case TYPE_LOGIC_REG:
11185 case TYPE_LOGICS_REG:
11186 case TYPE_ADC_REG:
11187 case TYPE_ADCS_REG:
11188 case TYPE_ADR:
11189 case TYPE_BFM:
11190 case TYPE_REV:
11191 case TYPE_MVN_REG:
11192 case TYPE_SHIFT_IMM:
11193 case TYPE_SHIFT_REG:
11194 case TYPE_LOAD_BYTE:
11195 case TYPE_LOAD1:
11196 case TYPE_STORE1:
11197 case TYPE_FFARITHS:
11198 case TYPE_FADDS:
11199 case TYPE_FFARITHD:
11200 case TYPE_FADDD:
11201 case TYPE_FMOV:
11202 case TYPE_F_CVT:
11203 case TYPE_FCMPS:
11204 case TYPE_FCMPD:
11205 case TYPE_FCONSTS:
11206 case TYPE_FCONSTD:
11207 case TYPE_FMULS:
11208 case TYPE_FMACS:
11209 case TYPE_FMULD:
11210 case TYPE_FMACD:
11211 case TYPE_FDIVS:
11212 case TYPE_FDIVD:
11213 case TYPE_F_MRC:
11214 case TYPE_F_MRRC:
11215 case TYPE_F_FLAG:
11216 case TYPE_F_LOADS:
11217 case TYPE_F_STORES:
11218 return true;
11219 default:
11220 return false;
11221 }
11222 }
11223
11224 /* Return true if and only if this insn can dual-issue as younger. */
11225 static bool
11226 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11227 {
11228 if (recog_memoized (insn) < 0)
11229 {
11230 if (verbose > 5)
11231 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11232 return false;
11233 }
11234
11235 switch (get_attr_type (insn))
11236 {
11237 case TYPE_ALU_IMM:
11238 case TYPE_ALUS_IMM:
11239 case TYPE_LOGIC_IMM:
11240 case TYPE_LOGICS_IMM:
11241 case TYPE_EXTEND:
11242 case TYPE_MVN_IMM:
11243 case TYPE_MOV_IMM:
11244 case TYPE_MOV_REG:
11245 case TYPE_MOV_SHIFT:
11246 case TYPE_MOV_SHIFT_REG:
11247 case TYPE_BRANCH:
11248 case TYPE_CALL:
11249 return true;
11250 default:
11251 return false;
11252 }
11253 }
11254
11255
11256 /* Look for an instruction that can dual issue only as an older
11257 instruction, and move it in front of any instructions that can
11258 dual-issue as younger, while preserving the relative order of all
11259 other instructions in the ready list. This is a hueuristic to help
11260 dual-issue in later cycles, by postponing issue of more flexible
11261 instructions. This heuristic may affect dual issue opportunities
11262 in the current cycle. */
11263 static void
11264 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11265 int *n_readyp, int clock)
11266 {
11267 int i;
11268 int first_older_only = -1, first_younger = -1;
11269
11270 if (verbose > 5)
11271 fprintf (file,
11272 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11273 clock,
11274 *n_readyp);
11275
11276 /* Traverse the ready list from the head (the instruction to issue
11277 first), and looking for the first instruction that can issue as
11278 younger and the first instruction that can dual-issue only as
11279 older. */
11280 for (i = *n_readyp - 1; i >= 0; i--)
11281 {
11282 rtx_insn *insn = ready[i];
11283 if (cortexa7_older_only (insn))
11284 {
11285 first_older_only = i;
11286 if (verbose > 5)
11287 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11288 break;
11289 }
11290 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11291 first_younger = i;
11292 }
11293
11294 /* Nothing to reorder because either no younger insn found or insn
11295 that can dual-issue only as older appears before any insn that
11296 can dual-issue as younger. */
11297 if (first_younger == -1)
11298 {
11299 if (verbose > 5)
11300 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11301 return;
11302 }
11303
11304 /* Nothing to reorder because no older-only insn in the ready list. */
11305 if (first_older_only == -1)
11306 {
11307 if (verbose > 5)
11308 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11309 return;
11310 }
11311
11312 /* Move first_older_only insn before first_younger. */
11313 if (verbose > 5)
11314 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11315 INSN_UID(ready [first_older_only]),
11316 INSN_UID(ready [first_younger]));
11317 rtx_insn *first_older_only_insn = ready [first_older_only];
11318 for (i = first_older_only; i < first_younger; i++)
11319 {
11320 ready[i] = ready[i+1];
11321 }
11322
11323 ready[i] = first_older_only_insn;
11324 return;
11325 }
11326
11327 /* Implement TARGET_SCHED_REORDER. */
11328 static int
11329 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11330 int clock)
11331 {
11332 switch (arm_tune)
11333 {
11334 case TARGET_CPU_cortexa7:
11335 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11336 break;
11337 default:
11338 /* Do nothing for other cores. */
11339 break;
11340 }
11341
11342 return arm_issue_rate ();
11343 }
11344
11345 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11346 It corrects the value of COST based on the relationship between
11347 INSN and DEP through the dependence LINK. It returns the new
11348 value. There is a per-core adjust_cost hook to adjust scheduler costs
11349 and the per-core hook can choose to completely override the generic
11350 adjust_cost function. Only put bits of code into arm_adjust_cost that
11351 are common across all cores. */
11352 static int
11353 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11354 unsigned int)
11355 {
11356 rtx i_pat, d_pat;
11357
11358 /* When generating Thumb-1 code, we want to place flag-setting operations
11359 close to a conditional branch which depends on them, so that we can
11360 omit the comparison. */
11361 if (TARGET_THUMB1
11362 && dep_type == 0
11363 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11364 && recog_memoized (dep) >= 0
11365 && get_attr_conds (dep) == CONDS_SET)
11366 return 0;
11367
11368 if (current_tune->sched_adjust_cost != NULL)
11369 {
11370 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11371 return cost;
11372 }
11373
11374 /* XXX Is this strictly true? */
11375 if (dep_type == REG_DEP_ANTI
11376 || dep_type == REG_DEP_OUTPUT)
11377 return 0;
11378
11379 /* Call insns don't incur a stall, even if they follow a load. */
11380 if (dep_type == 0
11381 && CALL_P (insn))
11382 return 1;
11383
11384 if ((i_pat = single_set (insn)) != NULL
11385 && MEM_P (SET_SRC (i_pat))
11386 && (d_pat = single_set (dep)) != NULL
11387 && MEM_P (SET_DEST (d_pat)))
11388 {
11389 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11390 /* This is a load after a store, there is no conflict if the load reads
11391 from a cached area. Assume that loads from the stack, and from the
11392 constant pool are cached, and that others will miss. This is a
11393 hack. */
11394
11395 if ((GET_CODE (src_mem) == SYMBOL_REF
11396 && CONSTANT_POOL_ADDRESS_P (src_mem))
11397 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11398 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11399 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11400 return 1;
11401 }
11402
11403 return cost;
11404 }
11405
11406 int
11407 arm_max_conditional_execute (void)
11408 {
11409 return max_insns_skipped;
11410 }
11411
11412 static int
11413 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11414 {
11415 if (TARGET_32BIT)
11416 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11417 else
11418 return (optimize > 0) ? 2 : 0;
11419 }
11420
11421 static int
11422 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11423 {
11424 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11425 }
11426
11427 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11428 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11429 sequences of non-executed instructions in IT blocks probably take the same
11430 amount of time as executed instructions (and the IT instruction itself takes
11431 space in icache). This function was experimentally determined to give good
11432 results on a popular embedded benchmark. */
11433
11434 static int
11435 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11436 {
11437 return (TARGET_32BIT && speed_p) ? 1
11438 : arm_default_branch_cost (speed_p, predictable_p);
11439 }
11440
11441 static int
11442 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11443 {
11444 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11445 }
11446
11447 static bool fp_consts_inited = false;
11448
11449 static REAL_VALUE_TYPE value_fp0;
11450
11451 static void
11452 init_fp_table (void)
11453 {
11454 REAL_VALUE_TYPE r;
11455
11456 r = REAL_VALUE_ATOF ("0", DFmode);
11457 value_fp0 = r;
11458 fp_consts_inited = true;
11459 }
11460
11461 /* Return TRUE if rtx X is a valid immediate FP constant. */
11462 int
11463 arm_const_double_rtx (rtx x)
11464 {
11465 const REAL_VALUE_TYPE *r;
11466
11467 if (!fp_consts_inited)
11468 init_fp_table ();
11469
11470 r = CONST_DOUBLE_REAL_VALUE (x);
11471 if (REAL_VALUE_MINUS_ZERO (*r))
11472 return 0;
11473
11474 if (real_equal (r, &value_fp0))
11475 return 1;
11476
11477 return 0;
11478 }
11479
11480 /* VFPv3 has a fairly wide range of representable immediates, formed from
11481 "quarter-precision" floating-point values. These can be evaluated using this
11482 formula (with ^ for exponentiation):
11483
11484 -1^s * n * 2^-r
11485
11486 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11487 16 <= n <= 31 and 0 <= r <= 7.
11488
11489 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11490
11491 - A (most-significant) is the sign bit.
11492 - BCD are the exponent (encoded as r XOR 3).
11493 - EFGH are the mantissa (encoded as n - 16).
11494 */
11495
11496 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11497 fconst[sd] instruction, or -1 if X isn't suitable. */
11498 static int
11499 vfp3_const_double_index (rtx x)
11500 {
11501 REAL_VALUE_TYPE r, m;
11502 int sign, exponent;
11503 unsigned HOST_WIDE_INT mantissa, mant_hi;
11504 unsigned HOST_WIDE_INT mask;
11505 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11506 bool fail;
11507
11508 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11509 return -1;
11510
11511 r = *CONST_DOUBLE_REAL_VALUE (x);
11512
11513 /* We can't represent these things, so detect them first. */
11514 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11515 return -1;
11516
11517 /* Extract sign, exponent and mantissa. */
11518 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11519 r = real_value_abs (&r);
11520 exponent = REAL_EXP (&r);
11521 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11522 highest (sign) bit, with a fixed binary point at bit point_pos.
11523 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11524 bits for the mantissa, this may fail (low bits would be lost). */
11525 real_ldexp (&m, &r, point_pos - exponent);
11526 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11527 mantissa = w.elt (0);
11528 mant_hi = w.elt (1);
11529
11530 /* If there are bits set in the low part of the mantissa, we can't
11531 represent this value. */
11532 if (mantissa != 0)
11533 return -1;
11534
11535 /* Now make it so that mantissa contains the most-significant bits, and move
11536 the point_pos to indicate that the least-significant bits have been
11537 discarded. */
11538 point_pos -= HOST_BITS_PER_WIDE_INT;
11539 mantissa = mant_hi;
11540
11541 /* We can permit four significant bits of mantissa only, plus a high bit
11542 which is always 1. */
11543 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11544 if ((mantissa & mask) != 0)
11545 return -1;
11546
11547 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11548 mantissa >>= point_pos - 5;
11549
11550 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11551 floating-point immediate zero with Neon using an integer-zero load, but
11552 that case is handled elsewhere.) */
11553 if (mantissa == 0)
11554 return -1;
11555
11556 gcc_assert (mantissa >= 16 && mantissa <= 31);
11557
11558 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11559 normalized significands are in the range [1, 2). (Our mantissa is shifted
11560 left 4 places at this point relative to normalized IEEE754 values). GCC
11561 internally uses [0.5, 1) (see real.c), so the exponent returned from
11562 REAL_EXP must be altered. */
11563 exponent = 5 - exponent;
11564
11565 if (exponent < 0 || exponent > 7)
11566 return -1;
11567
11568 /* Sign, mantissa and exponent are now in the correct form to plug into the
11569 formula described in the comment above. */
11570 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11571 }
11572
11573 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11574 int
11575 vfp3_const_double_rtx (rtx x)
11576 {
11577 if (!TARGET_VFP3)
11578 return 0;
11579
11580 return vfp3_const_double_index (x) != -1;
11581 }
11582
11583 /* Recognize immediates which can be used in various Neon instructions. Legal
11584 immediates are described by the following table (for VMVN variants, the
11585 bitwise inverse of the constant shown is recognized. In either case, VMOV
11586 is output and the correct instruction to use for a given constant is chosen
11587 by the assembler). The constant shown is replicated across all elements of
11588 the destination vector.
11589
11590 insn elems variant constant (binary)
11591 ---- ----- ------- -----------------
11592 vmov i32 0 00000000 00000000 00000000 abcdefgh
11593 vmov i32 1 00000000 00000000 abcdefgh 00000000
11594 vmov i32 2 00000000 abcdefgh 00000000 00000000
11595 vmov i32 3 abcdefgh 00000000 00000000 00000000
11596 vmov i16 4 00000000 abcdefgh
11597 vmov i16 5 abcdefgh 00000000
11598 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11599 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11600 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11601 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11602 vmvn i16 10 00000000 abcdefgh
11603 vmvn i16 11 abcdefgh 00000000
11604 vmov i32 12 00000000 00000000 abcdefgh 11111111
11605 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11606 vmov i32 14 00000000 abcdefgh 11111111 11111111
11607 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11608 vmov i8 16 abcdefgh
11609 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11610 eeeeeeee ffffffff gggggggg hhhhhhhh
11611 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11612 vmov f32 19 00000000 00000000 00000000 00000000
11613
11614 For case 18, B = !b. Representable values are exactly those accepted by
11615 vfp3_const_double_index, but are output as floating-point numbers rather
11616 than indices.
11617
11618 For case 19, we will change it to vmov.i32 when assembling.
11619
11620 Variants 0-5 (inclusive) may also be used as immediates for the second
11621 operand of VORR/VBIC instructions.
11622
11623 The INVERSE argument causes the bitwise inverse of the given operand to be
11624 recognized instead (used for recognizing legal immediates for the VAND/VORN
11625 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11626 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11627 output, rather than the real insns vbic/vorr).
11628
11629 INVERSE makes no difference to the recognition of float vectors.
11630
11631 The return value is the variant of immediate as shown in the above table, or
11632 -1 if the given value doesn't match any of the listed patterns.
11633 */
11634 static int
11635 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11636 rtx *modconst, int *elementwidth)
11637 {
11638 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11639 matches = 1; \
11640 for (i = 0; i < idx; i += (STRIDE)) \
11641 if (!(TEST)) \
11642 matches = 0; \
11643 if (matches) \
11644 { \
11645 immtype = (CLASS); \
11646 elsize = (ELSIZE); \
11647 break; \
11648 }
11649
11650 unsigned int i, elsize = 0, idx = 0, n_elts;
11651 unsigned int innersize;
11652 unsigned char bytes[16];
11653 int immtype = -1, matches;
11654 unsigned int invmask = inverse ? 0xff : 0;
11655 bool vector = GET_CODE (op) == CONST_VECTOR;
11656
11657 if (vector)
11658 n_elts = CONST_VECTOR_NUNITS (op);
11659 else
11660 {
11661 n_elts = 1;
11662 if (mode == VOIDmode)
11663 mode = DImode;
11664 }
11665
11666 innersize = GET_MODE_UNIT_SIZE (mode);
11667
11668 /* Vectors of float constants. */
11669 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11670 {
11671 rtx el0 = CONST_VECTOR_ELT (op, 0);
11672
11673 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11674 return -1;
11675
11676 /* FP16 vectors cannot be represented. */
11677 if (GET_MODE_INNER (mode) == HFmode)
11678 return -1;
11679
11680 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11681 are distinct in this context. */
11682 if (!const_vec_duplicate_p (op))
11683 return -1;
11684
11685 if (modconst)
11686 *modconst = CONST_VECTOR_ELT (op, 0);
11687
11688 if (elementwidth)
11689 *elementwidth = 0;
11690
11691 if (el0 == CONST0_RTX (GET_MODE (el0)))
11692 return 19;
11693 else
11694 return 18;
11695 }
11696
11697 /* Splat vector constant out into a byte vector. */
11698 for (i = 0; i < n_elts; i++)
11699 {
11700 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11701 unsigned HOST_WIDE_INT elpart;
11702
11703 gcc_assert (CONST_INT_P (el));
11704 elpart = INTVAL (el);
11705
11706 for (unsigned int byte = 0; byte < innersize; byte++)
11707 {
11708 bytes[idx++] = (elpart & 0xff) ^ invmask;
11709 elpart >>= BITS_PER_UNIT;
11710 }
11711 }
11712
11713 /* Sanity check. */
11714 gcc_assert (idx == GET_MODE_SIZE (mode));
11715
11716 do
11717 {
11718 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11719 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11720
11721 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11722 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11723
11724 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11725 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11726
11727 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11728 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11729
11730 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11731
11732 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11733
11734 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11735 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11736
11737 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11738 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11739
11740 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11741 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11742
11743 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11744 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11745
11746 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11747
11748 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11749
11750 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11751 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11752
11753 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11754 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11755
11756 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11757 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11758
11759 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11760 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11761
11762 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11763
11764 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11765 && bytes[i] == bytes[(i + 8) % idx]);
11766 }
11767 while (0);
11768
11769 if (immtype == -1)
11770 return -1;
11771
11772 if (elementwidth)
11773 *elementwidth = elsize;
11774
11775 if (modconst)
11776 {
11777 unsigned HOST_WIDE_INT imm = 0;
11778
11779 /* Un-invert bytes of recognized vector, if necessary. */
11780 if (invmask != 0)
11781 for (i = 0; i < idx; i++)
11782 bytes[i] ^= invmask;
11783
11784 if (immtype == 17)
11785 {
11786 /* FIXME: Broken on 32-bit H_W_I hosts. */
11787 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11788
11789 for (i = 0; i < 8; i++)
11790 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11791 << (i * BITS_PER_UNIT);
11792
11793 *modconst = GEN_INT (imm);
11794 }
11795 else
11796 {
11797 unsigned HOST_WIDE_INT imm = 0;
11798
11799 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11800 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11801
11802 *modconst = GEN_INT (imm);
11803 }
11804 }
11805
11806 return immtype;
11807 #undef CHECK
11808 }
11809
11810 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11811 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11812 float elements), and a modified constant (whatever should be output for a
11813 VMOV) in *MODCONST. */
11814
11815 int
11816 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11817 rtx *modconst, int *elementwidth)
11818 {
11819 rtx tmpconst;
11820 int tmpwidth;
11821 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11822
11823 if (retval == -1)
11824 return 0;
11825
11826 if (modconst)
11827 *modconst = tmpconst;
11828
11829 if (elementwidth)
11830 *elementwidth = tmpwidth;
11831
11832 return 1;
11833 }
11834
11835 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11836 the immediate is valid, write a constant suitable for using as an operand
11837 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11838 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11839
11840 int
11841 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11842 rtx *modconst, int *elementwidth)
11843 {
11844 rtx tmpconst;
11845 int tmpwidth;
11846 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11847
11848 if (retval < 0 || retval > 5)
11849 return 0;
11850
11851 if (modconst)
11852 *modconst = tmpconst;
11853
11854 if (elementwidth)
11855 *elementwidth = tmpwidth;
11856
11857 return 1;
11858 }
11859
11860 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11861 the immediate is valid, write a constant suitable for using as an operand
11862 to VSHR/VSHL to *MODCONST and the corresponding element width to
11863 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11864 because they have different limitations. */
11865
11866 int
11867 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11868 rtx *modconst, int *elementwidth,
11869 bool isleftshift)
11870 {
11871 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11872 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11873 unsigned HOST_WIDE_INT last_elt = 0;
11874 unsigned HOST_WIDE_INT maxshift;
11875
11876 /* Split vector constant out into a byte vector. */
11877 for (i = 0; i < n_elts; i++)
11878 {
11879 rtx el = CONST_VECTOR_ELT (op, i);
11880 unsigned HOST_WIDE_INT elpart;
11881
11882 if (CONST_INT_P (el))
11883 elpart = INTVAL (el);
11884 else if (CONST_DOUBLE_P (el))
11885 return 0;
11886 else
11887 gcc_unreachable ();
11888
11889 if (i != 0 && elpart != last_elt)
11890 return 0;
11891
11892 last_elt = elpart;
11893 }
11894
11895 /* Shift less than element size. */
11896 maxshift = innersize * 8;
11897
11898 if (isleftshift)
11899 {
11900 /* Left shift immediate value can be from 0 to <size>-1. */
11901 if (last_elt >= maxshift)
11902 return 0;
11903 }
11904 else
11905 {
11906 /* Right shift immediate value can be from 1 to <size>. */
11907 if (last_elt == 0 || last_elt > maxshift)
11908 return 0;
11909 }
11910
11911 if (elementwidth)
11912 *elementwidth = innersize * 8;
11913
11914 if (modconst)
11915 *modconst = CONST_VECTOR_ELT (op, 0);
11916
11917 return 1;
11918 }
11919
11920 /* Return a string suitable for output of Neon immediate logic operation
11921 MNEM. */
11922
11923 char *
11924 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11925 int inverse, int quad)
11926 {
11927 int width, is_valid;
11928 static char templ[40];
11929
11930 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11931
11932 gcc_assert (is_valid != 0);
11933
11934 if (quad)
11935 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11936 else
11937 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11938
11939 return templ;
11940 }
11941
11942 /* Return a string suitable for output of Neon immediate shift operation
11943 (VSHR or VSHL) MNEM. */
11944
11945 char *
11946 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11947 machine_mode mode, int quad,
11948 bool isleftshift)
11949 {
11950 int width, is_valid;
11951 static char templ[40];
11952
11953 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11954 gcc_assert (is_valid != 0);
11955
11956 if (quad)
11957 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11958 else
11959 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11960
11961 return templ;
11962 }
11963
11964 /* Output a sequence of pairwise operations to implement a reduction.
11965 NOTE: We do "too much work" here, because pairwise operations work on two
11966 registers-worth of operands in one go. Unfortunately we can't exploit those
11967 extra calculations to do the full operation in fewer steps, I don't think.
11968 Although all vector elements of the result but the first are ignored, we
11969 actually calculate the same result in each of the elements. An alternative
11970 such as initially loading a vector with zero to use as each of the second
11971 operands would use up an additional register and take an extra instruction,
11972 for no particular gain. */
11973
11974 void
11975 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11976 rtx (*reduc) (rtx, rtx, rtx))
11977 {
11978 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11979 rtx tmpsum = op1;
11980
11981 for (i = parts / 2; i >= 1; i /= 2)
11982 {
11983 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11984 emit_insn (reduc (dest, tmpsum, tmpsum));
11985 tmpsum = dest;
11986 }
11987 }
11988
11989 /* If VALS is a vector constant that can be loaded into a register
11990 using VDUP, generate instructions to do so and return an RTX to
11991 assign to the register. Otherwise return NULL_RTX. */
11992
11993 static rtx
11994 neon_vdup_constant (rtx vals)
11995 {
11996 machine_mode mode = GET_MODE (vals);
11997 machine_mode inner_mode = GET_MODE_INNER (mode);
11998 rtx x;
11999
12000 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12001 return NULL_RTX;
12002
12003 if (!const_vec_duplicate_p (vals, &x))
12004 /* The elements are not all the same. We could handle repeating
12005 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12006 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12007 vdup.i16). */
12008 return NULL_RTX;
12009
12010 /* We can load this constant by using VDUP and a constant in a
12011 single ARM register. This will be cheaper than a vector
12012 load. */
12013
12014 x = copy_to_mode_reg (inner_mode, x);
12015 return gen_rtx_VEC_DUPLICATE (mode, x);
12016 }
12017
12018 /* Generate code to load VALS, which is a PARALLEL containing only
12019 constants (for vec_init) or CONST_VECTOR, efficiently into a
12020 register. Returns an RTX to copy into the register, or NULL_RTX
12021 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12022
12023 rtx
12024 neon_make_constant (rtx vals)
12025 {
12026 machine_mode mode = GET_MODE (vals);
12027 rtx target;
12028 rtx const_vec = NULL_RTX;
12029 int n_elts = GET_MODE_NUNITS (mode);
12030 int n_const = 0;
12031 int i;
12032
12033 if (GET_CODE (vals) == CONST_VECTOR)
12034 const_vec = vals;
12035 else if (GET_CODE (vals) == PARALLEL)
12036 {
12037 /* A CONST_VECTOR must contain only CONST_INTs and
12038 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12039 Only store valid constants in a CONST_VECTOR. */
12040 for (i = 0; i < n_elts; ++i)
12041 {
12042 rtx x = XVECEXP (vals, 0, i);
12043 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12044 n_const++;
12045 }
12046 if (n_const == n_elts)
12047 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12048 }
12049 else
12050 gcc_unreachable ();
12051
12052 if (const_vec != NULL
12053 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12054 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12055 return const_vec;
12056 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12057 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12058 pipeline cycle; creating the constant takes one or two ARM
12059 pipeline cycles. */
12060 return target;
12061 else if (const_vec != NULL_RTX)
12062 /* Load from constant pool. On Cortex-A8 this takes two cycles
12063 (for either double or quad vectors). We can not take advantage
12064 of single-cycle VLD1 because we need a PC-relative addressing
12065 mode. */
12066 return const_vec;
12067 else
12068 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12069 We can not construct an initializer. */
12070 return NULL_RTX;
12071 }
12072
12073 /* Initialize vector TARGET to VALS. */
12074
12075 void
12076 neon_expand_vector_init (rtx target, rtx vals)
12077 {
12078 machine_mode mode = GET_MODE (target);
12079 machine_mode inner_mode = GET_MODE_INNER (mode);
12080 int n_elts = GET_MODE_NUNITS (mode);
12081 int n_var = 0, one_var = -1;
12082 bool all_same = true;
12083 rtx x, mem;
12084 int i;
12085
12086 for (i = 0; i < n_elts; ++i)
12087 {
12088 x = XVECEXP (vals, 0, i);
12089 if (!CONSTANT_P (x))
12090 ++n_var, one_var = i;
12091
12092 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12093 all_same = false;
12094 }
12095
12096 if (n_var == 0)
12097 {
12098 rtx constant = neon_make_constant (vals);
12099 if (constant != NULL_RTX)
12100 {
12101 emit_move_insn (target, constant);
12102 return;
12103 }
12104 }
12105
12106 /* Splat a single non-constant element if we can. */
12107 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12108 {
12109 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12110 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12111 return;
12112 }
12113
12114 /* One field is non-constant. Load constant then overwrite varying
12115 field. This is more efficient than using the stack. */
12116 if (n_var == 1)
12117 {
12118 rtx copy = copy_rtx (vals);
12119 rtx index = GEN_INT (one_var);
12120
12121 /* Load constant part of vector, substitute neighboring value for
12122 varying element. */
12123 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12124 neon_expand_vector_init (target, copy);
12125
12126 /* Insert variable. */
12127 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12128 switch (mode)
12129 {
12130 case V8QImode:
12131 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12132 break;
12133 case V16QImode:
12134 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12135 break;
12136 case V4HImode:
12137 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12138 break;
12139 case V8HImode:
12140 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12141 break;
12142 case V2SImode:
12143 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12144 break;
12145 case V4SImode:
12146 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12147 break;
12148 case V2SFmode:
12149 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12150 break;
12151 case V4SFmode:
12152 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12153 break;
12154 case V2DImode:
12155 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12156 break;
12157 default:
12158 gcc_unreachable ();
12159 }
12160 return;
12161 }
12162
12163 /* Construct the vector in memory one field at a time
12164 and load the whole vector. */
12165 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12166 for (i = 0; i < n_elts; i++)
12167 emit_move_insn (adjust_address_nv (mem, inner_mode,
12168 i * GET_MODE_SIZE (inner_mode)),
12169 XVECEXP (vals, 0, i));
12170 emit_move_insn (target, mem);
12171 }
12172
12173 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12174 ERR if it doesn't. EXP indicates the source location, which includes the
12175 inlining history for intrinsics. */
12176
12177 static void
12178 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12179 const_tree exp, const char *desc)
12180 {
12181 HOST_WIDE_INT lane;
12182
12183 gcc_assert (CONST_INT_P (operand));
12184
12185 lane = INTVAL (operand);
12186
12187 if (lane < low || lane >= high)
12188 {
12189 if (exp)
12190 error ("%K%s %wd out of range %wd - %wd",
12191 exp, desc, lane, low, high - 1);
12192 else
12193 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12194 }
12195 }
12196
12197 /* Bounds-check lanes. */
12198
12199 void
12200 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12201 const_tree exp)
12202 {
12203 bounds_check (operand, low, high, exp, "lane");
12204 }
12205
12206 /* Bounds-check constants. */
12207
12208 void
12209 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12210 {
12211 bounds_check (operand, low, high, NULL_TREE, "constant");
12212 }
12213
12214 HOST_WIDE_INT
12215 neon_element_bits (machine_mode mode)
12216 {
12217 return GET_MODE_UNIT_BITSIZE (mode);
12218 }
12219
12220 \f
12221 /* Predicates for `match_operand' and `match_operator'. */
12222
12223 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12224 WB is true if full writeback address modes are allowed and is false
12225 if limited writeback address modes (POST_INC and PRE_DEC) are
12226 allowed. */
12227
12228 int
12229 arm_coproc_mem_operand (rtx op, bool wb)
12230 {
12231 rtx ind;
12232
12233 /* Reject eliminable registers. */
12234 if (! (reload_in_progress || reload_completed || lra_in_progress)
12235 && ( reg_mentioned_p (frame_pointer_rtx, op)
12236 || reg_mentioned_p (arg_pointer_rtx, op)
12237 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12238 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12239 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12240 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12241 return FALSE;
12242
12243 /* Constants are converted into offsets from labels. */
12244 if (!MEM_P (op))
12245 return FALSE;
12246
12247 ind = XEXP (op, 0);
12248
12249 if (reload_completed
12250 && (GET_CODE (ind) == LABEL_REF
12251 || (GET_CODE (ind) == CONST
12252 && GET_CODE (XEXP (ind, 0)) == PLUS
12253 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12254 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12255 return TRUE;
12256
12257 /* Match: (mem (reg)). */
12258 if (REG_P (ind))
12259 return arm_address_register_rtx_p (ind, 0);
12260
12261 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12262 acceptable in any case (subject to verification by
12263 arm_address_register_rtx_p). We need WB to be true to accept
12264 PRE_INC and POST_DEC. */
12265 if (GET_CODE (ind) == POST_INC
12266 || GET_CODE (ind) == PRE_DEC
12267 || (wb
12268 && (GET_CODE (ind) == PRE_INC
12269 || GET_CODE (ind) == POST_DEC)))
12270 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12271
12272 if (wb
12273 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12274 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12275 && GET_CODE (XEXP (ind, 1)) == PLUS
12276 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12277 ind = XEXP (ind, 1);
12278
12279 /* Match:
12280 (plus (reg)
12281 (const)). */
12282 if (GET_CODE (ind) == PLUS
12283 && REG_P (XEXP (ind, 0))
12284 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12285 && CONST_INT_P (XEXP (ind, 1))
12286 && INTVAL (XEXP (ind, 1)) > -1024
12287 && INTVAL (XEXP (ind, 1)) < 1024
12288 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12289 return TRUE;
12290
12291 return FALSE;
12292 }
12293
12294 /* Return TRUE if OP is a memory operand which we can load or store a vector
12295 to/from. TYPE is one of the following values:
12296 0 - Vector load/stor (vldr)
12297 1 - Core registers (ldm)
12298 2 - Element/structure loads (vld1)
12299 */
12300 int
12301 neon_vector_mem_operand (rtx op, int type, bool strict)
12302 {
12303 rtx ind;
12304
12305 /* Reject eliminable registers. */
12306 if (strict && ! (reload_in_progress || reload_completed)
12307 && (reg_mentioned_p (frame_pointer_rtx, op)
12308 || reg_mentioned_p (arg_pointer_rtx, op)
12309 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12310 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12311 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12312 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12313 return FALSE;
12314
12315 /* Constants are converted into offsets from labels. */
12316 if (!MEM_P (op))
12317 return FALSE;
12318
12319 ind = XEXP (op, 0);
12320
12321 if (reload_completed
12322 && (GET_CODE (ind) == LABEL_REF
12323 || (GET_CODE (ind) == CONST
12324 && GET_CODE (XEXP (ind, 0)) == PLUS
12325 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12326 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12327 return TRUE;
12328
12329 /* Match: (mem (reg)). */
12330 if (REG_P (ind))
12331 return arm_address_register_rtx_p (ind, 0);
12332
12333 /* Allow post-increment with Neon registers. */
12334 if ((type != 1 && GET_CODE (ind) == POST_INC)
12335 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12336 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12337
12338 /* Allow post-increment by register for VLDn */
12339 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12340 && GET_CODE (XEXP (ind, 1)) == PLUS
12341 && REG_P (XEXP (XEXP (ind, 1), 1)))
12342 return true;
12343
12344 /* Match:
12345 (plus (reg)
12346 (const)). */
12347 if (type == 0
12348 && GET_CODE (ind) == PLUS
12349 && REG_P (XEXP (ind, 0))
12350 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12351 && CONST_INT_P (XEXP (ind, 1))
12352 && INTVAL (XEXP (ind, 1)) > -1024
12353 /* For quad modes, we restrict the constant offset to be slightly less
12354 than what the instruction format permits. We have no such constraint
12355 on double mode offsets. (This must match arm_legitimate_index_p.) */
12356 && (INTVAL (XEXP (ind, 1))
12357 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12358 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12359 return TRUE;
12360
12361 return FALSE;
12362 }
12363
12364 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12365 type. */
12366 int
12367 neon_struct_mem_operand (rtx op)
12368 {
12369 rtx ind;
12370
12371 /* Reject eliminable registers. */
12372 if (! (reload_in_progress || reload_completed)
12373 && ( reg_mentioned_p (frame_pointer_rtx, op)
12374 || reg_mentioned_p (arg_pointer_rtx, op)
12375 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12376 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12377 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12378 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12379 return FALSE;
12380
12381 /* Constants are converted into offsets from labels. */
12382 if (!MEM_P (op))
12383 return FALSE;
12384
12385 ind = XEXP (op, 0);
12386
12387 if (reload_completed
12388 && (GET_CODE (ind) == LABEL_REF
12389 || (GET_CODE (ind) == CONST
12390 && GET_CODE (XEXP (ind, 0)) == PLUS
12391 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12392 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12393 return TRUE;
12394
12395 /* Match: (mem (reg)). */
12396 if (REG_P (ind))
12397 return arm_address_register_rtx_p (ind, 0);
12398
12399 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12400 if (GET_CODE (ind) == POST_INC
12401 || GET_CODE (ind) == PRE_DEC)
12402 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12403
12404 return FALSE;
12405 }
12406
12407 /* Return true if X is a register that will be eliminated later on. */
12408 int
12409 arm_eliminable_register (rtx x)
12410 {
12411 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12412 || REGNO (x) == ARG_POINTER_REGNUM
12413 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12414 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12415 }
12416
12417 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12418 coprocessor registers. Otherwise return NO_REGS. */
12419
12420 enum reg_class
12421 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12422 {
12423 if (mode == HFmode)
12424 {
12425 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12426 return GENERAL_REGS;
12427 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12428 return NO_REGS;
12429 return GENERAL_REGS;
12430 }
12431
12432 /* The neon move patterns handle all legitimate vector and struct
12433 addresses. */
12434 if (TARGET_NEON
12435 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12436 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12437 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12438 || VALID_NEON_STRUCT_MODE (mode)))
12439 return NO_REGS;
12440
12441 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12442 return NO_REGS;
12443
12444 return GENERAL_REGS;
12445 }
12446
12447 /* Values which must be returned in the most-significant end of the return
12448 register. */
12449
12450 static bool
12451 arm_return_in_msb (const_tree valtype)
12452 {
12453 return (TARGET_AAPCS_BASED
12454 && BYTES_BIG_ENDIAN
12455 && (AGGREGATE_TYPE_P (valtype)
12456 || TREE_CODE (valtype) == COMPLEX_TYPE
12457 || FIXED_POINT_TYPE_P (valtype)));
12458 }
12459
12460 /* Return TRUE if X references a SYMBOL_REF. */
12461 int
12462 symbol_mentioned_p (rtx x)
12463 {
12464 const char * fmt;
12465 int i;
12466
12467 if (GET_CODE (x) == SYMBOL_REF)
12468 return 1;
12469
12470 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12471 are constant offsets, not symbols. */
12472 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12473 return 0;
12474
12475 fmt = GET_RTX_FORMAT (GET_CODE (x));
12476
12477 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12478 {
12479 if (fmt[i] == 'E')
12480 {
12481 int j;
12482
12483 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12484 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12485 return 1;
12486 }
12487 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12488 return 1;
12489 }
12490
12491 return 0;
12492 }
12493
12494 /* Return TRUE if X references a LABEL_REF. */
12495 int
12496 label_mentioned_p (rtx x)
12497 {
12498 const char * fmt;
12499 int i;
12500
12501 if (GET_CODE (x) == LABEL_REF)
12502 return 1;
12503
12504 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12505 instruction, but they are constant offsets, not symbols. */
12506 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12507 return 0;
12508
12509 fmt = GET_RTX_FORMAT (GET_CODE (x));
12510 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12511 {
12512 if (fmt[i] == 'E')
12513 {
12514 int j;
12515
12516 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12517 if (label_mentioned_p (XVECEXP (x, i, j)))
12518 return 1;
12519 }
12520 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12521 return 1;
12522 }
12523
12524 return 0;
12525 }
12526
12527 int
12528 tls_mentioned_p (rtx x)
12529 {
12530 switch (GET_CODE (x))
12531 {
12532 case CONST:
12533 return tls_mentioned_p (XEXP (x, 0));
12534
12535 case UNSPEC:
12536 if (XINT (x, 1) == UNSPEC_TLS)
12537 return 1;
12538
12539 /* Fall through. */
12540 default:
12541 return 0;
12542 }
12543 }
12544
12545 /* Must not copy any rtx that uses a pc-relative address.
12546 Also, disallow copying of load-exclusive instructions that
12547 may appear after splitting of compare-and-swap-style operations
12548 so as to prevent those loops from being transformed away from their
12549 canonical forms (see PR 69904). */
12550
12551 static bool
12552 arm_cannot_copy_insn_p (rtx_insn *insn)
12553 {
12554 /* The tls call insn cannot be copied, as it is paired with a data
12555 word. */
12556 if (recog_memoized (insn) == CODE_FOR_tlscall)
12557 return true;
12558
12559 subrtx_iterator::array_type array;
12560 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12561 {
12562 const_rtx x = *iter;
12563 if (GET_CODE (x) == UNSPEC
12564 && (XINT (x, 1) == UNSPEC_PIC_BASE
12565 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12566 return true;
12567 }
12568
12569 rtx set = single_set (insn);
12570 if (set)
12571 {
12572 rtx src = SET_SRC (set);
12573 if (GET_CODE (src) == ZERO_EXTEND)
12574 src = XEXP (src, 0);
12575
12576 /* Catch the load-exclusive and load-acquire operations. */
12577 if (GET_CODE (src) == UNSPEC_VOLATILE
12578 && (XINT (src, 1) == VUNSPEC_LL
12579 || XINT (src, 1) == VUNSPEC_LAX))
12580 return true;
12581 }
12582 return false;
12583 }
12584
12585 enum rtx_code
12586 minmax_code (rtx x)
12587 {
12588 enum rtx_code code = GET_CODE (x);
12589
12590 switch (code)
12591 {
12592 case SMAX:
12593 return GE;
12594 case SMIN:
12595 return LE;
12596 case UMIN:
12597 return LEU;
12598 case UMAX:
12599 return GEU;
12600 default:
12601 gcc_unreachable ();
12602 }
12603 }
12604
12605 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12606
12607 bool
12608 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12609 int *mask, bool *signed_sat)
12610 {
12611 /* The high bound must be a power of two minus one. */
12612 int log = exact_log2 (INTVAL (hi_bound) + 1);
12613 if (log == -1)
12614 return false;
12615
12616 /* The low bound is either zero (for usat) or one less than the
12617 negation of the high bound (for ssat). */
12618 if (INTVAL (lo_bound) == 0)
12619 {
12620 if (mask)
12621 *mask = log;
12622 if (signed_sat)
12623 *signed_sat = false;
12624
12625 return true;
12626 }
12627
12628 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12629 {
12630 if (mask)
12631 *mask = log + 1;
12632 if (signed_sat)
12633 *signed_sat = true;
12634
12635 return true;
12636 }
12637
12638 return false;
12639 }
12640
12641 /* Return 1 if memory locations are adjacent. */
12642 int
12643 adjacent_mem_locations (rtx a, rtx b)
12644 {
12645 /* We don't guarantee to preserve the order of these memory refs. */
12646 if (volatile_refs_p (a) || volatile_refs_p (b))
12647 return 0;
12648
12649 if ((REG_P (XEXP (a, 0))
12650 || (GET_CODE (XEXP (a, 0)) == PLUS
12651 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12652 && (REG_P (XEXP (b, 0))
12653 || (GET_CODE (XEXP (b, 0)) == PLUS
12654 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12655 {
12656 HOST_WIDE_INT val0 = 0, val1 = 0;
12657 rtx reg0, reg1;
12658 int val_diff;
12659
12660 if (GET_CODE (XEXP (a, 0)) == PLUS)
12661 {
12662 reg0 = XEXP (XEXP (a, 0), 0);
12663 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12664 }
12665 else
12666 reg0 = XEXP (a, 0);
12667
12668 if (GET_CODE (XEXP (b, 0)) == PLUS)
12669 {
12670 reg1 = XEXP (XEXP (b, 0), 0);
12671 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12672 }
12673 else
12674 reg1 = XEXP (b, 0);
12675
12676 /* Don't accept any offset that will require multiple
12677 instructions to handle, since this would cause the
12678 arith_adjacentmem pattern to output an overlong sequence. */
12679 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12680 return 0;
12681
12682 /* Don't allow an eliminable register: register elimination can make
12683 the offset too large. */
12684 if (arm_eliminable_register (reg0))
12685 return 0;
12686
12687 val_diff = val1 - val0;
12688
12689 if (arm_ld_sched)
12690 {
12691 /* If the target has load delay slots, then there's no benefit
12692 to using an ldm instruction unless the offset is zero and
12693 we are optimizing for size. */
12694 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12695 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12696 && (val_diff == 4 || val_diff == -4));
12697 }
12698
12699 return ((REGNO (reg0) == REGNO (reg1))
12700 && (val_diff == 4 || val_diff == -4));
12701 }
12702
12703 return 0;
12704 }
12705
12706 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12707 for load operations, false for store operations. CONSECUTIVE is true
12708 if the register numbers in the operation must be consecutive in the register
12709 bank. RETURN_PC is true if value is to be loaded in PC.
12710 The pattern we are trying to match for load is:
12711 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12712 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12713 :
12714 :
12715 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12716 ]
12717 where
12718 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12719 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12720 3. If consecutive is TRUE, then for kth register being loaded,
12721 REGNO (R_dk) = REGNO (R_d0) + k.
12722 The pattern for store is similar. */
12723 bool
12724 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12725 bool consecutive, bool return_pc)
12726 {
12727 HOST_WIDE_INT count = XVECLEN (op, 0);
12728 rtx reg, mem, addr;
12729 unsigned regno;
12730 unsigned first_regno;
12731 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12732 rtx elt;
12733 bool addr_reg_in_reglist = false;
12734 bool update = false;
12735 int reg_increment;
12736 int offset_adj;
12737 int regs_per_val;
12738
12739 /* If not in SImode, then registers must be consecutive
12740 (e.g., VLDM instructions for DFmode). */
12741 gcc_assert ((mode == SImode) || consecutive);
12742 /* Setting return_pc for stores is illegal. */
12743 gcc_assert (!return_pc || load);
12744
12745 /* Set up the increments and the regs per val based on the mode. */
12746 reg_increment = GET_MODE_SIZE (mode);
12747 regs_per_val = reg_increment / 4;
12748 offset_adj = return_pc ? 1 : 0;
12749
12750 if (count <= 1
12751 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12752 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12753 return false;
12754
12755 /* Check if this is a write-back. */
12756 elt = XVECEXP (op, 0, offset_adj);
12757 if (GET_CODE (SET_SRC (elt)) == PLUS)
12758 {
12759 i++;
12760 base = 1;
12761 update = true;
12762
12763 /* The offset adjustment must be the number of registers being
12764 popped times the size of a single register. */
12765 if (!REG_P (SET_DEST (elt))
12766 || !REG_P (XEXP (SET_SRC (elt), 0))
12767 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12768 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12769 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12770 ((count - 1 - offset_adj) * reg_increment))
12771 return false;
12772 }
12773
12774 i = i + offset_adj;
12775 base = base + offset_adj;
12776 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12777 success depends on the type: VLDM can do just one reg,
12778 LDM must do at least two. */
12779 if ((count <= i) && (mode == SImode))
12780 return false;
12781
12782 elt = XVECEXP (op, 0, i - 1);
12783 if (GET_CODE (elt) != SET)
12784 return false;
12785
12786 if (load)
12787 {
12788 reg = SET_DEST (elt);
12789 mem = SET_SRC (elt);
12790 }
12791 else
12792 {
12793 reg = SET_SRC (elt);
12794 mem = SET_DEST (elt);
12795 }
12796
12797 if (!REG_P (reg) || !MEM_P (mem))
12798 return false;
12799
12800 regno = REGNO (reg);
12801 first_regno = regno;
12802 addr = XEXP (mem, 0);
12803 if (GET_CODE (addr) == PLUS)
12804 {
12805 if (!CONST_INT_P (XEXP (addr, 1)))
12806 return false;
12807
12808 offset = INTVAL (XEXP (addr, 1));
12809 addr = XEXP (addr, 0);
12810 }
12811
12812 if (!REG_P (addr))
12813 return false;
12814
12815 /* Don't allow SP to be loaded unless it is also the base register. It
12816 guarantees that SP is reset correctly when an LDM instruction
12817 is interrupted. Otherwise, we might end up with a corrupt stack. */
12818 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12819 return false;
12820
12821 for (; i < count; i++)
12822 {
12823 elt = XVECEXP (op, 0, i);
12824 if (GET_CODE (elt) != SET)
12825 return false;
12826
12827 if (load)
12828 {
12829 reg = SET_DEST (elt);
12830 mem = SET_SRC (elt);
12831 }
12832 else
12833 {
12834 reg = SET_SRC (elt);
12835 mem = SET_DEST (elt);
12836 }
12837
12838 if (!REG_P (reg)
12839 || GET_MODE (reg) != mode
12840 || REGNO (reg) <= regno
12841 || (consecutive
12842 && (REGNO (reg) !=
12843 (unsigned int) (first_regno + regs_per_val * (i - base))))
12844 /* Don't allow SP to be loaded unless it is also the base register. It
12845 guarantees that SP is reset correctly when an LDM instruction
12846 is interrupted. Otherwise, we might end up with a corrupt stack. */
12847 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12848 || !MEM_P (mem)
12849 || GET_MODE (mem) != mode
12850 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12851 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12852 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12853 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12854 offset + (i - base) * reg_increment))
12855 && (!REG_P (XEXP (mem, 0))
12856 || offset + (i - base) * reg_increment != 0)))
12857 return false;
12858
12859 regno = REGNO (reg);
12860 if (regno == REGNO (addr))
12861 addr_reg_in_reglist = true;
12862 }
12863
12864 if (load)
12865 {
12866 if (update && addr_reg_in_reglist)
12867 return false;
12868
12869 /* For Thumb-1, address register is always modified - either by write-back
12870 or by explicit load. If the pattern does not describe an update,
12871 then the address register must be in the list of loaded registers. */
12872 if (TARGET_THUMB1)
12873 return update || addr_reg_in_reglist;
12874 }
12875
12876 return true;
12877 }
12878
12879 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12880 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12881 instruction. ADD_OFFSET is nonzero if the base address register needs
12882 to be modified with an add instruction before we can use it. */
12883
12884 static bool
12885 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12886 int nops, HOST_WIDE_INT add_offset)
12887 {
12888 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12889 if the offset isn't small enough. The reason 2 ldrs are faster
12890 is because these ARMs are able to do more than one cache access
12891 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12892 whilst the ARM8 has a double bandwidth cache. This means that
12893 these cores can do both an instruction fetch and a data fetch in
12894 a single cycle, so the trick of calculating the address into a
12895 scratch register (one of the result regs) and then doing a load
12896 multiple actually becomes slower (and no smaller in code size).
12897 That is the transformation
12898
12899 ldr rd1, [rbase + offset]
12900 ldr rd2, [rbase + offset + 4]
12901
12902 to
12903
12904 add rd1, rbase, offset
12905 ldmia rd1, {rd1, rd2}
12906
12907 produces worse code -- '3 cycles + any stalls on rd2' instead of
12908 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12909 access per cycle, the first sequence could never complete in less
12910 than 6 cycles, whereas the ldm sequence would only take 5 and
12911 would make better use of sequential accesses if not hitting the
12912 cache.
12913
12914 We cheat here and test 'arm_ld_sched' which we currently know to
12915 only be true for the ARM8, ARM9 and StrongARM. If this ever
12916 changes, then the test below needs to be reworked. */
12917 if (nops == 2 && arm_ld_sched && add_offset != 0)
12918 return false;
12919
12920 /* XScale has load-store double instructions, but they have stricter
12921 alignment requirements than load-store multiple, so we cannot
12922 use them.
12923
12924 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12925 the pipeline until completion.
12926
12927 NREGS CYCLES
12928 1 3
12929 2 4
12930 3 5
12931 4 6
12932
12933 An ldr instruction takes 1-3 cycles, but does not block the
12934 pipeline.
12935
12936 NREGS CYCLES
12937 1 1-3
12938 2 2-6
12939 3 3-9
12940 4 4-12
12941
12942 Best case ldr will always win. However, the more ldr instructions
12943 we issue, the less likely we are to be able to schedule them well.
12944 Using ldr instructions also increases code size.
12945
12946 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12947 for counts of 3 or 4 regs. */
12948 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12949 return false;
12950 return true;
12951 }
12952
12953 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12954 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12955 an array ORDER which describes the sequence to use when accessing the
12956 offsets that produces an ascending order. In this sequence, each
12957 offset must be larger by exactly 4 than the previous one. ORDER[0]
12958 must have been filled in with the lowest offset by the caller.
12959 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12960 we use to verify that ORDER produces an ascending order of registers.
12961 Return true if it was possible to construct such an order, false if
12962 not. */
12963
12964 static bool
12965 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12966 int *unsorted_regs)
12967 {
12968 int i;
12969 for (i = 1; i < nops; i++)
12970 {
12971 int j;
12972
12973 order[i] = order[i - 1];
12974 for (j = 0; j < nops; j++)
12975 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12976 {
12977 /* We must find exactly one offset that is higher than the
12978 previous one by 4. */
12979 if (order[i] != order[i - 1])
12980 return false;
12981 order[i] = j;
12982 }
12983 if (order[i] == order[i - 1])
12984 return false;
12985 /* The register numbers must be ascending. */
12986 if (unsorted_regs != NULL
12987 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
12988 return false;
12989 }
12990 return true;
12991 }
12992
12993 /* Used to determine in a peephole whether a sequence of load
12994 instructions can be changed into a load-multiple instruction.
12995 NOPS is the number of separate load instructions we are examining. The
12996 first NOPS entries in OPERANDS are the destination registers, the
12997 next NOPS entries are memory operands. If this function is
12998 successful, *BASE is set to the common base register of the memory
12999 accesses; *LOAD_OFFSET is set to the first memory location's offset
13000 from that base register.
13001 REGS is an array filled in with the destination register numbers.
13002 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13003 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13004 the sequence of registers in REGS matches the loads from ascending memory
13005 locations, and the function verifies that the register numbers are
13006 themselves ascending. If CHECK_REGS is false, the register numbers
13007 are stored in the order they are found in the operands. */
13008 static int
13009 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13010 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13011 {
13012 int unsorted_regs[MAX_LDM_STM_OPS];
13013 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13014 int order[MAX_LDM_STM_OPS];
13015 rtx base_reg_rtx = NULL;
13016 int base_reg = -1;
13017 int i, ldm_case;
13018
13019 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13020 easily extended if required. */
13021 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13022
13023 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13024
13025 /* Loop over the operands and check that the memory references are
13026 suitable (i.e. immediate offsets from the same base register). At
13027 the same time, extract the target register, and the memory
13028 offsets. */
13029 for (i = 0; i < nops; i++)
13030 {
13031 rtx reg;
13032 rtx offset;
13033
13034 /* Convert a subreg of a mem into the mem itself. */
13035 if (GET_CODE (operands[nops + i]) == SUBREG)
13036 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13037
13038 gcc_assert (MEM_P (operands[nops + i]));
13039
13040 /* Don't reorder volatile memory references; it doesn't seem worth
13041 looking for the case where the order is ok anyway. */
13042 if (MEM_VOLATILE_P (operands[nops + i]))
13043 return 0;
13044
13045 offset = const0_rtx;
13046
13047 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13048 || (GET_CODE (reg) == SUBREG
13049 && REG_P (reg = SUBREG_REG (reg))))
13050 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13051 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13052 || (GET_CODE (reg) == SUBREG
13053 && REG_P (reg = SUBREG_REG (reg))))
13054 && (CONST_INT_P (offset
13055 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13056 {
13057 if (i == 0)
13058 {
13059 base_reg = REGNO (reg);
13060 base_reg_rtx = reg;
13061 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13062 return 0;
13063 }
13064 else if (base_reg != (int) REGNO (reg))
13065 /* Not addressed from the same base register. */
13066 return 0;
13067
13068 unsorted_regs[i] = (REG_P (operands[i])
13069 ? REGNO (operands[i])
13070 : REGNO (SUBREG_REG (operands[i])));
13071
13072 /* If it isn't an integer register, or if it overwrites the
13073 base register but isn't the last insn in the list, then
13074 we can't do this. */
13075 if (unsorted_regs[i] < 0
13076 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13077 || unsorted_regs[i] > 14
13078 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13079 return 0;
13080
13081 /* Don't allow SP to be loaded unless it is also the base
13082 register. It guarantees that SP is reset correctly when
13083 an LDM instruction is interrupted. Otherwise, we might
13084 end up with a corrupt stack. */
13085 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13086 return 0;
13087
13088 unsorted_offsets[i] = INTVAL (offset);
13089 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13090 order[0] = i;
13091 }
13092 else
13093 /* Not a suitable memory address. */
13094 return 0;
13095 }
13096
13097 /* All the useful information has now been extracted from the
13098 operands into unsorted_regs and unsorted_offsets; additionally,
13099 order[0] has been set to the lowest offset in the list. Sort
13100 the offsets into order, verifying that they are adjacent, and
13101 check that the register numbers are ascending. */
13102 if (!compute_offset_order (nops, unsorted_offsets, order,
13103 check_regs ? unsorted_regs : NULL))
13104 return 0;
13105
13106 if (saved_order)
13107 memcpy (saved_order, order, sizeof order);
13108
13109 if (base)
13110 {
13111 *base = base_reg;
13112
13113 for (i = 0; i < nops; i++)
13114 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13115
13116 *load_offset = unsorted_offsets[order[0]];
13117 }
13118
13119 if (TARGET_THUMB1
13120 && !peep2_reg_dead_p (nops, base_reg_rtx))
13121 return 0;
13122
13123 if (unsorted_offsets[order[0]] == 0)
13124 ldm_case = 1; /* ldmia */
13125 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13126 ldm_case = 2; /* ldmib */
13127 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13128 ldm_case = 3; /* ldmda */
13129 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13130 ldm_case = 4; /* ldmdb */
13131 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13132 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13133 ldm_case = 5;
13134 else
13135 return 0;
13136
13137 if (!multiple_operation_profitable_p (false, nops,
13138 ldm_case == 5
13139 ? unsorted_offsets[order[0]] : 0))
13140 return 0;
13141
13142 return ldm_case;
13143 }
13144
13145 /* Used to determine in a peephole whether a sequence of store instructions can
13146 be changed into a store-multiple instruction.
13147 NOPS is the number of separate store instructions we are examining.
13148 NOPS_TOTAL is the total number of instructions recognized by the peephole
13149 pattern.
13150 The first NOPS entries in OPERANDS are the source registers, the next
13151 NOPS entries are memory operands. If this function is successful, *BASE is
13152 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13153 to the first memory location's offset from that base register. REGS is an
13154 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13155 likewise filled with the corresponding rtx's.
13156 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13157 numbers to an ascending order of stores.
13158 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13159 from ascending memory locations, and the function verifies that the register
13160 numbers are themselves ascending. If CHECK_REGS is false, the register
13161 numbers are stored in the order they are found in the operands. */
13162 static int
13163 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13164 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13165 HOST_WIDE_INT *load_offset, bool check_regs)
13166 {
13167 int unsorted_regs[MAX_LDM_STM_OPS];
13168 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13169 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13170 int order[MAX_LDM_STM_OPS];
13171 int base_reg = -1;
13172 rtx base_reg_rtx = NULL;
13173 int i, stm_case;
13174
13175 /* Write back of base register is currently only supported for Thumb 1. */
13176 int base_writeback = TARGET_THUMB1;
13177
13178 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13179 easily extended if required. */
13180 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13181
13182 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13183
13184 /* Loop over the operands and check that the memory references are
13185 suitable (i.e. immediate offsets from the same base register). At
13186 the same time, extract the target register, and the memory
13187 offsets. */
13188 for (i = 0; i < nops; i++)
13189 {
13190 rtx reg;
13191 rtx offset;
13192
13193 /* Convert a subreg of a mem into the mem itself. */
13194 if (GET_CODE (operands[nops + i]) == SUBREG)
13195 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13196
13197 gcc_assert (MEM_P (operands[nops + i]));
13198
13199 /* Don't reorder volatile memory references; it doesn't seem worth
13200 looking for the case where the order is ok anyway. */
13201 if (MEM_VOLATILE_P (operands[nops + i]))
13202 return 0;
13203
13204 offset = const0_rtx;
13205
13206 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13207 || (GET_CODE (reg) == SUBREG
13208 && REG_P (reg = SUBREG_REG (reg))))
13209 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13210 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13211 || (GET_CODE (reg) == SUBREG
13212 && REG_P (reg = SUBREG_REG (reg))))
13213 && (CONST_INT_P (offset
13214 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13215 {
13216 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13217 ? operands[i] : SUBREG_REG (operands[i]));
13218 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13219
13220 if (i == 0)
13221 {
13222 base_reg = REGNO (reg);
13223 base_reg_rtx = reg;
13224 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13225 return 0;
13226 }
13227 else if (base_reg != (int) REGNO (reg))
13228 /* Not addressed from the same base register. */
13229 return 0;
13230
13231 /* If it isn't an integer register, then we can't do this. */
13232 if (unsorted_regs[i] < 0
13233 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13234 /* The effects are unpredictable if the base register is
13235 both updated and stored. */
13236 || (base_writeback && unsorted_regs[i] == base_reg)
13237 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13238 || unsorted_regs[i] > 14)
13239 return 0;
13240
13241 unsorted_offsets[i] = INTVAL (offset);
13242 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13243 order[0] = i;
13244 }
13245 else
13246 /* Not a suitable memory address. */
13247 return 0;
13248 }
13249
13250 /* All the useful information has now been extracted from the
13251 operands into unsorted_regs and unsorted_offsets; additionally,
13252 order[0] has been set to the lowest offset in the list. Sort
13253 the offsets into order, verifying that they are adjacent, and
13254 check that the register numbers are ascending. */
13255 if (!compute_offset_order (nops, unsorted_offsets, order,
13256 check_regs ? unsorted_regs : NULL))
13257 return 0;
13258
13259 if (saved_order)
13260 memcpy (saved_order, order, sizeof order);
13261
13262 if (base)
13263 {
13264 *base = base_reg;
13265
13266 for (i = 0; i < nops; i++)
13267 {
13268 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13269 if (reg_rtxs)
13270 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13271 }
13272
13273 *load_offset = unsorted_offsets[order[0]];
13274 }
13275
13276 if (TARGET_THUMB1
13277 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13278 return 0;
13279
13280 if (unsorted_offsets[order[0]] == 0)
13281 stm_case = 1; /* stmia */
13282 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13283 stm_case = 2; /* stmib */
13284 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13285 stm_case = 3; /* stmda */
13286 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13287 stm_case = 4; /* stmdb */
13288 else
13289 return 0;
13290
13291 if (!multiple_operation_profitable_p (false, nops, 0))
13292 return 0;
13293
13294 return stm_case;
13295 }
13296 \f
13297 /* Routines for use in generating RTL. */
13298
13299 /* Generate a load-multiple instruction. COUNT is the number of loads in
13300 the instruction; REGS and MEMS are arrays containing the operands.
13301 BASEREG is the base register to be used in addressing the memory operands.
13302 WBACK_OFFSET is nonzero if the instruction should update the base
13303 register. */
13304
13305 static rtx
13306 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13307 HOST_WIDE_INT wback_offset)
13308 {
13309 int i = 0, j;
13310 rtx result;
13311
13312 if (!multiple_operation_profitable_p (false, count, 0))
13313 {
13314 rtx seq;
13315
13316 start_sequence ();
13317
13318 for (i = 0; i < count; i++)
13319 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13320
13321 if (wback_offset != 0)
13322 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13323
13324 seq = get_insns ();
13325 end_sequence ();
13326
13327 return seq;
13328 }
13329
13330 result = gen_rtx_PARALLEL (VOIDmode,
13331 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13332 if (wback_offset != 0)
13333 {
13334 XVECEXP (result, 0, 0)
13335 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13336 i = 1;
13337 count++;
13338 }
13339
13340 for (j = 0; i < count; i++, j++)
13341 XVECEXP (result, 0, i)
13342 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13343
13344 return result;
13345 }
13346
13347 /* Generate a store-multiple instruction. COUNT is the number of stores in
13348 the instruction; REGS and MEMS are arrays containing the operands.
13349 BASEREG is the base register to be used in addressing the memory operands.
13350 WBACK_OFFSET is nonzero if the instruction should update the base
13351 register. */
13352
13353 static rtx
13354 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13355 HOST_WIDE_INT wback_offset)
13356 {
13357 int i = 0, j;
13358 rtx result;
13359
13360 if (GET_CODE (basereg) == PLUS)
13361 basereg = XEXP (basereg, 0);
13362
13363 if (!multiple_operation_profitable_p (false, count, 0))
13364 {
13365 rtx seq;
13366
13367 start_sequence ();
13368
13369 for (i = 0; i < count; i++)
13370 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13371
13372 if (wback_offset != 0)
13373 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13374
13375 seq = get_insns ();
13376 end_sequence ();
13377
13378 return seq;
13379 }
13380
13381 result = gen_rtx_PARALLEL (VOIDmode,
13382 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13383 if (wback_offset != 0)
13384 {
13385 XVECEXP (result, 0, 0)
13386 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13387 i = 1;
13388 count++;
13389 }
13390
13391 for (j = 0; i < count; i++, j++)
13392 XVECEXP (result, 0, i)
13393 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13394
13395 return result;
13396 }
13397
13398 /* Generate either a load-multiple or a store-multiple instruction. This
13399 function can be used in situations where we can start with a single MEM
13400 rtx and adjust its address upwards.
13401 COUNT is the number of operations in the instruction, not counting a
13402 possible update of the base register. REGS is an array containing the
13403 register operands.
13404 BASEREG is the base register to be used in addressing the memory operands,
13405 which are constructed from BASEMEM.
13406 WRITE_BACK specifies whether the generated instruction should include an
13407 update of the base register.
13408 OFFSETP is used to pass an offset to and from this function; this offset
13409 is not used when constructing the address (instead BASEMEM should have an
13410 appropriate offset in its address), it is used only for setting
13411 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13412
13413 static rtx
13414 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13415 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13416 {
13417 rtx mems[MAX_LDM_STM_OPS];
13418 HOST_WIDE_INT offset = *offsetp;
13419 int i;
13420
13421 gcc_assert (count <= MAX_LDM_STM_OPS);
13422
13423 if (GET_CODE (basereg) == PLUS)
13424 basereg = XEXP (basereg, 0);
13425
13426 for (i = 0; i < count; i++)
13427 {
13428 rtx addr = plus_constant (Pmode, basereg, i * 4);
13429 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13430 offset += 4;
13431 }
13432
13433 if (write_back)
13434 *offsetp = offset;
13435
13436 if (is_load)
13437 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13438 write_back ? 4 * count : 0);
13439 else
13440 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13441 write_back ? 4 * count : 0);
13442 }
13443
13444 rtx
13445 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13446 rtx basemem, HOST_WIDE_INT *offsetp)
13447 {
13448 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13449 offsetp);
13450 }
13451
13452 rtx
13453 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13454 rtx basemem, HOST_WIDE_INT *offsetp)
13455 {
13456 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13457 offsetp);
13458 }
13459
13460 /* Called from a peephole2 expander to turn a sequence of loads into an
13461 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13462 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13463 is true if we can reorder the registers because they are used commutatively
13464 subsequently.
13465 Returns true iff we could generate a new instruction. */
13466
13467 bool
13468 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13469 {
13470 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13471 rtx mems[MAX_LDM_STM_OPS];
13472 int i, j, base_reg;
13473 rtx base_reg_rtx;
13474 HOST_WIDE_INT offset;
13475 int write_back = FALSE;
13476 int ldm_case;
13477 rtx addr;
13478
13479 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13480 &base_reg, &offset, !sort_regs);
13481
13482 if (ldm_case == 0)
13483 return false;
13484
13485 if (sort_regs)
13486 for (i = 0; i < nops - 1; i++)
13487 for (j = i + 1; j < nops; j++)
13488 if (regs[i] > regs[j])
13489 {
13490 int t = regs[i];
13491 regs[i] = regs[j];
13492 regs[j] = t;
13493 }
13494 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13495
13496 if (TARGET_THUMB1)
13497 {
13498 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13499 gcc_assert (ldm_case == 1 || ldm_case == 5);
13500 write_back = TRUE;
13501 }
13502
13503 if (ldm_case == 5)
13504 {
13505 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13506 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13507 offset = 0;
13508 if (!TARGET_THUMB1)
13509 {
13510 base_reg = regs[0];
13511 base_reg_rtx = newbase;
13512 }
13513 }
13514
13515 for (i = 0; i < nops; i++)
13516 {
13517 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13518 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13519 SImode, addr, 0);
13520 }
13521 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13522 write_back ? offset + i * 4 : 0));
13523 return true;
13524 }
13525
13526 /* Called from a peephole2 expander to turn a sequence of stores into an
13527 STM instruction. OPERANDS are the operands found by the peephole matcher;
13528 NOPS indicates how many separate stores we are trying to combine.
13529 Returns true iff we could generate a new instruction. */
13530
13531 bool
13532 gen_stm_seq (rtx *operands, int nops)
13533 {
13534 int i;
13535 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13536 rtx mems[MAX_LDM_STM_OPS];
13537 int base_reg;
13538 rtx base_reg_rtx;
13539 HOST_WIDE_INT offset;
13540 int write_back = FALSE;
13541 int stm_case;
13542 rtx addr;
13543 bool base_reg_dies;
13544
13545 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13546 mem_order, &base_reg, &offset, true);
13547
13548 if (stm_case == 0)
13549 return false;
13550
13551 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13552
13553 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13554 if (TARGET_THUMB1)
13555 {
13556 gcc_assert (base_reg_dies);
13557 write_back = TRUE;
13558 }
13559
13560 if (stm_case == 5)
13561 {
13562 gcc_assert (base_reg_dies);
13563 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13564 offset = 0;
13565 }
13566
13567 addr = plus_constant (Pmode, base_reg_rtx, offset);
13568
13569 for (i = 0; i < nops; i++)
13570 {
13571 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13572 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13573 SImode, addr, 0);
13574 }
13575 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13576 write_back ? offset + i * 4 : 0));
13577 return true;
13578 }
13579
13580 /* Called from a peephole2 expander to turn a sequence of stores that are
13581 preceded by constant loads into an STM instruction. OPERANDS are the
13582 operands found by the peephole matcher; NOPS indicates how many
13583 separate stores we are trying to combine; there are 2 * NOPS
13584 instructions in the peephole.
13585 Returns true iff we could generate a new instruction. */
13586
13587 bool
13588 gen_const_stm_seq (rtx *operands, int nops)
13589 {
13590 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13591 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13592 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13593 rtx mems[MAX_LDM_STM_OPS];
13594 int base_reg;
13595 rtx base_reg_rtx;
13596 HOST_WIDE_INT offset;
13597 int write_back = FALSE;
13598 int stm_case;
13599 rtx addr;
13600 bool base_reg_dies;
13601 int i, j;
13602 HARD_REG_SET allocated;
13603
13604 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13605 mem_order, &base_reg, &offset, false);
13606
13607 if (stm_case == 0)
13608 return false;
13609
13610 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13611
13612 /* If the same register is used more than once, try to find a free
13613 register. */
13614 CLEAR_HARD_REG_SET (allocated);
13615 for (i = 0; i < nops; i++)
13616 {
13617 for (j = i + 1; j < nops; j++)
13618 if (regs[i] == regs[j])
13619 {
13620 rtx t = peep2_find_free_register (0, nops * 2,
13621 TARGET_THUMB1 ? "l" : "r",
13622 SImode, &allocated);
13623 if (t == NULL_RTX)
13624 return false;
13625 reg_rtxs[i] = t;
13626 regs[i] = REGNO (t);
13627 }
13628 }
13629
13630 /* Compute an ordering that maps the register numbers to an ascending
13631 sequence. */
13632 reg_order[0] = 0;
13633 for (i = 0; i < nops; i++)
13634 if (regs[i] < regs[reg_order[0]])
13635 reg_order[0] = i;
13636
13637 for (i = 1; i < nops; i++)
13638 {
13639 int this_order = reg_order[i - 1];
13640 for (j = 0; j < nops; j++)
13641 if (regs[j] > regs[reg_order[i - 1]]
13642 && (this_order == reg_order[i - 1]
13643 || regs[j] < regs[this_order]))
13644 this_order = j;
13645 reg_order[i] = this_order;
13646 }
13647
13648 /* Ensure that registers that must be live after the instruction end
13649 up with the correct value. */
13650 for (i = 0; i < nops; i++)
13651 {
13652 int this_order = reg_order[i];
13653 if ((this_order != mem_order[i]
13654 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13655 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13656 return false;
13657 }
13658
13659 /* Load the constants. */
13660 for (i = 0; i < nops; i++)
13661 {
13662 rtx op = operands[2 * nops + mem_order[i]];
13663 sorted_regs[i] = regs[reg_order[i]];
13664 emit_move_insn (reg_rtxs[reg_order[i]], op);
13665 }
13666
13667 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13668
13669 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13670 if (TARGET_THUMB1)
13671 {
13672 gcc_assert (base_reg_dies);
13673 write_back = TRUE;
13674 }
13675
13676 if (stm_case == 5)
13677 {
13678 gcc_assert (base_reg_dies);
13679 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13680 offset = 0;
13681 }
13682
13683 addr = plus_constant (Pmode, base_reg_rtx, offset);
13684
13685 for (i = 0; i < nops; i++)
13686 {
13687 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13688 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13689 SImode, addr, 0);
13690 }
13691 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13692 write_back ? offset + i * 4 : 0));
13693 return true;
13694 }
13695
13696 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13697 unaligned copies on processors which support unaligned semantics for those
13698 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13699 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13700 An interleave factor of 1 (the minimum) will perform no interleaving.
13701 Load/store multiple are used for aligned addresses where possible. */
13702
13703 static void
13704 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13705 HOST_WIDE_INT length,
13706 unsigned int interleave_factor)
13707 {
13708 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13709 int *regnos = XALLOCAVEC (int, interleave_factor);
13710 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13711 HOST_WIDE_INT i, j;
13712 HOST_WIDE_INT remaining = length, words;
13713 rtx halfword_tmp = NULL, byte_tmp = NULL;
13714 rtx dst, src;
13715 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13716 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13717 HOST_WIDE_INT srcoffset, dstoffset;
13718 HOST_WIDE_INT src_autoinc, dst_autoinc;
13719 rtx mem, addr;
13720
13721 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13722
13723 /* Use hard registers if we have aligned source or destination so we can use
13724 load/store multiple with contiguous registers. */
13725 if (dst_aligned || src_aligned)
13726 for (i = 0; i < interleave_factor; i++)
13727 regs[i] = gen_rtx_REG (SImode, i);
13728 else
13729 for (i = 0; i < interleave_factor; i++)
13730 regs[i] = gen_reg_rtx (SImode);
13731
13732 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13733 src = copy_addr_to_reg (XEXP (srcbase, 0));
13734
13735 srcoffset = dstoffset = 0;
13736
13737 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13738 For copying the last bytes we want to subtract this offset again. */
13739 src_autoinc = dst_autoinc = 0;
13740
13741 for (i = 0; i < interleave_factor; i++)
13742 regnos[i] = i;
13743
13744 /* Copy BLOCK_SIZE_BYTES chunks. */
13745
13746 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13747 {
13748 /* Load words. */
13749 if (src_aligned && interleave_factor > 1)
13750 {
13751 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13752 TRUE, srcbase, &srcoffset));
13753 src_autoinc += UNITS_PER_WORD * interleave_factor;
13754 }
13755 else
13756 {
13757 for (j = 0; j < interleave_factor; j++)
13758 {
13759 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13760 - src_autoinc));
13761 mem = adjust_automodify_address (srcbase, SImode, addr,
13762 srcoffset + j * UNITS_PER_WORD);
13763 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13764 }
13765 srcoffset += block_size_bytes;
13766 }
13767
13768 /* Store words. */
13769 if (dst_aligned && interleave_factor > 1)
13770 {
13771 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13772 TRUE, dstbase, &dstoffset));
13773 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13774 }
13775 else
13776 {
13777 for (j = 0; j < interleave_factor; j++)
13778 {
13779 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13780 - dst_autoinc));
13781 mem = adjust_automodify_address (dstbase, SImode, addr,
13782 dstoffset + j * UNITS_PER_WORD);
13783 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13784 }
13785 dstoffset += block_size_bytes;
13786 }
13787
13788 remaining -= block_size_bytes;
13789 }
13790
13791 /* Copy any whole words left (note these aren't interleaved with any
13792 subsequent halfword/byte load/stores in the interests of simplicity). */
13793
13794 words = remaining / UNITS_PER_WORD;
13795
13796 gcc_assert (words < interleave_factor);
13797
13798 if (src_aligned && words > 1)
13799 {
13800 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13801 &srcoffset));
13802 src_autoinc += UNITS_PER_WORD * words;
13803 }
13804 else
13805 {
13806 for (j = 0; j < words; j++)
13807 {
13808 addr = plus_constant (Pmode, src,
13809 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13810 mem = adjust_automodify_address (srcbase, SImode, addr,
13811 srcoffset + j * UNITS_PER_WORD);
13812 if (src_aligned)
13813 emit_move_insn (regs[j], mem);
13814 else
13815 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13816 }
13817 srcoffset += words * UNITS_PER_WORD;
13818 }
13819
13820 if (dst_aligned && words > 1)
13821 {
13822 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13823 &dstoffset));
13824 dst_autoinc += words * UNITS_PER_WORD;
13825 }
13826 else
13827 {
13828 for (j = 0; j < words; j++)
13829 {
13830 addr = plus_constant (Pmode, dst,
13831 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13832 mem = adjust_automodify_address (dstbase, SImode, addr,
13833 dstoffset + j * UNITS_PER_WORD);
13834 if (dst_aligned)
13835 emit_move_insn (mem, regs[j]);
13836 else
13837 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13838 }
13839 dstoffset += words * UNITS_PER_WORD;
13840 }
13841
13842 remaining -= words * UNITS_PER_WORD;
13843
13844 gcc_assert (remaining < 4);
13845
13846 /* Copy a halfword if necessary. */
13847
13848 if (remaining >= 2)
13849 {
13850 halfword_tmp = gen_reg_rtx (SImode);
13851
13852 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13853 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13854 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13855
13856 /* Either write out immediately, or delay until we've loaded the last
13857 byte, depending on interleave factor. */
13858 if (interleave_factor == 1)
13859 {
13860 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13861 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13862 emit_insn (gen_unaligned_storehi (mem,
13863 gen_lowpart (HImode, halfword_tmp)));
13864 halfword_tmp = NULL;
13865 dstoffset += 2;
13866 }
13867
13868 remaining -= 2;
13869 srcoffset += 2;
13870 }
13871
13872 gcc_assert (remaining < 2);
13873
13874 /* Copy last byte. */
13875
13876 if ((remaining & 1) != 0)
13877 {
13878 byte_tmp = gen_reg_rtx (SImode);
13879
13880 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13881 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13882 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13883
13884 if (interleave_factor == 1)
13885 {
13886 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13887 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13888 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13889 byte_tmp = NULL;
13890 dstoffset++;
13891 }
13892
13893 remaining--;
13894 srcoffset++;
13895 }
13896
13897 /* Store last halfword if we haven't done so already. */
13898
13899 if (halfword_tmp)
13900 {
13901 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13902 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13903 emit_insn (gen_unaligned_storehi (mem,
13904 gen_lowpart (HImode, halfword_tmp)));
13905 dstoffset += 2;
13906 }
13907
13908 /* Likewise for last byte. */
13909
13910 if (byte_tmp)
13911 {
13912 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13913 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13914 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13915 dstoffset++;
13916 }
13917
13918 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13919 }
13920
13921 /* From mips_adjust_block_mem:
13922
13923 Helper function for doing a loop-based block operation on memory
13924 reference MEM. Each iteration of the loop will operate on LENGTH
13925 bytes of MEM.
13926
13927 Create a new base register for use within the loop and point it to
13928 the start of MEM. Create a new memory reference that uses this
13929 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13930
13931 static void
13932 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13933 rtx *loop_mem)
13934 {
13935 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13936
13937 /* Although the new mem does not refer to a known location,
13938 it does keep up to LENGTH bytes of alignment. */
13939 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13940 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13941 }
13942
13943 /* From mips_block_move_loop:
13944
13945 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13946 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13947 the memory regions do not overlap. */
13948
13949 static void
13950 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13951 unsigned int interleave_factor,
13952 HOST_WIDE_INT bytes_per_iter)
13953 {
13954 rtx src_reg, dest_reg, final_src, test;
13955 HOST_WIDE_INT leftover;
13956
13957 leftover = length % bytes_per_iter;
13958 length -= leftover;
13959
13960 /* Create registers and memory references for use within the loop. */
13961 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13962 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13963
13964 /* Calculate the value that SRC_REG should have after the last iteration of
13965 the loop. */
13966 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13967 0, 0, OPTAB_WIDEN);
13968
13969 /* Emit the start of the loop. */
13970 rtx_code_label *label = gen_label_rtx ();
13971 emit_label (label);
13972
13973 /* Emit the loop body. */
13974 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13975 interleave_factor);
13976
13977 /* Move on to the next block. */
13978 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13979 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13980
13981 /* Emit the loop condition. */
13982 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13983 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13984
13985 /* Mop up any left-over bytes. */
13986 if (leftover)
13987 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
13988 }
13989
13990 /* Emit a block move when either the source or destination is unaligned (not
13991 aligned to a four-byte boundary). This may need further tuning depending on
13992 core type, optimize_size setting, etc. */
13993
13994 static int
13995 arm_movmemqi_unaligned (rtx *operands)
13996 {
13997 HOST_WIDE_INT length = INTVAL (operands[2]);
13998
13999 if (optimize_size)
14000 {
14001 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14002 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14003 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14004 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14005 or dst_aligned though: allow more interleaving in those cases since the
14006 resulting code can be smaller. */
14007 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14008 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14009
14010 if (length > 12)
14011 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14012 interleave_factor, bytes_per_iter);
14013 else
14014 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14015 interleave_factor);
14016 }
14017 else
14018 {
14019 /* Note that the loop created by arm_block_move_unaligned_loop may be
14020 subject to loop unrolling, which makes tuning this condition a little
14021 redundant. */
14022 if (length > 32)
14023 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14024 else
14025 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14026 }
14027
14028 return 1;
14029 }
14030
14031 int
14032 arm_gen_movmemqi (rtx *operands)
14033 {
14034 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14035 HOST_WIDE_INT srcoffset, dstoffset;
14036 int i;
14037 rtx src, dst, srcbase, dstbase;
14038 rtx part_bytes_reg = NULL;
14039 rtx mem;
14040
14041 if (!CONST_INT_P (operands[2])
14042 || !CONST_INT_P (operands[3])
14043 || INTVAL (operands[2]) > 64)
14044 return 0;
14045
14046 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14047 return arm_movmemqi_unaligned (operands);
14048
14049 if (INTVAL (operands[3]) & 3)
14050 return 0;
14051
14052 dstbase = operands[0];
14053 srcbase = operands[1];
14054
14055 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14056 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14057
14058 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14059 out_words_to_go = INTVAL (operands[2]) / 4;
14060 last_bytes = INTVAL (operands[2]) & 3;
14061 dstoffset = srcoffset = 0;
14062
14063 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14064 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14065
14066 for (i = 0; in_words_to_go >= 2; i+=4)
14067 {
14068 if (in_words_to_go > 4)
14069 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14070 TRUE, srcbase, &srcoffset));
14071 else
14072 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14073 src, FALSE, srcbase,
14074 &srcoffset));
14075
14076 if (out_words_to_go)
14077 {
14078 if (out_words_to_go > 4)
14079 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14080 TRUE, dstbase, &dstoffset));
14081 else if (out_words_to_go != 1)
14082 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14083 out_words_to_go, dst,
14084 (last_bytes == 0
14085 ? FALSE : TRUE),
14086 dstbase, &dstoffset));
14087 else
14088 {
14089 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14090 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14091 if (last_bytes != 0)
14092 {
14093 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14094 dstoffset += 4;
14095 }
14096 }
14097 }
14098
14099 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14100 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14101 }
14102
14103 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14104 if (out_words_to_go)
14105 {
14106 rtx sreg;
14107
14108 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14109 sreg = copy_to_reg (mem);
14110
14111 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14112 emit_move_insn (mem, sreg);
14113 in_words_to_go--;
14114
14115 gcc_assert (!in_words_to_go); /* Sanity check */
14116 }
14117
14118 if (in_words_to_go)
14119 {
14120 gcc_assert (in_words_to_go > 0);
14121
14122 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14123 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14124 }
14125
14126 gcc_assert (!last_bytes || part_bytes_reg);
14127
14128 if (BYTES_BIG_ENDIAN && last_bytes)
14129 {
14130 rtx tmp = gen_reg_rtx (SImode);
14131
14132 /* The bytes we want are in the top end of the word. */
14133 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14134 GEN_INT (8 * (4 - last_bytes))));
14135 part_bytes_reg = tmp;
14136
14137 while (last_bytes)
14138 {
14139 mem = adjust_automodify_address (dstbase, QImode,
14140 plus_constant (Pmode, dst,
14141 last_bytes - 1),
14142 dstoffset + last_bytes - 1);
14143 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14144
14145 if (--last_bytes)
14146 {
14147 tmp = gen_reg_rtx (SImode);
14148 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14149 part_bytes_reg = tmp;
14150 }
14151 }
14152
14153 }
14154 else
14155 {
14156 if (last_bytes > 1)
14157 {
14158 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14159 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14160 last_bytes -= 2;
14161 if (last_bytes)
14162 {
14163 rtx tmp = gen_reg_rtx (SImode);
14164 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14165 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14166 part_bytes_reg = tmp;
14167 dstoffset += 2;
14168 }
14169 }
14170
14171 if (last_bytes)
14172 {
14173 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14174 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14175 }
14176 }
14177
14178 return 1;
14179 }
14180
14181 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14182 by mode size. */
14183 inline static rtx
14184 next_consecutive_mem (rtx mem)
14185 {
14186 machine_mode mode = GET_MODE (mem);
14187 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14188 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14189
14190 return adjust_automodify_address (mem, mode, addr, offset);
14191 }
14192
14193 /* Copy using LDRD/STRD instructions whenever possible.
14194 Returns true upon success. */
14195 bool
14196 gen_movmem_ldrd_strd (rtx *operands)
14197 {
14198 unsigned HOST_WIDE_INT len;
14199 HOST_WIDE_INT align;
14200 rtx src, dst, base;
14201 rtx reg0;
14202 bool src_aligned, dst_aligned;
14203 bool src_volatile, dst_volatile;
14204
14205 gcc_assert (CONST_INT_P (operands[2]));
14206 gcc_assert (CONST_INT_P (operands[3]));
14207
14208 len = UINTVAL (operands[2]);
14209 if (len > 64)
14210 return false;
14211
14212 /* Maximum alignment we can assume for both src and dst buffers. */
14213 align = INTVAL (operands[3]);
14214
14215 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14216 return false;
14217
14218 /* Place src and dst addresses in registers
14219 and update the corresponding mem rtx. */
14220 dst = operands[0];
14221 dst_volatile = MEM_VOLATILE_P (dst);
14222 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14223 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14224 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14225
14226 src = operands[1];
14227 src_volatile = MEM_VOLATILE_P (src);
14228 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14229 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14230 src = adjust_automodify_address (src, VOIDmode, base, 0);
14231
14232 if (!unaligned_access && !(src_aligned && dst_aligned))
14233 return false;
14234
14235 if (src_volatile || dst_volatile)
14236 return false;
14237
14238 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14239 if (!(dst_aligned || src_aligned))
14240 return arm_gen_movmemqi (operands);
14241
14242 /* If the either src or dst is unaligned we'll be accessing it as pairs
14243 of unaligned SImode accesses. Otherwise we can generate DImode
14244 ldrd/strd instructions. */
14245 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14246 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14247
14248 while (len >= 8)
14249 {
14250 len -= 8;
14251 reg0 = gen_reg_rtx (DImode);
14252 rtx low_reg = NULL_RTX;
14253 rtx hi_reg = NULL_RTX;
14254
14255 if (!src_aligned || !dst_aligned)
14256 {
14257 low_reg = gen_lowpart (SImode, reg0);
14258 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14259 }
14260 if (src_aligned)
14261 emit_move_insn (reg0, src);
14262 else
14263 {
14264 emit_insn (gen_unaligned_loadsi (low_reg, src));
14265 src = next_consecutive_mem (src);
14266 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14267 }
14268
14269 if (dst_aligned)
14270 emit_move_insn (dst, reg0);
14271 else
14272 {
14273 emit_insn (gen_unaligned_storesi (dst, low_reg));
14274 dst = next_consecutive_mem (dst);
14275 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14276 }
14277
14278 src = next_consecutive_mem (src);
14279 dst = next_consecutive_mem (dst);
14280 }
14281
14282 gcc_assert (len < 8);
14283 if (len >= 4)
14284 {
14285 /* More than a word but less than a double-word to copy. Copy a word. */
14286 reg0 = gen_reg_rtx (SImode);
14287 src = adjust_address (src, SImode, 0);
14288 dst = adjust_address (dst, SImode, 0);
14289 if (src_aligned)
14290 emit_move_insn (reg0, src);
14291 else
14292 emit_insn (gen_unaligned_loadsi (reg0, src));
14293
14294 if (dst_aligned)
14295 emit_move_insn (dst, reg0);
14296 else
14297 emit_insn (gen_unaligned_storesi (dst, reg0));
14298
14299 src = next_consecutive_mem (src);
14300 dst = next_consecutive_mem (dst);
14301 len -= 4;
14302 }
14303
14304 if (len == 0)
14305 return true;
14306
14307 /* Copy the remaining bytes. */
14308 if (len >= 2)
14309 {
14310 dst = adjust_address (dst, HImode, 0);
14311 src = adjust_address (src, HImode, 0);
14312 reg0 = gen_reg_rtx (SImode);
14313 if (src_aligned)
14314 emit_insn (gen_zero_extendhisi2 (reg0, src));
14315 else
14316 emit_insn (gen_unaligned_loadhiu (reg0, src));
14317
14318 if (dst_aligned)
14319 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14320 else
14321 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14322
14323 src = next_consecutive_mem (src);
14324 dst = next_consecutive_mem (dst);
14325 if (len == 2)
14326 return true;
14327 }
14328
14329 dst = adjust_address (dst, QImode, 0);
14330 src = adjust_address (src, QImode, 0);
14331 reg0 = gen_reg_rtx (QImode);
14332 emit_move_insn (reg0, src);
14333 emit_move_insn (dst, reg0);
14334 return true;
14335 }
14336
14337 /* Select a dominance comparison mode if possible for a test of the general
14338 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14339 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14340 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14341 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14342 In all cases OP will be either EQ or NE, but we don't need to know which
14343 here. If we are unable to support a dominance comparison we return
14344 CC mode. This will then fail to match for the RTL expressions that
14345 generate this call. */
14346 machine_mode
14347 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14348 {
14349 enum rtx_code cond1, cond2;
14350 int swapped = 0;
14351
14352 /* Currently we will probably get the wrong result if the individual
14353 comparisons are not simple. This also ensures that it is safe to
14354 reverse a comparison if necessary. */
14355 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14356 != CCmode)
14357 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14358 != CCmode))
14359 return CCmode;
14360
14361 /* The if_then_else variant of this tests the second condition if the
14362 first passes, but is true if the first fails. Reverse the first
14363 condition to get a true "inclusive-or" expression. */
14364 if (cond_or == DOM_CC_NX_OR_Y)
14365 cond1 = reverse_condition (cond1);
14366
14367 /* If the comparisons are not equal, and one doesn't dominate the other,
14368 then we can't do this. */
14369 if (cond1 != cond2
14370 && !comparison_dominates_p (cond1, cond2)
14371 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14372 return CCmode;
14373
14374 if (swapped)
14375 std::swap (cond1, cond2);
14376
14377 switch (cond1)
14378 {
14379 case EQ:
14380 if (cond_or == DOM_CC_X_AND_Y)
14381 return CC_DEQmode;
14382
14383 switch (cond2)
14384 {
14385 case EQ: return CC_DEQmode;
14386 case LE: return CC_DLEmode;
14387 case LEU: return CC_DLEUmode;
14388 case GE: return CC_DGEmode;
14389 case GEU: return CC_DGEUmode;
14390 default: gcc_unreachable ();
14391 }
14392
14393 case LT:
14394 if (cond_or == DOM_CC_X_AND_Y)
14395 return CC_DLTmode;
14396
14397 switch (cond2)
14398 {
14399 case LT:
14400 return CC_DLTmode;
14401 case LE:
14402 return CC_DLEmode;
14403 case NE:
14404 return CC_DNEmode;
14405 default:
14406 gcc_unreachable ();
14407 }
14408
14409 case GT:
14410 if (cond_or == DOM_CC_X_AND_Y)
14411 return CC_DGTmode;
14412
14413 switch (cond2)
14414 {
14415 case GT:
14416 return CC_DGTmode;
14417 case GE:
14418 return CC_DGEmode;
14419 case NE:
14420 return CC_DNEmode;
14421 default:
14422 gcc_unreachable ();
14423 }
14424
14425 case LTU:
14426 if (cond_or == DOM_CC_X_AND_Y)
14427 return CC_DLTUmode;
14428
14429 switch (cond2)
14430 {
14431 case LTU:
14432 return CC_DLTUmode;
14433 case LEU:
14434 return CC_DLEUmode;
14435 case NE:
14436 return CC_DNEmode;
14437 default:
14438 gcc_unreachable ();
14439 }
14440
14441 case GTU:
14442 if (cond_or == DOM_CC_X_AND_Y)
14443 return CC_DGTUmode;
14444
14445 switch (cond2)
14446 {
14447 case GTU:
14448 return CC_DGTUmode;
14449 case GEU:
14450 return CC_DGEUmode;
14451 case NE:
14452 return CC_DNEmode;
14453 default:
14454 gcc_unreachable ();
14455 }
14456
14457 /* The remaining cases only occur when both comparisons are the
14458 same. */
14459 case NE:
14460 gcc_assert (cond1 == cond2);
14461 return CC_DNEmode;
14462
14463 case LE:
14464 gcc_assert (cond1 == cond2);
14465 return CC_DLEmode;
14466
14467 case GE:
14468 gcc_assert (cond1 == cond2);
14469 return CC_DGEmode;
14470
14471 case LEU:
14472 gcc_assert (cond1 == cond2);
14473 return CC_DLEUmode;
14474
14475 case GEU:
14476 gcc_assert (cond1 == cond2);
14477 return CC_DGEUmode;
14478
14479 default:
14480 gcc_unreachable ();
14481 }
14482 }
14483
14484 machine_mode
14485 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14486 {
14487 /* All floating point compares return CCFP if it is an equality
14488 comparison, and CCFPE otherwise. */
14489 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14490 {
14491 switch (op)
14492 {
14493 case EQ:
14494 case NE:
14495 case UNORDERED:
14496 case ORDERED:
14497 case UNLT:
14498 case UNLE:
14499 case UNGT:
14500 case UNGE:
14501 case UNEQ:
14502 case LTGT:
14503 return CCFPmode;
14504
14505 case LT:
14506 case LE:
14507 case GT:
14508 case GE:
14509 return CCFPEmode;
14510
14511 default:
14512 gcc_unreachable ();
14513 }
14514 }
14515
14516 /* A compare with a shifted operand. Because of canonicalization, the
14517 comparison will have to be swapped when we emit the assembler. */
14518 if (GET_MODE (y) == SImode
14519 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14520 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14521 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14522 || GET_CODE (x) == ROTATERT))
14523 return CC_SWPmode;
14524
14525 /* This operation is performed swapped, but since we only rely on the Z
14526 flag we don't need an additional mode. */
14527 if (GET_MODE (y) == SImode
14528 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14529 && GET_CODE (x) == NEG
14530 && (op == EQ || op == NE))
14531 return CC_Zmode;
14532
14533 /* This is a special case that is used by combine to allow a
14534 comparison of a shifted byte load to be split into a zero-extend
14535 followed by a comparison of the shifted integer (only valid for
14536 equalities and unsigned inequalities). */
14537 if (GET_MODE (x) == SImode
14538 && GET_CODE (x) == ASHIFT
14539 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14540 && GET_CODE (XEXP (x, 0)) == SUBREG
14541 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14542 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14543 && (op == EQ || op == NE
14544 || op == GEU || op == GTU || op == LTU || op == LEU)
14545 && CONST_INT_P (y))
14546 return CC_Zmode;
14547
14548 /* A construct for a conditional compare, if the false arm contains
14549 0, then both conditions must be true, otherwise either condition
14550 must be true. Not all conditions are possible, so CCmode is
14551 returned if it can't be done. */
14552 if (GET_CODE (x) == IF_THEN_ELSE
14553 && (XEXP (x, 2) == const0_rtx
14554 || XEXP (x, 2) == const1_rtx)
14555 && COMPARISON_P (XEXP (x, 0))
14556 && COMPARISON_P (XEXP (x, 1)))
14557 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14558 INTVAL (XEXP (x, 2)));
14559
14560 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14561 if (GET_CODE (x) == AND
14562 && (op == EQ || op == NE)
14563 && COMPARISON_P (XEXP (x, 0))
14564 && COMPARISON_P (XEXP (x, 1)))
14565 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14566 DOM_CC_X_AND_Y);
14567
14568 if (GET_CODE (x) == IOR
14569 && (op == EQ || op == NE)
14570 && COMPARISON_P (XEXP (x, 0))
14571 && COMPARISON_P (XEXP (x, 1)))
14572 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14573 DOM_CC_X_OR_Y);
14574
14575 /* An operation (on Thumb) where we want to test for a single bit.
14576 This is done by shifting that bit up into the top bit of a
14577 scratch register; we can then branch on the sign bit. */
14578 if (TARGET_THUMB1
14579 && GET_MODE (x) == SImode
14580 && (op == EQ || op == NE)
14581 && GET_CODE (x) == ZERO_EXTRACT
14582 && XEXP (x, 1) == const1_rtx)
14583 return CC_Nmode;
14584
14585 /* An operation that sets the condition codes as a side-effect, the
14586 V flag is not set correctly, so we can only use comparisons where
14587 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14588 instead.) */
14589 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14590 if (GET_MODE (x) == SImode
14591 && y == const0_rtx
14592 && (op == EQ || op == NE || op == LT || op == GE)
14593 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14594 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14595 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14596 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14597 || GET_CODE (x) == LSHIFTRT
14598 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14599 || GET_CODE (x) == ROTATERT
14600 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14601 return CC_NOOVmode;
14602
14603 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14604 return CC_Zmode;
14605
14606 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14607 && GET_CODE (x) == PLUS
14608 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14609 return CC_Cmode;
14610
14611 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14612 {
14613 switch (op)
14614 {
14615 case EQ:
14616 case NE:
14617 /* A DImode comparison against zero can be implemented by
14618 or'ing the two halves together. */
14619 if (y == const0_rtx)
14620 return CC_Zmode;
14621
14622 /* We can do an equality test in three Thumb instructions. */
14623 if (!TARGET_32BIT)
14624 return CC_Zmode;
14625
14626 /* FALLTHROUGH */
14627
14628 case LTU:
14629 case LEU:
14630 case GTU:
14631 case GEU:
14632 /* DImode unsigned comparisons can be implemented by cmp +
14633 cmpeq without a scratch register. Not worth doing in
14634 Thumb-2. */
14635 if (TARGET_32BIT)
14636 return CC_CZmode;
14637
14638 /* FALLTHROUGH */
14639
14640 case LT:
14641 case LE:
14642 case GT:
14643 case GE:
14644 /* DImode signed and unsigned comparisons can be implemented
14645 by cmp + sbcs with a scratch register, but that does not
14646 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14647 gcc_assert (op != EQ && op != NE);
14648 return CC_NCVmode;
14649
14650 default:
14651 gcc_unreachable ();
14652 }
14653 }
14654
14655 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14656 return GET_MODE (x);
14657
14658 return CCmode;
14659 }
14660
14661 /* X and Y are two things to compare using CODE. Emit the compare insn and
14662 return the rtx for register 0 in the proper mode. FP means this is a
14663 floating point compare: I don't think that it is needed on the arm. */
14664 rtx
14665 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14666 {
14667 machine_mode mode;
14668 rtx cc_reg;
14669 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14670
14671 /* We might have X as a constant, Y as a register because of the predicates
14672 used for cmpdi. If so, force X to a register here. */
14673 if (dimode_comparison && !REG_P (x))
14674 x = force_reg (DImode, x);
14675
14676 mode = SELECT_CC_MODE (code, x, y);
14677 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14678
14679 if (dimode_comparison
14680 && mode != CC_CZmode)
14681 {
14682 rtx clobber, set;
14683
14684 /* To compare two non-zero values for equality, XOR them and
14685 then compare against zero. Not used for ARM mode; there
14686 CC_CZmode is cheaper. */
14687 if (mode == CC_Zmode && y != const0_rtx)
14688 {
14689 gcc_assert (!reload_completed);
14690 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14691 y = const0_rtx;
14692 }
14693
14694 /* A scratch register is required. */
14695 if (reload_completed)
14696 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14697 else
14698 scratch = gen_rtx_SCRATCH (SImode);
14699
14700 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14701 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14702 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14703 }
14704 else
14705 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14706
14707 return cc_reg;
14708 }
14709
14710 /* Generate a sequence of insns that will generate the correct return
14711 address mask depending on the physical architecture that the program
14712 is running on. */
14713 rtx
14714 arm_gen_return_addr_mask (void)
14715 {
14716 rtx reg = gen_reg_rtx (Pmode);
14717
14718 emit_insn (gen_return_addr_mask (reg));
14719 return reg;
14720 }
14721
14722 void
14723 arm_reload_in_hi (rtx *operands)
14724 {
14725 rtx ref = operands[1];
14726 rtx base, scratch;
14727 HOST_WIDE_INT offset = 0;
14728
14729 if (GET_CODE (ref) == SUBREG)
14730 {
14731 offset = SUBREG_BYTE (ref);
14732 ref = SUBREG_REG (ref);
14733 }
14734
14735 if (REG_P (ref))
14736 {
14737 /* We have a pseudo which has been spilt onto the stack; there
14738 are two cases here: the first where there is a simple
14739 stack-slot replacement and a second where the stack-slot is
14740 out of range, or is used as a subreg. */
14741 if (reg_equiv_mem (REGNO (ref)))
14742 {
14743 ref = reg_equiv_mem (REGNO (ref));
14744 base = find_replacement (&XEXP (ref, 0));
14745 }
14746 else
14747 /* The slot is out of range, or was dressed up in a SUBREG. */
14748 base = reg_equiv_address (REGNO (ref));
14749
14750 /* PR 62554: If there is no equivalent memory location then just move
14751 the value as an SImode register move. This happens when the target
14752 architecture variant does not have an HImode register move. */
14753 if (base == NULL)
14754 {
14755 gcc_assert (REG_P (operands[0]));
14756 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14757 gen_rtx_SUBREG (SImode, ref, 0)));
14758 return;
14759 }
14760 }
14761 else
14762 base = find_replacement (&XEXP (ref, 0));
14763
14764 /* Handle the case where the address is too complex to be offset by 1. */
14765 if (GET_CODE (base) == MINUS
14766 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14767 {
14768 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14769
14770 emit_set_insn (base_plus, base);
14771 base = base_plus;
14772 }
14773 else if (GET_CODE (base) == PLUS)
14774 {
14775 /* The addend must be CONST_INT, or we would have dealt with it above. */
14776 HOST_WIDE_INT hi, lo;
14777
14778 offset += INTVAL (XEXP (base, 1));
14779 base = XEXP (base, 0);
14780
14781 /* Rework the address into a legal sequence of insns. */
14782 /* Valid range for lo is -4095 -> 4095 */
14783 lo = (offset >= 0
14784 ? (offset & 0xfff)
14785 : -((-offset) & 0xfff));
14786
14787 /* Corner case, if lo is the max offset then we would be out of range
14788 once we have added the additional 1 below, so bump the msb into the
14789 pre-loading insn(s). */
14790 if (lo == 4095)
14791 lo &= 0x7ff;
14792
14793 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14794 ^ (HOST_WIDE_INT) 0x80000000)
14795 - (HOST_WIDE_INT) 0x80000000);
14796
14797 gcc_assert (hi + lo == offset);
14798
14799 if (hi != 0)
14800 {
14801 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14802
14803 /* Get the base address; addsi3 knows how to handle constants
14804 that require more than one insn. */
14805 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14806 base = base_plus;
14807 offset = lo;
14808 }
14809 }
14810
14811 /* Operands[2] may overlap operands[0] (though it won't overlap
14812 operands[1]), that's why we asked for a DImode reg -- so we can
14813 use the bit that does not overlap. */
14814 if (REGNO (operands[2]) == REGNO (operands[0]))
14815 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14816 else
14817 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14818
14819 emit_insn (gen_zero_extendqisi2 (scratch,
14820 gen_rtx_MEM (QImode,
14821 plus_constant (Pmode, base,
14822 offset))));
14823 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14824 gen_rtx_MEM (QImode,
14825 plus_constant (Pmode, base,
14826 offset + 1))));
14827 if (!BYTES_BIG_ENDIAN)
14828 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14829 gen_rtx_IOR (SImode,
14830 gen_rtx_ASHIFT
14831 (SImode,
14832 gen_rtx_SUBREG (SImode, operands[0], 0),
14833 GEN_INT (8)),
14834 scratch));
14835 else
14836 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14837 gen_rtx_IOR (SImode,
14838 gen_rtx_ASHIFT (SImode, scratch,
14839 GEN_INT (8)),
14840 gen_rtx_SUBREG (SImode, operands[0], 0)));
14841 }
14842
14843 /* Handle storing a half-word to memory during reload by synthesizing as two
14844 byte stores. Take care not to clobber the input values until after we
14845 have moved them somewhere safe. This code assumes that if the DImode
14846 scratch in operands[2] overlaps either the input value or output address
14847 in some way, then that value must die in this insn (we absolutely need
14848 two scratch registers for some corner cases). */
14849 void
14850 arm_reload_out_hi (rtx *operands)
14851 {
14852 rtx ref = operands[0];
14853 rtx outval = operands[1];
14854 rtx base, scratch;
14855 HOST_WIDE_INT offset = 0;
14856
14857 if (GET_CODE (ref) == SUBREG)
14858 {
14859 offset = SUBREG_BYTE (ref);
14860 ref = SUBREG_REG (ref);
14861 }
14862
14863 if (REG_P (ref))
14864 {
14865 /* We have a pseudo which has been spilt onto the stack; there
14866 are two cases here: the first where there is a simple
14867 stack-slot replacement and a second where the stack-slot is
14868 out of range, or is used as a subreg. */
14869 if (reg_equiv_mem (REGNO (ref)))
14870 {
14871 ref = reg_equiv_mem (REGNO (ref));
14872 base = find_replacement (&XEXP (ref, 0));
14873 }
14874 else
14875 /* The slot is out of range, or was dressed up in a SUBREG. */
14876 base = reg_equiv_address (REGNO (ref));
14877
14878 /* PR 62254: If there is no equivalent memory location then just move
14879 the value as an SImode register move. This happens when the target
14880 architecture variant does not have an HImode register move. */
14881 if (base == NULL)
14882 {
14883 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14884
14885 if (REG_P (outval))
14886 {
14887 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14888 gen_rtx_SUBREG (SImode, outval, 0)));
14889 }
14890 else /* SUBREG_P (outval) */
14891 {
14892 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14893 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14894 SUBREG_REG (outval)));
14895 else
14896 /* FIXME: Handle other cases ? */
14897 gcc_unreachable ();
14898 }
14899 return;
14900 }
14901 }
14902 else
14903 base = find_replacement (&XEXP (ref, 0));
14904
14905 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14906
14907 /* Handle the case where the address is too complex to be offset by 1. */
14908 if (GET_CODE (base) == MINUS
14909 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14910 {
14911 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14912
14913 /* Be careful not to destroy OUTVAL. */
14914 if (reg_overlap_mentioned_p (base_plus, outval))
14915 {
14916 /* Updating base_plus might destroy outval, see if we can
14917 swap the scratch and base_plus. */
14918 if (!reg_overlap_mentioned_p (scratch, outval))
14919 std::swap (scratch, base_plus);
14920 else
14921 {
14922 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14923
14924 /* Be conservative and copy OUTVAL into the scratch now,
14925 this should only be necessary if outval is a subreg
14926 of something larger than a word. */
14927 /* XXX Might this clobber base? I can't see how it can,
14928 since scratch is known to overlap with OUTVAL, and
14929 must be wider than a word. */
14930 emit_insn (gen_movhi (scratch_hi, outval));
14931 outval = scratch_hi;
14932 }
14933 }
14934
14935 emit_set_insn (base_plus, base);
14936 base = base_plus;
14937 }
14938 else if (GET_CODE (base) == PLUS)
14939 {
14940 /* The addend must be CONST_INT, or we would have dealt with it above. */
14941 HOST_WIDE_INT hi, lo;
14942
14943 offset += INTVAL (XEXP (base, 1));
14944 base = XEXP (base, 0);
14945
14946 /* Rework the address into a legal sequence of insns. */
14947 /* Valid range for lo is -4095 -> 4095 */
14948 lo = (offset >= 0
14949 ? (offset & 0xfff)
14950 : -((-offset) & 0xfff));
14951
14952 /* Corner case, if lo is the max offset then we would be out of range
14953 once we have added the additional 1 below, so bump the msb into the
14954 pre-loading insn(s). */
14955 if (lo == 4095)
14956 lo &= 0x7ff;
14957
14958 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14959 ^ (HOST_WIDE_INT) 0x80000000)
14960 - (HOST_WIDE_INT) 0x80000000);
14961
14962 gcc_assert (hi + lo == offset);
14963
14964 if (hi != 0)
14965 {
14966 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14967
14968 /* Be careful not to destroy OUTVAL. */
14969 if (reg_overlap_mentioned_p (base_plus, outval))
14970 {
14971 /* Updating base_plus might destroy outval, see if we
14972 can swap the scratch and base_plus. */
14973 if (!reg_overlap_mentioned_p (scratch, outval))
14974 std::swap (scratch, base_plus);
14975 else
14976 {
14977 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14978
14979 /* Be conservative and copy outval into scratch now,
14980 this should only be necessary if outval is a
14981 subreg of something larger than a word. */
14982 /* XXX Might this clobber base? I can't see how it
14983 can, since scratch is known to overlap with
14984 outval. */
14985 emit_insn (gen_movhi (scratch_hi, outval));
14986 outval = scratch_hi;
14987 }
14988 }
14989
14990 /* Get the base address; addsi3 knows how to handle constants
14991 that require more than one insn. */
14992 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14993 base = base_plus;
14994 offset = lo;
14995 }
14996 }
14997
14998 if (BYTES_BIG_ENDIAN)
14999 {
15000 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15001 plus_constant (Pmode, base,
15002 offset + 1)),
15003 gen_lowpart (QImode, outval)));
15004 emit_insn (gen_lshrsi3 (scratch,
15005 gen_rtx_SUBREG (SImode, outval, 0),
15006 GEN_INT (8)));
15007 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15008 offset)),
15009 gen_lowpart (QImode, scratch)));
15010 }
15011 else
15012 {
15013 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15014 offset)),
15015 gen_lowpart (QImode, outval)));
15016 emit_insn (gen_lshrsi3 (scratch,
15017 gen_rtx_SUBREG (SImode, outval, 0),
15018 GEN_INT (8)));
15019 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15020 plus_constant (Pmode, base,
15021 offset + 1)),
15022 gen_lowpart (QImode, scratch)));
15023 }
15024 }
15025
15026 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15027 (padded to the size of a word) should be passed in a register. */
15028
15029 static bool
15030 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15031 {
15032 if (TARGET_AAPCS_BASED)
15033 return must_pass_in_stack_var_size (mode, type);
15034 else
15035 return must_pass_in_stack_var_size_or_pad (mode, type);
15036 }
15037
15038
15039 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15040 Return true if an argument passed on the stack should be padded upwards,
15041 i.e. if the least-significant byte has useful data.
15042 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15043 aggregate types are placed in the lowest memory address. */
15044
15045 bool
15046 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15047 {
15048 if (!TARGET_AAPCS_BASED)
15049 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15050
15051 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15052 return false;
15053
15054 return true;
15055 }
15056
15057
15058 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15059 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15060 register has useful data, and return the opposite if the most
15061 significant byte does. */
15062
15063 bool
15064 arm_pad_reg_upward (machine_mode mode,
15065 tree type, int first ATTRIBUTE_UNUSED)
15066 {
15067 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15068 {
15069 /* For AAPCS, small aggregates, small fixed-point types,
15070 and small complex types are always padded upwards. */
15071 if (type)
15072 {
15073 if ((AGGREGATE_TYPE_P (type)
15074 || TREE_CODE (type) == COMPLEX_TYPE
15075 || FIXED_POINT_TYPE_P (type))
15076 && int_size_in_bytes (type) <= 4)
15077 return true;
15078 }
15079 else
15080 {
15081 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15082 && GET_MODE_SIZE (mode) <= 4)
15083 return true;
15084 }
15085 }
15086
15087 /* Otherwise, use default padding. */
15088 return !BYTES_BIG_ENDIAN;
15089 }
15090
15091 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15092 assuming that the address in the base register is word aligned. */
15093 bool
15094 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15095 {
15096 HOST_WIDE_INT max_offset;
15097
15098 /* Offset must be a multiple of 4 in Thumb mode. */
15099 if (TARGET_THUMB2 && ((offset & 3) != 0))
15100 return false;
15101
15102 if (TARGET_THUMB2)
15103 max_offset = 1020;
15104 else if (TARGET_ARM)
15105 max_offset = 255;
15106 else
15107 return false;
15108
15109 return ((offset <= max_offset) && (offset >= -max_offset));
15110 }
15111
15112 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15113 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15114 Assumes that the address in the base register RN is word aligned. Pattern
15115 guarantees that both memory accesses use the same base register,
15116 the offsets are constants within the range, and the gap between the offsets is 4.
15117 If preload complete then check that registers are legal. WBACK indicates whether
15118 address is updated. LOAD indicates whether memory access is load or store. */
15119 bool
15120 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15121 bool wback, bool load)
15122 {
15123 unsigned int t, t2, n;
15124
15125 if (!reload_completed)
15126 return true;
15127
15128 if (!offset_ok_for_ldrd_strd (offset))
15129 return false;
15130
15131 t = REGNO (rt);
15132 t2 = REGNO (rt2);
15133 n = REGNO (rn);
15134
15135 if ((TARGET_THUMB2)
15136 && ((wback && (n == t || n == t2))
15137 || (t == SP_REGNUM)
15138 || (t == PC_REGNUM)
15139 || (t2 == SP_REGNUM)
15140 || (t2 == PC_REGNUM)
15141 || (!load && (n == PC_REGNUM))
15142 || (load && (t == t2))
15143 /* Triggers Cortex-M3 LDRD errata. */
15144 || (!wback && load && fix_cm3_ldrd && (n == t))))
15145 return false;
15146
15147 if ((TARGET_ARM)
15148 && ((wback && (n == t || n == t2))
15149 || (t2 == PC_REGNUM)
15150 || (t % 2 != 0) /* First destination register is not even. */
15151 || (t2 != t + 1)
15152 /* PC can be used as base register (for offset addressing only),
15153 but it is depricated. */
15154 || (n == PC_REGNUM)))
15155 return false;
15156
15157 return true;
15158 }
15159
15160 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15161 operand MEM's address contains an immediate offset from the base
15162 register and has no side effects, in which case it sets BASE and
15163 OFFSET accordingly. */
15164 static bool
15165 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15166 {
15167 rtx addr;
15168
15169 gcc_assert (base != NULL && offset != NULL);
15170
15171 /* TODO: Handle more general memory operand patterns, such as
15172 PRE_DEC and PRE_INC. */
15173
15174 if (side_effects_p (mem))
15175 return false;
15176
15177 /* Can't deal with subregs. */
15178 if (GET_CODE (mem) == SUBREG)
15179 return false;
15180
15181 gcc_assert (MEM_P (mem));
15182
15183 *offset = const0_rtx;
15184
15185 addr = XEXP (mem, 0);
15186
15187 /* If addr isn't valid for DImode, then we can't handle it. */
15188 if (!arm_legitimate_address_p (DImode, addr,
15189 reload_in_progress || reload_completed))
15190 return false;
15191
15192 if (REG_P (addr))
15193 {
15194 *base = addr;
15195 return true;
15196 }
15197 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15198 {
15199 *base = XEXP (addr, 0);
15200 *offset = XEXP (addr, 1);
15201 return (REG_P (*base) && CONST_INT_P (*offset));
15202 }
15203
15204 return false;
15205 }
15206
15207 /* Called from a peephole2 to replace two word-size accesses with a
15208 single LDRD/STRD instruction. Returns true iff we can generate a
15209 new instruction sequence. That is, both accesses use the same base
15210 register and the gap between constant offsets is 4. This function
15211 may reorder its operands to match ldrd/strd RTL templates.
15212 OPERANDS are the operands found by the peephole matcher;
15213 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15214 corresponding memory operands. LOAD indicaates whether the access
15215 is load or store. CONST_STORE indicates a store of constant
15216 integer values held in OPERANDS[4,5] and assumes that the pattern
15217 is of length 4 insn, for the purpose of checking dead registers.
15218 COMMUTE indicates that register operands may be reordered. */
15219 bool
15220 gen_operands_ldrd_strd (rtx *operands, bool load,
15221 bool const_store, bool commute)
15222 {
15223 int nops = 2;
15224 HOST_WIDE_INT offsets[2], offset;
15225 rtx base = NULL_RTX;
15226 rtx cur_base, cur_offset, tmp;
15227 int i, gap;
15228 HARD_REG_SET regset;
15229
15230 gcc_assert (!const_store || !load);
15231 /* Check that the memory references are immediate offsets from the
15232 same base register. Extract the base register, the destination
15233 registers, and the corresponding memory offsets. */
15234 for (i = 0; i < nops; i++)
15235 {
15236 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15237 return false;
15238
15239 if (i == 0)
15240 base = cur_base;
15241 else if (REGNO (base) != REGNO (cur_base))
15242 return false;
15243
15244 offsets[i] = INTVAL (cur_offset);
15245 if (GET_CODE (operands[i]) == SUBREG)
15246 {
15247 tmp = SUBREG_REG (operands[i]);
15248 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15249 operands[i] = tmp;
15250 }
15251 }
15252
15253 /* Make sure there is no dependency between the individual loads. */
15254 if (load && REGNO (operands[0]) == REGNO (base))
15255 return false; /* RAW */
15256
15257 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15258 return false; /* WAW */
15259
15260 /* If the same input register is used in both stores
15261 when storing different constants, try to find a free register.
15262 For example, the code
15263 mov r0, 0
15264 str r0, [r2]
15265 mov r0, 1
15266 str r0, [r2, #4]
15267 can be transformed into
15268 mov r1, 0
15269 mov r0, 1
15270 strd r1, r0, [r2]
15271 in Thumb mode assuming that r1 is free.
15272 For ARM mode do the same but only if the starting register
15273 can be made to be even. */
15274 if (const_store
15275 && REGNO (operands[0]) == REGNO (operands[1])
15276 && INTVAL (operands[4]) != INTVAL (operands[5]))
15277 {
15278 if (TARGET_THUMB2)
15279 {
15280 CLEAR_HARD_REG_SET (regset);
15281 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15282 if (tmp == NULL_RTX)
15283 return false;
15284
15285 /* Use the new register in the first load to ensure that
15286 if the original input register is not dead after peephole,
15287 then it will have the correct constant value. */
15288 operands[0] = tmp;
15289 }
15290 else if (TARGET_ARM)
15291 {
15292 int regno = REGNO (operands[0]);
15293 if (!peep2_reg_dead_p (4, operands[0]))
15294 {
15295 /* When the input register is even and is not dead after the
15296 pattern, it has to hold the second constant but we cannot
15297 form a legal STRD in ARM mode with this register as the second
15298 register. */
15299 if (regno % 2 == 0)
15300 return false;
15301
15302 /* Is regno-1 free? */
15303 SET_HARD_REG_SET (regset);
15304 CLEAR_HARD_REG_BIT(regset, regno - 1);
15305 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15306 if (tmp == NULL_RTX)
15307 return false;
15308
15309 operands[0] = tmp;
15310 }
15311 else
15312 {
15313 /* Find a DImode register. */
15314 CLEAR_HARD_REG_SET (regset);
15315 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15316 if (tmp != NULL_RTX)
15317 {
15318 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15319 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15320 }
15321 else
15322 {
15323 /* Can we use the input register to form a DI register? */
15324 SET_HARD_REG_SET (regset);
15325 CLEAR_HARD_REG_BIT(regset,
15326 regno % 2 == 0 ? regno + 1 : regno - 1);
15327 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15328 if (tmp == NULL_RTX)
15329 return false;
15330 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15331 }
15332 }
15333
15334 gcc_assert (operands[0] != NULL_RTX);
15335 gcc_assert (operands[1] != NULL_RTX);
15336 gcc_assert (REGNO (operands[0]) % 2 == 0);
15337 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15338 }
15339 }
15340
15341 /* Make sure the instructions are ordered with lower memory access first. */
15342 if (offsets[0] > offsets[1])
15343 {
15344 gap = offsets[0] - offsets[1];
15345 offset = offsets[1];
15346
15347 /* Swap the instructions such that lower memory is accessed first. */
15348 std::swap (operands[0], operands[1]);
15349 std::swap (operands[2], operands[3]);
15350 if (const_store)
15351 std::swap (operands[4], operands[5]);
15352 }
15353 else
15354 {
15355 gap = offsets[1] - offsets[0];
15356 offset = offsets[0];
15357 }
15358
15359 /* Make sure accesses are to consecutive memory locations. */
15360 if (gap != 4)
15361 return false;
15362
15363 /* Make sure we generate legal instructions. */
15364 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15365 false, load))
15366 return true;
15367
15368 /* In Thumb state, where registers are almost unconstrained, there
15369 is little hope to fix it. */
15370 if (TARGET_THUMB2)
15371 return false;
15372
15373 if (load && commute)
15374 {
15375 /* Try reordering registers. */
15376 std::swap (operands[0], operands[1]);
15377 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15378 false, load))
15379 return true;
15380 }
15381
15382 if (const_store)
15383 {
15384 /* If input registers are dead after this pattern, they can be
15385 reordered or replaced by other registers that are free in the
15386 current pattern. */
15387 if (!peep2_reg_dead_p (4, operands[0])
15388 || !peep2_reg_dead_p (4, operands[1]))
15389 return false;
15390
15391 /* Try to reorder the input registers. */
15392 /* For example, the code
15393 mov r0, 0
15394 mov r1, 1
15395 str r1, [r2]
15396 str r0, [r2, #4]
15397 can be transformed into
15398 mov r1, 0
15399 mov r0, 1
15400 strd r0, [r2]
15401 */
15402 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15403 false, false))
15404 {
15405 std::swap (operands[0], operands[1]);
15406 return true;
15407 }
15408
15409 /* Try to find a free DI register. */
15410 CLEAR_HARD_REG_SET (regset);
15411 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15412 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15413 while (true)
15414 {
15415 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15416 if (tmp == NULL_RTX)
15417 return false;
15418
15419 /* DREG must be an even-numbered register in DImode.
15420 Split it into SI registers. */
15421 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15422 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15423 gcc_assert (operands[0] != NULL_RTX);
15424 gcc_assert (operands[1] != NULL_RTX);
15425 gcc_assert (REGNO (operands[0]) % 2 == 0);
15426 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15427
15428 return (operands_ok_ldrd_strd (operands[0], operands[1],
15429 base, offset,
15430 false, load));
15431 }
15432 }
15433
15434 return false;
15435 }
15436
15437
15438
15439 \f
15440 /* Print a symbolic form of X to the debug file, F. */
15441 static void
15442 arm_print_value (FILE *f, rtx x)
15443 {
15444 switch (GET_CODE (x))
15445 {
15446 case CONST_INT:
15447 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15448 return;
15449
15450 case CONST_DOUBLE:
15451 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15452 return;
15453
15454 case CONST_VECTOR:
15455 {
15456 int i;
15457
15458 fprintf (f, "<");
15459 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15460 {
15461 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15462 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15463 fputc (',', f);
15464 }
15465 fprintf (f, ">");
15466 }
15467 return;
15468
15469 case CONST_STRING:
15470 fprintf (f, "\"%s\"", XSTR (x, 0));
15471 return;
15472
15473 case SYMBOL_REF:
15474 fprintf (f, "`%s'", XSTR (x, 0));
15475 return;
15476
15477 case LABEL_REF:
15478 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15479 return;
15480
15481 case CONST:
15482 arm_print_value (f, XEXP (x, 0));
15483 return;
15484
15485 case PLUS:
15486 arm_print_value (f, XEXP (x, 0));
15487 fprintf (f, "+");
15488 arm_print_value (f, XEXP (x, 1));
15489 return;
15490
15491 case PC:
15492 fprintf (f, "pc");
15493 return;
15494
15495 default:
15496 fprintf (f, "????");
15497 return;
15498 }
15499 }
15500 \f
15501 /* Routines for manipulation of the constant pool. */
15502
15503 /* Arm instructions cannot load a large constant directly into a
15504 register; they have to come from a pc relative load. The constant
15505 must therefore be placed in the addressable range of the pc
15506 relative load. Depending on the precise pc relative load
15507 instruction the range is somewhere between 256 bytes and 4k. This
15508 means that we often have to dump a constant inside a function, and
15509 generate code to branch around it.
15510
15511 It is important to minimize this, since the branches will slow
15512 things down and make the code larger.
15513
15514 Normally we can hide the table after an existing unconditional
15515 branch so that there is no interruption of the flow, but in the
15516 worst case the code looks like this:
15517
15518 ldr rn, L1
15519 ...
15520 b L2
15521 align
15522 L1: .long value
15523 L2:
15524 ...
15525
15526 ldr rn, L3
15527 ...
15528 b L4
15529 align
15530 L3: .long value
15531 L4:
15532 ...
15533
15534 We fix this by performing a scan after scheduling, which notices
15535 which instructions need to have their operands fetched from the
15536 constant table and builds the table.
15537
15538 The algorithm starts by building a table of all the constants that
15539 need fixing up and all the natural barriers in the function (places
15540 where a constant table can be dropped without breaking the flow).
15541 For each fixup we note how far the pc-relative replacement will be
15542 able to reach and the offset of the instruction into the function.
15543
15544 Having built the table we then group the fixes together to form
15545 tables that are as large as possible (subject to addressing
15546 constraints) and emit each table of constants after the last
15547 barrier that is within range of all the instructions in the group.
15548 If a group does not contain a barrier, then we forcibly create one
15549 by inserting a jump instruction into the flow. Once the table has
15550 been inserted, the insns are then modified to reference the
15551 relevant entry in the pool.
15552
15553 Possible enhancements to the algorithm (not implemented) are:
15554
15555 1) For some processors and object formats, there may be benefit in
15556 aligning the pools to the start of cache lines; this alignment
15557 would need to be taken into account when calculating addressability
15558 of a pool. */
15559
15560 /* These typedefs are located at the start of this file, so that
15561 they can be used in the prototypes there. This comment is to
15562 remind readers of that fact so that the following structures
15563 can be understood more easily.
15564
15565 typedef struct minipool_node Mnode;
15566 typedef struct minipool_fixup Mfix; */
15567
15568 struct minipool_node
15569 {
15570 /* Doubly linked chain of entries. */
15571 Mnode * next;
15572 Mnode * prev;
15573 /* The maximum offset into the code that this entry can be placed. While
15574 pushing fixes for forward references, all entries are sorted in order
15575 of increasing max_address. */
15576 HOST_WIDE_INT max_address;
15577 /* Similarly for an entry inserted for a backwards ref. */
15578 HOST_WIDE_INT min_address;
15579 /* The number of fixes referencing this entry. This can become zero
15580 if we "unpush" an entry. In this case we ignore the entry when we
15581 come to emit the code. */
15582 int refcount;
15583 /* The offset from the start of the minipool. */
15584 HOST_WIDE_INT offset;
15585 /* The value in table. */
15586 rtx value;
15587 /* The mode of value. */
15588 machine_mode mode;
15589 /* The size of the value. With iWMMXt enabled
15590 sizes > 4 also imply an alignment of 8-bytes. */
15591 int fix_size;
15592 };
15593
15594 struct minipool_fixup
15595 {
15596 Mfix * next;
15597 rtx_insn * insn;
15598 HOST_WIDE_INT address;
15599 rtx * loc;
15600 machine_mode mode;
15601 int fix_size;
15602 rtx value;
15603 Mnode * minipool;
15604 HOST_WIDE_INT forwards;
15605 HOST_WIDE_INT backwards;
15606 };
15607
15608 /* Fixes less than a word need padding out to a word boundary. */
15609 #define MINIPOOL_FIX_SIZE(mode) \
15610 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15611
15612 static Mnode * minipool_vector_head;
15613 static Mnode * minipool_vector_tail;
15614 static rtx_code_label *minipool_vector_label;
15615 static int minipool_pad;
15616
15617 /* The linked list of all minipool fixes required for this function. */
15618 Mfix * minipool_fix_head;
15619 Mfix * minipool_fix_tail;
15620 /* The fix entry for the current minipool, once it has been placed. */
15621 Mfix * minipool_barrier;
15622
15623 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15624 #define JUMP_TABLES_IN_TEXT_SECTION 0
15625 #endif
15626
15627 static HOST_WIDE_INT
15628 get_jump_table_size (rtx_jump_table_data *insn)
15629 {
15630 /* ADDR_VECs only take room if read-only data does into the text
15631 section. */
15632 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15633 {
15634 rtx body = PATTERN (insn);
15635 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15636 HOST_WIDE_INT size;
15637 HOST_WIDE_INT modesize;
15638
15639 modesize = GET_MODE_SIZE (GET_MODE (body));
15640 size = modesize * XVECLEN (body, elt);
15641 switch (modesize)
15642 {
15643 case 1:
15644 /* Round up size of TBB table to a halfword boundary. */
15645 size = (size + 1) & ~HOST_WIDE_INT_1;
15646 break;
15647 case 2:
15648 /* No padding necessary for TBH. */
15649 break;
15650 case 4:
15651 /* Add two bytes for alignment on Thumb. */
15652 if (TARGET_THUMB)
15653 size += 2;
15654 break;
15655 default:
15656 gcc_unreachable ();
15657 }
15658 return size;
15659 }
15660
15661 return 0;
15662 }
15663
15664 /* Return the maximum amount of padding that will be inserted before
15665 label LABEL. */
15666
15667 static HOST_WIDE_INT
15668 get_label_padding (rtx label)
15669 {
15670 HOST_WIDE_INT align, min_insn_size;
15671
15672 align = 1 << label_to_alignment (label);
15673 min_insn_size = TARGET_THUMB ? 2 : 4;
15674 return align > min_insn_size ? align - min_insn_size : 0;
15675 }
15676
15677 /* Move a minipool fix MP from its current location to before MAX_MP.
15678 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15679 constraints may need updating. */
15680 static Mnode *
15681 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15682 HOST_WIDE_INT max_address)
15683 {
15684 /* The code below assumes these are different. */
15685 gcc_assert (mp != max_mp);
15686
15687 if (max_mp == NULL)
15688 {
15689 if (max_address < mp->max_address)
15690 mp->max_address = max_address;
15691 }
15692 else
15693 {
15694 if (max_address > max_mp->max_address - mp->fix_size)
15695 mp->max_address = max_mp->max_address - mp->fix_size;
15696 else
15697 mp->max_address = max_address;
15698
15699 /* Unlink MP from its current position. Since max_mp is non-null,
15700 mp->prev must be non-null. */
15701 mp->prev->next = mp->next;
15702 if (mp->next != NULL)
15703 mp->next->prev = mp->prev;
15704 else
15705 minipool_vector_tail = mp->prev;
15706
15707 /* Re-insert it before MAX_MP. */
15708 mp->next = max_mp;
15709 mp->prev = max_mp->prev;
15710 max_mp->prev = mp;
15711
15712 if (mp->prev != NULL)
15713 mp->prev->next = mp;
15714 else
15715 minipool_vector_head = mp;
15716 }
15717
15718 /* Save the new entry. */
15719 max_mp = mp;
15720
15721 /* Scan over the preceding entries and adjust their addresses as
15722 required. */
15723 while (mp->prev != NULL
15724 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15725 {
15726 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15727 mp = mp->prev;
15728 }
15729
15730 return max_mp;
15731 }
15732
15733 /* Add a constant to the minipool for a forward reference. Returns the
15734 node added or NULL if the constant will not fit in this pool. */
15735 static Mnode *
15736 add_minipool_forward_ref (Mfix *fix)
15737 {
15738 /* If set, max_mp is the first pool_entry that has a lower
15739 constraint than the one we are trying to add. */
15740 Mnode * max_mp = NULL;
15741 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15742 Mnode * mp;
15743
15744 /* If the minipool starts before the end of FIX->INSN then this FIX
15745 can not be placed into the current pool. Furthermore, adding the
15746 new constant pool entry may cause the pool to start FIX_SIZE bytes
15747 earlier. */
15748 if (minipool_vector_head &&
15749 (fix->address + get_attr_length (fix->insn)
15750 >= minipool_vector_head->max_address - fix->fix_size))
15751 return NULL;
15752
15753 /* Scan the pool to see if a constant with the same value has
15754 already been added. While we are doing this, also note the
15755 location where we must insert the constant if it doesn't already
15756 exist. */
15757 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15758 {
15759 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15760 && fix->mode == mp->mode
15761 && (!LABEL_P (fix->value)
15762 || (CODE_LABEL_NUMBER (fix->value)
15763 == CODE_LABEL_NUMBER (mp->value)))
15764 && rtx_equal_p (fix->value, mp->value))
15765 {
15766 /* More than one fix references this entry. */
15767 mp->refcount++;
15768 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15769 }
15770
15771 /* Note the insertion point if necessary. */
15772 if (max_mp == NULL
15773 && mp->max_address > max_address)
15774 max_mp = mp;
15775
15776 /* If we are inserting an 8-bytes aligned quantity and
15777 we have not already found an insertion point, then
15778 make sure that all such 8-byte aligned quantities are
15779 placed at the start of the pool. */
15780 if (ARM_DOUBLEWORD_ALIGN
15781 && max_mp == NULL
15782 && fix->fix_size >= 8
15783 && mp->fix_size < 8)
15784 {
15785 max_mp = mp;
15786 max_address = mp->max_address;
15787 }
15788 }
15789
15790 /* The value is not currently in the minipool, so we need to create
15791 a new entry for it. If MAX_MP is NULL, the entry will be put on
15792 the end of the list since the placement is less constrained than
15793 any existing entry. Otherwise, we insert the new fix before
15794 MAX_MP and, if necessary, adjust the constraints on the other
15795 entries. */
15796 mp = XNEW (Mnode);
15797 mp->fix_size = fix->fix_size;
15798 mp->mode = fix->mode;
15799 mp->value = fix->value;
15800 mp->refcount = 1;
15801 /* Not yet required for a backwards ref. */
15802 mp->min_address = -65536;
15803
15804 if (max_mp == NULL)
15805 {
15806 mp->max_address = max_address;
15807 mp->next = NULL;
15808 mp->prev = minipool_vector_tail;
15809
15810 if (mp->prev == NULL)
15811 {
15812 minipool_vector_head = mp;
15813 minipool_vector_label = gen_label_rtx ();
15814 }
15815 else
15816 mp->prev->next = mp;
15817
15818 minipool_vector_tail = mp;
15819 }
15820 else
15821 {
15822 if (max_address > max_mp->max_address - mp->fix_size)
15823 mp->max_address = max_mp->max_address - mp->fix_size;
15824 else
15825 mp->max_address = max_address;
15826
15827 mp->next = max_mp;
15828 mp->prev = max_mp->prev;
15829 max_mp->prev = mp;
15830 if (mp->prev != NULL)
15831 mp->prev->next = mp;
15832 else
15833 minipool_vector_head = mp;
15834 }
15835
15836 /* Save the new entry. */
15837 max_mp = mp;
15838
15839 /* Scan over the preceding entries and adjust their addresses as
15840 required. */
15841 while (mp->prev != NULL
15842 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15843 {
15844 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15845 mp = mp->prev;
15846 }
15847
15848 return max_mp;
15849 }
15850
15851 static Mnode *
15852 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15853 HOST_WIDE_INT min_address)
15854 {
15855 HOST_WIDE_INT offset;
15856
15857 /* The code below assumes these are different. */
15858 gcc_assert (mp != min_mp);
15859
15860 if (min_mp == NULL)
15861 {
15862 if (min_address > mp->min_address)
15863 mp->min_address = min_address;
15864 }
15865 else
15866 {
15867 /* We will adjust this below if it is too loose. */
15868 mp->min_address = min_address;
15869
15870 /* Unlink MP from its current position. Since min_mp is non-null,
15871 mp->next must be non-null. */
15872 mp->next->prev = mp->prev;
15873 if (mp->prev != NULL)
15874 mp->prev->next = mp->next;
15875 else
15876 minipool_vector_head = mp->next;
15877
15878 /* Reinsert it after MIN_MP. */
15879 mp->prev = min_mp;
15880 mp->next = min_mp->next;
15881 min_mp->next = mp;
15882 if (mp->next != NULL)
15883 mp->next->prev = mp;
15884 else
15885 minipool_vector_tail = mp;
15886 }
15887
15888 min_mp = mp;
15889
15890 offset = 0;
15891 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15892 {
15893 mp->offset = offset;
15894 if (mp->refcount > 0)
15895 offset += mp->fix_size;
15896
15897 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15898 mp->next->min_address = mp->min_address + mp->fix_size;
15899 }
15900
15901 return min_mp;
15902 }
15903
15904 /* Add a constant to the minipool for a backward reference. Returns the
15905 node added or NULL if the constant will not fit in this pool.
15906
15907 Note that the code for insertion for a backwards reference can be
15908 somewhat confusing because the calculated offsets for each fix do
15909 not take into account the size of the pool (which is still under
15910 construction. */
15911 static Mnode *
15912 add_minipool_backward_ref (Mfix *fix)
15913 {
15914 /* If set, min_mp is the last pool_entry that has a lower constraint
15915 than the one we are trying to add. */
15916 Mnode *min_mp = NULL;
15917 /* This can be negative, since it is only a constraint. */
15918 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15919 Mnode *mp;
15920
15921 /* If we can't reach the current pool from this insn, or if we can't
15922 insert this entry at the end of the pool without pushing other
15923 fixes out of range, then we don't try. This ensures that we
15924 can't fail later on. */
15925 if (min_address >= minipool_barrier->address
15926 || (minipool_vector_tail->min_address + fix->fix_size
15927 >= minipool_barrier->address))
15928 return NULL;
15929
15930 /* Scan the pool to see if a constant with the same value has
15931 already been added. While we are doing this, also note the
15932 location where we must insert the constant if it doesn't already
15933 exist. */
15934 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15935 {
15936 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15937 && fix->mode == mp->mode
15938 && (!LABEL_P (fix->value)
15939 || (CODE_LABEL_NUMBER (fix->value)
15940 == CODE_LABEL_NUMBER (mp->value)))
15941 && rtx_equal_p (fix->value, mp->value)
15942 /* Check that there is enough slack to move this entry to the
15943 end of the table (this is conservative). */
15944 && (mp->max_address
15945 > (minipool_barrier->address
15946 + minipool_vector_tail->offset
15947 + minipool_vector_tail->fix_size)))
15948 {
15949 mp->refcount++;
15950 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15951 }
15952
15953 if (min_mp != NULL)
15954 mp->min_address += fix->fix_size;
15955 else
15956 {
15957 /* Note the insertion point if necessary. */
15958 if (mp->min_address < min_address)
15959 {
15960 /* For now, we do not allow the insertion of 8-byte alignment
15961 requiring nodes anywhere but at the start of the pool. */
15962 if (ARM_DOUBLEWORD_ALIGN
15963 && fix->fix_size >= 8 && mp->fix_size < 8)
15964 return NULL;
15965 else
15966 min_mp = mp;
15967 }
15968 else if (mp->max_address
15969 < minipool_barrier->address + mp->offset + fix->fix_size)
15970 {
15971 /* Inserting before this entry would push the fix beyond
15972 its maximum address (which can happen if we have
15973 re-located a forwards fix); force the new fix to come
15974 after it. */
15975 if (ARM_DOUBLEWORD_ALIGN
15976 && fix->fix_size >= 8 && mp->fix_size < 8)
15977 return NULL;
15978 else
15979 {
15980 min_mp = mp;
15981 min_address = mp->min_address + fix->fix_size;
15982 }
15983 }
15984 /* Do not insert a non-8-byte aligned quantity before 8-byte
15985 aligned quantities. */
15986 else if (ARM_DOUBLEWORD_ALIGN
15987 && fix->fix_size < 8
15988 && mp->fix_size >= 8)
15989 {
15990 min_mp = mp;
15991 min_address = mp->min_address + fix->fix_size;
15992 }
15993 }
15994 }
15995
15996 /* We need to create a new entry. */
15997 mp = XNEW (Mnode);
15998 mp->fix_size = fix->fix_size;
15999 mp->mode = fix->mode;
16000 mp->value = fix->value;
16001 mp->refcount = 1;
16002 mp->max_address = minipool_barrier->address + 65536;
16003
16004 mp->min_address = min_address;
16005
16006 if (min_mp == NULL)
16007 {
16008 mp->prev = NULL;
16009 mp->next = minipool_vector_head;
16010
16011 if (mp->next == NULL)
16012 {
16013 minipool_vector_tail = mp;
16014 minipool_vector_label = gen_label_rtx ();
16015 }
16016 else
16017 mp->next->prev = mp;
16018
16019 minipool_vector_head = mp;
16020 }
16021 else
16022 {
16023 mp->next = min_mp->next;
16024 mp->prev = min_mp;
16025 min_mp->next = mp;
16026
16027 if (mp->next != NULL)
16028 mp->next->prev = mp;
16029 else
16030 minipool_vector_tail = mp;
16031 }
16032
16033 /* Save the new entry. */
16034 min_mp = mp;
16035
16036 if (mp->prev)
16037 mp = mp->prev;
16038 else
16039 mp->offset = 0;
16040
16041 /* Scan over the following entries and adjust their offsets. */
16042 while (mp->next != NULL)
16043 {
16044 if (mp->next->min_address < mp->min_address + mp->fix_size)
16045 mp->next->min_address = mp->min_address + mp->fix_size;
16046
16047 if (mp->refcount)
16048 mp->next->offset = mp->offset + mp->fix_size;
16049 else
16050 mp->next->offset = mp->offset;
16051
16052 mp = mp->next;
16053 }
16054
16055 return min_mp;
16056 }
16057
16058 static void
16059 assign_minipool_offsets (Mfix *barrier)
16060 {
16061 HOST_WIDE_INT offset = 0;
16062 Mnode *mp;
16063
16064 minipool_barrier = barrier;
16065
16066 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16067 {
16068 mp->offset = offset;
16069
16070 if (mp->refcount > 0)
16071 offset += mp->fix_size;
16072 }
16073 }
16074
16075 /* Output the literal table */
16076 static void
16077 dump_minipool (rtx_insn *scan)
16078 {
16079 Mnode * mp;
16080 Mnode * nmp;
16081 int align64 = 0;
16082
16083 if (ARM_DOUBLEWORD_ALIGN)
16084 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16085 if (mp->refcount > 0 && mp->fix_size >= 8)
16086 {
16087 align64 = 1;
16088 break;
16089 }
16090
16091 if (dump_file)
16092 fprintf (dump_file,
16093 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16094 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16095
16096 scan = emit_label_after (gen_label_rtx (), scan);
16097 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16098 scan = emit_label_after (minipool_vector_label, scan);
16099
16100 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16101 {
16102 if (mp->refcount > 0)
16103 {
16104 if (dump_file)
16105 {
16106 fprintf (dump_file,
16107 ";; Offset %u, min %ld, max %ld ",
16108 (unsigned) mp->offset, (unsigned long) mp->min_address,
16109 (unsigned long) mp->max_address);
16110 arm_print_value (dump_file, mp->value);
16111 fputc ('\n', dump_file);
16112 }
16113
16114 switch (GET_MODE_SIZE (mp->mode))
16115 {
16116 #ifdef HAVE_consttable_1
16117 case 1:
16118 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16119 break;
16120
16121 #endif
16122 #ifdef HAVE_consttable_2
16123 case 2:
16124 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16125 break;
16126
16127 #endif
16128 #ifdef HAVE_consttable_4
16129 case 4:
16130 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16131 break;
16132
16133 #endif
16134 #ifdef HAVE_consttable_8
16135 case 8:
16136 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16137 break;
16138
16139 #endif
16140 #ifdef HAVE_consttable_16
16141 case 16:
16142 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16143 break;
16144
16145 #endif
16146 default:
16147 gcc_unreachable ();
16148 }
16149 }
16150
16151 nmp = mp->next;
16152 free (mp);
16153 }
16154
16155 minipool_vector_head = minipool_vector_tail = NULL;
16156 scan = emit_insn_after (gen_consttable_end (), scan);
16157 scan = emit_barrier_after (scan);
16158 }
16159
16160 /* Return the cost of forcibly inserting a barrier after INSN. */
16161 static int
16162 arm_barrier_cost (rtx_insn *insn)
16163 {
16164 /* Basing the location of the pool on the loop depth is preferable,
16165 but at the moment, the basic block information seems to be
16166 corrupt by this stage of the compilation. */
16167 int base_cost = 50;
16168 rtx_insn *next = next_nonnote_insn (insn);
16169
16170 if (next != NULL && LABEL_P (next))
16171 base_cost -= 20;
16172
16173 switch (GET_CODE (insn))
16174 {
16175 case CODE_LABEL:
16176 /* It will always be better to place the table before the label, rather
16177 than after it. */
16178 return 50;
16179
16180 case INSN:
16181 case CALL_INSN:
16182 return base_cost;
16183
16184 case JUMP_INSN:
16185 return base_cost - 10;
16186
16187 default:
16188 return base_cost + 10;
16189 }
16190 }
16191
16192 /* Find the best place in the insn stream in the range
16193 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16194 Create the barrier by inserting a jump and add a new fix entry for
16195 it. */
16196 static Mfix *
16197 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16198 {
16199 HOST_WIDE_INT count = 0;
16200 rtx_barrier *barrier;
16201 rtx_insn *from = fix->insn;
16202 /* The instruction after which we will insert the jump. */
16203 rtx_insn *selected = NULL;
16204 int selected_cost;
16205 /* The address at which the jump instruction will be placed. */
16206 HOST_WIDE_INT selected_address;
16207 Mfix * new_fix;
16208 HOST_WIDE_INT max_count = max_address - fix->address;
16209 rtx_code_label *label = gen_label_rtx ();
16210
16211 selected_cost = arm_barrier_cost (from);
16212 selected_address = fix->address;
16213
16214 while (from && count < max_count)
16215 {
16216 rtx_jump_table_data *tmp;
16217 int new_cost;
16218
16219 /* This code shouldn't have been called if there was a natural barrier
16220 within range. */
16221 gcc_assert (!BARRIER_P (from));
16222
16223 /* Count the length of this insn. This must stay in sync with the
16224 code that pushes minipool fixes. */
16225 if (LABEL_P (from))
16226 count += get_label_padding (from);
16227 else
16228 count += get_attr_length (from);
16229
16230 /* If there is a jump table, add its length. */
16231 if (tablejump_p (from, NULL, &tmp))
16232 {
16233 count += get_jump_table_size (tmp);
16234
16235 /* Jump tables aren't in a basic block, so base the cost on
16236 the dispatch insn. If we select this location, we will
16237 still put the pool after the table. */
16238 new_cost = arm_barrier_cost (from);
16239
16240 if (count < max_count
16241 && (!selected || new_cost <= selected_cost))
16242 {
16243 selected = tmp;
16244 selected_cost = new_cost;
16245 selected_address = fix->address + count;
16246 }
16247
16248 /* Continue after the dispatch table. */
16249 from = NEXT_INSN (tmp);
16250 continue;
16251 }
16252
16253 new_cost = arm_barrier_cost (from);
16254
16255 if (count < max_count
16256 && (!selected || new_cost <= selected_cost))
16257 {
16258 selected = from;
16259 selected_cost = new_cost;
16260 selected_address = fix->address + count;
16261 }
16262
16263 from = NEXT_INSN (from);
16264 }
16265
16266 /* Make sure that we found a place to insert the jump. */
16267 gcc_assert (selected);
16268
16269 /* Make sure we do not split a call and its corresponding
16270 CALL_ARG_LOCATION note. */
16271 if (CALL_P (selected))
16272 {
16273 rtx_insn *next = NEXT_INSN (selected);
16274 if (next && NOTE_P (next)
16275 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16276 selected = next;
16277 }
16278
16279 /* Create a new JUMP_INSN that branches around a barrier. */
16280 from = emit_jump_insn_after (gen_jump (label), selected);
16281 JUMP_LABEL (from) = label;
16282 barrier = emit_barrier_after (from);
16283 emit_label_after (label, barrier);
16284
16285 /* Create a minipool barrier entry for the new barrier. */
16286 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16287 new_fix->insn = barrier;
16288 new_fix->address = selected_address;
16289 new_fix->next = fix->next;
16290 fix->next = new_fix;
16291
16292 return new_fix;
16293 }
16294
16295 /* Record that there is a natural barrier in the insn stream at
16296 ADDRESS. */
16297 static void
16298 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16299 {
16300 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16301
16302 fix->insn = insn;
16303 fix->address = address;
16304
16305 fix->next = NULL;
16306 if (minipool_fix_head != NULL)
16307 minipool_fix_tail->next = fix;
16308 else
16309 minipool_fix_head = fix;
16310
16311 minipool_fix_tail = fix;
16312 }
16313
16314 /* Record INSN, which will need fixing up to load a value from the
16315 minipool. ADDRESS is the offset of the insn since the start of the
16316 function; LOC is a pointer to the part of the insn which requires
16317 fixing; VALUE is the constant that must be loaded, which is of type
16318 MODE. */
16319 static void
16320 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16321 machine_mode mode, rtx value)
16322 {
16323 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16324
16325 fix->insn = insn;
16326 fix->address = address;
16327 fix->loc = loc;
16328 fix->mode = mode;
16329 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16330 fix->value = value;
16331 fix->forwards = get_attr_pool_range (insn);
16332 fix->backwards = get_attr_neg_pool_range (insn);
16333 fix->minipool = NULL;
16334
16335 /* If an insn doesn't have a range defined for it, then it isn't
16336 expecting to be reworked by this code. Better to stop now than
16337 to generate duff assembly code. */
16338 gcc_assert (fix->forwards || fix->backwards);
16339
16340 /* If an entry requires 8-byte alignment then assume all constant pools
16341 require 4 bytes of padding. Trying to do this later on a per-pool
16342 basis is awkward because existing pool entries have to be modified. */
16343 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16344 minipool_pad = 4;
16345
16346 if (dump_file)
16347 {
16348 fprintf (dump_file,
16349 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16350 GET_MODE_NAME (mode),
16351 INSN_UID (insn), (unsigned long) address,
16352 -1 * (long)fix->backwards, (long)fix->forwards);
16353 arm_print_value (dump_file, fix->value);
16354 fprintf (dump_file, "\n");
16355 }
16356
16357 /* Add it to the chain of fixes. */
16358 fix->next = NULL;
16359
16360 if (minipool_fix_head != NULL)
16361 minipool_fix_tail->next = fix;
16362 else
16363 minipool_fix_head = fix;
16364
16365 minipool_fix_tail = fix;
16366 }
16367
16368 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16369 Returns the number of insns needed, or 99 if we always want to synthesize
16370 the value. */
16371 int
16372 arm_max_const_double_inline_cost ()
16373 {
16374 /* Let the value get synthesized to avoid the use of literal pools. */
16375 if (arm_disable_literal_pool)
16376 return 99;
16377
16378 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16379 }
16380
16381 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16382 Returns the number of insns needed, or 99 if we don't know how to
16383 do it. */
16384 int
16385 arm_const_double_inline_cost (rtx val)
16386 {
16387 rtx lowpart, highpart;
16388 machine_mode mode;
16389
16390 mode = GET_MODE (val);
16391
16392 if (mode == VOIDmode)
16393 mode = DImode;
16394
16395 gcc_assert (GET_MODE_SIZE (mode) == 8);
16396
16397 lowpart = gen_lowpart (SImode, val);
16398 highpart = gen_highpart_mode (SImode, mode, val);
16399
16400 gcc_assert (CONST_INT_P (lowpart));
16401 gcc_assert (CONST_INT_P (highpart));
16402
16403 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16404 NULL_RTX, NULL_RTX, 0, 0)
16405 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16406 NULL_RTX, NULL_RTX, 0, 0));
16407 }
16408
16409 /* Cost of loading a SImode constant. */
16410 static inline int
16411 arm_const_inline_cost (enum rtx_code code, rtx val)
16412 {
16413 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16414 NULL_RTX, NULL_RTX, 1, 0);
16415 }
16416
16417 /* Return true if it is worthwhile to split a 64-bit constant into two
16418 32-bit operations. This is the case if optimizing for size, or
16419 if we have load delay slots, or if one 32-bit part can be done with
16420 a single data operation. */
16421 bool
16422 arm_const_double_by_parts (rtx val)
16423 {
16424 machine_mode mode = GET_MODE (val);
16425 rtx part;
16426
16427 if (optimize_size || arm_ld_sched)
16428 return true;
16429
16430 if (mode == VOIDmode)
16431 mode = DImode;
16432
16433 part = gen_highpart_mode (SImode, mode, val);
16434
16435 gcc_assert (CONST_INT_P (part));
16436
16437 if (const_ok_for_arm (INTVAL (part))
16438 || const_ok_for_arm (~INTVAL (part)))
16439 return true;
16440
16441 part = gen_lowpart (SImode, val);
16442
16443 gcc_assert (CONST_INT_P (part));
16444
16445 if (const_ok_for_arm (INTVAL (part))
16446 || const_ok_for_arm (~INTVAL (part)))
16447 return true;
16448
16449 return false;
16450 }
16451
16452 /* Return true if it is possible to inline both the high and low parts
16453 of a 64-bit constant into 32-bit data processing instructions. */
16454 bool
16455 arm_const_double_by_immediates (rtx val)
16456 {
16457 machine_mode mode = GET_MODE (val);
16458 rtx part;
16459
16460 if (mode == VOIDmode)
16461 mode = DImode;
16462
16463 part = gen_highpart_mode (SImode, mode, val);
16464
16465 gcc_assert (CONST_INT_P (part));
16466
16467 if (!const_ok_for_arm (INTVAL (part)))
16468 return false;
16469
16470 part = gen_lowpart (SImode, val);
16471
16472 gcc_assert (CONST_INT_P (part));
16473
16474 if (!const_ok_for_arm (INTVAL (part)))
16475 return false;
16476
16477 return true;
16478 }
16479
16480 /* Scan INSN and note any of its operands that need fixing.
16481 If DO_PUSHES is false we do not actually push any of the fixups
16482 needed. */
16483 static void
16484 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16485 {
16486 int opno;
16487
16488 extract_constrain_insn (insn);
16489
16490 if (recog_data.n_alternatives == 0)
16491 return;
16492
16493 /* Fill in recog_op_alt with information about the constraints of
16494 this insn. */
16495 preprocess_constraints (insn);
16496
16497 const operand_alternative *op_alt = which_op_alt ();
16498 for (opno = 0; opno < recog_data.n_operands; opno++)
16499 {
16500 /* Things we need to fix can only occur in inputs. */
16501 if (recog_data.operand_type[opno] != OP_IN)
16502 continue;
16503
16504 /* If this alternative is a memory reference, then any mention
16505 of constants in this alternative is really to fool reload
16506 into allowing us to accept one there. We need to fix them up
16507 now so that we output the right code. */
16508 if (op_alt[opno].memory_ok)
16509 {
16510 rtx op = recog_data.operand[opno];
16511
16512 if (CONSTANT_P (op))
16513 {
16514 if (do_pushes)
16515 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16516 recog_data.operand_mode[opno], op);
16517 }
16518 else if (MEM_P (op)
16519 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16520 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16521 {
16522 if (do_pushes)
16523 {
16524 rtx cop = avoid_constant_pool_reference (op);
16525
16526 /* Casting the address of something to a mode narrower
16527 than a word can cause avoid_constant_pool_reference()
16528 to return the pool reference itself. That's no good to
16529 us here. Lets just hope that we can use the
16530 constant pool value directly. */
16531 if (op == cop)
16532 cop = get_pool_constant (XEXP (op, 0));
16533
16534 push_minipool_fix (insn, address,
16535 recog_data.operand_loc[opno],
16536 recog_data.operand_mode[opno], cop);
16537 }
16538
16539 }
16540 }
16541 }
16542
16543 return;
16544 }
16545
16546 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16547 and unions in the context of ARMv8-M Security Extensions. It is used as a
16548 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16549 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16550 or four masks, depending on whether it is being computed for a
16551 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16552 respectively. The tree for the type of the argument or a field within an
16553 argument is passed in ARG_TYPE, the current register this argument or field
16554 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16555 argument or field starts at is passed in STARTING_BIT and the last used bit
16556 is kept in LAST_USED_BIT which is also updated accordingly. */
16557
16558 static unsigned HOST_WIDE_INT
16559 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16560 uint32_t * padding_bits_to_clear,
16561 unsigned starting_bit, int * last_used_bit)
16562
16563 {
16564 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16565
16566 if (TREE_CODE (arg_type) == RECORD_TYPE)
16567 {
16568 unsigned current_bit = starting_bit;
16569 tree field;
16570 long int offset, size;
16571
16572
16573 field = TYPE_FIELDS (arg_type);
16574 while (field)
16575 {
16576 /* The offset within a structure is always an offset from
16577 the start of that structure. Make sure we take that into the
16578 calculation of the register based offset that we use here. */
16579 offset = starting_bit;
16580 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16581 offset %= 32;
16582
16583 /* This is the actual size of the field, for bitfields this is the
16584 bitfield width and not the container size. */
16585 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16586
16587 if (*last_used_bit != offset)
16588 {
16589 if (offset < *last_used_bit)
16590 {
16591 /* This field's offset is before the 'last_used_bit', that
16592 means this field goes on the next register. So we need to
16593 pad the rest of the current register and increase the
16594 register number. */
16595 uint32_t mask;
16596 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16597 mask++;
16598
16599 padding_bits_to_clear[*regno] |= mask;
16600 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16601 (*regno)++;
16602 }
16603 else
16604 {
16605 /* Otherwise we pad the bits between the last field's end and
16606 the start of the new field. */
16607 uint32_t mask;
16608
16609 mask = ((uint32_t)-1) >> (32 - offset);
16610 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16611 padding_bits_to_clear[*regno] |= mask;
16612 }
16613 current_bit = offset;
16614 }
16615
16616 /* Calculate further padding bits for inner structs/unions too. */
16617 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16618 {
16619 *last_used_bit = current_bit;
16620 not_to_clear_reg_mask
16621 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16622 padding_bits_to_clear, offset,
16623 last_used_bit);
16624 }
16625 else
16626 {
16627 /* Update 'current_bit' with this field's size. If the
16628 'current_bit' lies in a subsequent register, update 'regno' and
16629 reset 'current_bit' to point to the current bit in that new
16630 register. */
16631 current_bit += size;
16632 while (current_bit >= 32)
16633 {
16634 current_bit-=32;
16635 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16636 (*regno)++;
16637 }
16638 *last_used_bit = current_bit;
16639 }
16640
16641 field = TREE_CHAIN (field);
16642 }
16643 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16644 }
16645 else if (TREE_CODE (arg_type) == UNION_TYPE)
16646 {
16647 tree field, field_t;
16648 int i, regno_t, field_size;
16649 int max_reg = -1;
16650 int max_bit = -1;
16651 uint32_t mask;
16652 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16653 = {-1, -1, -1, -1};
16654
16655 /* To compute the padding bits in a union we only consider bits as
16656 padding bits if they are always either a padding bit or fall outside a
16657 fields size for all fields in the union. */
16658 field = TYPE_FIELDS (arg_type);
16659 while (field)
16660 {
16661 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16662 = {0U, 0U, 0U, 0U};
16663 int last_used_bit_t = *last_used_bit;
16664 regno_t = *regno;
16665 field_t = TREE_TYPE (field);
16666
16667 /* If the field's type is either a record or a union make sure to
16668 compute their padding bits too. */
16669 if (RECORD_OR_UNION_TYPE_P (field_t))
16670 not_to_clear_reg_mask
16671 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16672 &padding_bits_to_clear_t[0],
16673 starting_bit, &last_used_bit_t);
16674 else
16675 {
16676 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16677 regno_t = (field_size / 32) + *regno;
16678 last_used_bit_t = (starting_bit + field_size) % 32;
16679 }
16680
16681 for (i = *regno; i < regno_t; i++)
16682 {
16683 /* For all but the last register used by this field only keep the
16684 padding bits that were padding bits in this field. */
16685 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16686 }
16687
16688 /* For the last register, keep all padding bits that were padding
16689 bits in this field and any padding bits that are still valid
16690 as padding bits but fall outside of this field's size. */
16691 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16692 padding_bits_to_clear_res[regno_t]
16693 &= padding_bits_to_clear_t[regno_t] | mask;
16694
16695 /* Update the maximum size of the fields in terms of registers used
16696 ('max_reg') and the 'last_used_bit' in said register. */
16697 if (max_reg < regno_t)
16698 {
16699 max_reg = regno_t;
16700 max_bit = last_used_bit_t;
16701 }
16702 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16703 max_bit = last_used_bit_t;
16704
16705 field = TREE_CHAIN (field);
16706 }
16707
16708 /* Update the current padding_bits_to_clear using the intersection of the
16709 padding bits of all the fields. */
16710 for (i=*regno; i < max_reg; i++)
16711 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16712
16713 /* Do not keep trailing padding bits, we do not know yet whether this
16714 is the end of the argument. */
16715 mask = ((uint32_t) 1 << max_bit) - 1;
16716 padding_bits_to_clear[max_reg]
16717 |= padding_bits_to_clear_res[max_reg] & mask;
16718
16719 *regno = max_reg;
16720 *last_used_bit = max_bit;
16721 }
16722 else
16723 /* This function should only be used for structs and unions. */
16724 gcc_unreachable ();
16725
16726 return not_to_clear_reg_mask;
16727 }
16728
16729 /* In the context of ARMv8-M Security Extensions, this function is used for both
16730 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16731 registers are used when returning or passing arguments, which is then
16732 returned as a mask. It will also compute a mask to indicate padding/unused
16733 bits for each of these registers, and passes this through the
16734 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16735 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16736 the starting register used to pass this argument or return value is passed
16737 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16738 for struct and union types. */
16739
16740 static unsigned HOST_WIDE_INT
16741 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16742 uint32_t * padding_bits_to_clear)
16743
16744 {
16745 int last_used_bit = 0;
16746 unsigned HOST_WIDE_INT not_to_clear_mask;
16747
16748 if (RECORD_OR_UNION_TYPE_P (arg_type))
16749 {
16750 not_to_clear_mask
16751 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16752 padding_bits_to_clear, 0,
16753 &last_used_bit);
16754
16755
16756 /* If the 'last_used_bit' is not zero, that means we are still using a
16757 part of the last 'regno'. In such cases we must clear the trailing
16758 bits. Otherwise we are not using regno and we should mark it as to
16759 clear. */
16760 if (last_used_bit != 0)
16761 padding_bits_to_clear[regno]
16762 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16763 else
16764 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16765 }
16766 else
16767 {
16768 not_to_clear_mask = 0;
16769 /* We are not dealing with structs nor unions. So these arguments may be
16770 passed in floating point registers too. In some cases a BLKmode is
16771 used when returning or passing arguments in multiple VFP registers. */
16772 if (GET_MODE (arg_rtx) == BLKmode)
16773 {
16774 int i, arg_regs;
16775 rtx reg;
16776
16777 /* This should really only occur when dealing with the hard-float
16778 ABI. */
16779 gcc_assert (TARGET_HARD_FLOAT_ABI);
16780
16781 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16782 {
16783 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16784 gcc_assert (REG_P (reg));
16785
16786 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16787
16788 /* If we are dealing with DF mode, make sure we don't
16789 clear either of the registers it addresses. */
16790 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16791 if (arg_regs > 1)
16792 {
16793 unsigned HOST_WIDE_INT mask;
16794 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16795 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16796 not_to_clear_mask |= mask;
16797 }
16798 }
16799 }
16800 else
16801 {
16802 /* Otherwise we can rely on the MODE to determine how many registers
16803 are being used by this argument. */
16804 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16805 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16806 if (arg_regs > 1)
16807 {
16808 unsigned HOST_WIDE_INT
16809 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16810 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16811 not_to_clear_mask |= mask;
16812 }
16813 }
16814 }
16815
16816 return not_to_clear_mask;
16817 }
16818
16819 /* Saves callee saved registers, clears callee saved registers and caller saved
16820 registers not used to pass arguments before a cmse_nonsecure_call. And
16821 restores the callee saved registers after. */
16822
16823 static void
16824 cmse_nonsecure_call_clear_caller_saved (void)
16825 {
16826 basic_block bb;
16827
16828 FOR_EACH_BB_FN (bb, cfun)
16829 {
16830 rtx_insn *insn;
16831
16832 FOR_BB_INSNS (bb, insn)
16833 {
16834 uint64_t to_clear_mask, float_mask;
16835 rtx_insn *seq;
16836 rtx pat, call, unspec, reg, cleared_reg, tmp;
16837 unsigned int regno, maxregno;
16838 rtx address;
16839 CUMULATIVE_ARGS args_so_far_v;
16840 cumulative_args_t args_so_far;
16841 tree arg_type, fntype;
16842 bool using_r4, first_param = true;
16843 function_args_iterator args_iter;
16844 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16845 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16846
16847 if (!NONDEBUG_INSN_P (insn))
16848 continue;
16849
16850 if (!CALL_P (insn))
16851 continue;
16852
16853 pat = PATTERN (insn);
16854 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16855 call = XVECEXP (pat, 0, 0);
16856
16857 /* Get the real call RTX if the insn sets a value, ie. returns. */
16858 if (GET_CODE (call) == SET)
16859 call = SET_SRC (call);
16860
16861 /* Check if it is a cmse_nonsecure_call. */
16862 unspec = XEXP (call, 0);
16863 if (GET_CODE (unspec) != UNSPEC
16864 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16865 continue;
16866
16867 /* Determine the caller-saved registers we need to clear. */
16868 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16869 maxregno = NUM_ARG_REGS - 1;
16870 /* Only look at the caller-saved floating point registers in case of
16871 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16872 lazy store and loads which clear both caller- and callee-saved
16873 registers. */
16874 if (TARGET_HARD_FLOAT_ABI)
16875 {
16876 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16877 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16878 to_clear_mask |= float_mask;
16879 maxregno = D7_VFP_REGNUM;
16880 }
16881
16882 /* Make sure the register used to hold the function address is not
16883 cleared. */
16884 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16885 gcc_assert (MEM_P (address));
16886 gcc_assert (REG_P (XEXP (address, 0)));
16887 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16888
16889 /* Set basic block of call insn so that df rescan is performed on
16890 insns inserted here. */
16891 set_block_for_insn (insn, bb);
16892 df_set_flags (DF_DEFER_INSN_RESCAN);
16893 start_sequence ();
16894
16895 /* Make sure the scheduler doesn't schedule other insns beyond
16896 here. */
16897 emit_insn (gen_blockage ());
16898
16899 /* Walk through all arguments and clear registers appropriately.
16900 */
16901 fntype = TREE_TYPE (MEM_EXPR (address));
16902 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16903 NULL_TREE);
16904 args_so_far = pack_cumulative_args (&args_so_far_v);
16905 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16906 {
16907 rtx arg_rtx;
16908 machine_mode arg_mode = TYPE_MODE (arg_type);
16909
16910 if (VOID_TYPE_P (arg_type))
16911 continue;
16912
16913 if (!first_param)
16914 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16915 true);
16916
16917 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16918 true);
16919 gcc_assert (REG_P (arg_rtx));
16920 to_clear_mask
16921 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16922 REGNO (arg_rtx),
16923 padding_bits_to_clear_ptr);
16924
16925 first_param = false;
16926 }
16927
16928 /* Clear padding bits where needed. */
16929 cleared_reg = XEXP (address, 0);
16930 reg = gen_rtx_REG (SImode, IP_REGNUM);
16931 using_r4 = false;
16932 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
16933 {
16934 if (padding_bits_to_clear[regno] == 0)
16935 continue;
16936
16937 /* If this is a Thumb-1 target copy the address of the function
16938 we are calling from 'r4' into 'ip' such that we can use r4 to
16939 clear the unused bits in the arguments. */
16940 if (TARGET_THUMB1 && !using_r4)
16941 {
16942 using_r4 = true;
16943 reg = cleared_reg;
16944 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
16945 reg);
16946 }
16947
16948 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
16949 emit_move_insn (reg, tmp);
16950 /* Also fill the top half of the negated
16951 padding_bits_to_clear. */
16952 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
16953 {
16954 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
16955 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
16956 GEN_INT (16),
16957 GEN_INT (16)),
16958 tmp));
16959 }
16960
16961 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
16962 gen_rtx_REG (SImode, regno),
16963 reg));
16964
16965 }
16966 if (using_r4)
16967 emit_move_insn (cleared_reg,
16968 gen_rtx_REG (SImode, IP_REGNUM));
16969
16970 /* We use right shift and left shift to clear the LSB of the address
16971 we jump to instead of using bic, to avoid having to use an extra
16972 register on Thumb-1. */
16973 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
16974 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16975 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
16976 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16977
16978 /* Clearing all registers that leak before doing a non-secure
16979 call. */
16980 for (regno = R0_REGNUM; regno <= maxregno; regno++)
16981 {
16982 if (!(to_clear_mask & (1LL << regno)))
16983 continue;
16984
16985 /* If regno is an even vfp register and its successor is also to
16986 be cleared, use vmov. */
16987 if (IS_VFP_REGNUM (regno))
16988 {
16989 if (TARGET_VFP_DOUBLE
16990 && VFP_REGNO_OK_FOR_DOUBLE (regno)
16991 && to_clear_mask & (1LL << (regno + 1)))
16992 emit_move_insn (gen_rtx_REG (DFmode, regno++),
16993 CONST0_RTX (DFmode));
16994 else
16995 emit_move_insn (gen_rtx_REG (SFmode, regno),
16996 CONST0_RTX (SFmode));
16997 }
16998 else
16999 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17000 }
17001
17002 seq = get_insns ();
17003 end_sequence ();
17004 emit_insn_before (seq, insn);
17005
17006 }
17007 }
17008 }
17009
17010 /* Rewrite move insn into subtract of 0 if the condition codes will
17011 be useful in next conditional jump insn. */
17012
17013 static void
17014 thumb1_reorg (void)
17015 {
17016 basic_block bb;
17017
17018 FOR_EACH_BB_FN (bb, cfun)
17019 {
17020 rtx dest, src;
17021 rtx cmp, op0, op1, set = NULL;
17022 rtx_insn *prev, *insn = BB_END (bb);
17023 bool insn_clobbered = false;
17024
17025 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17026 insn = PREV_INSN (insn);
17027
17028 /* Find the last cbranchsi4_insn in basic block BB. */
17029 if (insn == BB_HEAD (bb)
17030 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17031 continue;
17032
17033 /* Get the register with which we are comparing. */
17034 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17035 op0 = XEXP (cmp, 0);
17036 op1 = XEXP (cmp, 1);
17037
17038 /* Check that comparison is against ZERO. */
17039 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17040 continue;
17041
17042 /* Find the first flag setting insn before INSN in basic block BB. */
17043 gcc_assert (insn != BB_HEAD (bb));
17044 for (prev = PREV_INSN (insn);
17045 (!insn_clobbered
17046 && prev != BB_HEAD (bb)
17047 && (NOTE_P (prev)
17048 || DEBUG_INSN_P (prev)
17049 || ((set = single_set (prev)) != NULL
17050 && get_attr_conds (prev) == CONDS_NOCOND)));
17051 prev = PREV_INSN (prev))
17052 {
17053 if (reg_set_p (op0, prev))
17054 insn_clobbered = true;
17055 }
17056
17057 /* Skip if op0 is clobbered by insn other than prev. */
17058 if (insn_clobbered)
17059 continue;
17060
17061 if (!set)
17062 continue;
17063
17064 dest = SET_DEST (set);
17065 src = SET_SRC (set);
17066 if (!low_register_operand (dest, SImode)
17067 || !low_register_operand (src, SImode))
17068 continue;
17069
17070 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17071 in INSN. Both src and dest of the move insn are checked. */
17072 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17073 {
17074 dest = copy_rtx (dest);
17075 src = copy_rtx (src);
17076 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17077 PATTERN (prev) = gen_rtx_SET (dest, src);
17078 INSN_CODE (prev) = -1;
17079 /* Set test register in INSN to dest. */
17080 XEXP (cmp, 0) = copy_rtx (dest);
17081 INSN_CODE (insn) = -1;
17082 }
17083 }
17084 }
17085
17086 /* Convert instructions to their cc-clobbering variant if possible, since
17087 that allows us to use smaller encodings. */
17088
17089 static void
17090 thumb2_reorg (void)
17091 {
17092 basic_block bb;
17093 regset_head live;
17094
17095 INIT_REG_SET (&live);
17096
17097 /* We are freeing block_for_insn in the toplev to keep compatibility
17098 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17099 compute_bb_for_insn ();
17100 df_analyze ();
17101
17102 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17103
17104 FOR_EACH_BB_FN (bb, cfun)
17105 {
17106 if ((current_tune->disparage_flag_setting_t16_encodings
17107 == tune_params::DISPARAGE_FLAGS_ALL)
17108 && optimize_bb_for_speed_p (bb))
17109 continue;
17110
17111 rtx_insn *insn;
17112 Convert_Action action = SKIP;
17113 Convert_Action action_for_partial_flag_setting
17114 = ((current_tune->disparage_flag_setting_t16_encodings
17115 != tune_params::DISPARAGE_FLAGS_NEITHER)
17116 && optimize_bb_for_speed_p (bb))
17117 ? SKIP : CONV;
17118
17119 COPY_REG_SET (&live, DF_LR_OUT (bb));
17120 df_simulate_initialize_backwards (bb, &live);
17121 FOR_BB_INSNS_REVERSE (bb, insn)
17122 {
17123 if (NONJUMP_INSN_P (insn)
17124 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17125 && GET_CODE (PATTERN (insn)) == SET)
17126 {
17127 action = SKIP;
17128 rtx pat = PATTERN (insn);
17129 rtx dst = XEXP (pat, 0);
17130 rtx src = XEXP (pat, 1);
17131 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17132
17133 if (UNARY_P (src) || BINARY_P (src))
17134 op0 = XEXP (src, 0);
17135
17136 if (BINARY_P (src))
17137 op1 = XEXP (src, 1);
17138
17139 if (low_register_operand (dst, SImode))
17140 {
17141 switch (GET_CODE (src))
17142 {
17143 case PLUS:
17144 /* Adding two registers and storing the result
17145 in the first source is already a 16-bit
17146 operation. */
17147 if (rtx_equal_p (dst, op0)
17148 && register_operand (op1, SImode))
17149 break;
17150
17151 if (low_register_operand (op0, SImode))
17152 {
17153 /* ADDS <Rd>,<Rn>,<Rm> */
17154 if (low_register_operand (op1, SImode))
17155 action = CONV;
17156 /* ADDS <Rdn>,#<imm8> */
17157 /* SUBS <Rdn>,#<imm8> */
17158 else if (rtx_equal_p (dst, op0)
17159 && CONST_INT_P (op1)
17160 && IN_RANGE (INTVAL (op1), -255, 255))
17161 action = CONV;
17162 /* ADDS <Rd>,<Rn>,#<imm3> */
17163 /* SUBS <Rd>,<Rn>,#<imm3> */
17164 else if (CONST_INT_P (op1)
17165 && IN_RANGE (INTVAL (op1), -7, 7))
17166 action = CONV;
17167 }
17168 /* ADCS <Rd>, <Rn> */
17169 else if (GET_CODE (XEXP (src, 0)) == PLUS
17170 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17171 && low_register_operand (XEXP (XEXP (src, 0), 1),
17172 SImode)
17173 && COMPARISON_P (op1)
17174 && cc_register (XEXP (op1, 0), VOIDmode)
17175 && maybe_get_arm_condition_code (op1) == ARM_CS
17176 && XEXP (op1, 1) == const0_rtx)
17177 action = CONV;
17178 break;
17179
17180 case MINUS:
17181 /* RSBS <Rd>,<Rn>,#0
17182 Not handled here: see NEG below. */
17183 /* SUBS <Rd>,<Rn>,#<imm3>
17184 SUBS <Rdn>,#<imm8>
17185 Not handled here: see PLUS above. */
17186 /* SUBS <Rd>,<Rn>,<Rm> */
17187 if (low_register_operand (op0, SImode)
17188 && low_register_operand (op1, SImode))
17189 action = CONV;
17190 break;
17191
17192 case MULT:
17193 /* MULS <Rdm>,<Rn>,<Rdm>
17194 As an exception to the rule, this is only used
17195 when optimizing for size since MULS is slow on all
17196 known implementations. We do not even want to use
17197 MULS in cold code, if optimizing for speed, so we
17198 test the global flag here. */
17199 if (!optimize_size)
17200 break;
17201 /* Fall through. */
17202 case AND:
17203 case IOR:
17204 case XOR:
17205 /* ANDS <Rdn>,<Rm> */
17206 if (rtx_equal_p (dst, op0)
17207 && low_register_operand (op1, SImode))
17208 action = action_for_partial_flag_setting;
17209 else if (rtx_equal_p (dst, op1)
17210 && low_register_operand (op0, SImode))
17211 action = action_for_partial_flag_setting == SKIP
17212 ? SKIP : SWAP_CONV;
17213 break;
17214
17215 case ASHIFTRT:
17216 case ASHIFT:
17217 case LSHIFTRT:
17218 /* ASRS <Rdn>,<Rm> */
17219 /* LSRS <Rdn>,<Rm> */
17220 /* LSLS <Rdn>,<Rm> */
17221 if (rtx_equal_p (dst, op0)
17222 && low_register_operand (op1, SImode))
17223 action = action_for_partial_flag_setting;
17224 /* ASRS <Rd>,<Rm>,#<imm5> */
17225 /* LSRS <Rd>,<Rm>,#<imm5> */
17226 /* LSLS <Rd>,<Rm>,#<imm5> */
17227 else if (low_register_operand (op0, SImode)
17228 && CONST_INT_P (op1)
17229 && IN_RANGE (INTVAL (op1), 0, 31))
17230 action = action_for_partial_flag_setting;
17231 break;
17232
17233 case ROTATERT:
17234 /* RORS <Rdn>,<Rm> */
17235 if (rtx_equal_p (dst, op0)
17236 && low_register_operand (op1, SImode))
17237 action = action_for_partial_flag_setting;
17238 break;
17239
17240 case NOT:
17241 /* MVNS <Rd>,<Rm> */
17242 if (low_register_operand (op0, SImode))
17243 action = action_for_partial_flag_setting;
17244 break;
17245
17246 case NEG:
17247 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17248 if (low_register_operand (op0, SImode))
17249 action = CONV;
17250 break;
17251
17252 case CONST_INT:
17253 /* MOVS <Rd>,#<imm8> */
17254 if (CONST_INT_P (src)
17255 && IN_RANGE (INTVAL (src), 0, 255))
17256 action = action_for_partial_flag_setting;
17257 break;
17258
17259 case REG:
17260 /* MOVS and MOV<c> with registers have different
17261 encodings, so are not relevant here. */
17262 break;
17263
17264 default:
17265 break;
17266 }
17267 }
17268
17269 if (action != SKIP)
17270 {
17271 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17272 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17273 rtvec vec;
17274
17275 if (action == SWAP_CONV)
17276 {
17277 src = copy_rtx (src);
17278 XEXP (src, 0) = op1;
17279 XEXP (src, 1) = op0;
17280 pat = gen_rtx_SET (dst, src);
17281 vec = gen_rtvec (2, pat, clobber);
17282 }
17283 else /* action == CONV */
17284 vec = gen_rtvec (2, pat, clobber);
17285
17286 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17287 INSN_CODE (insn) = -1;
17288 }
17289 }
17290
17291 if (NONDEBUG_INSN_P (insn))
17292 df_simulate_one_insn_backwards (bb, insn, &live);
17293 }
17294 }
17295
17296 CLEAR_REG_SET (&live);
17297 }
17298
17299 /* Gcc puts the pool in the wrong place for ARM, since we can only
17300 load addresses a limited distance around the pc. We do some
17301 special munging to move the constant pool values to the correct
17302 point in the code. */
17303 static void
17304 arm_reorg (void)
17305 {
17306 rtx_insn *insn;
17307 HOST_WIDE_INT address = 0;
17308 Mfix * fix;
17309
17310 if (use_cmse)
17311 cmse_nonsecure_call_clear_caller_saved ();
17312 if (TARGET_THUMB1)
17313 thumb1_reorg ();
17314 else if (TARGET_THUMB2)
17315 thumb2_reorg ();
17316
17317 /* Ensure all insns that must be split have been split at this point.
17318 Otherwise, the pool placement code below may compute incorrect
17319 insn lengths. Note that when optimizing, all insns have already
17320 been split at this point. */
17321 if (!optimize)
17322 split_all_insns_noflow ();
17323
17324 minipool_fix_head = minipool_fix_tail = NULL;
17325
17326 /* The first insn must always be a note, or the code below won't
17327 scan it properly. */
17328 insn = get_insns ();
17329 gcc_assert (NOTE_P (insn));
17330 minipool_pad = 0;
17331
17332 /* Scan all the insns and record the operands that will need fixing. */
17333 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17334 {
17335 if (BARRIER_P (insn))
17336 push_minipool_barrier (insn, address);
17337 else if (INSN_P (insn))
17338 {
17339 rtx_jump_table_data *table;
17340
17341 note_invalid_constants (insn, address, true);
17342 address += get_attr_length (insn);
17343
17344 /* If the insn is a vector jump, add the size of the table
17345 and skip the table. */
17346 if (tablejump_p (insn, NULL, &table))
17347 {
17348 address += get_jump_table_size (table);
17349 insn = table;
17350 }
17351 }
17352 else if (LABEL_P (insn))
17353 /* Add the worst-case padding due to alignment. We don't add
17354 the _current_ padding because the minipool insertions
17355 themselves might change it. */
17356 address += get_label_padding (insn);
17357 }
17358
17359 fix = minipool_fix_head;
17360
17361 /* Now scan the fixups and perform the required changes. */
17362 while (fix)
17363 {
17364 Mfix * ftmp;
17365 Mfix * fdel;
17366 Mfix * last_added_fix;
17367 Mfix * last_barrier = NULL;
17368 Mfix * this_fix;
17369
17370 /* Skip any further barriers before the next fix. */
17371 while (fix && BARRIER_P (fix->insn))
17372 fix = fix->next;
17373
17374 /* No more fixes. */
17375 if (fix == NULL)
17376 break;
17377
17378 last_added_fix = NULL;
17379
17380 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17381 {
17382 if (BARRIER_P (ftmp->insn))
17383 {
17384 if (ftmp->address >= minipool_vector_head->max_address)
17385 break;
17386
17387 last_barrier = ftmp;
17388 }
17389 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17390 break;
17391
17392 last_added_fix = ftmp; /* Keep track of the last fix added. */
17393 }
17394
17395 /* If we found a barrier, drop back to that; any fixes that we
17396 could have reached but come after the barrier will now go in
17397 the next mini-pool. */
17398 if (last_barrier != NULL)
17399 {
17400 /* Reduce the refcount for those fixes that won't go into this
17401 pool after all. */
17402 for (fdel = last_barrier->next;
17403 fdel && fdel != ftmp;
17404 fdel = fdel->next)
17405 {
17406 fdel->minipool->refcount--;
17407 fdel->minipool = NULL;
17408 }
17409
17410 ftmp = last_barrier;
17411 }
17412 else
17413 {
17414 /* ftmp is first fix that we can't fit into this pool and
17415 there no natural barriers that we could use. Insert a
17416 new barrier in the code somewhere between the previous
17417 fix and this one, and arrange to jump around it. */
17418 HOST_WIDE_INT max_address;
17419
17420 /* The last item on the list of fixes must be a barrier, so
17421 we can never run off the end of the list of fixes without
17422 last_barrier being set. */
17423 gcc_assert (ftmp);
17424
17425 max_address = minipool_vector_head->max_address;
17426 /* Check that there isn't another fix that is in range that
17427 we couldn't fit into this pool because the pool was
17428 already too large: we need to put the pool before such an
17429 instruction. The pool itself may come just after the
17430 fix because create_fix_barrier also allows space for a
17431 jump instruction. */
17432 if (ftmp->address < max_address)
17433 max_address = ftmp->address + 1;
17434
17435 last_barrier = create_fix_barrier (last_added_fix, max_address);
17436 }
17437
17438 assign_minipool_offsets (last_barrier);
17439
17440 while (ftmp)
17441 {
17442 if (!BARRIER_P (ftmp->insn)
17443 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17444 == NULL))
17445 break;
17446
17447 ftmp = ftmp->next;
17448 }
17449
17450 /* Scan over the fixes we have identified for this pool, fixing them
17451 up and adding the constants to the pool itself. */
17452 for (this_fix = fix; this_fix && ftmp != this_fix;
17453 this_fix = this_fix->next)
17454 if (!BARRIER_P (this_fix->insn))
17455 {
17456 rtx addr
17457 = plus_constant (Pmode,
17458 gen_rtx_LABEL_REF (VOIDmode,
17459 minipool_vector_label),
17460 this_fix->minipool->offset);
17461 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17462 }
17463
17464 dump_minipool (last_barrier->insn);
17465 fix = ftmp;
17466 }
17467
17468 /* From now on we must synthesize any constants that we can't handle
17469 directly. This can happen if the RTL gets split during final
17470 instruction generation. */
17471 cfun->machine->after_arm_reorg = 1;
17472
17473 /* Free the minipool memory. */
17474 obstack_free (&minipool_obstack, minipool_startobj);
17475 }
17476 \f
17477 /* Routines to output assembly language. */
17478
17479 /* Return string representation of passed in real value. */
17480 static const char *
17481 fp_const_from_val (REAL_VALUE_TYPE *r)
17482 {
17483 if (!fp_consts_inited)
17484 init_fp_table ();
17485
17486 gcc_assert (real_equal (r, &value_fp0));
17487 return "0";
17488 }
17489
17490 /* OPERANDS[0] is the entire list of insns that constitute pop,
17491 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17492 is in the list, UPDATE is true iff the list contains explicit
17493 update of base register. */
17494 void
17495 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17496 bool update)
17497 {
17498 int i;
17499 char pattern[100];
17500 int offset;
17501 const char *conditional;
17502 int num_saves = XVECLEN (operands[0], 0);
17503 unsigned int regno;
17504 unsigned int regno_base = REGNO (operands[1]);
17505 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17506
17507 offset = 0;
17508 offset += update ? 1 : 0;
17509 offset += return_pc ? 1 : 0;
17510
17511 /* Is the base register in the list? */
17512 for (i = offset; i < num_saves; i++)
17513 {
17514 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17515 /* If SP is in the list, then the base register must be SP. */
17516 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17517 /* If base register is in the list, there must be no explicit update. */
17518 if (regno == regno_base)
17519 gcc_assert (!update);
17520 }
17521
17522 conditional = reverse ? "%?%D0" : "%?%d0";
17523 /* Can't use POP if returning from an interrupt. */
17524 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17525 sprintf (pattern, "pop%s\t{", conditional);
17526 else
17527 {
17528 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17529 It's just a convention, their semantics are identical. */
17530 if (regno_base == SP_REGNUM)
17531 sprintf (pattern, "ldmfd%s\t", conditional);
17532 else if (update)
17533 sprintf (pattern, "ldmia%s\t", conditional);
17534 else
17535 sprintf (pattern, "ldm%s\t", conditional);
17536
17537 strcat (pattern, reg_names[regno_base]);
17538 if (update)
17539 strcat (pattern, "!, {");
17540 else
17541 strcat (pattern, ", {");
17542 }
17543
17544 /* Output the first destination register. */
17545 strcat (pattern,
17546 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17547
17548 /* Output the rest of the destination registers. */
17549 for (i = offset + 1; i < num_saves; i++)
17550 {
17551 strcat (pattern, ", ");
17552 strcat (pattern,
17553 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17554 }
17555
17556 strcat (pattern, "}");
17557
17558 if (interrupt_p && return_pc)
17559 strcat (pattern, "^");
17560
17561 output_asm_insn (pattern, &cond);
17562 }
17563
17564
17565 /* Output the assembly for a store multiple. */
17566
17567 const char *
17568 vfp_output_vstmd (rtx * operands)
17569 {
17570 char pattern[100];
17571 int p;
17572 int base;
17573 int i;
17574 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17575 ? XEXP (operands[0], 0)
17576 : XEXP (XEXP (operands[0], 0), 0);
17577 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17578
17579 if (push_p)
17580 strcpy (pattern, "vpush%?.64\t{%P1");
17581 else
17582 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17583
17584 p = strlen (pattern);
17585
17586 gcc_assert (REG_P (operands[1]));
17587
17588 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17589 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17590 {
17591 p += sprintf (&pattern[p], ", d%d", base + i);
17592 }
17593 strcpy (&pattern[p], "}");
17594
17595 output_asm_insn (pattern, operands);
17596 return "";
17597 }
17598
17599
17600 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17601 number of bytes pushed. */
17602
17603 static int
17604 vfp_emit_fstmd (int base_reg, int count)
17605 {
17606 rtx par;
17607 rtx dwarf;
17608 rtx tmp, reg;
17609 int i;
17610
17611 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17612 register pairs are stored by a store multiple insn. We avoid this
17613 by pushing an extra pair. */
17614 if (count == 2 && !arm_arch6)
17615 {
17616 if (base_reg == LAST_VFP_REGNUM - 3)
17617 base_reg -= 2;
17618 count++;
17619 }
17620
17621 /* FSTMD may not store more than 16 doubleword registers at once. Split
17622 larger stores into multiple parts (up to a maximum of two, in
17623 practice). */
17624 if (count > 16)
17625 {
17626 int saved;
17627 /* NOTE: base_reg is an internal register number, so each D register
17628 counts as 2. */
17629 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17630 saved += vfp_emit_fstmd (base_reg, 16);
17631 return saved;
17632 }
17633
17634 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17635 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17636
17637 reg = gen_rtx_REG (DFmode, base_reg);
17638 base_reg += 2;
17639
17640 XVECEXP (par, 0, 0)
17641 = gen_rtx_SET (gen_frame_mem
17642 (BLKmode,
17643 gen_rtx_PRE_MODIFY (Pmode,
17644 stack_pointer_rtx,
17645 plus_constant
17646 (Pmode, stack_pointer_rtx,
17647 - (count * 8)))
17648 ),
17649 gen_rtx_UNSPEC (BLKmode,
17650 gen_rtvec (1, reg),
17651 UNSPEC_PUSH_MULT));
17652
17653 tmp = gen_rtx_SET (stack_pointer_rtx,
17654 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17655 RTX_FRAME_RELATED_P (tmp) = 1;
17656 XVECEXP (dwarf, 0, 0) = tmp;
17657
17658 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17659 RTX_FRAME_RELATED_P (tmp) = 1;
17660 XVECEXP (dwarf, 0, 1) = tmp;
17661
17662 for (i = 1; i < count; i++)
17663 {
17664 reg = gen_rtx_REG (DFmode, base_reg);
17665 base_reg += 2;
17666 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17667
17668 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17669 plus_constant (Pmode,
17670 stack_pointer_rtx,
17671 i * 8)),
17672 reg);
17673 RTX_FRAME_RELATED_P (tmp) = 1;
17674 XVECEXP (dwarf, 0, i + 1) = tmp;
17675 }
17676
17677 par = emit_insn (par);
17678 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17679 RTX_FRAME_RELATED_P (par) = 1;
17680
17681 return count * 8;
17682 }
17683
17684 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17685 has the cmse_nonsecure_call attribute and returns false otherwise. */
17686
17687 bool
17688 detect_cmse_nonsecure_call (tree addr)
17689 {
17690 if (!addr)
17691 return FALSE;
17692
17693 tree fntype = TREE_TYPE (addr);
17694 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17695 TYPE_ATTRIBUTES (fntype)))
17696 return TRUE;
17697 return FALSE;
17698 }
17699
17700
17701 /* Emit a call instruction with pattern PAT. ADDR is the address of
17702 the call target. */
17703
17704 void
17705 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17706 {
17707 rtx insn;
17708
17709 insn = emit_call_insn (pat);
17710
17711 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17712 If the call might use such an entry, add a use of the PIC register
17713 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17714 if (TARGET_VXWORKS_RTP
17715 && flag_pic
17716 && !sibcall
17717 && GET_CODE (addr) == SYMBOL_REF
17718 && (SYMBOL_REF_DECL (addr)
17719 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17720 : !SYMBOL_REF_LOCAL_P (addr)))
17721 {
17722 require_pic_register ();
17723 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17724 }
17725
17726 if (TARGET_AAPCS_BASED)
17727 {
17728 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17729 linker. We need to add an IP clobber to allow setting
17730 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17731 is not needed since it's a fixed register. */
17732 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17733 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17734 }
17735 }
17736
17737 /* Output a 'call' insn. */
17738 const char *
17739 output_call (rtx *operands)
17740 {
17741 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17742
17743 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17744 if (REGNO (operands[0]) == LR_REGNUM)
17745 {
17746 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17747 output_asm_insn ("mov%?\t%0, %|lr", operands);
17748 }
17749
17750 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17751
17752 if (TARGET_INTERWORK || arm_arch4t)
17753 output_asm_insn ("bx%?\t%0", operands);
17754 else
17755 output_asm_insn ("mov%?\t%|pc, %0", operands);
17756
17757 return "";
17758 }
17759
17760 /* Output a move from arm registers to arm registers of a long double
17761 OPERANDS[0] is the destination.
17762 OPERANDS[1] is the source. */
17763 const char *
17764 output_mov_long_double_arm_from_arm (rtx *operands)
17765 {
17766 /* We have to be careful here because the two might overlap. */
17767 int dest_start = REGNO (operands[0]);
17768 int src_start = REGNO (operands[1]);
17769 rtx ops[2];
17770 int i;
17771
17772 if (dest_start < src_start)
17773 {
17774 for (i = 0; i < 3; i++)
17775 {
17776 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17777 ops[1] = gen_rtx_REG (SImode, src_start + i);
17778 output_asm_insn ("mov%?\t%0, %1", ops);
17779 }
17780 }
17781 else
17782 {
17783 for (i = 2; i >= 0; i--)
17784 {
17785 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17786 ops[1] = gen_rtx_REG (SImode, src_start + i);
17787 output_asm_insn ("mov%?\t%0, %1", ops);
17788 }
17789 }
17790
17791 return "";
17792 }
17793
17794 void
17795 arm_emit_movpair (rtx dest, rtx src)
17796 {
17797 /* If the src is an immediate, simplify it. */
17798 if (CONST_INT_P (src))
17799 {
17800 HOST_WIDE_INT val = INTVAL (src);
17801 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17802 if ((val >> 16) & 0x0000ffff)
17803 {
17804 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17805 GEN_INT (16)),
17806 GEN_INT ((val >> 16) & 0x0000ffff));
17807 rtx_insn *insn = get_last_insn ();
17808 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17809 }
17810 return;
17811 }
17812 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17813 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17814 rtx_insn *insn = get_last_insn ();
17815 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17816 }
17817
17818 /* Output a move between double words. It must be REG<-MEM
17819 or MEM<-REG. */
17820 const char *
17821 output_move_double (rtx *operands, bool emit, int *count)
17822 {
17823 enum rtx_code code0 = GET_CODE (operands[0]);
17824 enum rtx_code code1 = GET_CODE (operands[1]);
17825 rtx otherops[3];
17826 if (count)
17827 *count = 1;
17828
17829 /* The only case when this might happen is when
17830 you are looking at the length of a DImode instruction
17831 that has an invalid constant in it. */
17832 if (code0 == REG && code1 != MEM)
17833 {
17834 gcc_assert (!emit);
17835 *count = 2;
17836 return "";
17837 }
17838
17839 if (code0 == REG)
17840 {
17841 unsigned int reg0 = REGNO (operands[0]);
17842
17843 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17844
17845 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17846
17847 switch (GET_CODE (XEXP (operands[1], 0)))
17848 {
17849 case REG:
17850
17851 if (emit)
17852 {
17853 if (TARGET_LDRD
17854 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17855 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17856 else
17857 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17858 }
17859 break;
17860
17861 case PRE_INC:
17862 gcc_assert (TARGET_LDRD);
17863 if (emit)
17864 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17865 break;
17866
17867 case PRE_DEC:
17868 if (emit)
17869 {
17870 if (TARGET_LDRD)
17871 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17872 else
17873 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17874 }
17875 break;
17876
17877 case POST_INC:
17878 if (emit)
17879 {
17880 if (TARGET_LDRD)
17881 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17882 else
17883 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17884 }
17885 break;
17886
17887 case POST_DEC:
17888 gcc_assert (TARGET_LDRD);
17889 if (emit)
17890 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17891 break;
17892
17893 case PRE_MODIFY:
17894 case POST_MODIFY:
17895 /* Autoicrement addressing modes should never have overlapping
17896 base and destination registers, and overlapping index registers
17897 are already prohibited, so this doesn't need to worry about
17898 fix_cm3_ldrd. */
17899 otherops[0] = operands[0];
17900 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17901 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17902
17903 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17904 {
17905 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17906 {
17907 /* Registers overlap so split out the increment. */
17908 if (emit)
17909 {
17910 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17911 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17912 }
17913 if (count)
17914 *count = 2;
17915 }
17916 else
17917 {
17918 /* Use a single insn if we can.
17919 FIXME: IWMMXT allows offsets larger than ldrd can
17920 handle, fix these up with a pair of ldr. */
17921 if (TARGET_THUMB2
17922 || !CONST_INT_P (otherops[2])
17923 || (INTVAL (otherops[2]) > -256
17924 && INTVAL (otherops[2]) < 256))
17925 {
17926 if (emit)
17927 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17928 }
17929 else
17930 {
17931 if (emit)
17932 {
17933 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17934 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17935 }
17936 if (count)
17937 *count = 2;
17938
17939 }
17940 }
17941 }
17942 else
17943 {
17944 /* Use a single insn if we can.
17945 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17946 fix these up with a pair of ldr. */
17947 if (TARGET_THUMB2
17948 || !CONST_INT_P (otherops[2])
17949 || (INTVAL (otherops[2]) > -256
17950 && INTVAL (otherops[2]) < 256))
17951 {
17952 if (emit)
17953 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17954 }
17955 else
17956 {
17957 if (emit)
17958 {
17959 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17960 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17961 }
17962 if (count)
17963 *count = 2;
17964 }
17965 }
17966 break;
17967
17968 case LABEL_REF:
17969 case CONST:
17970 /* We might be able to use ldrd %0, %1 here. However the range is
17971 different to ldr/adr, and it is broken on some ARMv7-M
17972 implementations. */
17973 /* Use the second register of the pair to avoid problematic
17974 overlap. */
17975 otherops[1] = operands[1];
17976 if (emit)
17977 output_asm_insn ("adr%?\t%0, %1", otherops);
17978 operands[1] = otherops[0];
17979 if (emit)
17980 {
17981 if (TARGET_LDRD)
17982 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
17983 else
17984 output_asm_insn ("ldmia%?\t%1, %M0", operands);
17985 }
17986
17987 if (count)
17988 *count = 2;
17989 break;
17990
17991 /* ??? This needs checking for thumb2. */
17992 default:
17993 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17994 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17995 {
17996 otherops[0] = operands[0];
17997 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17998 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17999
18000 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18001 {
18002 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18003 {
18004 switch ((int) INTVAL (otherops[2]))
18005 {
18006 case -8:
18007 if (emit)
18008 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18009 return "";
18010 case -4:
18011 if (TARGET_THUMB2)
18012 break;
18013 if (emit)
18014 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18015 return "";
18016 case 4:
18017 if (TARGET_THUMB2)
18018 break;
18019 if (emit)
18020 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18021 return "";
18022 }
18023 }
18024 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18025 operands[1] = otherops[0];
18026 if (TARGET_LDRD
18027 && (REG_P (otherops[2])
18028 || TARGET_THUMB2
18029 || (CONST_INT_P (otherops[2])
18030 && INTVAL (otherops[2]) > -256
18031 && INTVAL (otherops[2]) < 256)))
18032 {
18033 if (reg_overlap_mentioned_p (operands[0],
18034 otherops[2]))
18035 {
18036 /* Swap base and index registers over to
18037 avoid a conflict. */
18038 std::swap (otherops[1], otherops[2]);
18039 }
18040 /* If both registers conflict, it will usually
18041 have been fixed by a splitter. */
18042 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18043 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18044 {
18045 if (emit)
18046 {
18047 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18048 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18049 }
18050 if (count)
18051 *count = 2;
18052 }
18053 else
18054 {
18055 otherops[0] = operands[0];
18056 if (emit)
18057 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18058 }
18059 return "";
18060 }
18061
18062 if (CONST_INT_P (otherops[2]))
18063 {
18064 if (emit)
18065 {
18066 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18067 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18068 else
18069 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18070 }
18071 }
18072 else
18073 {
18074 if (emit)
18075 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18076 }
18077 }
18078 else
18079 {
18080 if (emit)
18081 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18082 }
18083
18084 if (count)
18085 *count = 2;
18086
18087 if (TARGET_LDRD)
18088 return "ldrd%?\t%0, [%1]";
18089
18090 return "ldmia%?\t%1, %M0";
18091 }
18092 else
18093 {
18094 otherops[1] = adjust_address (operands[1], SImode, 4);
18095 /* Take care of overlapping base/data reg. */
18096 if (reg_mentioned_p (operands[0], operands[1]))
18097 {
18098 if (emit)
18099 {
18100 output_asm_insn ("ldr%?\t%0, %1", otherops);
18101 output_asm_insn ("ldr%?\t%0, %1", operands);
18102 }
18103 if (count)
18104 *count = 2;
18105
18106 }
18107 else
18108 {
18109 if (emit)
18110 {
18111 output_asm_insn ("ldr%?\t%0, %1", operands);
18112 output_asm_insn ("ldr%?\t%0, %1", otherops);
18113 }
18114 if (count)
18115 *count = 2;
18116 }
18117 }
18118 }
18119 }
18120 else
18121 {
18122 /* Constraints should ensure this. */
18123 gcc_assert (code0 == MEM && code1 == REG);
18124 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18125 || (TARGET_ARM && TARGET_LDRD));
18126
18127 switch (GET_CODE (XEXP (operands[0], 0)))
18128 {
18129 case REG:
18130 if (emit)
18131 {
18132 if (TARGET_LDRD)
18133 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18134 else
18135 output_asm_insn ("stm%?\t%m0, %M1", operands);
18136 }
18137 break;
18138
18139 case PRE_INC:
18140 gcc_assert (TARGET_LDRD);
18141 if (emit)
18142 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18143 break;
18144
18145 case PRE_DEC:
18146 if (emit)
18147 {
18148 if (TARGET_LDRD)
18149 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18150 else
18151 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18152 }
18153 break;
18154
18155 case POST_INC:
18156 if (emit)
18157 {
18158 if (TARGET_LDRD)
18159 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18160 else
18161 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18162 }
18163 break;
18164
18165 case POST_DEC:
18166 gcc_assert (TARGET_LDRD);
18167 if (emit)
18168 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18169 break;
18170
18171 case PRE_MODIFY:
18172 case POST_MODIFY:
18173 otherops[0] = operands[1];
18174 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18175 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18176
18177 /* IWMMXT allows offsets larger than ldrd can handle,
18178 fix these up with a pair of ldr. */
18179 if (!TARGET_THUMB2
18180 && CONST_INT_P (otherops[2])
18181 && (INTVAL(otherops[2]) <= -256
18182 || INTVAL(otherops[2]) >= 256))
18183 {
18184 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18185 {
18186 if (emit)
18187 {
18188 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18189 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18190 }
18191 if (count)
18192 *count = 2;
18193 }
18194 else
18195 {
18196 if (emit)
18197 {
18198 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18199 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18200 }
18201 if (count)
18202 *count = 2;
18203 }
18204 }
18205 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18206 {
18207 if (emit)
18208 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18209 }
18210 else
18211 {
18212 if (emit)
18213 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18214 }
18215 break;
18216
18217 case PLUS:
18218 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18219 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18220 {
18221 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18222 {
18223 case -8:
18224 if (emit)
18225 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18226 return "";
18227
18228 case -4:
18229 if (TARGET_THUMB2)
18230 break;
18231 if (emit)
18232 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18233 return "";
18234
18235 case 4:
18236 if (TARGET_THUMB2)
18237 break;
18238 if (emit)
18239 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18240 return "";
18241 }
18242 }
18243 if (TARGET_LDRD
18244 && (REG_P (otherops[2])
18245 || TARGET_THUMB2
18246 || (CONST_INT_P (otherops[2])
18247 && INTVAL (otherops[2]) > -256
18248 && INTVAL (otherops[2]) < 256)))
18249 {
18250 otherops[0] = operands[1];
18251 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18252 if (emit)
18253 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18254 return "";
18255 }
18256 /* Fall through */
18257
18258 default:
18259 otherops[0] = adjust_address (operands[0], SImode, 4);
18260 otherops[1] = operands[1];
18261 if (emit)
18262 {
18263 output_asm_insn ("str%?\t%1, %0", operands);
18264 output_asm_insn ("str%?\t%H1, %0", otherops);
18265 }
18266 if (count)
18267 *count = 2;
18268 }
18269 }
18270
18271 return "";
18272 }
18273
18274 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18275 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18276
18277 const char *
18278 output_move_quad (rtx *operands)
18279 {
18280 if (REG_P (operands[0]))
18281 {
18282 /* Load, or reg->reg move. */
18283
18284 if (MEM_P (operands[1]))
18285 {
18286 switch (GET_CODE (XEXP (operands[1], 0)))
18287 {
18288 case REG:
18289 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18290 break;
18291
18292 case LABEL_REF:
18293 case CONST:
18294 output_asm_insn ("adr%?\t%0, %1", operands);
18295 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18296 break;
18297
18298 default:
18299 gcc_unreachable ();
18300 }
18301 }
18302 else
18303 {
18304 rtx ops[2];
18305 int dest, src, i;
18306
18307 gcc_assert (REG_P (operands[1]));
18308
18309 dest = REGNO (operands[0]);
18310 src = REGNO (operands[1]);
18311
18312 /* This seems pretty dumb, but hopefully GCC won't try to do it
18313 very often. */
18314 if (dest < src)
18315 for (i = 0; i < 4; i++)
18316 {
18317 ops[0] = gen_rtx_REG (SImode, dest + i);
18318 ops[1] = gen_rtx_REG (SImode, src + i);
18319 output_asm_insn ("mov%?\t%0, %1", ops);
18320 }
18321 else
18322 for (i = 3; i >= 0; i--)
18323 {
18324 ops[0] = gen_rtx_REG (SImode, dest + i);
18325 ops[1] = gen_rtx_REG (SImode, src + i);
18326 output_asm_insn ("mov%?\t%0, %1", ops);
18327 }
18328 }
18329 }
18330 else
18331 {
18332 gcc_assert (MEM_P (operands[0]));
18333 gcc_assert (REG_P (operands[1]));
18334 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18335
18336 switch (GET_CODE (XEXP (operands[0], 0)))
18337 {
18338 case REG:
18339 output_asm_insn ("stm%?\t%m0, %M1", operands);
18340 break;
18341
18342 default:
18343 gcc_unreachable ();
18344 }
18345 }
18346
18347 return "";
18348 }
18349
18350 /* Output a VFP load or store instruction. */
18351
18352 const char *
18353 output_move_vfp (rtx *operands)
18354 {
18355 rtx reg, mem, addr, ops[2];
18356 int load = REG_P (operands[0]);
18357 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18358 int sp = (!TARGET_VFP_FP16INST
18359 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18360 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18361 const char *templ;
18362 char buff[50];
18363 machine_mode mode;
18364
18365 reg = operands[!load];
18366 mem = operands[load];
18367
18368 mode = GET_MODE (reg);
18369
18370 gcc_assert (REG_P (reg));
18371 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18372 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18373 || mode == SFmode
18374 || mode == DFmode
18375 || mode == HImode
18376 || mode == SImode
18377 || mode == DImode
18378 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18379 gcc_assert (MEM_P (mem));
18380
18381 addr = XEXP (mem, 0);
18382
18383 switch (GET_CODE (addr))
18384 {
18385 case PRE_DEC:
18386 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18387 ops[0] = XEXP (addr, 0);
18388 ops[1] = reg;
18389 break;
18390
18391 case POST_INC:
18392 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18393 ops[0] = XEXP (addr, 0);
18394 ops[1] = reg;
18395 break;
18396
18397 default:
18398 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18399 ops[0] = reg;
18400 ops[1] = mem;
18401 break;
18402 }
18403
18404 sprintf (buff, templ,
18405 load ? "ld" : "st",
18406 dp ? "64" : sp ? "32" : "16",
18407 dp ? "P" : "",
18408 integer_p ? "\t%@ int" : "");
18409 output_asm_insn (buff, ops);
18410
18411 return "";
18412 }
18413
18414 /* Output a Neon double-word or quad-word load or store, or a load
18415 or store for larger structure modes.
18416
18417 WARNING: The ordering of elements is weird in big-endian mode,
18418 because the EABI requires that vectors stored in memory appear
18419 as though they were stored by a VSTM, as required by the EABI.
18420 GCC RTL defines element ordering based on in-memory order.
18421 This can be different from the architectural ordering of elements
18422 within a NEON register. The intrinsics defined in arm_neon.h use the
18423 NEON register element ordering, not the GCC RTL element ordering.
18424
18425 For example, the in-memory ordering of a big-endian a quadword
18426 vector with 16-bit elements when stored from register pair {d0,d1}
18427 will be (lowest address first, d0[N] is NEON register element N):
18428
18429 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18430
18431 When necessary, quadword registers (dN, dN+1) are moved to ARM
18432 registers from rN in the order:
18433
18434 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18435
18436 So that STM/LDM can be used on vectors in ARM registers, and the
18437 same memory layout will result as if VSTM/VLDM were used.
18438
18439 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18440 possible, which allows use of appropriate alignment tags.
18441 Note that the choice of "64" is independent of the actual vector
18442 element size; this size simply ensures that the behavior is
18443 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18444
18445 Due to limitations of those instructions, use of VST1.64/VLD1.64
18446 is not possible if:
18447 - the address contains PRE_DEC, or
18448 - the mode refers to more than 4 double-word registers
18449
18450 In those cases, it would be possible to replace VSTM/VLDM by a
18451 sequence of instructions; this is not currently implemented since
18452 this is not certain to actually improve performance. */
18453
18454 const char *
18455 output_move_neon (rtx *operands)
18456 {
18457 rtx reg, mem, addr, ops[2];
18458 int regno, nregs, load = REG_P (operands[0]);
18459 const char *templ;
18460 char buff[50];
18461 machine_mode mode;
18462
18463 reg = operands[!load];
18464 mem = operands[load];
18465
18466 mode = GET_MODE (reg);
18467
18468 gcc_assert (REG_P (reg));
18469 regno = REGNO (reg);
18470 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18471 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18472 || NEON_REGNO_OK_FOR_QUAD (regno));
18473 gcc_assert (VALID_NEON_DREG_MODE (mode)
18474 || VALID_NEON_QREG_MODE (mode)
18475 || VALID_NEON_STRUCT_MODE (mode));
18476 gcc_assert (MEM_P (mem));
18477
18478 addr = XEXP (mem, 0);
18479
18480 /* Strip off const from addresses like (const (plus (...))). */
18481 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18482 addr = XEXP (addr, 0);
18483
18484 switch (GET_CODE (addr))
18485 {
18486 case POST_INC:
18487 /* We have to use vldm / vstm for too-large modes. */
18488 if (nregs > 4)
18489 {
18490 templ = "v%smia%%?\t%%0!, %%h1";
18491 ops[0] = XEXP (addr, 0);
18492 }
18493 else
18494 {
18495 templ = "v%s1.64\t%%h1, %%A0";
18496 ops[0] = mem;
18497 }
18498 ops[1] = reg;
18499 break;
18500
18501 case PRE_DEC:
18502 /* We have to use vldm / vstm in this case, since there is no
18503 pre-decrement form of the vld1 / vst1 instructions. */
18504 templ = "v%smdb%%?\t%%0!, %%h1";
18505 ops[0] = XEXP (addr, 0);
18506 ops[1] = reg;
18507 break;
18508
18509 case POST_MODIFY:
18510 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18511 gcc_unreachable ();
18512
18513 case REG:
18514 /* We have to use vldm / vstm for too-large modes. */
18515 if (nregs > 1)
18516 {
18517 if (nregs > 4)
18518 templ = "v%smia%%?\t%%m0, %%h1";
18519 else
18520 templ = "v%s1.64\t%%h1, %%A0";
18521
18522 ops[0] = mem;
18523 ops[1] = reg;
18524 break;
18525 }
18526 /* Fall through. */
18527 case LABEL_REF:
18528 case PLUS:
18529 {
18530 int i;
18531 int overlap = -1;
18532 for (i = 0; i < nregs; i++)
18533 {
18534 /* We're only using DImode here because it's a convenient size. */
18535 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18536 ops[1] = adjust_address (mem, DImode, 8 * i);
18537 if (reg_overlap_mentioned_p (ops[0], mem))
18538 {
18539 gcc_assert (overlap == -1);
18540 overlap = i;
18541 }
18542 else
18543 {
18544 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18545 output_asm_insn (buff, ops);
18546 }
18547 }
18548 if (overlap != -1)
18549 {
18550 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18551 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18552 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18553 output_asm_insn (buff, ops);
18554 }
18555
18556 return "";
18557 }
18558
18559 default:
18560 gcc_unreachable ();
18561 }
18562
18563 sprintf (buff, templ, load ? "ld" : "st");
18564 output_asm_insn (buff, ops);
18565
18566 return "";
18567 }
18568
18569 /* Compute and return the length of neon_mov<mode>, where <mode> is
18570 one of VSTRUCT modes: EI, OI, CI or XI. */
18571 int
18572 arm_attr_length_move_neon (rtx_insn *insn)
18573 {
18574 rtx reg, mem, addr;
18575 int load;
18576 machine_mode mode;
18577
18578 extract_insn_cached (insn);
18579
18580 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18581 {
18582 mode = GET_MODE (recog_data.operand[0]);
18583 switch (mode)
18584 {
18585 case EImode:
18586 case OImode:
18587 return 8;
18588 case CImode:
18589 return 12;
18590 case XImode:
18591 return 16;
18592 default:
18593 gcc_unreachable ();
18594 }
18595 }
18596
18597 load = REG_P (recog_data.operand[0]);
18598 reg = recog_data.operand[!load];
18599 mem = recog_data.operand[load];
18600
18601 gcc_assert (MEM_P (mem));
18602
18603 mode = GET_MODE (reg);
18604 addr = XEXP (mem, 0);
18605
18606 /* Strip off const from addresses like (const (plus (...))). */
18607 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18608 addr = XEXP (addr, 0);
18609
18610 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18611 {
18612 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18613 return insns * 4;
18614 }
18615 else
18616 return 4;
18617 }
18618
18619 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18620 return zero. */
18621
18622 int
18623 arm_address_offset_is_imm (rtx_insn *insn)
18624 {
18625 rtx mem, addr;
18626
18627 extract_insn_cached (insn);
18628
18629 if (REG_P (recog_data.operand[0]))
18630 return 0;
18631
18632 mem = recog_data.operand[0];
18633
18634 gcc_assert (MEM_P (mem));
18635
18636 addr = XEXP (mem, 0);
18637
18638 if (REG_P (addr)
18639 || (GET_CODE (addr) == PLUS
18640 && REG_P (XEXP (addr, 0))
18641 && CONST_INT_P (XEXP (addr, 1))))
18642 return 1;
18643 else
18644 return 0;
18645 }
18646
18647 /* Output an ADD r, s, #n where n may be too big for one instruction.
18648 If adding zero to one register, output nothing. */
18649 const char *
18650 output_add_immediate (rtx *operands)
18651 {
18652 HOST_WIDE_INT n = INTVAL (operands[2]);
18653
18654 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18655 {
18656 if (n < 0)
18657 output_multi_immediate (operands,
18658 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18659 -n);
18660 else
18661 output_multi_immediate (operands,
18662 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18663 n);
18664 }
18665
18666 return "";
18667 }
18668
18669 /* Output a multiple immediate operation.
18670 OPERANDS is the vector of operands referred to in the output patterns.
18671 INSTR1 is the output pattern to use for the first constant.
18672 INSTR2 is the output pattern to use for subsequent constants.
18673 IMMED_OP is the index of the constant slot in OPERANDS.
18674 N is the constant value. */
18675 static const char *
18676 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18677 int immed_op, HOST_WIDE_INT n)
18678 {
18679 #if HOST_BITS_PER_WIDE_INT > 32
18680 n &= 0xffffffff;
18681 #endif
18682
18683 if (n == 0)
18684 {
18685 /* Quick and easy output. */
18686 operands[immed_op] = const0_rtx;
18687 output_asm_insn (instr1, operands);
18688 }
18689 else
18690 {
18691 int i;
18692 const char * instr = instr1;
18693
18694 /* Note that n is never zero here (which would give no output). */
18695 for (i = 0; i < 32; i += 2)
18696 {
18697 if (n & (3 << i))
18698 {
18699 operands[immed_op] = GEN_INT (n & (255 << i));
18700 output_asm_insn (instr, operands);
18701 instr = instr2;
18702 i += 6;
18703 }
18704 }
18705 }
18706
18707 return "";
18708 }
18709
18710 /* Return the name of a shifter operation. */
18711 static const char *
18712 arm_shift_nmem(enum rtx_code code)
18713 {
18714 switch (code)
18715 {
18716 case ASHIFT:
18717 return ARM_LSL_NAME;
18718
18719 case ASHIFTRT:
18720 return "asr";
18721
18722 case LSHIFTRT:
18723 return "lsr";
18724
18725 case ROTATERT:
18726 return "ror";
18727
18728 default:
18729 abort();
18730 }
18731 }
18732
18733 /* Return the appropriate ARM instruction for the operation code.
18734 The returned result should not be overwritten. OP is the rtx of the
18735 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18736 was shifted. */
18737 const char *
18738 arithmetic_instr (rtx op, int shift_first_arg)
18739 {
18740 switch (GET_CODE (op))
18741 {
18742 case PLUS:
18743 return "add";
18744
18745 case MINUS:
18746 return shift_first_arg ? "rsb" : "sub";
18747
18748 case IOR:
18749 return "orr";
18750
18751 case XOR:
18752 return "eor";
18753
18754 case AND:
18755 return "and";
18756
18757 case ASHIFT:
18758 case ASHIFTRT:
18759 case LSHIFTRT:
18760 case ROTATERT:
18761 return arm_shift_nmem(GET_CODE(op));
18762
18763 default:
18764 gcc_unreachable ();
18765 }
18766 }
18767
18768 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18769 for the operation code. The returned result should not be overwritten.
18770 OP is the rtx code of the shift.
18771 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18772 shift. */
18773 static const char *
18774 shift_op (rtx op, HOST_WIDE_INT *amountp)
18775 {
18776 const char * mnem;
18777 enum rtx_code code = GET_CODE (op);
18778
18779 switch (code)
18780 {
18781 case ROTATE:
18782 if (!CONST_INT_P (XEXP (op, 1)))
18783 {
18784 output_operand_lossage ("invalid shift operand");
18785 return NULL;
18786 }
18787
18788 code = ROTATERT;
18789 *amountp = 32 - INTVAL (XEXP (op, 1));
18790 mnem = "ror";
18791 break;
18792
18793 case ASHIFT:
18794 case ASHIFTRT:
18795 case LSHIFTRT:
18796 case ROTATERT:
18797 mnem = arm_shift_nmem(code);
18798 if (CONST_INT_P (XEXP (op, 1)))
18799 {
18800 *amountp = INTVAL (XEXP (op, 1));
18801 }
18802 else if (REG_P (XEXP (op, 1)))
18803 {
18804 *amountp = -1;
18805 return mnem;
18806 }
18807 else
18808 {
18809 output_operand_lossage ("invalid shift operand");
18810 return NULL;
18811 }
18812 break;
18813
18814 case MULT:
18815 /* We never have to worry about the amount being other than a
18816 power of 2, since this case can never be reloaded from a reg. */
18817 if (!CONST_INT_P (XEXP (op, 1)))
18818 {
18819 output_operand_lossage ("invalid shift operand");
18820 return NULL;
18821 }
18822
18823 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18824
18825 /* Amount must be a power of two. */
18826 if (*amountp & (*amountp - 1))
18827 {
18828 output_operand_lossage ("invalid shift operand");
18829 return NULL;
18830 }
18831
18832 *amountp = exact_log2 (*amountp);
18833 gcc_assert (IN_RANGE (*amountp, 0, 31));
18834 return ARM_LSL_NAME;
18835
18836 default:
18837 output_operand_lossage ("invalid shift operand");
18838 return NULL;
18839 }
18840
18841 /* This is not 100% correct, but follows from the desire to merge
18842 multiplication by a power of 2 with the recognizer for a
18843 shift. >=32 is not a valid shift for "lsl", so we must try and
18844 output a shift that produces the correct arithmetical result.
18845 Using lsr #32 is identical except for the fact that the carry bit
18846 is not set correctly if we set the flags; but we never use the
18847 carry bit from such an operation, so we can ignore that. */
18848 if (code == ROTATERT)
18849 /* Rotate is just modulo 32. */
18850 *amountp &= 31;
18851 else if (*amountp != (*amountp & 31))
18852 {
18853 if (code == ASHIFT)
18854 mnem = "lsr";
18855 *amountp = 32;
18856 }
18857
18858 /* Shifts of 0 are no-ops. */
18859 if (*amountp == 0)
18860 return NULL;
18861
18862 return mnem;
18863 }
18864
18865 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18866 because /bin/as is horribly restrictive. The judgement about
18867 whether or not each character is 'printable' (and can be output as
18868 is) or not (and must be printed with an octal escape) must be made
18869 with reference to the *host* character set -- the situation is
18870 similar to that discussed in the comments above pp_c_char in
18871 c-pretty-print.c. */
18872
18873 #define MAX_ASCII_LEN 51
18874
18875 void
18876 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18877 {
18878 int i;
18879 int len_so_far = 0;
18880
18881 fputs ("\t.ascii\t\"", stream);
18882
18883 for (i = 0; i < len; i++)
18884 {
18885 int c = p[i];
18886
18887 if (len_so_far >= MAX_ASCII_LEN)
18888 {
18889 fputs ("\"\n\t.ascii\t\"", stream);
18890 len_so_far = 0;
18891 }
18892
18893 if (ISPRINT (c))
18894 {
18895 if (c == '\\' || c == '\"')
18896 {
18897 putc ('\\', stream);
18898 len_so_far++;
18899 }
18900 putc (c, stream);
18901 len_so_far++;
18902 }
18903 else
18904 {
18905 fprintf (stream, "\\%03o", c);
18906 len_so_far += 4;
18907 }
18908 }
18909
18910 fputs ("\"\n", stream);
18911 }
18912 \f
18913 /* Whether a register is callee saved or not. This is necessary because high
18914 registers are marked as caller saved when optimizing for size on Thumb-1
18915 targets despite being callee saved in order to avoid using them. */
18916 #define callee_saved_reg_p(reg) \
18917 (!call_used_regs[reg] \
18918 || (TARGET_THUMB1 && optimize_size \
18919 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18920
18921 /* Compute the register save mask for registers 0 through 12
18922 inclusive. This code is used by arm_compute_save_reg_mask. */
18923
18924 static unsigned long
18925 arm_compute_save_reg0_reg12_mask (void)
18926 {
18927 unsigned long func_type = arm_current_func_type ();
18928 unsigned long save_reg_mask = 0;
18929 unsigned int reg;
18930
18931 if (IS_INTERRUPT (func_type))
18932 {
18933 unsigned int max_reg;
18934 /* Interrupt functions must not corrupt any registers,
18935 even call clobbered ones. If this is a leaf function
18936 we can just examine the registers used by the RTL, but
18937 otherwise we have to assume that whatever function is
18938 called might clobber anything, and so we have to save
18939 all the call-clobbered registers as well. */
18940 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18941 /* FIQ handlers have registers r8 - r12 banked, so
18942 we only need to check r0 - r7, Normal ISRs only
18943 bank r14 and r15, so we must check up to r12.
18944 r13 is the stack pointer which is always preserved,
18945 so we do not need to consider it here. */
18946 max_reg = 7;
18947 else
18948 max_reg = 12;
18949
18950 for (reg = 0; reg <= max_reg; reg++)
18951 if (df_regs_ever_live_p (reg)
18952 || (! crtl->is_leaf && call_used_regs[reg]))
18953 save_reg_mask |= (1 << reg);
18954
18955 /* Also save the pic base register if necessary. */
18956 if (flag_pic
18957 && !TARGET_SINGLE_PIC_BASE
18958 && arm_pic_register != INVALID_REGNUM
18959 && crtl->uses_pic_offset_table)
18960 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18961 }
18962 else if (IS_VOLATILE(func_type))
18963 {
18964 /* For noreturn functions we historically omitted register saves
18965 altogether. However this really messes up debugging. As a
18966 compromise save just the frame pointers. Combined with the link
18967 register saved elsewhere this should be sufficient to get
18968 a backtrace. */
18969 if (frame_pointer_needed)
18970 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18971 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18972 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18973 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18974 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18975 }
18976 else
18977 {
18978 /* In the normal case we only need to save those registers
18979 which are call saved and which are used by this function. */
18980 for (reg = 0; reg <= 11; reg++)
18981 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18982 save_reg_mask |= (1 << reg);
18983
18984 /* Handle the frame pointer as a special case. */
18985 if (frame_pointer_needed)
18986 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18987
18988 /* If we aren't loading the PIC register,
18989 don't stack it even though it may be live. */
18990 if (flag_pic
18991 && !TARGET_SINGLE_PIC_BASE
18992 && arm_pic_register != INVALID_REGNUM
18993 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18994 || crtl->uses_pic_offset_table))
18995 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18996
18997 /* The prologue will copy SP into R0, so save it. */
18998 if (IS_STACKALIGN (func_type))
18999 save_reg_mask |= 1;
19000 }
19001
19002 /* Save registers so the exception handler can modify them. */
19003 if (crtl->calls_eh_return)
19004 {
19005 unsigned int i;
19006
19007 for (i = 0; ; i++)
19008 {
19009 reg = EH_RETURN_DATA_REGNO (i);
19010 if (reg == INVALID_REGNUM)
19011 break;
19012 save_reg_mask |= 1 << reg;
19013 }
19014 }
19015
19016 return save_reg_mask;
19017 }
19018
19019 /* Return true if r3 is live at the start of the function. */
19020
19021 static bool
19022 arm_r3_live_at_start_p (void)
19023 {
19024 /* Just look at cfg info, which is still close enough to correct at this
19025 point. This gives false positives for broken functions that might use
19026 uninitialized data that happens to be allocated in r3, but who cares? */
19027 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19028 }
19029
19030 /* Compute the number of bytes used to store the static chain register on the
19031 stack, above the stack frame. We need to know this accurately to get the
19032 alignment of the rest of the stack frame correct. */
19033
19034 static int
19035 arm_compute_static_chain_stack_bytes (void)
19036 {
19037 /* See the defining assertion in arm_expand_prologue. */
19038 if (IS_NESTED (arm_current_func_type ())
19039 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19040 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19041 && !df_regs_ever_live_p (LR_REGNUM)))
19042 && arm_r3_live_at_start_p ()
19043 && crtl->args.pretend_args_size == 0)
19044 return 4;
19045
19046 return 0;
19047 }
19048
19049 /* Compute a bit mask of which registers need to be
19050 saved on the stack for the current function.
19051 This is used by arm_get_frame_offsets, which may add extra registers. */
19052
19053 static unsigned long
19054 arm_compute_save_reg_mask (void)
19055 {
19056 unsigned int save_reg_mask = 0;
19057 unsigned long func_type = arm_current_func_type ();
19058 unsigned int reg;
19059
19060 if (IS_NAKED (func_type))
19061 /* This should never really happen. */
19062 return 0;
19063
19064 /* If we are creating a stack frame, then we must save the frame pointer,
19065 IP (which will hold the old stack pointer), LR and the PC. */
19066 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19067 save_reg_mask |=
19068 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19069 | (1 << IP_REGNUM)
19070 | (1 << LR_REGNUM)
19071 | (1 << PC_REGNUM);
19072
19073 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19074
19075 /* Decide if we need to save the link register.
19076 Interrupt routines have their own banked link register,
19077 so they never need to save it.
19078 Otherwise if we do not use the link register we do not need to save
19079 it. If we are pushing other registers onto the stack however, we
19080 can save an instruction in the epilogue by pushing the link register
19081 now and then popping it back into the PC. This incurs extra memory
19082 accesses though, so we only do it when optimizing for size, and only
19083 if we know that we will not need a fancy return sequence. */
19084 if (df_regs_ever_live_p (LR_REGNUM)
19085 || (save_reg_mask
19086 && optimize_size
19087 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19088 && !crtl->tail_call_emit
19089 && !crtl->calls_eh_return))
19090 save_reg_mask |= 1 << LR_REGNUM;
19091
19092 if (cfun->machine->lr_save_eliminated)
19093 save_reg_mask &= ~ (1 << LR_REGNUM);
19094
19095 if (TARGET_REALLY_IWMMXT
19096 && ((bit_count (save_reg_mask)
19097 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19098 arm_compute_static_chain_stack_bytes())
19099 ) % 2) != 0)
19100 {
19101 /* The total number of registers that are going to be pushed
19102 onto the stack is odd. We need to ensure that the stack
19103 is 64-bit aligned before we start to save iWMMXt registers,
19104 and also before we start to create locals. (A local variable
19105 might be a double or long long which we will load/store using
19106 an iWMMXt instruction). Therefore we need to push another
19107 ARM register, so that the stack will be 64-bit aligned. We
19108 try to avoid using the arg registers (r0 -r3) as they might be
19109 used to pass values in a tail call. */
19110 for (reg = 4; reg <= 12; reg++)
19111 if ((save_reg_mask & (1 << reg)) == 0)
19112 break;
19113
19114 if (reg <= 12)
19115 save_reg_mask |= (1 << reg);
19116 else
19117 {
19118 cfun->machine->sibcall_blocked = 1;
19119 save_reg_mask |= (1 << 3);
19120 }
19121 }
19122
19123 /* We may need to push an additional register for use initializing the
19124 PIC base register. */
19125 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19126 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19127 {
19128 reg = thumb_find_work_register (1 << 4);
19129 if (!call_used_regs[reg])
19130 save_reg_mask |= (1 << reg);
19131 }
19132
19133 return save_reg_mask;
19134 }
19135
19136 /* Compute a bit mask of which registers need to be
19137 saved on the stack for the current function. */
19138 static unsigned long
19139 thumb1_compute_save_reg_mask (void)
19140 {
19141 unsigned long mask;
19142 unsigned reg;
19143
19144 mask = 0;
19145 for (reg = 0; reg < 12; reg ++)
19146 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19147 mask |= 1 << reg;
19148
19149 /* Handle the frame pointer as a special case. */
19150 if (frame_pointer_needed)
19151 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19152
19153 if (flag_pic
19154 && !TARGET_SINGLE_PIC_BASE
19155 && arm_pic_register != INVALID_REGNUM
19156 && crtl->uses_pic_offset_table)
19157 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19158
19159 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19160 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19161 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19162
19163 /* LR will also be pushed if any lo regs are pushed. */
19164 if (mask & 0xff || thumb_force_lr_save ())
19165 mask |= (1 << LR_REGNUM);
19166
19167 /* Make sure we have a low work register if we need one.
19168 We will need one if we are going to push a high register,
19169 but we are not currently intending to push a low register. */
19170 if ((mask & 0xff) == 0
19171 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19172 {
19173 /* Use thumb_find_work_register to choose which register
19174 we will use. If the register is live then we will
19175 have to push it. Use LAST_LO_REGNUM as our fallback
19176 choice for the register to select. */
19177 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19178 /* Make sure the register returned by thumb_find_work_register is
19179 not part of the return value. */
19180 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19181 reg = LAST_LO_REGNUM;
19182
19183 if (callee_saved_reg_p (reg))
19184 mask |= 1 << reg;
19185 }
19186
19187 /* The 504 below is 8 bytes less than 512 because there are two possible
19188 alignment words. We can't tell here if they will be present or not so we
19189 have to play it safe and assume that they are. */
19190 if ((CALLER_INTERWORKING_SLOT_SIZE +
19191 ROUND_UP_WORD (get_frame_size ()) +
19192 crtl->outgoing_args_size) >= 504)
19193 {
19194 /* This is the same as the code in thumb1_expand_prologue() which
19195 determines which register to use for stack decrement. */
19196 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19197 if (mask & (1 << reg))
19198 break;
19199
19200 if (reg > LAST_LO_REGNUM)
19201 {
19202 /* Make sure we have a register available for stack decrement. */
19203 mask |= 1 << LAST_LO_REGNUM;
19204 }
19205 }
19206
19207 return mask;
19208 }
19209
19210
19211 /* Return the number of bytes required to save VFP registers. */
19212 static int
19213 arm_get_vfp_saved_size (void)
19214 {
19215 unsigned int regno;
19216 int count;
19217 int saved;
19218
19219 saved = 0;
19220 /* Space for saved VFP registers. */
19221 if (TARGET_HARD_FLOAT)
19222 {
19223 count = 0;
19224 for (regno = FIRST_VFP_REGNUM;
19225 regno < LAST_VFP_REGNUM;
19226 regno += 2)
19227 {
19228 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19229 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19230 {
19231 if (count > 0)
19232 {
19233 /* Workaround ARM10 VFPr1 bug. */
19234 if (count == 2 && !arm_arch6)
19235 count++;
19236 saved += count * 8;
19237 }
19238 count = 0;
19239 }
19240 else
19241 count++;
19242 }
19243 if (count > 0)
19244 {
19245 if (count == 2 && !arm_arch6)
19246 count++;
19247 saved += count * 8;
19248 }
19249 }
19250 return saved;
19251 }
19252
19253
19254 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19255 everything bar the final return instruction. If simple_return is true,
19256 then do not output epilogue, because it has already been emitted in RTL. */
19257 const char *
19258 output_return_instruction (rtx operand, bool really_return, bool reverse,
19259 bool simple_return)
19260 {
19261 char conditional[10];
19262 char instr[100];
19263 unsigned reg;
19264 unsigned long live_regs_mask;
19265 unsigned long func_type;
19266 arm_stack_offsets *offsets;
19267
19268 func_type = arm_current_func_type ();
19269
19270 if (IS_NAKED (func_type))
19271 return "";
19272
19273 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19274 {
19275 /* If this function was declared non-returning, and we have
19276 found a tail call, then we have to trust that the called
19277 function won't return. */
19278 if (really_return)
19279 {
19280 rtx ops[2];
19281
19282 /* Otherwise, trap an attempted return by aborting. */
19283 ops[0] = operand;
19284 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19285 : "abort");
19286 assemble_external_libcall (ops[1]);
19287 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19288 }
19289
19290 return "";
19291 }
19292
19293 gcc_assert (!cfun->calls_alloca || really_return);
19294
19295 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19296
19297 cfun->machine->return_used_this_function = 1;
19298
19299 offsets = arm_get_frame_offsets ();
19300 live_regs_mask = offsets->saved_regs_mask;
19301
19302 if (!simple_return && live_regs_mask)
19303 {
19304 const char * return_reg;
19305
19306 /* If we do not have any special requirements for function exit
19307 (e.g. interworking) then we can load the return address
19308 directly into the PC. Otherwise we must load it into LR. */
19309 if (really_return
19310 && !IS_CMSE_ENTRY (func_type)
19311 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19312 return_reg = reg_names[PC_REGNUM];
19313 else
19314 return_reg = reg_names[LR_REGNUM];
19315
19316 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19317 {
19318 /* There are three possible reasons for the IP register
19319 being saved. 1) a stack frame was created, in which case
19320 IP contains the old stack pointer, or 2) an ISR routine
19321 corrupted it, or 3) it was saved to align the stack on
19322 iWMMXt. In case 1, restore IP into SP, otherwise just
19323 restore IP. */
19324 if (frame_pointer_needed)
19325 {
19326 live_regs_mask &= ~ (1 << IP_REGNUM);
19327 live_regs_mask |= (1 << SP_REGNUM);
19328 }
19329 else
19330 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19331 }
19332
19333 /* On some ARM architectures it is faster to use LDR rather than
19334 LDM to load a single register. On other architectures, the
19335 cost is the same. In 26 bit mode, or for exception handlers,
19336 we have to use LDM to load the PC so that the CPSR is also
19337 restored. */
19338 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19339 if (live_regs_mask == (1U << reg))
19340 break;
19341
19342 if (reg <= LAST_ARM_REGNUM
19343 && (reg != LR_REGNUM
19344 || ! really_return
19345 || ! IS_INTERRUPT (func_type)))
19346 {
19347 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19348 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19349 }
19350 else
19351 {
19352 char *p;
19353 int first = 1;
19354
19355 /* Generate the load multiple instruction to restore the
19356 registers. Note we can get here, even if
19357 frame_pointer_needed is true, but only if sp already
19358 points to the base of the saved core registers. */
19359 if (live_regs_mask & (1 << SP_REGNUM))
19360 {
19361 unsigned HOST_WIDE_INT stack_adjust;
19362
19363 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19364 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19365
19366 if (stack_adjust && arm_arch5 && TARGET_ARM)
19367 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19368 else
19369 {
19370 /* If we can't use ldmib (SA110 bug),
19371 then try to pop r3 instead. */
19372 if (stack_adjust)
19373 live_regs_mask |= 1 << 3;
19374
19375 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19376 }
19377 }
19378 /* For interrupt returns we have to use an LDM rather than
19379 a POP so that we can use the exception return variant. */
19380 else if (IS_INTERRUPT (func_type))
19381 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19382 else
19383 sprintf (instr, "pop%s\t{", conditional);
19384
19385 p = instr + strlen (instr);
19386
19387 for (reg = 0; reg <= SP_REGNUM; reg++)
19388 if (live_regs_mask & (1 << reg))
19389 {
19390 int l = strlen (reg_names[reg]);
19391
19392 if (first)
19393 first = 0;
19394 else
19395 {
19396 memcpy (p, ", ", 2);
19397 p += 2;
19398 }
19399
19400 memcpy (p, "%|", 2);
19401 memcpy (p + 2, reg_names[reg], l);
19402 p += l + 2;
19403 }
19404
19405 if (live_regs_mask & (1 << LR_REGNUM))
19406 {
19407 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19408 /* If returning from an interrupt, restore the CPSR. */
19409 if (IS_INTERRUPT (func_type))
19410 strcat (p, "^");
19411 }
19412 else
19413 strcpy (p, "}");
19414 }
19415
19416 output_asm_insn (instr, & operand);
19417
19418 /* See if we need to generate an extra instruction to
19419 perform the actual function return. */
19420 if (really_return
19421 && func_type != ARM_FT_INTERWORKED
19422 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19423 {
19424 /* The return has already been handled
19425 by loading the LR into the PC. */
19426 return "";
19427 }
19428 }
19429
19430 if (really_return)
19431 {
19432 switch ((int) ARM_FUNC_TYPE (func_type))
19433 {
19434 case ARM_FT_ISR:
19435 case ARM_FT_FIQ:
19436 /* ??? This is wrong for unified assembly syntax. */
19437 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19438 break;
19439
19440 case ARM_FT_INTERWORKED:
19441 gcc_assert (arm_arch5 || arm_arch4t);
19442 sprintf (instr, "bx%s\t%%|lr", conditional);
19443 break;
19444
19445 case ARM_FT_EXCEPTION:
19446 /* ??? This is wrong for unified assembly syntax. */
19447 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19448 break;
19449
19450 default:
19451 if (IS_CMSE_ENTRY (func_type))
19452 {
19453 /* Check if we have to clear the 'GE bits' which is only used if
19454 parallel add and subtraction instructions are available. */
19455 if (TARGET_INT_SIMD)
19456 snprintf (instr, sizeof (instr),
19457 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19458 else
19459 snprintf (instr, sizeof (instr),
19460 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19461
19462 output_asm_insn (instr, & operand);
19463 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19464 {
19465 /* Clear the cumulative exception-status bits (0-4,7) and the
19466 condition code bits (28-31) of the FPSCR. We need to
19467 remember to clear the first scratch register used (IP) and
19468 save and restore the second (r4). */
19469 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19470 output_asm_insn (instr, & operand);
19471 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19472 output_asm_insn (instr, & operand);
19473 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19474 output_asm_insn (instr, & operand);
19475 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19476 output_asm_insn (instr, & operand);
19477 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19478 output_asm_insn (instr, & operand);
19479 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19480 output_asm_insn (instr, & operand);
19481 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19482 output_asm_insn (instr, & operand);
19483 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19484 output_asm_insn (instr, & operand);
19485 }
19486 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19487 }
19488 /* Use bx if it's available. */
19489 else if (arm_arch5 || arm_arch4t)
19490 sprintf (instr, "bx%s\t%%|lr", conditional);
19491 else
19492 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19493 break;
19494 }
19495
19496 output_asm_insn (instr, & operand);
19497 }
19498
19499 return "";
19500 }
19501
19502 /* Output in FILE asm statements needed to declare the NAME of the function
19503 defined by its DECL node. */
19504
19505 void
19506 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19507 {
19508 size_t cmse_name_len;
19509 char *cmse_name = 0;
19510 char cmse_prefix[] = "__acle_se_";
19511
19512 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19513 extra function label for each function with the 'cmse_nonsecure_entry'
19514 attribute. This extra function label should be prepended with
19515 '__acle_se_', telling the linker that it needs to create secure gateway
19516 veneers for this function. */
19517 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19518 DECL_ATTRIBUTES (decl)))
19519 {
19520 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19521 cmse_name = XALLOCAVEC (char, cmse_name_len);
19522 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19523 targetm.asm_out.globalize_label (file, cmse_name);
19524
19525 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19526 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19527 }
19528
19529 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19530 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19531 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19532 ASM_OUTPUT_LABEL (file, name);
19533
19534 if (cmse_name)
19535 ASM_OUTPUT_LABEL (file, cmse_name);
19536
19537 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19538 }
19539
19540 /* Write the function name into the code section, directly preceding
19541 the function prologue.
19542
19543 Code will be output similar to this:
19544 t0
19545 .ascii "arm_poke_function_name", 0
19546 .align
19547 t1
19548 .word 0xff000000 + (t1 - t0)
19549 arm_poke_function_name
19550 mov ip, sp
19551 stmfd sp!, {fp, ip, lr, pc}
19552 sub fp, ip, #4
19553
19554 When performing a stack backtrace, code can inspect the value
19555 of 'pc' stored at 'fp' + 0. If the trace function then looks
19556 at location pc - 12 and the top 8 bits are set, then we know
19557 that there is a function name embedded immediately preceding this
19558 location and has length ((pc[-3]) & 0xff000000).
19559
19560 We assume that pc is declared as a pointer to an unsigned long.
19561
19562 It is of no benefit to output the function name if we are assembling
19563 a leaf function. These function types will not contain a stack
19564 backtrace structure, therefore it is not possible to determine the
19565 function name. */
19566 void
19567 arm_poke_function_name (FILE *stream, const char *name)
19568 {
19569 unsigned long alignlength;
19570 unsigned long length;
19571 rtx x;
19572
19573 length = strlen (name) + 1;
19574 alignlength = ROUND_UP_WORD (length);
19575
19576 ASM_OUTPUT_ASCII (stream, name, length);
19577 ASM_OUTPUT_ALIGN (stream, 2);
19578 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19579 assemble_aligned_integer (UNITS_PER_WORD, x);
19580 }
19581
19582 /* Place some comments into the assembler stream
19583 describing the current function. */
19584 static void
19585 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19586 {
19587 unsigned long func_type;
19588
19589 /* Sanity check. */
19590 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19591
19592 func_type = arm_current_func_type ();
19593
19594 switch ((int) ARM_FUNC_TYPE (func_type))
19595 {
19596 default:
19597 case ARM_FT_NORMAL:
19598 break;
19599 case ARM_FT_INTERWORKED:
19600 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19601 break;
19602 case ARM_FT_ISR:
19603 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19604 break;
19605 case ARM_FT_FIQ:
19606 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19607 break;
19608 case ARM_FT_EXCEPTION:
19609 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19610 break;
19611 }
19612
19613 if (IS_NAKED (func_type))
19614 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19615
19616 if (IS_VOLATILE (func_type))
19617 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19618
19619 if (IS_NESTED (func_type))
19620 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19621 if (IS_STACKALIGN (func_type))
19622 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19623 if (IS_CMSE_ENTRY (func_type))
19624 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19625
19626 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19627 crtl->args.size,
19628 crtl->args.pretend_args_size, frame_size);
19629
19630 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19631 frame_pointer_needed,
19632 cfun->machine->uses_anonymous_args);
19633
19634 if (cfun->machine->lr_save_eliminated)
19635 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19636
19637 if (crtl->calls_eh_return)
19638 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19639
19640 }
19641
19642 static void
19643 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19644 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19645 {
19646 arm_stack_offsets *offsets;
19647
19648 if (TARGET_THUMB1)
19649 {
19650 int regno;
19651
19652 /* Emit any call-via-reg trampolines that are needed for v4t support
19653 of call_reg and call_value_reg type insns. */
19654 for (regno = 0; regno < LR_REGNUM; regno++)
19655 {
19656 rtx label = cfun->machine->call_via[regno];
19657
19658 if (label != NULL)
19659 {
19660 switch_to_section (function_section (current_function_decl));
19661 targetm.asm_out.internal_label (asm_out_file, "L",
19662 CODE_LABEL_NUMBER (label));
19663 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19664 }
19665 }
19666
19667 /* ??? Probably not safe to set this here, since it assumes that a
19668 function will be emitted as assembly immediately after we generate
19669 RTL for it. This does not happen for inline functions. */
19670 cfun->machine->return_used_this_function = 0;
19671 }
19672 else /* TARGET_32BIT */
19673 {
19674 /* We need to take into account any stack-frame rounding. */
19675 offsets = arm_get_frame_offsets ();
19676
19677 gcc_assert (!use_return_insn (FALSE, NULL)
19678 || (cfun->machine->return_used_this_function != 0)
19679 || offsets->saved_regs == offsets->outgoing_args
19680 || frame_pointer_needed);
19681 }
19682 }
19683
19684 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19685 STR and STRD. If an even number of registers are being pushed, one
19686 or more STRD patterns are created for each register pair. If an
19687 odd number of registers are pushed, emit an initial STR followed by
19688 as many STRD instructions as are needed. This works best when the
19689 stack is initially 64-bit aligned (the normal case), since it
19690 ensures that each STRD is also 64-bit aligned. */
19691 static void
19692 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19693 {
19694 int num_regs = 0;
19695 int i;
19696 int regno;
19697 rtx par = NULL_RTX;
19698 rtx dwarf = NULL_RTX;
19699 rtx tmp;
19700 bool first = true;
19701
19702 num_regs = bit_count (saved_regs_mask);
19703
19704 /* Must be at least one register to save, and can't save SP or PC. */
19705 gcc_assert (num_regs > 0 && num_regs <= 14);
19706 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19707 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19708
19709 /* Create sequence for DWARF info. All the frame-related data for
19710 debugging is held in this wrapper. */
19711 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19712
19713 /* Describe the stack adjustment. */
19714 tmp = gen_rtx_SET (stack_pointer_rtx,
19715 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19716 RTX_FRAME_RELATED_P (tmp) = 1;
19717 XVECEXP (dwarf, 0, 0) = tmp;
19718
19719 /* Find the first register. */
19720 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19721 ;
19722
19723 i = 0;
19724
19725 /* If there's an odd number of registers to push. Start off by
19726 pushing a single register. This ensures that subsequent strd
19727 operations are dword aligned (assuming that SP was originally
19728 64-bit aligned). */
19729 if ((num_regs & 1) != 0)
19730 {
19731 rtx reg, mem, insn;
19732
19733 reg = gen_rtx_REG (SImode, regno);
19734 if (num_regs == 1)
19735 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19736 stack_pointer_rtx));
19737 else
19738 mem = gen_frame_mem (Pmode,
19739 gen_rtx_PRE_MODIFY
19740 (Pmode, stack_pointer_rtx,
19741 plus_constant (Pmode, stack_pointer_rtx,
19742 -4 * num_regs)));
19743
19744 tmp = gen_rtx_SET (mem, reg);
19745 RTX_FRAME_RELATED_P (tmp) = 1;
19746 insn = emit_insn (tmp);
19747 RTX_FRAME_RELATED_P (insn) = 1;
19748 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19749 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19750 RTX_FRAME_RELATED_P (tmp) = 1;
19751 i++;
19752 regno++;
19753 XVECEXP (dwarf, 0, i) = tmp;
19754 first = false;
19755 }
19756
19757 while (i < num_regs)
19758 if (saved_regs_mask & (1 << regno))
19759 {
19760 rtx reg1, reg2, mem1, mem2;
19761 rtx tmp0, tmp1, tmp2;
19762 int regno2;
19763
19764 /* Find the register to pair with this one. */
19765 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19766 regno2++)
19767 ;
19768
19769 reg1 = gen_rtx_REG (SImode, regno);
19770 reg2 = gen_rtx_REG (SImode, regno2);
19771
19772 if (first)
19773 {
19774 rtx insn;
19775
19776 first = false;
19777 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19778 stack_pointer_rtx,
19779 -4 * num_regs));
19780 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19781 stack_pointer_rtx,
19782 -4 * (num_regs - 1)));
19783 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19784 plus_constant (Pmode, stack_pointer_rtx,
19785 -4 * (num_regs)));
19786 tmp1 = gen_rtx_SET (mem1, reg1);
19787 tmp2 = gen_rtx_SET (mem2, reg2);
19788 RTX_FRAME_RELATED_P (tmp0) = 1;
19789 RTX_FRAME_RELATED_P (tmp1) = 1;
19790 RTX_FRAME_RELATED_P (tmp2) = 1;
19791 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19792 XVECEXP (par, 0, 0) = tmp0;
19793 XVECEXP (par, 0, 1) = tmp1;
19794 XVECEXP (par, 0, 2) = tmp2;
19795 insn = emit_insn (par);
19796 RTX_FRAME_RELATED_P (insn) = 1;
19797 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19798 }
19799 else
19800 {
19801 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19802 stack_pointer_rtx,
19803 4 * i));
19804 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19805 stack_pointer_rtx,
19806 4 * (i + 1)));
19807 tmp1 = gen_rtx_SET (mem1, reg1);
19808 tmp2 = gen_rtx_SET (mem2, reg2);
19809 RTX_FRAME_RELATED_P (tmp1) = 1;
19810 RTX_FRAME_RELATED_P (tmp2) = 1;
19811 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19812 XVECEXP (par, 0, 0) = tmp1;
19813 XVECEXP (par, 0, 1) = tmp2;
19814 emit_insn (par);
19815 }
19816
19817 /* Create unwind information. This is an approximation. */
19818 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19819 plus_constant (Pmode,
19820 stack_pointer_rtx,
19821 4 * i)),
19822 reg1);
19823 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19824 plus_constant (Pmode,
19825 stack_pointer_rtx,
19826 4 * (i + 1))),
19827 reg2);
19828
19829 RTX_FRAME_RELATED_P (tmp1) = 1;
19830 RTX_FRAME_RELATED_P (tmp2) = 1;
19831 XVECEXP (dwarf, 0, i + 1) = tmp1;
19832 XVECEXP (dwarf, 0, i + 2) = tmp2;
19833 i += 2;
19834 regno = regno2 + 1;
19835 }
19836 else
19837 regno++;
19838
19839 return;
19840 }
19841
19842 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19843 whenever possible, otherwise it emits single-word stores. The first store
19844 also allocates stack space for all saved registers, using writeback with
19845 post-addressing mode. All other stores use offset addressing. If no STRD
19846 can be emitted, this function emits a sequence of single-word stores,
19847 and not an STM as before, because single-word stores provide more freedom
19848 scheduling and can be turned into an STM by peephole optimizations. */
19849 static void
19850 arm_emit_strd_push (unsigned long saved_regs_mask)
19851 {
19852 int num_regs = 0;
19853 int i, j, dwarf_index = 0;
19854 int offset = 0;
19855 rtx dwarf = NULL_RTX;
19856 rtx insn = NULL_RTX;
19857 rtx tmp, mem;
19858
19859 /* TODO: A more efficient code can be emitted by changing the
19860 layout, e.g., first push all pairs that can use STRD to keep the
19861 stack aligned, and then push all other registers. */
19862 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19863 if (saved_regs_mask & (1 << i))
19864 num_regs++;
19865
19866 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19867 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19868 gcc_assert (num_regs > 0);
19869
19870 /* Create sequence for DWARF info. */
19871 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19872
19873 /* For dwarf info, we generate explicit stack update. */
19874 tmp = gen_rtx_SET (stack_pointer_rtx,
19875 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19876 RTX_FRAME_RELATED_P (tmp) = 1;
19877 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19878
19879 /* Save registers. */
19880 offset = - 4 * num_regs;
19881 j = 0;
19882 while (j <= LAST_ARM_REGNUM)
19883 if (saved_regs_mask & (1 << j))
19884 {
19885 if ((j % 2 == 0)
19886 && (saved_regs_mask & (1 << (j + 1))))
19887 {
19888 /* Current register and previous register form register pair for
19889 which STRD can be generated. */
19890 if (offset < 0)
19891 {
19892 /* Allocate stack space for all saved registers. */
19893 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19894 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19895 mem = gen_frame_mem (DImode, tmp);
19896 offset = 0;
19897 }
19898 else if (offset > 0)
19899 mem = gen_frame_mem (DImode,
19900 plus_constant (Pmode,
19901 stack_pointer_rtx,
19902 offset));
19903 else
19904 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19905
19906 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19907 RTX_FRAME_RELATED_P (tmp) = 1;
19908 tmp = emit_insn (tmp);
19909
19910 /* Record the first store insn. */
19911 if (dwarf_index == 1)
19912 insn = tmp;
19913
19914 /* Generate dwarf info. */
19915 mem = gen_frame_mem (SImode,
19916 plus_constant (Pmode,
19917 stack_pointer_rtx,
19918 offset));
19919 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19920 RTX_FRAME_RELATED_P (tmp) = 1;
19921 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19922
19923 mem = gen_frame_mem (SImode,
19924 plus_constant (Pmode,
19925 stack_pointer_rtx,
19926 offset + 4));
19927 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19928 RTX_FRAME_RELATED_P (tmp) = 1;
19929 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19930
19931 offset += 8;
19932 j += 2;
19933 }
19934 else
19935 {
19936 /* Emit a single word store. */
19937 if (offset < 0)
19938 {
19939 /* Allocate stack space for all saved registers. */
19940 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19941 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19942 mem = gen_frame_mem (SImode, tmp);
19943 offset = 0;
19944 }
19945 else if (offset > 0)
19946 mem = gen_frame_mem (SImode,
19947 plus_constant (Pmode,
19948 stack_pointer_rtx,
19949 offset));
19950 else
19951 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19952
19953 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19954 RTX_FRAME_RELATED_P (tmp) = 1;
19955 tmp = emit_insn (tmp);
19956
19957 /* Record the first store insn. */
19958 if (dwarf_index == 1)
19959 insn = tmp;
19960
19961 /* Generate dwarf info. */
19962 mem = gen_frame_mem (SImode,
19963 plus_constant(Pmode,
19964 stack_pointer_rtx,
19965 offset));
19966 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19967 RTX_FRAME_RELATED_P (tmp) = 1;
19968 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19969
19970 offset += 4;
19971 j += 1;
19972 }
19973 }
19974 else
19975 j++;
19976
19977 /* Attach dwarf info to the first insn we generate. */
19978 gcc_assert (insn != NULL_RTX);
19979 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19980 RTX_FRAME_RELATED_P (insn) = 1;
19981 }
19982
19983 /* Generate and emit an insn that we will recognize as a push_multi.
19984 Unfortunately, since this insn does not reflect very well the actual
19985 semantics of the operation, we need to annotate the insn for the benefit
19986 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19987 MASK for registers that should be annotated for DWARF2 frame unwind
19988 information. */
19989 static rtx
19990 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19991 {
19992 int num_regs = 0;
19993 int num_dwarf_regs = 0;
19994 int i, j;
19995 rtx par;
19996 rtx dwarf;
19997 int dwarf_par_index;
19998 rtx tmp, reg;
19999
20000 /* We don't record the PC in the dwarf frame information. */
20001 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20002
20003 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20004 {
20005 if (mask & (1 << i))
20006 num_regs++;
20007 if (dwarf_regs_mask & (1 << i))
20008 num_dwarf_regs++;
20009 }
20010
20011 gcc_assert (num_regs && num_regs <= 16);
20012 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20013
20014 /* For the body of the insn we are going to generate an UNSPEC in
20015 parallel with several USEs. This allows the insn to be recognized
20016 by the push_multi pattern in the arm.md file.
20017
20018 The body of the insn looks something like this:
20019
20020 (parallel [
20021 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20022 (const_int:SI <num>)))
20023 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20024 (use (reg:SI XX))
20025 (use (reg:SI YY))
20026 ...
20027 ])
20028
20029 For the frame note however, we try to be more explicit and actually
20030 show each register being stored into the stack frame, plus a (single)
20031 decrement of the stack pointer. We do it this way in order to be
20032 friendly to the stack unwinding code, which only wants to see a single
20033 stack decrement per instruction. The RTL we generate for the note looks
20034 something like this:
20035
20036 (sequence [
20037 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20038 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20039 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20040 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20041 ...
20042 ])
20043
20044 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20045 instead we'd have a parallel expression detailing all
20046 the stores to the various memory addresses so that debug
20047 information is more up-to-date. Remember however while writing
20048 this to take care of the constraints with the push instruction.
20049
20050 Note also that this has to be taken care of for the VFP registers.
20051
20052 For more see PR43399. */
20053
20054 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20055 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20056 dwarf_par_index = 1;
20057
20058 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20059 {
20060 if (mask & (1 << i))
20061 {
20062 reg = gen_rtx_REG (SImode, i);
20063
20064 XVECEXP (par, 0, 0)
20065 = gen_rtx_SET (gen_frame_mem
20066 (BLKmode,
20067 gen_rtx_PRE_MODIFY (Pmode,
20068 stack_pointer_rtx,
20069 plus_constant
20070 (Pmode, stack_pointer_rtx,
20071 -4 * num_regs))
20072 ),
20073 gen_rtx_UNSPEC (BLKmode,
20074 gen_rtvec (1, reg),
20075 UNSPEC_PUSH_MULT));
20076
20077 if (dwarf_regs_mask & (1 << i))
20078 {
20079 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20080 reg);
20081 RTX_FRAME_RELATED_P (tmp) = 1;
20082 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20083 }
20084
20085 break;
20086 }
20087 }
20088
20089 for (j = 1, i++; j < num_regs; i++)
20090 {
20091 if (mask & (1 << i))
20092 {
20093 reg = gen_rtx_REG (SImode, i);
20094
20095 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20096
20097 if (dwarf_regs_mask & (1 << i))
20098 {
20099 tmp
20100 = gen_rtx_SET (gen_frame_mem
20101 (SImode,
20102 plus_constant (Pmode, stack_pointer_rtx,
20103 4 * j)),
20104 reg);
20105 RTX_FRAME_RELATED_P (tmp) = 1;
20106 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20107 }
20108
20109 j++;
20110 }
20111 }
20112
20113 par = emit_insn (par);
20114
20115 tmp = gen_rtx_SET (stack_pointer_rtx,
20116 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20117 RTX_FRAME_RELATED_P (tmp) = 1;
20118 XVECEXP (dwarf, 0, 0) = tmp;
20119
20120 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20121
20122 return par;
20123 }
20124
20125 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20126 SIZE is the offset to be adjusted.
20127 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20128 static void
20129 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20130 {
20131 rtx dwarf;
20132
20133 RTX_FRAME_RELATED_P (insn) = 1;
20134 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20135 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20136 }
20137
20138 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20139 SAVED_REGS_MASK shows which registers need to be restored.
20140
20141 Unfortunately, since this insn does not reflect very well the actual
20142 semantics of the operation, we need to annotate the insn for the benefit
20143 of DWARF2 frame unwind information. */
20144 static void
20145 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20146 {
20147 int num_regs = 0;
20148 int i, j;
20149 rtx par;
20150 rtx dwarf = NULL_RTX;
20151 rtx tmp, reg;
20152 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20153 int offset_adj;
20154 int emit_update;
20155
20156 offset_adj = return_in_pc ? 1 : 0;
20157 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20158 if (saved_regs_mask & (1 << i))
20159 num_regs++;
20160
20161 gcc_assert (num_regs && num_regs <= 16);
20162
20163 /* If SP is in reglist, then we don't emit SP update insn. */
20164 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20165
20166 /* The parallel needs to hold num_regs SETs
20167 and one SET for the stack update. */
20168 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20169
20170 if (return_in_pc)
20171 XVECEXP (par, 0, 0) = ret_rtx;
20172
20173 if (emit_update)
20174 {
20175 /* Increment the stack pointer, based on there being
20176 num_regs 4-byte registers to restore. */
20177 tmp = gen_rtx_SET (stack_pointer_rtx,
20178 plus_constant (Pmode,
20179 stack_pointer_rtx,
20180 4 * num_regs));
20181 RTX_FRAME_RELATED_P (tmp) = 1;
20182 XVECEXP (par, 0, offset_adj) = tmp;
20183 }
20184
20185 /* Now restore every reg, which may include PC. */
20186 for (j = 0, i = 0; j < num_regs; i++)
20187 if (saved_regs_mask & (1 << i))
20188 {
20189 reg = gen_rtx_REG (SImode, i);
20190 if ((num_regs == 1) && emit_update && !return_in_pc)
20191 {
20192 /* Emit single load with writeback. */
20193 tmp = gen_frame_mem (SImode,
20194 gen_rtx_POST_INC (Pmode,
20195 stack_pointer_rtx));
20196 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20197 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20198 return;
20199 }
20200
20201 tmp = gen_rtx_SET (reg,
20202 gen_frame_mem
20203 (SImode,
20204 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20205 RTX_FRAME_RELATED_P (tmp) = 1;
20206 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20207
20208 /* We need to maintain a sequence for DWARF info too. As dwarf info
20209 should not have PC, skip PC. */
20210 if (i != PC_REGNUM)
20211 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20212
20213 j++;
20214 }
20215
20216 if (return_in_pc)
20217 par = emit_jump_insn (par);
20218 else
20219 par = emit_insn (par);
20220
20221 REG_NOTES (par) = dwarf;
20222 if (!return_in_pc)
20223 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20224 stack_pointer_rtx, stack_pointer_rtx);
20225 }
20226
20227 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20228 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20229
20230 Unfortunately, since this insn does not reflect very well the actual
20231 semantics of the operation, we need to annotate the insn for the benefit
20232 of DWARF2 frame unwind information. */
20233 static void
20234 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20235 {
20236 int i, j;
20237 rtx par;
20238 rtx dwarf = NULL_RTX;
20239 rtx tmp, reg;
20240
20241 gcc_assert (num_regs && num_regs <= 32);
20242
20243 /* Workaround ARM10 VFPr1 bug. */
20244 if (num_regs == 2 && !arm_arch6)
20245 {
20246 if (first_reg == 15)
20247 first_reg--;
20248
20249 num_regs++;
20250 }
20251
20252 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20253 there could be up to 32 D-registers to restore.
20254 If there are more than 16 D-registers, make two recursive calls,
20255 each of which emits one pop_multi instruction. */
20256 if (num_regs > 16)
20257 {
20258 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20259 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20260 return;
20261 }
20262
20263 /* The parallel needs to hold num_regs SETs
20264 and one SET for the stack update. */
20265 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20266
20267 /* Increment the stack pointer, based on there being
20268 num_regs 8-byte registers to restore. */
20269 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20270 RTX_FRAME_RELATED_P (tmp) = 1;
20271 XVECEXP (par, 0, 0) = tmp;
20272
20273 /* Now show every reg that will be restored, using a SET for each. */
20274 for (j = 0, i=first_reg; j < num_regs; i += 2)
20275 {
20276 reg = gen_rtx_REG (DFmode, i);
20277
20278 tmp = gen_rtx_SET (reg,
20279 gen_frame_mem
20280 (DFmode,
20281 plus_constant (Pmode, base_reg, 8 * j)));
20282 RTX_FRAME_RELATED_P (tmp) = 1;
20283 XVECEXP (par, 0, j + 1) = tmp;
20284
20285 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20286
20287 j++;
20288 }
20289
20290 par = emit_insn (par);
20291 REG_NOTES (par) = dwarf;
20292
20293 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20294 if (REGNO (base_reg) == IP_REGNUM)
20295 {
20296 RTX_FRAME_RELATED_P (par) = 1;
20297 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20298 }
20299 else
20300 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20301 base_reg, base_reg);
20302 }
20303
20304 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20305 number of registers are being popped, multiple LDRD patterns are created for
20306 all register pairs. If odd number of registers are popped, last register is
20307 loaded by using LDR pattern. */
20308 static void
20309 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20310 {
20311 int num_regs = 0;
20312 int i, j;
20313 rtx par = NULL_RTX;
20314 rtx dwarf = NULL_RTX;
20315 rtx tmp, reg, tmp1;
20316 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20317
20318 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20319 if (saved_regs_mask & (1 << i))
20320 num_regs++;
20321
20322 gcc_assert (num_regs && num_regs <= 16);
20323
20324 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20325 to be popped. So, if num_regs is even, now it will become odd,
20326 and we can generate pop with PC. If num_regs is odd, it will be
20327 even now, and ldr with return can be generated for PC. */
20328 if (return_in_pc)
20329 num_regs--;
20330
20331 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20332
20333 /* Var j iterates over all the registers to gather all the registers in
20334 saved_regs_mask. Var i gives index of saved registers in stack frame.
20335 A PARALLEL RTX of register-pair is created here, so that pattern for
20336 LDRD can be matched. As PC is always last register to be popped, and
20337 we have already decremented num_regs if PC, we don't have to worry
20338 about PC in this loop. */
20339 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20340 if (saved_regs_mask & (1 << j))
20341 {
20342 /* Create RTX for memory load. */
20343 reg = gen_rtx_REG (SImode, j);
20344 tmp = gen_rtx_SET (reg,
20345 gen_frame_mem (SImode,
20346 plus_constant (Pmode,
20347 stack_pointer_rtx, 4 * i)));
20348 RTX_FRAME_RELATED_P (tmp) = 1;
20349
20350 if (i % 2 == 0)
20351 {
20352 /* When saved-register index (i) is even, the RTX to be emitted is
20353 yet to be created. Hence create it first. The LDRD pattern we
20354 are generating is :
20355 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20356 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20357 where target registers need not be consecutive. */
20358 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20359 dwarf = NULL_RTX;
20360 }
20361
20362 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20363 added as 0th element and if i is odd, reg_i is added as 1st element
20364 of LDRD pattern shown above. */
20365 XVECEXP (par, 0, (i % 2)) = tmp;
20366 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20367
20368 if ((i % 2) == 1)
20369 {
20370 /* When saved-register index (i) is odd, RTXs for both the registers
20371 to be loaded are generated in above given LDRD pattern, and the
20372 pattern can be emitted now. */
20373 par = emit_insn (par);
20374 REG_NOTES (par) = dwarf;
20375 RTX_FRAME_RELATED_P (par) = 1;
20376 }
20377
20378 i++;
20379 }
20380
20381 /* If the number of registers pushed is odd AND return_in_pc is false OR
20382 number of registers are even AND return_in_pc is true, last register is
20383 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20384 then LDR with post increment. */
20385
20386 /* Increment the stack pointer, based on there being
20387 num_regs 4-byte registers to restore. */
20388 tmp = gen_rtx_SET (stack_pointer_rtx,
20389 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20390 RTX_FRAME_RELATED_P (tmp) = 1;
20391 tmp = emit_insn (tmp);
20392 if (!return_in_pc)
20393 {
20394 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20395 stack_pointer_rtx, stack_pointer_rtx);
20396 }
20397
20398 dwarf = NULL_RTX;
20399
20400 if (((num_regs % 2) == 1 && !return_in_pc)
20401 || ((num_regs % 2) == 0 && return_in_pc))
20402 {
20403 /* Scan for the single register to be popped. Skip until the saved
20404 register is found. */
20405 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20406
20407 /* Gen LDR with post increment here. */
20408 tmp1 = gen_rtx_MEM (SImode,
20409 gen_rtx_POST_INC (SImode,
20410 stack_pointer_rtx));
20411 set_mem_alias_set (tmp1, get_frame_alias_set ());
20412
20413 reg = gen_rtx_REG (SImode, j);
20414 tmp = gen_rtx_SET (reg, tmp1);
20415 RTX_FRAME_RELATED_P (tmp) = 1;
20416 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20417
20418 if (return_in_pc)
20419 {
20420 /* If return_in_pc, j must be PC_REGNUM. */
20421 gcc_assert (j == PC_REGNUM);
20422 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20423 XVECEXP (par, 0, 0) = ret_rtx;
20424 XVECEXP (par, 0, 1) = tmp;
20425 par = emit_jump_insn (par);
20426 }
20427 else
20428 {
20429 par = emit_insn (tmp);
20430 REG_NOTES (par) = dwarf;
20431 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20432 stack_pointer_rtx, stack_pointer_rtx);
20433 }
20434
20435 }
20436 else if ((num_regs % 2) == 1 && return_in_pc)
20437 {
20438 /* There are 2 registers to be popped. So, generate the pattern
20439 pop_multiple_with_stack_update_and_return to pop in PC. */
20440 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20441 }
20442
20443 return;
20444 }
20445
20446 /* LDRD in ARM mode needs consecutive registers as operands. This function
20447 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20448 offset addressing and then generates one separate stack udpate. This provides
20449 more scheduling freedom, compared to writeback on every load. However,
20450 if the function returns using load into PC directly
20451 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20452 before the last load. TODO: Add a peephole optimization to recognize
20453 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20454 peephole optimization to merge the load at stack-offset zero
20455 with the stack update instruction using load with writeback
20456 in post-index addressing mode. */
20457 static void
20458 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20459 {
20460 int j = 0;
20461 int offset = 0;
20462 rtx par = NULL_RTX;
20463 rtx dwarf = NULL_RTX;
20464 rtx tmp, mem;
20465
20466 /* Restore saved registers. */
20467 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20468 j = 0;
20469 while (j <= LAST_ARM_REGNUM)
20470 if (saved_regs_mask & (1 << j))
20471 {
20472 if ((j % 2) == 0
20473 && (saved_regs_mask & (1 << (j + 1)))
20474 && (j + 1) != PC_REGNUM)
20475 {
20476 /* Current register and next register form register pair for which
20477 LDRD can be generated. PC is always the last register popped, and
20478 we handle it separately. */
20479 if (offset > 0)
20480 mem = gen_frame_mem (DImode,
20481 plus_constant (Pmode,
20482 stack_pointer_rtx,
20483 offset));
20484 else
20485 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20486
20487 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20488 tmp = emit_insn (tmp);
20489 RTX_FRAME_RELATED_P (tmp) = 1;
20490
20491 /* Generate dwarf info. */
20492
20493 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20494 gen_rtx_REG (SImode, j),
20495 NULL_RTX);
20496 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20497 gen_rtx_REG (SImode, j + 1),
20498 dwarf);
20499
20500 REG_NOTES (tmp) = dwarf;
20501
20502 offset += 8;
20503 j += 2;
20504 }
20505 else if (j != PC_REGNUM)
20506 {
20507 /* Emit a single word load. */
20508 if (offset > 0)
20509 mem = gen_frame_mem (SImode,
20510 plus_constant (Pmode,
20511 stack_pointer_rtx,
20512 offset));
20513 else
20514 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20515
20516 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20517 tmp = emit_insn (tmp);
20518 RTX_FRAME_RELATED_P (tmp) = 1;
20519
20520 /* Generate dwarf info. */
20521 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20522 gen_rtx_REG (SImode, j),
20523 NULL_RTX);
20524
20525 offset += 4;
20526 j += 1;
20527 }
20528 else /* j == PC_REGNUM */
20529 j++;
20530 }
20531 else
20532 j++;
20533
20534 /* Update the stack. */
20535 if (offset > 0)
20536 {
20537 tmp = gen_rtx_SET (stack_pointer_rtx,
20538 plus_constant (Pmode,
20539 stack_pointer_rtx,
20540 offset));
20541 tmp = emit_insn (tmp);
20542 arm_add_cfa_adjust_cfa_note (tmp, offset,
20543 stack_pointer_rtx, stack_pointer_rtx);
20544 offset = 0;
20545 }
20546
20547 if (saved_regs_mask & (1 << PC_REGNUM))
20548 {
20549 /* Only PC is to be popped. */
20550 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20551 XVECEXP (par, 0, 0) = ret_rtx;
20552 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20553 gen_frame_mem (SImode,
20554 gen_rtx_POST_INC (SImode,
20555 stack_pointer_rtx)));
20556 RTX_FRAME_RELATED_P (tmp) = 1;
20557 XVECEXP (par, 0, 1) = tmp;
20558 par = emit_jump_insn (par);
20559
20560 /* Generate dwarf info. */
20561 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20562 gen_rtx_REG (SImode, PC_REGNUM),
20563 NULL_RTX);
20564 REG_NOTES (par) = dwarf;
20565 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20566 stack_pointer_rtx, stack_pointer_rtx);
20567 }
20568 }
20569
20570 /* Calculate the size of the return value that is passed in registers. */
20571 static unsigned
20572 arm_size_return_regs (void)
20573 {
20574 machine_mode mode;
20575
20576 if (crtl->return_rtx != 0)
20577 mode = GET_MODE (crtl->return_rtx);
20578 else
20579 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20580
20581 return GET_MODE_SIZE (mode);
20582 }
20583
20584 /* Return true if the current function needs to save/restore LR. */
20585 static bool
20586 thumb_force_lr_save (void)
20587 {
20588 return !cfun->machine->lr_save_eliminated
20589 && (!crtl->is_leaf
20590 || thumb_far_jump_used_p ()
20591 || df_regs_ever_live_p (LR_REGNUM));
20592 }
20593
20594 /* We do not know if r3 will be available because
20595 we do have an indirect tailcall happening in this
20596 particular case. */
20597 static bool
20598 is_indirect_tailcall_p (rtx call)
20599 {
20600 rtx pat = PATTERN (call);
20601
20602 /* Indirect tail call. */
20603 pat = XVECEXP (pat, 0, 0);
20604 if (GET_CODE (pat) == SET)
20605 pat = SET_SRC (pat);
20606
20607 pat = XEXP (XEXP (pat, 0), 0);
20608 return REG_P (pat);
20609 }
20610
20611 /* Return true if r3 is used by any of the tail call insns in the
20612 current function. */
20613 static bool
20614 any_sibcall_could_use_r3 (void)
20615 {
20616 edge_iterator ei;
20617 edge e;
20618
20619 if (!crtl->tail_call_emit)
20620 return false;
20621 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20622 if (e->flags & EDGE_SIBCALL)
20623 {
20624 rtx_insn *call = BB_END (e->src);
20625 if (!CALL_P (call))
20626 call = prev_nonnote_nondebug_insn (call);
20627 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20628 if (find_regno_fusage (call, USE, 3)
20629 || is_indirect_tailcall_p (call))
20630 return true;
20631 }
20632 return false;
20633 }
20634
20635
20636 /* Compute the distance from register FROM to register TO.
20637 These can be the arg pointer (26), the soft frame pointer (25),
20638 the stack pointer (13) or the hard frame pointer (11).
20639 In thumb mode r7 is used as the soft frame pointer, if needed.
20640 Typical stack layout looks like this:
20641
20642 old stack pointer -> | |
20643 ----
20644 | | \
20645 | | saved arguments for
20646 | | vararg functions
20647 | | /
20648 --
20649 hard FP & arg pointer -> | | \
20650 | | stack
20651 | | frame
20652 | | /
20653 --
20654 | | \
20655 | | call saved
20656 | | registers
20657 soft frame pointer -> | | /
20658 --
20659 | | \
20660 | | local
20661 | | variables
20662 locals base pointer -> | | /
20663 --
20664 | | \
20665 | | outgoing
20666 | | arguments
20667 current stack pointer -> | | /
20668 --
20669
20670 For a given function some or all of these stack components
20671 may not be needed, giving rise to the possibility of
20672 eliminating some of the registers.
20673
20674 The values returned by this function must reflect the behavior
20675 of arm_expand_prologue() and arm_compute_save_reg_mask().
20676
20677 The sign of the number returned reflects the direction of stack
20678 growth, so the values are positive for all eliminations except
20679 from the soft frame pointer to the hard frame pointer.
20680
20681 SFP may point just inside the local variables block to ensure correct
20682 alignment. */
20683
20684
20685 /* Calculate stack offsets. These are used to calculate register elimination
20686 offsets and in prologue/epilogue code. Also calculates which registers
20687 should be saved. */
20688
20689 static arm_stack_offsets *
20690 arm_get_frame_offsets (void)
20691 {
20692 struct arm_stack_offsets *offsets;
20693 unsigned long func_type;
20694 int saved;
20695 int core_saved;
20696 HOST_WIDE_INT frame_size;
20697 int i;
20698
20699 offsets = &cfun->machine->stack_offsets;
20700
20701 if (reload_completed)
20702 return offsets;
20703
20704 /* Initially this is the size of the local variables. It will translated
20705 into an offset once we have determined the size of preceding data. */
20706 frame_size = ROUND_UP_WORD (get_frame_size ());
20707
20708 /* Space for variadic functions. */
20709 offsets->saved_args = crtl->args.pretend_args_size;
20710
20711 /* In Thumb mode this is incorrect, but never used. */
20712 offsets->frame
20713 = (offsets->saved_args
20714 + arm_compute_static_chain_stack_bytes ()
20715 + (frame_pointer_needed ? 4 : 0));
20716
20717 if (TARGET_32BIT)
20718 {
20719 unsigned int regno;
20720
20721 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20722 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20723 saved = core_saved;
20724
20725 /* We know that SP will be doubleword aligned on entry, and we must
20726 preserve that condition at any subroutine call. We also require the
20727 soft frame pointer to be doubleword aligned. */
20728
20729 if (TARGET_REALLY_IWMMXT)
20730 {
20731 /* Check for the call-saved iWMMXt registers. */
20732 for (regno = FIRST_IWMMXT_REGNUM;
20733 regno <= LAST_IWMMXT_REGNUM;
20734 regno++)
20735 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20736 saved += 8;
20737 }
20738
20739 func_type = arm_current_func_type ();
20740 /* Space for saved VFP registers. */
20741 if (! IS_VOLATILE (func_type)
20742 && TARGET_HARD_FLOAT)
20743 saved += arm_get_vfp_saved_size ();
20744 }
20745 else /* TARGET_THUMB1 */
20746 {
20747 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20748 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20749 saved = core_saved;
20750 if (TARGET_BACKTRACE)
20751 saved += 16;
20752 }
20753
20754 /* Saved registers include the stack frame. */
20755 offsets->saved_regs
20756 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20757 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20758
20759 /* A leaf function does not need any stack alignment if it has nothing
20760 on the stack. */
20761 if (crtl->is_leaf && frame_size == 0
20762 /* However if it calls alloca(), we have a dynamically allocated
20763 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20764 && ! cfun->calls_alloca)
20765 {
20766 offsets->outgoing_args = offsets->soft_frame;
20767 offsets->locals_base = offsets->soft_frame;
20768 return offsets;
20769 }
20770
20771 /* Ensure SFP has the correct alignment. */
20772 if (ARM_DOUBLEWORD_ALIGN
20773 && (offsets->soft_frame & 7))
20774 {
20775 offsets->soft_frame += 4;
20776 /* Try to align stack by pushing an extra reg. Don't bother doing this
20777 when there is a stack frame as the alignment will be rolled into
20778 the normal stack adjustment. */
20779 if (frame_size + crtl->outgoing_args_size == 0)
20780 {
20781 int reg = -1;
20782
20783 /* Register r3 is caller-saved. Normally it does not need to be
20784 saved on entry by the prologue. However if we choose to save
20785 it for padding then we may confuse the compiler into thinking
20786 a prologue sequence is required when in fact it is not. This
20787 will occur when shrink-wrapping if r3 is used as a scratch
20788 register and there are no other callee-saved writes.
20789
20790 This situation can be avoided when other callee-saved registers
20791 are available and r3 is not mandatory if we choose a callee-saved
20792 register for padding. */
20793 bool prefer_callee_reg_p = false;
20794
20795 /* If it is safe to use r3, then do so. This sometimes
20796 generates better code on Thumb-2 by avoiding the need to
20797 use 32-bit push/pop instructions. */
20798 if (! any_sibcall_could_use_r3 ()
20799 && arm_size_return_regs () <= 12
20800 && (offsets->saved_regs_mask & (1 << 3)) == 0
20801 && (TARGET_THUMB2
20802 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20803 {
20804 reg = 3;
20805 if (!TARGET_THUMB2)
20806 prefer_callee_reg_p = true;
20807 }
20808 if (reg == -1
20809 || prefer_callee_reg_p)
20810 {
20811 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20812 {
20813 /* Avoid fixed registers; they may be changed at
20814 arbitrary times so it's unsafe to restore them
20815 during the epilogue. */
20816 if (!fixed_regs[i]
20817 && (offsets->saved_regs_mask & (1 << i)) == 0)
20818 {
20819 reg = i;
20820 break;
20821 }
20822 }
20823 }
20824
20825 if (reg != -1)
20826 {
20827 offsets->saved_regs += 4;
20828 offsets->saved_regs_mask |= (1 << reg);
20829 }
20830 }
20831 }
20832
20833 offsets->locals_base = offsets->soft_frame + frame_size;
20834 offsets->outgoing_args = (offsets->locals_base
20835 + crtl->outgoing_args_size);
20836
20837 if (ARM_DOUBLEWORD_ALIGN)
20838 {
20839 /* Ensure SP remains doubleword aligned. */
20840 if (offsets->outgoing_args & 7)
20841 offsets->outgoing_args += 4;
20842 gcc_assert (!(offsets->outgoing_args & 7));
20843 }
20844
20845 return offsets;
20846 }
20847
20848
20849 /* Calculate the relative offsets for the different stack pointers. Positive
20850 offsets are in the direction of stack growth. */
20851
20852 HOST_WIDE_INT
20853 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20854 {
20855 arm_stack_offsets *offsets;
20856
20857 offsets = arm_get_frame_offsets ();
20858
20859 /* OK, now we have enough information to compute the distances.
20860 There must be an entry in these switch tables for each pair
20861 of registers in ELIMINABLE_REGS, even if some of the entries
20862 seem to be redundant or useless. */
20863 switch (from)
20864 {
20865 case ARG_POINTER_REGNUM:
20866 switch (to)
20867 {
20868 case THUMB_HARD_FRAME_POINTER_REGNUM:
20869 return 0;
20870
20871 case FRAME_POINTER_REGNUM:
20872 /* This is the reverse of the soft frame pointer
20873 to hard frame pointer elimination below. */
20874 return offsets->soft_frame - offsets->saved_args;
20875
20876 case ARM_HARD_FRAME_POINTER_REGNUM:
20877 /* This is only non-zero in the case where the static chain register
20878 is stored above the frame. */
20879 return offsets->frame - offsets->saved_args - 4;
20880
20881 case STACK_POINTER_REGNUM:
20882 /* If nothing has been pushed on the stack at all
20883 then this will return -4. This *is* correct! */
20884 return offsets->outgoing_args - (offsets->saved_args + 4);
20885
20886 default:
20887 gcc_unreachable ();
20888 }
20889 gcc_unreachable ();
20890
20891 case FRAME_POINTER_REGNUM:
20892 switch (to)
20893 {
20894 case THUMB_HARD_FRAME_POINTER_REGNUM:
20895 return 0;
20896
20897 case ARM_HARD_FRAME_POINTER_REGNUM:
20898 /* The hard frame pointer points to the top entry in the
20899 stack frame. The soft frame pointer to the bottom entry
20900 in the stack frame. If there is no stack frame at all,
20901 then they are identical. */
20902
20903 return offsets->frame - offsets->soft_frame;
20904
20905 case STACK_POINTER_REGNUM:
20906 return offsets->outgoing_args - offsets->soft_frame;
20907
20908 default:
20909 gcc_unreachable ();
20910 }
20911 gcc_unreachable ();
20912
20913 default:
20914 /* You cannot eliminate from the stack pointer.
20915 In theory you could eliminate from the hard frame
20916 pointer to the stack pointer, but this will never
20917 happen, since if a stack frame is not needed the
20918 hard frame pointer will never be used. */
20919 gcc_unreachable ();
20920 }
20921 }
20922
20923 /* Given FROM and TO register numbers, say whether this elimination is
20924 allowed. Frame pointer elimination is automatically handled.
20925
20926 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20927 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20928 pointer, we must eliminate FRAME_POINTER_REGNUM into
20929 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20930 ARG_POINTER_REGNUM. */
20931
20932 bool
20933 arm_can_eliminate (const int from, const int to)
20934 {
20935 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20936 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20937 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20938 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20939 true);
20940 }
20941
20942 /* Emit RTL to save coprocessor registers on function entry. Returns the
20943 number of bytes pushed. */
20944
20945 static int
20946 arm_save_coproc_regs(void)
20947 {
20948 int saved_size = 0;
20949 unsigned reg;
20950 unsigned start_reg;
20951 rtx insn;
20952
20953 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20954 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20955 {
20956 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20957 insn = gen_rtx_MEM (V2SImode, insn);
20958 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20959 RTX_FRAME_RELATED_P (insn) = 1;
20960 saved_size += 8;
20961 }
20962
20963 if (TARGET_HARD_FLOAT)
20964 {
20965 start_reg = FIRST_VFP_REGNUM;
20966
20967 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20968 {
20969 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20970 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20971 {
20972 if (start_reg != reg)
20973 saved_size += vfp_emit_fstmd (start_reg,
20974 (reg - start_reg) / 2);
20975 start_reg = reg + 2;
20976 }
20977 }
20978 if (start_reg != reg)
20979 saved_size += vfp_emit_fstmd (start_reg,
20980 (reg - start_reg) / 2);
20981 }
20982 return saved_size;
20983 }
20984
20985
20986 /* Set the Thumb frame pointer from the stack pointer. */
20987
20988 static void
20989 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20990 {
20991 HOST_WIDE_INT amount;
20992 rtx insn, dwarf;
20993
20994 amount = offsets->outgoing_args - offsets->locals_base;
20995 if (amount < 1024)
20996 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20997 stack_pointer_rtx, GEN_INT (amount)));
20998 else
20999 {
21000 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21001 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21002 expects the first two operands to be the same. */
21003 if (TARGET_THUMB2)
21004 {
21005 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21006 stack_pointer_rtx,
21007 hard_frame_pointer_rtx));
21008 }
21009 else
21010 {
21011 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21012 hard_frame_pointer_rtx,
21013 stack_pointer_rtx));
21014 }
21015 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21016 plus_constant (Pmode, stack_pointer_rtx, amount));
21017 RTX_FRAME_RELATED_P (dwarf) = 1;
21018 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21019 }
21020
21021 RTX_FRAME_RELATED_P (insn) = 1;
21022 }
21023
21024 struct scratch_reg {
21025 rtx reg;
21026 bool saved;
21027 };
21028
21029 /* Return a short-lived scratch register for use as a 2nd scratch register on
21030 function entry after the registers are saved in the prologue. This register
21031 must be released by means of release_scratch_register_on_entry. IP is not
21032 considered since it is always used as the 1st scratch register if available.
21033
21034 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21035 mask of live registers. */
21036
21037 static void
21038 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21039 unsigned long live_regs)
21040 {
21041 int regno = -1;
21042
21043 sr->saved = false;
21044
21045 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21046 regno = LR_REGNUM;
21047 else
21048 {
21049 unsigned int i;
21050
21051 for (i = 4; i < 11; i++)
21052 if (regno1 != i && (live_regs & (1 << i)) != 0)
21053 {
21054 regno = i;
21055 break;
21056 }
21057
21058 if (regno < 0)
21059 {
21060 /* If IP is used as the 1st scratch register for a nested function,
21061 then either r3 wasn't available or is used to preserve IP. */
21062 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21063 regno1 = 3;
21064 regno = (regno1 == 3 ? 2 : 3);
21065 sr->saved
21066 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21067 regno);
21068 }
21069 }
21070
21071 sr->reg = gen_rtx_REG (SImode, regno);
21072 if (sr->saved)
21073 {
21074 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21075 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21076 rtx x = gen_rtx_SET (stack_pointer_rtx,
21077 plus_constant (Pmode, stack_pointer_rtx, -4));
21078 RTX_FRAME_RELATED_P (insn) = 1;
21079 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21080 }
21081 }
21082
21083 /* Release a scratch register obtained from the preceding function. */
21084
21085 static void
21086 release_scratch_register_on_entry (struct scratch_reg *sr)
21087 {
21088 if (sr->saved)
21089 {
21090 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21091 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21092 rtx x = gen_rtx_SET (stack_pointer_rtx,
21093 plus_constant (Pmode, stack_pointer_rtx, 4));
21094 RTX_FRAME_RELATED_P (insn) = 1;
21095 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21096 }
21097 }
21098
21099 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21100
21101 #if PROBE_INTERVAL > 4096
21102 #error Cannot use indexed addressing mode for stack probing
21103 #endif
21104
21105 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21106 inclusive. These are offsets from the current stack pointer. REGNO1
21107 is the index number of the 1st scratch register and LIVE_REGS is the
21108 mask of live registers. */
21109
21110 static void
21111 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21112 unsigned int regno1, unsigned long live_regs)
21113 {
21114 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21115
21116 /* See if we have a constant small number of probes to generate. If so,
21117 that's the easy case. */
21118 if (size <= PROBE_INTERVAL)
21119 {
21120 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21121 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21122 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21123 }
21124
21125 /* The run-time loop is made up of 10 insns in the generic case while the
21126 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21127 else if (size <= 5 * PROBE_INTERVAL)
21128 {
21129 HOST_WIDE_INT i, rem;
21130
21131 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21132 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21133 emit_stack_probe (reg1);
21134
21135 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21136 it exceeds SIZE. If only two probes are needed, this will not
21137 generate any code. Then probe at FIRST + SIZE. */
21138 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21139 {
21140 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21141 emit_stack_probe (reg1);
21142 }
21143
21144 rem = size - (i - PROBE_INTERVAL);
21145 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21146 {
21147 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21148 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21149 }
21150 else
21151 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21152 }
21153
21154 /* Otherwise, do the same as above, but in a loop. Note that we must be
21155 extra careful with variables wrapping around because we might be at
21156 the very top (or the very bottom) of the address space and we have
21157 to be able to handle this case properly; in particular, we use an
21158 equality test for the loop condition. */
21159 else
21160 {
21161 HOST_WIDE_INT rounded_size;
21162 struct scratch_reg sr;
21163
21164 get_scratch_register_on_entry (&sr, regno1, live_regs);
21165
21166 emit_move_insn (reg1, GEN_INT (first));
21167
21168
21169 /* Step 1: round SIZE to the previous multiple of the interval. */
21170
21171 rounded_size = size & -PROBE_INTERVAL;
21172 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21173
21174
21175 /* Step 2: compute initial and final value of the loop counter. */
21176
21177 /* TEST_ADDR = SP + FIRST. */
21178 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21179
21180 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21181 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21182
21183
21184 /* Step 3: the loop
21185
21186 do
21187 {
21188 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21189 probe at TEST_ADDR
21190 }
21191 while (TEST_ADDR != LAST_ADDR)
21192
21193 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21194 until it is equal to ROUNDED_SIZE. */
21195
21196 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21197
21198
21199 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21200 that SIZE is equal to ROUNDED_SIZE. */
21201
21202 if (size != rounded_size)
21203 {
21204 HOST_WIDE_INT rem = size - rounded_size;
21205
21206 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21207 {
21208 emit_set_insn (sr.reg,
21209 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21210 emit_stack_probe (plus_constant (Pmode, sr.reg,
21211 PROBE_INTERVAL - rem));
21212 }
21213 else
21214 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21215 }
21216
21217 release_scratch_register_on_entry (&sr);
21218 }
21219
21220 /* Make sure nothing is scheduled before we are done. */
21221 emit_insn (gen_blockage ());
21222 }
21223
21224 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21225 absolute addresses. */
21226
21227 const char *
21228 output_probe_stack_range (rtx reg1, rtx reg2)
21229 {
21230 static int labelno = 0;
21231 char loop_lab[32];
21232 rtx xops[2];
21233
21234 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21235
21236 /* Loop. */
21237 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21238
21239 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21240 xops[0] = reg1;
21241 xops[1] = GEN_INT (PROBE_INTERVAL);
21242 output_asm_insn ("sub\t%0, %0, %1", xops);
21243
21244 /* Probe at TEST_ADDR. */
21245 output_asm_insn ("str\tr0, [%0, #0]", xops);
21246
21247 /* Test if TEST_ADDR == LAST_ADDR. */
21248 xops[1] = reg2;
21249 output_asm_insn ("cmp\t%0, %1", xops);
21250
21251 /* Branch. */
21252 fputs ("\tbne\t", asm_out_file);
21253 assemble_name_raw (asm_out_file, loop_lab);
21254 fputc ('\n', asm_out_file);
21255
21256 return "";
21257 }
21258
21259 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21260 function. */
21261 void
21262 arm_expand_prologue (void)
21263 {
21264 rtx amount;
21265 rtx insn;
21266 rtx ip_rtx;
21267 unsigned long live_regs_mask;
21268 unsigned long func_type;
21269 int fp_offset = 0;
21270 int saved_pretend_args = 0;
21271 int saved_regs = 0;
21272 unsigned HOST_WIDE_INT args_to_push;
21273 HOST_WIDE_INT size;
21274 arm_stack_offsets *offsets;
21275 bool clobber_ip;
21276
21277 func_type = arm_current_func_type ();
21278
21279 /* Naked functions don't have prologues. */
21280 if (IS_NAKED (func_type))
21281 {
21282 if (flag_stack_usage_info)
21283 current_function_static_stack_size = 0;
21284 return;
21285 }
21286
21287 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21288 args_to_push = crtl->args.pretend_args_size;
21289
21290 /* Compute which register we will have to save onto the stack. */
21291 offsets = arm_get_frame_offsets ();
21292 live_regs_mask = offsets->saved_regs_mask;
21293
21294 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21295
21296 if (IS_STACKALIGN (func_type))
21297 {
21298 rtx r0, r1;
21299
21300 /* Handle a word-aligned stack pointer. We generate the following:
21301
21302 mov r0, sp
21303 bic r1, r0, #7
21304 mov sp, r1
21305 <save and restore r0 in normal prologue/epilogue>
21306 mov sp, r0
21307 bx lr
21308
21309 The unwinder doesn't need to know about the stack realignment.
21310 Just tell it we saved SP in r0. */
21311 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21312
21313 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21314 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21315
21316 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21317 RTX_FRAME_RELATED_P (insn) = 1;
21318 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21319
21320 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21321
21322 /* ??? The CFA changes here, which may cause GDB to conclude that it
21323 has entered a different function. That said, the unwind info is
21324 correct, individually, before and after this instruction because
21325 we've described the save of SP, which will override the default
21326 handling of SP as restoring from the CFA. */
21327 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21328 }
21329
21330 /* The static chain register is the same as the IP register. If it is
21331 clobbered when creating the frame, we need to save and restore it. */
21332 clobber_ip = IS_NESTED (func_type)
21333 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21334 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21335 && !df_regs_ever_live_p (LR_REGNUM)
21336 && arm_r3_live_at_start_p ()));
21337
21338 /* Find somewhere to store IP whilst the frame is being created.
21339 We try the following places in order:
21340
21341 1. The last argument register r3 if it is available.
21342 2. A slot on the stack above the frame if there are no
21343 arguments to push onto the stack.
21344 3. Register r3 again, after pushing the argument registers
21345 onto the stack, if this is a varargs function.
21346 4. The last slot on the stack created for the arguments to
21347 push, if this isn't a varargs function.
21348
21349 Note - we only need to tell the dwarf2 backend about the SP
21350 adjustment in the second variant; the static chain register
21351 doesn't need to be unwound, as it doesn't contain a value
21352 inherited from the caller. */
21353 if (clobber_ip)
21354 {
21355 if (!arm_r3_live_at_start_p ())
21356 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21357 else if (args_to_push == 0)
21358 {
21359 rtx addr, dwarf;
21360
21361 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21362 saved_regs += 4;
21363
21364 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21365 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21366 fp_offset = 4;
21367
21368 /* Just tell the dwarf backend that we adjusted SP. */
21369 dwarf = gen_rtx_SET (stack_pointer_rtx,
21370 plus_constant (Pmode, stack_pointer_rtx,
21371 -fp_offset));
21372 RTX_FRAME_RELATED_P (insn) = 1;
21373 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21374 }
21375 else
21376 {
21377 /* Store the args on the stack. */
21378 if (cfun->machine->uses_anonymous_args)
21379 {
21380 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21381 (0xf0 >> (args_to_push / 4)) & 0xf);
21382 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21383 saved_pretend_args = 1;
21384 }
21385 else
21386 {
21387 rtx addr, dwarf;
21388
21389 if (args_to_push == 4)
21390 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21391 else
21392 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21393 plus_constant (Pmode,
21394 stack_pointer_rtx,
21395 -args_to_push));
21396
21397 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21398
21399 /* Just tell the dwarf backend that we adjusted SP. */
21400 dwarf = gen_rtx_SET (stack_pointer_rtx,
21401 plus_constant (Pmode, stack_pointer_rtx,
21402 -args_to_push));
21403 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21404 }
21405
21406 RTX_FRAME_RELATED_P (insn) = 1;
21407 fp_offset = args_to_push;
21408 args_to_push = 0;
21409 }
21410 }
21411
21412 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21413 {
21414 if (IS_INTERRUPT (func_type))
21415 {
21416 /* Interrupt functions must not corrupt any registers.
21417 Creating a frame pointer however, corrupts the IP
21418 register, so we must push it first. */
21419 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21420
21421 /* Do not set RTX_FRAME_RELATED_P on this insn.
21422 The dwarf stack unwinding code only wants to see one
21423 stack decrement per function, and this is not it. If
21424 this instruction is labeled as being part of the frame
21425 creation sequence then dwarf2out_frame_debug_expr will
21426 die when it encounters the assignment of IP to FP
21427 later on, since the use of SP here establishes SP as
21428 the CFA register and not IP.
21429
21430 Anyway this instruction is not really part of the stack
21431 frame creation although it is part of the prologue. */
21432 }
21433
21434 insn = emit_set_insn (ip_rtx,
21435 plus_constant (Pmode, stack_pointer_rtx,
21436 fp_offset));
21437 RTX_FRAME_RELATED_P (insn) = 1;
21438 }
21439
21440 if (args_to_push)
21441 {
21442 /* Push the argument registers, or reserve space for them. */
21443 if (cfun->machine->uses_anonymous_args)
21444 insn = emit_multi_reg_push
21445 ((0xf0 >> (args_to_push / 4)) & 0xf,
21446 (0xf0 >> (args_to_push / 4)) & 0xf);
21447 else
21448 insn = emit_insn
21449 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21450 GEN_INT (- args_to_push)));
21451 RTX_FRAME_RELATED_P (insn) = 1;
21452 }
21453
21454 /* If this is an interrupt service routine, and the link register
21455 is going to be pushed, and we're not generating extra
21456 push of IP (needed when frame is needed and frame layout if apcs),
21457 subtracting four from LR now will mean that the function return
21458 can be done with a single instruction. */
21459 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21460 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21461 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21462 && TARGET_ARM)
21463 {
21464 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21465
21466 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21467 }
21468
21469 if (live_regs_mask)
21470 {
21471 unsigned long dwarf_regs_mask = live_regs_mask;
21472
21473 saved_regs += bit_count (live_regs_mask) * 4;
21474 if (optimize_size && !frame_pointer_needed
21475 && saved_regs == offsets->saved_regs - offsets->saved_args)
21476 {
21477 /* If no coprocessor registers are being pushed and we don't have
21478 to worry about a frame pointer then push extra registers to
21479 create the stack frame. This is done is a way that does not
21480 alter the frame layout, so is independent of the epilogue. */
21481 int n;
21482 int frame;
21483 n = 0;
21484 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21485 n++;
21486 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21487 if (frame && n * 4 >= frame)
21488 {
21489 n = frame / 4;
21490 live_regs_mask |= (1 << n) - 1;
21491 saved_regs += frame;
21492 }
21493 }
21494
21495 if (TARGET_LDRD
21496 && current_tune->prefer_ldrd_strd
21497 && !optimize_function_for_size_p (cfun))
21498 {
21499 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21500 if (TARGET_THUMB2)
21501 thumb2_emit_strd_push (live_regs_mask);
21502 else if (TARGET_ARM
21503 && !TARGET_APCS_FRAME
21504 && !IS_INTERRUPT (func_type))
21505 arm_emit_strd_push (live_regs_mask);
21506 else
21507 {
21508 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21509 RTX_FRAME_RELATED_P (insn) = 1;
21510 }
21511 }
21512 else
21513 {
21514 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21515 RTX_FRAME_RELATED_P (insn) = 1;
21516 }
21517 }
21518
21519 if (! IS_VOLATILE (func_type))
21520 saved_regs += arm_save_coproc_regs ();
21521
21522 if (frame_pointer_needed && TARGET_ARM)
21523 {
21524 /* Create the new frame pointer. */
21525 if (TARGET_APCS_FRAME)
21526 {
21527 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21528 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21529 RTX_FRAME_RELATED_P (insn) = 1;
21530 }
21531 else
21532 {
21533 insn = GEN_INT (saved_regs - (4 + fp_offset));
21534 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21535 stack_pointer_rtx, insn));
21536 RTX_FRAME_RELATED_P (insn) = 1;
21537 }
21538 }
21539
21540 size = offsets->outgoing_args - offsets->saved_args;
21541 if (flag_stack_usage_info)
21542 current_function_static_stack_size = size;
21543
21544 /* If this isn't an interrupt service routine and we have a frame, then do
21545 stack checking. We use IP as the first scratch register, except for the
21546 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21547 if (!IS_INTERRUPT (func_type)
21548 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21549 {
21550 unsigned int regno;
21551
21552 if (!IS_NESTED (func_type) || clobber_ip)
21553 regno = IP_REGNUM;
21554 else if (df_regs_ever_live_p (LR_REGNUM))
21555 regno = LR_REGNUM;
21556 else
21557 regno = 3;
21558
21559 if (crtl->is_leaf && !cfun->calls_alloca)
21560 {
21561 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21562 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21563 size - STACK_CHECK_PROTECT,
21564 regno, live_regs_mask);
21565 }
21566 else if (size > 0)
21567 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21568 regno, live_regs_mask);
21569 }
21570
21571 /* Recover the static chain register. */
21572 if (clobber_ip)
21573 {
21574 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21575 insn = gen_rtx_REG (SImode, 3);
21576 else
21577 {
21578 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21579 insn = gen_frame_mem (SImode, insn);
21580 }
21581 emit_set_insn (ip_rtx, insn);
21582 emit_insn (gen_force_register_use (ip_rtx));
21583 }
21584
21585 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21586 {
21587 /* This add can produce multiple insns for a large constant, so we
21588 need to get tricky. */
21589 rtx_insn *last = get_last_insn ();
21590
21591 amount = GEN_INT (offsets->saved_args + saved_regs
21592 - offsets->outgoing_args);
21593
21594 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21595 amount));
21596 do
21597 {
21598 last = last ? NEXT_INSN (last) : get_insns ();
21599 RTX_FRAME_RELATED_P (last) = 1;
21600 }
21601 while (last != insn);
21602
21603 /* If the frame pointer is needed, emit a special barrier that
21604 will prevent the scheduler from moving stores to the frame
21605 before the stack adjustment. */
21606 if (frame_pointer_needed)
21607 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21608 hard_frame_pointer_rtx));
21609 }
21610
21611
21612 if (frame_pointer_needed && TARGET_THUMB2)
21613 thumb_set_frame_pointer (offsets);
21614
21615 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21616 {
21617 unsigned long mask;
21618
21619 mask = live_regs_mask;
21620 mask &= THUMB2_WORK_REGS;
21621 if (!IS_NESTED (func_type))
21622 mask |= (1 << IP_REGNUM);
21623 arm_load_pic_register (mask);
21624 }
21625
21626 /* If we are profiling, make sure no instructions are scheduled before
21627 the call to mcount. Similarly if the user has requested no
21628 scheduling in the prolog. Similarly if we want non-call exceptions
21629 using the EABI unwinder, to prevent faulting instructions from being
21630 swapped with a stack adjustment. */
21631 if (crtl->profile || !TARGET_SCHED_PROLOG
21632 || (arm_except_unwind_info (&global_options) == UI_TARGET
21633 && cfun->can_throw_non_call_exceptions))
21634 emit_insn (gen_blockage ());
21635
21636 /* If the link register is being kept alive, with the return address in it,
21637 then make sure that it does not get reused by the ce2 pass. */
21638 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21639 cfun->machine->lr_save_eliminated = 1;
21640 }
21641 \f
21642 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21643 static void
21644 arm_print_condition (FILE *stream)
21645 {
21646 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21647 {
21648 /* Branch conversion is not implemented for Thumb-2. */
21649 if (TARGET_THUMB)
21650 {
21651 output_operand_lossage ("predicated Thumb instruction");
21652 return;
21653 }
21654 if (current_insn_predicate != NULL)
21655 {
21656 output_operand_lossage
21657 ("predicated instruction in conditional sequence");
21658 return;
21659 }
21660
21661 fputs (arm_condition_codes[arm_current_cc], stream);
21662 }
21663 else if (current_insn_predicate)
21664 {
21665 enum arm_cond_code code;
21666
21667 if (TARGET_THUMB1)
21668 {
21669 output_operand_lossage ("predicated Thumb instruction");
21670 return;
21671 }
21672
21673 code = get_arm_condition_code (current_insn_predicate);
21674 fputs (arm_condition_codes[code], stream);
21675 }
21676 }
21677
21678
21679 /* Globally reserved letters: acln
21680 Puncutation letters currently used: @_|?().!#
21681 Lower case letters currently used: bcdefhimpqtvwxyz
21682 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21683 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21684
21685 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21686
21687 If CODE is 'd', then the X is a condition operand and the instruction
21688 should only be executed if the condition is true.
21689 if CODE is 'D', then the X is a condition operand and the instruction
21690 should only be executed if the condition is false: however, if the mode
21691 of the comparison is CCFPEmode, then always execute the instruction -- we
21692 do this because in these circumstances !GE does not necessarily imply LT;
21693 in these cases the instruction pattern will take care to make sure that
21694 an instruction containing %d will follow, thereby undoing the effects of
21695 doing this instruction unconditionally.
21696 If CODE is 'N' then X is a floating point operand that must be negated
21697 before output.
21698 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21699 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21700 static void
21701 arm_print_operand (FILE *stream, rtx x, int code)
21702 {
21703 switch (code)
21704 {
21705 case '@':
21706 fputs (ASM_COMMENT_START, stream);
21707 return;
21708
21709 case '_':
21710 fputs (user_label_prefix, stream);
21711 return;
21712
21713 case '|':
21714 fputs (REGISTER_PREFIX, stream);
21715 return;
21716
21717 case '?':
21718 arm_print_condition (stream);
21719 return;
21720
21721 case '.':
21722 /* The current condition code for a condition code setting instruction.
21723 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21724 fputc('s', stream);
21725 arm_print_condition (stream);
21726 return;
21727
21728 case '!':
21729 /* If the instruction is conditionally executed then print
21730 the current condition code, otherwise print 's'. */
21731 gcc_assert (TARGET_THUMB2);
21732 if (current_insn_predicate)
21733 arm_print_condition (stream);
21734 else
21735 fputc('s', stream);
21736 break;
21737
21738 /* %# is a "break" sequence. It doesn't output anything, but is used to
21739 separate e.g. operand numbers from following text, if that text consists
21740 of further digits which we don't want to be part of the operand
21741 number. */
21742 case '#':
21743 return;
21744
21745 case 'N':
21746 {
21747 REAL_VALUE_TYPE r;
21748 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21749 fprintf (stream, "%s", fp_const_from_val (&r));
21750 }
21751 return;
21752
21753 /* An integer or symbol address without a preceding # sign. */
21754 case 'c':
21755 switch (GET_CODE (x))
21756 {
21757 case CONST_INT:
21758 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21759 break;
21760
21761 case SYMBOL_REF:
21762 output_addr_const (stream, x);
21763 break;
21764
21765 case CONST:
21766 if (GET_CODE (XEXP (x, 0)) == PLUS
21767 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21768 {
21769 output_addr_const (stream, x);
21770 break;
21771 }
21772 /* Fall through. */
21773
21774 default:
21775 output_operand_lossage ("Unsupported operand for code '%c'", code);
21776 }
21777 return;
21778
21779 /* An integer that we want to print in HEX. */
21780 case 'x':
21781 switch (GET_CODE (x))
21782 {
21783 case CONST_INT:
21784 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21785 break;
21786
21787 default:
21788 output_operand_lossage ("Unsupported operand for code '%c'", code);
21789 }
21790 return;
21791
21792 case 'B':
21793 if (CONST_INT_P (x))
21794 {
21795 HOST_WIDE_INT val;
21796 val = ARM_SIGN_EXTEND (~INTVAL (x));
21797 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21798 }
21799 else
21800 {
21801 putc ('~', stream);
21802 output_addr_const (stream, x);
21803 }
21804 return;
21805
21806 case 'b':
21807 /* Print the log2 of a CONST_INT. */
21808 {
21809 HOST_WIDE_INT val;
21810
21811 if (!CONST_INT_P (x)
21812 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21813 output_operand_lossage ("Unsupported operand for code '%c'", code);
21814 else
21815 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21816 }
21817 return;
21818
21819 case 'L':
21820 /* The low 16 bits of an immediate constant. */
21821 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21822 return;
21823
21824 case 'i':
21825 fprintf (stream, "%s", arithmetic_instr (x, 1));
21826 return;
21827
21828 case 'I':
21829 fprintf (stream, "%s", arithmetic_instr (x, 0));
21830 return;
21831
21832 case 'S':
21833 {
21834 HOST_WIDE_INT val;
21835 const char *shift;
21836
21837 shift = shift_op (x, &val);
21838
21839 if (shift)
21840 {
21841 fprintf (stream, ", %s ", shift);
21842 if (val == -1)
21843 arm_print_operand (stream, XEXP (x, 1), 0);
21844 else
21845 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21846 }
21847 }
21848 return;
21849
21850 /* An explanation of the 'Q', 'R' and 'H' register operands:
21851
21852 In a pair of registers containing a DI or DF value the 'Q'
21853 operand returns the register number of the register containing
21854 the least significant part of the value. The 'R' operand returns
21855 the register number of the register containing the most
21856 significant part of the value.
21857
21858 The 'H' operand returns the higher of the two register numbers.
21859 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21860 same as the 'Q' operand, since the most significant part of the
21861 value is held in the lower number register. The reverse is true
21862 on systems where WORDS_BIG_ENDIAN is false.
21863
21864 The purpose of these operands is to distinguish between cases
21865 where the endian-ness of the values is important (for example
21866 when they are added together), and cases where the endian-ness
21867 is irrelevant, but the order of register operations is important.
21868 For example when loading a value from memory into a register
21869 pair, the endian-ness does not matter. Provided that the value
21870 from the lower memory address is put into the lower numbered
21871 register, and the value from the higher address is put into the
21872 higher numbered register, the load will work regardless of whether
21873 the value being loaded is big-wordian or little-wordian. The
21874 order of the two register loads can matter however, if the address
21875 of the memory location is actually held in one of the registers
21876 being overwritten by the load.
21877
21878 The 'Q' and 'R' constraints are also available for 64-bit
21879 constants. */
21880 case 'Q':
21881 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21882 {
21883 rtx part = gen_lowpart (SImode, x);
21884 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21885 return;
21886 }
21887
21888 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21889 {
21890 output_operand_lossage ("invalid operand for code '%c'", code);
21891 return;
21892 }
21893
21894 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21895 return;
21896
21897 case 'R':
21898 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21899 {
21900 machine_mode mode = GET_MODE (x);
21901 rtx part;
21902
21903 if (mode == VOIDmode)
21904 mode = DImode;
21905 part = gen_highpart_mode (SImode, mode, x);
21906 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21907 return;
21908 }
21909
21910 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21911 {
21912 output_operand_lossage ("invalid operand for code '%c'", code);
21913 return;
21914 }
21915
21916 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21917 return;
21918
21919 case 'H':
21920 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21921 {
21922 output_operand_lossage ("invalid operand for code '%c'", code);
21923 return;
21924 }
21925
21926 asm_fprintf (stream, "%r", REGNO (x) + 1);
21927 return;
21928
21929 case 'J':
21930 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21931 {
21932 output_operand_lossage ("invalid operand for code '%c'", code);
21933 return;
21934 }
21935
21936 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21937 return;
21938
21939 case 'K':
21940 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21941 {
21942 output_operand_lossage ("invalid operand for code '%c'", code);
21943 return;
21944 }
21945
21946 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21947 return;
21948
21949 case 'm':
21950 asm_fprintf (stream, "%r",
21951 REG_P (XEXP (x, 0))
21952 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21953 return;
21954
21955 case 'M':
21956 asm_fprintf (stream, "{%r-%r}",
21957 REGNO (x),
21958 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21959 return;
21960
21961 /* Like 'M', but writing doubleword vector registers, for use by Neon
21962 insns. */
21963 case 'h':
21964 {
21965 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21966 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21967 if (numregs == 1)
21968 asm_fprintf (stream, "{d%d}", regno);
21969 else
21970 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21971 }
21972 return;
21973
21974 case 'd':
21975 /* CONST_TRUE_RTX means always -- that's the default. */
21976 if (x == const_true_rtx)
21977 return;
21978
21979 if (!COMPARISON_P (x))
21980 {
21981 output_operand_lossage ("invalid operand for code '%c'", code);
21982 return;
21983 }
21984
21985 fputs (arm_condition_codes[get_arm_condition_code (x)],
21986 stream);
21987 return;
21988
21989 case 'D':
21990 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21991 want to do that. */
21992 if (x == const_true_rtx)
21993 {
21994 output_operand_lossage ("instruction never executed");
21995 return;
21996 }
21997 if (!COMPARISON_P (x))
21998 {
21999 output_operand_lossage ("invalid operand for code '%c'", code);
22000 return;
22001 }
22002
22003 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22004 (get_arm_condition_code (x))],
22005 stream);
22006 return;
22007
22008 case 's':
22009 case 'V':
22010 case 'W':
22011 case 'X':
22012 case 'Y':
22013 case 'Z':
22014 /* Former Maverick support, removed after GCC-4.7. */
22015 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22016 return;
22017
22018 case 'U':
22019 if (!REG_P (x)
22020 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22021 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22022 /* Bad value for wCG register number. */
22023 {
22024 output_operand_lossage ("invalid operand for code '%c'", code);
22025 return;
22026 }
22027
22028 else
22029 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22030 return;
22031
22032 /* Print an iWMMXt control register name. */
22033 case 'w':
22034 if (!CONST_INT_P (x)
22035 || INTVAL (x) < 0
22036 || INTVAL (x) >= 16)
22037 /* Bad value for wC register number. */
22038 {
22039 output_operand_lossage ("invalid operand for code '%c'", code);
22040 return;
22041 }
22042
22043 else
22044 {
22045 static const char * wc_reg_names [16] =
22046 {
22047 "wCID", "wCon", "wCSSF", "wCASF",
22048 "wC4", "wC5", "wC6", "wC7",
22049 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22050 "wC12", "wC13", "wC14", "wC15"
22051 };
22052
22053 fputs (wc_reg_names [INTVAL (x)], stream);
22054 }
22055 return;
22056
22057 /* Print the high single-precision register of a VFP double-precision
22058 register. */
22059 case 'p':
22060 {
22061 machine_mode mode = GET_MODE (x);
22062 int regno;
22063
22064 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22065 {
22066 output_operand_lossage ("invalid operand for code '%c'", code);
22067 return;
22068 }
22069
22070 regno = REGNO (x);
22071 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22072 {
22073 output_operand_lossage ("invalid operand for code '%c'", code);
22074 return;
22075 }
22076
22077 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22078 }
22079 return;
22080
22081 /* Print a VFP/Neon double precision or quad precision register name. */
22082 case 'P':
22083 case 'q':
22084 {
22085 machine_mode mode = GET_MODE (x);
22086 int is_quad = (code == 'q');
22087 int regno;
22088
22089 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22090 {
22091 output_operand_lossage ("invalid operand for code '%c'", code);
22092 return;
22093 }
22094
22095 if (!REG_P (x)
22096 || !IS_VFP_REGNUM (REGNO (x)))
22097 {
22098 output_operand_lossage ("invalid operand for code '%c'", code);
22099 return;
22100 }
22101
22102 regno = REGNO (x);
22103 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22104 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22105 {
22106 output_operand_lossage ("invalid operand for code '%c'", code);
22107 return;
22108 }
22109
22110 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22111 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22112 }
22113 return;
22114
22115 /* These two codes print the low/high doubleword register of a Neon quad
22116 register, respectively. For pair-structure types, can also print
22117 low/high quadword registers. */
22118 case 'e':
22119 case 'f':
22120 {
22121 machine_mode mode = GET_MODE (x);
22122 int regno;
22123
22124 if ((GET_MODE_SIZE (mode) != 16
22125 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22126 {
22127 output_operand_lossage ("invalid operand for code '%c'", code);
22128 return;
22129 }
22130
22131 regno = REGNO (x);
22132 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22133 {
22134 output_operand_lossage ("invalid operand for code '%c'", code);
22135 return;
22136 }
22137
22138 if (GET_MODE_SIZE (mode) == 16)
22139 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22140 + (code == 'f' ? 1 : 0));
22141 else
22142 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22143 + (code == 'f' ? 1 : 0));
22144 }
22145 return;
22146
22147 /* Print a VFPv3 floating-point constant, represented as an integer
22148 index. */
22149 case 'G':
22150 {
22151 int index = vfp3_const_double_index (x);
22152 gcc_assert (index != -1);
22153 fprintf (stream, "%d", index);
22154 }
22155 return;
22156
22157 /* Print bits representing opcode features for Neon.
22158
22159 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22160 and polynomials as unsigned.
22161
22162 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22163
22164 Bit 2 is 1 for rounding functions, 0 otherwise. */
22165
22166 /* Identify the type as 's', 'u', 'p' or 'f'. */
22167 case 'T':
22168 {
22169 HOST_WIDE_INT bits = INTVAL (x);
22170 fputc ("uspf"[bits & 3], stream);
22171 }
22172 return;
22173
22174 /* Likewise, but signed and unsigned integers are both 'i'. */
22175 case 'F':
22176 {
22177 HOST_WIDE_INT bits = INTVAL (x);
22178 fputc ("iipf"[bits & 3], stream);
22179 }
22180 return;
22181
22182 /* As for 'T', but emit 'u' instead of 'p'. */
22183 case 't':
22184 {
22185 HOST_WIDE_INT bits = INTVAL (x);
22186 fputc ("usuf"[bits & 3], stream);
22187 }
22188 return;
22189
22190 /* Bit 2: rounding (vs none). */
22191 case 'O':
22192 {
22193 HOST_WIDE_INT bits = INTVAL (x);
22194 fputs ((bits & 4) != 0 ? "r" : "", stream);
22195 }
22196 return;
22197
22198 /* Memory operand for vld1/vst1 instruction. */
22199 case 'A':
22200 {
22201 rtx addr;
22202 bool postinc = FALSE;
22203 rtx postinc_reg = NULL;
22204 unsigned align, memsize, align_bits;
22205
22206 gcc_assert (MEM_P (x));
22207 addr = XEXP (x, 0);
22208 if (GET_CODE (addr) == POST_INC)
22209 {
22210 postinc = 1;
22211 addr = XEXP (addr, 0);
22212 }
22213 if (GET_CODE (addr) == POST_MODIFY)
22214 {
22215 postinc_reg = XEXP( XEXP (addr, 1), 1);
22216 addr = XEXP (addr, 0);
22217 }
22218 asm_fprintf (stream, "[%r", REGNO (addr));
22219
22220 /* We know the alignment of this access, so we can emit a hint in the
22221 instruction (for some alignments) as an aid to the memory subsystem
22222 of the target. */
22223 align = MEM_ALIGN (x) >> 3;
22224 memsize = MEM_SIZE (x);
22225
22226 /* Only certain alignment specifiers are supported by the hardware. */
22227 if (memsize == 32 && (align % 32) == 0)
22228 align_bits = 256;
22229 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22230 align_bits = 128;
22231 else if (memsize >= 8 && (align % 8) == 0)
22232 align_bits = 64;
22233 else
22234 align_bits = 0;
22235
22236 if (align_bits != 0)
22237 asm_fprintf (stream, ":%d", align_bits);
22238
22239 asm_fprintf (stream, "]");
22240
22241 if (postinc)
22242 fputs("!", stream);
22243 if (postinc_reg)
22244 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22245 }
22246 return;
22247
22248 case 'C':
22249 {
22250 rtx addr;
22251
22252 gcc_assert (MEM_P (x));
22253 addr = XEXP (x, 0);
22254 gcc_assert (REG_P (addr));
22255 asm_fprintf (stream, "[%r]", REGNO (addr));
22256 }
22257 return;
22258
22259 /* Translate an S register number into a D register number and element index. */
22260 case 'y':
22261 {
22262 machine_mode mode = GET_MODE (x);
22263 int regno;
22264
22265 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22266 {
22267 output_operand_lossage ("invalid operand for code '%c'", code);
22268 return;
22269 }
22270
22271 regno = REGNO (x);
22272 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22273 {
22274 output_operand_lossage ("invalid operand for code '%c'", code);
22275 return;
22276 }
22277
22278 regno = regno - FIRST_VFP_REGNUM;
22279 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22280 }
22281 return;
22282
22283 case 'v':
22284 gcc_assert (CONST_DOUBLE_P (x));
22285 int result;
22286 result = vfp3_const_double_for_fract_bits (x);
22287 if (result == 0)
22288 result = vfp3_const_double_for_bits (x);
22289 fprintf (stream, "#%d", result);
22290 return;
22291
22292 /* Register specifier for vld1.16/vst1.16. Translate the S register
22293 number into a D register number and element index. */
22294 case 'z':
22295 {
22296 machine_mode mode = GET_MODE (x);
22297 int regno;
22298
22299 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22300 {
22301 output_operand_lossage ("invalid operand for code '%c'", code);
22302 return;
22303 }
22304
22305 regno = REGNO (x);
22306 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22307 {
22308 output_operand_lossage ("invalid operand for code '%c'", code);
22309 return;
22310 }
22311
22312 regno = regno - FIRST_VFP_REGNUM;
22313 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22314 }
22315 return;
22316
22317 default:
22318 if (x == 0)
22319 {
22320 output_operand_lossage ("missing operand");
22321 return;
22322 }
22323
22324 switch (GET_CODE (x))
22325 {
22326 case REG:
22327 asm_fprintf (stream, "%r", REGNO (x));
22328 break;
22329
22330 case MEM:
22331 output_address (GET_MODE (x), XEXP (x, 0));
22332 break;
22333
22334 case CONST_DOUBLE:
22335 {
22336 char fpstr[20];
22337 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22338 sizeof (fpstr), 0, 1);
22339 fprintf (stream, "#%s", fpstr);
22340 }
22341 break;
22342
22343 default:
22344 gcc_assert (GET_CODE (x) != NEG);
22345 fputc ('#', stream);
22346 if (GET_CODE (x) == HIGH)
22347 {
22348 fputs (":lower16:", stream);
22349 x = XEXP (x, 0);
22350 }
22351
22352 output_addr_const (stream, x);
22353 break;
22354 }
22355 }
22356 }
22357 \f
22358 /* Target hook for printing a memory address. */
22359 static void
22360 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22361 {
22362 if (TARGET_32BIT)
22363 {
22364 int is_minus = GET_CODE (x) == MINUS;
22365
22366 if (REG_P (x))
22367 asm_fprintf (stream, "[%r]", REGNO (x));
22368 else if (GET_CODE (x) == PLUS || is_minus)
22369 {
22370 rtx base = XEXP (x, 0);
22371 rtx index = XEXP (x, 1);
22372 HOST_WIDE_INT offset = 0;
22373 if (!REG_P (base)
22374 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22375 {
22376 /* Ensure that BASE is a register. */
22377 /* (one of them must be). */
22378 /* Also ensure the SP is not used as in index register. */
22379 std::swap (base, index);
22380 }
22381 switch (GET_CODE (index))
22382 {
22383 case CONST_INT:
22384 offset = INTVAL (index);
22385 if (is_minus)
22386 offset = -offset;
22387 asm_fprintf (stream, "[%r, #%wd]",
22388 REGNO (base), offset);
22389 break;
22390
22391 case REG:
22392 asm_fprintf (stream, "[%r, %s%r]",
22393 REGNO (base), is_minus ? "-" : "",
22394 REGNO (index));
22395 break;
22396
22397 case MULT:
22398 case ASHIFTRT:
22399 case LSHIFTRT:
22400 case ASHIFT:
22401 case ROTATERT:
22402 {
22403 asm_fprintf (stream, "[%r, %s%r",
22404 REGNO (base), is_minus ? "-" : "",
22405 REGNO (XEXP (index, 0)));
22406 arm_print_operand (stream, index, 'S');
22407 fputs ("]", stream);
22408 break;
22409 }
22410
22411 default:
22412 gcc_unreachable ();
22413 }
22414 }
22415 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22416 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22417 {
22418 gcc_assert (REG_P (XEXP (x, 0)));
22419
22420 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22421 asm_fprintf (stream, "[%r, #%s%d]!",
22422 REGNO (XEXP (x, 0)),
22423 GET_CODE (x) == PRE_DEC ? "-" : "",
22424 GET_MODE_SIZE (mode));
22425 else
22426 asm_fprintf (stream, "[%r], #%s%d",
22427 REGNO (XEXP (x, 0)),
22428 GET_CODE (x) == POST_DEC ? "-" : "",
22429 GET_MODE_SIZE (mode));
22430 }
22431 else if (GET_CODE (x) == PRE_MODIFY)
22432 {
22433 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22434 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22435 asm_fprintf (stream, "#%wd]!",
22436 INTVAL (XEXP (XEXP (x, 1), 1)));
22437 else
22438 asm_fprintf (stream, "%r]!",
22439 REGNO (XEXP (XEXP (x, 1), 1)));
22440 }
22441 else if (GET_CODE (x) == POST_MODIFY)
22442 {
22443 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22444 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22445 asm_fprintf (stream, "#%wd",
22446 INTVAL (XEXP (XEXP (x, 1), 1)));
22447 else
22448 asm_fprintf (stream, "%r",
22449 REGNO (XEXP (XEXP (x, 1), 1)));
22450 }
22451 else output_addr_const (stream, x);
22452 }
22453 else
22454 {
22455 if (REG_P (x))
22456 asm_fprintf (stream, "[%r]", REGNO (x));
22457 else if (GET_CODE (x) == POST_INC)
22458 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22459 else if (GET_CODE (x) == PLUS)
22460 {
22461 gcc_assert (REG_P (XEXP (x, 0)));
22462 if (CONST_INT_P (XEXP (x, 1)))
22463 asm_fprintf (stream, "[%r, #%wd]",
22464 REGNO (XEXP (x, 0)),
22465 INTVAL (XEXP (x, 1)));
22466 else
22467 asm_fprintf (stream, "[%r, %r]",
22468 REGNO (XEXP (x, 0)),
22469 REGNO (XEXP (x, 1)));
22470 }
22471 else
22472 output_addr_const (stream, x);
22473 }
22474 }
22475 \f
22476 /* Target hook for indicating whether a punctuation character for
22477 TARGET_PRINT_OPERAND is valid. */
22478 static bool
22479 arm_print_operand_punct_valid_p (unsigned char code)
22480 {
22481 return (code == '@' || code == '|' || code == '.'
22482 || code == '(' || code == ')' || code == '#'
22483 || (TARGET_32BIT && (code == '?'))
22484 || (TARGET_THUMB2 && (code == '!'))
22485 || (TARGET_THUMB && (code == '_')));
22486 }
22487 \f
22488 /* Target hook for assembling integer objects. The ARM version needs to
22489 handle word-sized values specially. */
22490 static bool
22491 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22492 {
22493 machine_mode mode;
22494
22495 if (size == UNITS_PER_WORD && aligned_p)
22496 {
22497 fputs ("\t.word\t", asm_out_file);
22498 output_addr_const (asm_out_file, x);
22499
22500 /* Mark symbols as position independent. We only do this in the
22501 .text segment, not in the .data segment. */
22502 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22503 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22504 {
22505 /* See legitimize_pic_address for an explanation of the
22506 TARGET_VXWORKS_RTP check. */
22507 if (!arm_pic_data_is_text_relative
22508 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22509 fputs ("(GOT)", asm_out_file);
22510 else
22511 fputs ("(GOTOFF)", asm_out_file);
22512 }
22513 fputc ('\n', asm_out_file);
22514 return true;
22515 }
22516
22517 mode = GET_MODE (x);
22518
22519 if (arm_vector_mode_supported_p (mode))
22520 {
22521 int i, units;
22522
22523 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22524
22525 units = CONST_VECTOR_NUNITS (x);
22526 size = GET_MODE_UNIT_SIZE (mode);
22527
22528 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22529 for (i = 0; i < units; i++)
22530 {
22531 rtx elt = CONST_VECTOR_ELT (x, i);
22532 assemble_integer
22533 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22534 }
22535 else
22536 for (i = 0; i < units; i++)
22537 {
22538 rtx elt = CONST_VECTOR_ELT (x, i);
22539 assemble_real
22540 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22541 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22542 }
22543
22544 return true;
22545 }
22546
22547 return default_assemble_integer (x, size, aligned_p);
22548 }
22549
22550 static void
22551 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22552 {
22553 section *s;
22554
22555 if (!TARGET_AAPCS_BASED)
22556 {
22557 (is_ctor ?
22558 default_named_section_asm_out_constructor
22559 : default_named_section_asm_out_destructor) (symbol, priority);
22560 return;
22561 }
22562
22563 /* Put these in the .init_array section, using a special relocation. */
22564 if (priority != DEFAULT_INIT_PRIORITY)
22565 {
22566 char buf[18];
22567 sprintf (buf, "%s.%.5u",
22568 is_ctor ? ".init_array" : ".fini_array",
22569 priority);
22570 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22571 }
22572 else if (is_ctor)
22573 s = ctors_section;
22574 else
22575 s = dtors_section;
22576
22577 switch_to_section (s);
22578 assemble_align (POINTER_SIZE);
22579 fputs ("\t.word\t", asm_out_file);
22580 output_addr_const (asm_out_file, symbol);
22581 fputs ("(target1)\n", asm_out_file);
22582 }
22583
22584 /* Add a function to the list of static constructors. */
22585
22586 static void
22587 arm_elf_asm_constructor (rtx symbol, int priority)
22588 {
22589 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22590 }
22591
22592 /* Add a function to the list of static destructors. */
22593
22594 static void
22595 arm_elf_asm_destructor (rtx symbol, int priority)
22596 {
22597 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22598 }
22599 \f
22600 /* A finite state machine takes care of noticing whether or not instructions
22601 can be conditionally executed, and thus decrease execution time and code
22602 size by deleting branch instructions. The fsm is controlled by
22603 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22604
22605 /* The state of the fsm controlling condition codes are:
22606 0: normal, do nothing special
22607 1: make ASM_OUTPUT_OPCODE not output this instruction
22608 2: make ASM_OUTPUT_OPCODE not output this instruction
22609 3: make instructions conditional
22610 4: make instructions conditional
22611
22612 State transitions (state->state by whom under condition):
22613 0 -> 1 final_prescan_insn if the `target' is a label
22614 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22615 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22616 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22617 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22618 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22619 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22620 (the target insn is arm_target_insn).
22621
22622 If the jump clobbers the conditions then we use states 2 and 4.
22623
22624 A similar thing can be done with conditional return insns.
22625
22626 XXX In case the `target' is an unconditional branch, this conditionalising
22627 of the instructions always reduces code size, but not always execution
22628 time. But then, I want to reduce the code size to somewhere near what
22629 /bin/cc produces. */
22630
22631 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22632 instructions. When a COND_EXEC instruction is seen the subsequent
22633 instructions are scanned so that multiple conditional instructions can be
22634 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22635 specify the length and true/false mask for the IT block. These will be
22636 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22637
22638 /* Returns the index of the ARM condition code string in
22639 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22640 COMPARISON should be an rtx like `(eq (...) (...))'. */
22641
22642 enum arm_cond_code
22643 maybe_get_arm_condition_code (rtx comparison)
22644 {
22645 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22646 enum arm_cond_code code;
22647 enum rtx_code comp_code = GET_CODE (comparison);
22648
22649 if (GET_MODE_CLASS (mode) != MODE_CC)
22650 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22651 XEXP (comparison, 1));
22652
22653 switch (mode)
22654 {
22655 case CC_DNEmode: code = ARM_NE; goto dominance;
22656 case CC_DEQmode: code = ARM_EQ; goto dominance;
22657 case CC_DGEmode: code = ARM_GE; goto dominance;
22658 case CC_DGTmode: code = ARM_GT; goto dominance;
22659 case CC_DLEmode: code = ARM_LE; goto dominance;
22660 case CC_DLTmode: code = ARM_LT; goto dominance;
22661 case CC_DGEUmode: code = ARM_CS; goto dominance;
22662 case CC_DGTUmode: code = ARM_HI; goto dominance;
22663 case CC_DLEUmode: code = ARM_LS; goto dominance;
22664 case CC_DLTUmode: code = ARM_CC;
22665
22666 dominance:
22667 if (comp_code == EQ)
22668 return ARM_INVERSE_CONDITION_CODE (code);
22669 if (comp_code == NE)
22670 return code;
22671 return ARM_NV;
22672
22673 case CC_NOOVmode:
22674 switch (comp_code)
22675 {
22676 case NE: return ARM_NE;
22677 case EQ: return ARM_EQ;
22678 case GE: return ARM_PL;
22679 case LT: return ARM_MI;
22680 default: return ARM_NV;
22681 }
22682
22683 case CC_Zmode:
22684 switch (comp_code)
22685 {
22686 case NE: return ARM_NE;
22687 case EQ: return ARM_EQ;
22688 default: return ARM_NV;
22689 }
22690
22691 case CC_Nmode:
22692 switch (comp_code)
22693 {
22694 case NE: return ARM_MI;
22695 case EQ: return ARM_PL;
22696 default: return ARM_NV;
22697 }
22698
22699 case CCFPEmode:
22700 case CCFPmode:
22701 /* We can handle all cases except UNEQ and LTGT. */
22702 switch (comp_code)
22703 {
22704 case GE: return ARM_GE;
22705 case GT: return ARM_GT;
22706 case LE: return ARM_LS;
22707 case LT: return ARM_MI;
22708 case NE: return ARM_NE;
22709 case EQ: return ARM_EQ;
22710 case ORDERED: return ARM_VC;
22711 case UNORDERED: return ARM_VS;
22712 case UNLT: return ARM_LT;
22713 case UNLE: return ARM_LE;
22714 case UNGT: return ARM_HI;
22715 case UNGE: return ARM_PL;
22716 /* UNEQ and LTGT do not have a representation. */
22717 case UNEQ: /* Fall through. */
22718 case LTGT: /* Fall through. */
22719 default: return ARM_NV;
22720 }
22721
22722 case CC_SWPmode:
22723 switch (comp_code)
22724 {
22725 case NE: return ARM_NE;
22726 case EQ: return ARM_EQ;
22727 case GE: return ARM_LE;
22728 case GT: return ARM_LT;
22729 case LE: return ARM_GE;
22730 case LT: return ARM_GT;
22731 case GEU: return ARM_LS;
22732 case GTU: return ARM_CC;
22733 case LEU: return ARM_CS;
22734 case LTU: return ARM_HI;
22735 default: return ARM_NV;
22736 }
22737
22738 case CC_Cmode:
22739 switch (comp_code)
22740 {
22741 case LTU: return ARM_CS;
22742 case GEU: return ARM_CC;
22743 case NE: return ARM_CS;
22744 case EQ: return ARM_CC;
22745 default: return ARM_NV;
22746 }
22747
22748 case CC_CZmode:
22749 switch (comp_code)
22750 {
22751 case NE: return ARM_NE;
22752 case EQ: return ARM_EQ;
22753 case GEU: return ARM_CS;
22754 case GTU: return ARM_HI;
22755 case LEU: return ARM_LS;
22756 case LTU: return ARM_CC;
22757 default: return ARM_NV;
22758 }
22759
22760 case CC_NCVmode:
22761 switch (comp_code)
22762 {
22763 case GE: return ARM_GE;
22764 case LT: return ARM_LT;
22765 case GEU: return ARM_CS;
22766 case LTU: return ARM_CC;
22767 default: return ARM_NV;
22768 }
22769
22770 case CC_Vmode:
22771 switch (comp_code)
22772 {
22773 case NE: return ARM_VS;
22774 case EQ: return ARM_VC;
22775 default: return ARM_NV;
22776 }
22777
22778 case CCmode:
22779 switch (comp_code)
22780 {
22781 case NE: return ARM_NE;
22782 case EQ: return ARM_EQ;
22783 case GE: return ARM_GE;
22784 case GT: return ARM_GT;
22785 case LE: return ARM_LE;
22786 case LT: return ARM_LT;
22787 case GEU: return ARM_CS;
22788 case GTU: return ARM_HI;
22789 case LEU: return ARM_LS;
22790 case LTU: return ARM_CC;
22791 default: return ARM_NV;
22792 }
22793
22794 default: gcc_unreachable ();
22795 }
22796 }
22797
22798 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22799 static enum arm_cond_code
22800 get_arm_condition_code (rtx comparison)
22801 {
22802 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22803 gcc_assert (code != ARM_NV);
22804 return code;
22805 }
22806
22807 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22808 instructions. */
22809 void
22810 thumb2_final_prescan_insn (rtx_insn *insn)
22811 {
22812 rtx_insn *first_insn = insn;
22813 rtx body = PATTERN (insn);
22814 rtx predicate;
22815 enum arm_cond_code code;
22816 int n;
22817 int mask;
22818 int max;
22819
22820 /* max_insns_skipped in the tune was already taken into account in the
22821 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22822 just emit the IT blocks as we can. It does not make sense to split
22823 the IT blocks. */
22824 max = MAX_INSN_PER_IT_BLOCK;
22825
22826 /* Remove the previous insn from the count of insns to be output. */
22827 if (arm_condexec_count)
22828 arm_condexec_count--;
22829
22830 /* Nothing to do if we are already inside a conditional block. */
22831 if (arm_condexec_count)
22832 return;
22833
22834 if (GET_CODE (body) != COND_EXEC)
22835 return;
22836
22837 /* Conditional jumps are implemented directly. */
22838 if (JUMP_P (insn))
22839 return;
22840
22841 predicate = COND_EXEC_TEST (body);
22842 arm_current_cc = get_arm_condition_code (predicate);
22843
22844 n = get_attr_ce_count (insn);
22845 arm_condexec_count = 1;
22846 arm_condexec_mask = (1 << n) - 1;
22847 arm_condexec_masklen = n;
22848 /* See if subsequent instructions can be combined into the same block. */
22849 for (;;)
22850 {
22851 insn = next_nonnote_insn (insn);
22852
22853 /* Jumping into the middle of an IT block is illegal, so a label or
22854 barrier terminates the block. */
22855 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22856 break;
22857
22858 body = PATTERN (insn);
22859 /* USE and CLOBBER aren't really insns, so just skip them. */
22860 if (GET_CODE (body) == USE
22861 || GET_CODE (body) == CLOBBER)
22862 continue;
22863
22864 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22865 if (GET_CODE (body) != COND_EXEC)
22866 break;
22867 /* Maximum number of conditionally executed instructions in a block. */
22868 n = get_attr_ce_count (insn);
22869 if (arm_condexec_masklen + n > max)
22870 break;
22871
22872 predicate = COND_EXEC_TEST (body);
22873 code = get_arm_condition_code (predicate);
22874 mask = (1 << n) - 1;
22875 if (arm_current_cc == code)
22876 arm_condexec_mask |= (mask << arm_condexec_masklen);
22877 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22878 break;
22879
22880 arm_condexec_count++;
22881 arm_condexec_masklen += n;
22882
22883 /* A jump must be the last instruction in a conditional block. */
22884 if (JUMP_P (insn))
22885 break;
22886 }
22887 /* Restore recog_data (getting the attributes of other insns can
22888 destroy this array, but final.c assumes that it remains intact
22889 across this call). */
22890 extract_constrain_insn_cached (first_insn);
22891 }
22892
22893 void
22894 arm_final_prescan_insn (rtx_insn *insn)
22895 {
22896 /* BODY will hold the body of INSN. */
22897 rtx body = PATTERN (insn);
22898
22899 /* This will be 1 if trying to repeat the trick, and things need to be
22900 reversed if it appears to fail. */
22901 int reverse = 0;
22902
22903 /* If we start with a return insn, we only succeed if we find another one. */
22904 int seeking_return = 0;
22905 enum rtx_code return_code = UNKNOWN;
22906
22907 /* START_INSN will hold the insn from where we start looking. This is the
22908 first insn after the following code_label if REVERSE is true. */
22909 rtx_insn *start_insn = insn;
22910
22911 /* If in state 4, check if the target branch is reached, in order to
22912 change back to state 0. */
22913 if (arm_ccfsm_state == 4)
22914 {
22915 if (insn == arm_target_insn)
22916 {
22917 arm_target_insn = NULL;
22918 arm_ccfsm_state = 0;
22919 }
22920 return;
22921 }
22922
22923 /* If in state 3, it is possible to repeat the trick, if this insn is an
22924 unconditional branch to a label, and immediately following this branch
22925 is the previous target label which is only used once, and the label this
22926 branch jumps to is not too far off. */
22927 if (arm_ccfsm_state == 3)
22928 {
22929 if (simplejump_p (insn))
22930 {
22931 start_insn = next_nonnote_insn (start_insn);
22932 if (BARRIER_P (start_insn))
22933 {
22934 /* XXX Isn't this always a barrier? */
22935 start_insn = next_nonnote_insn (start_insn);
22936 }
22937 if (LABEL_P (start_insn)
22938 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22939 && LABEL_NUSES (start_insn) == 1)
22940 reverse = TRUE;
22941 else
22942 return;
22943 }
22944 else if (ANY_RETURN_P (body))
22945 {
22946 start_insn = next_nonnote_insn (start_insn);
22947 if (BARRIER_P (start_insn))
22948 start_insn = next_nonnote_insn (start_insn);
22949 if (LABEL_P (start_insn)
22950 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22951 && LABEL_NUSES (start_insn) == 1)
22952 {
22953 reverse = TRUE;
22954 seeking_return = 1;
22955 return_code = GET_CODE (body);
22956 }
22957 else
22958 return;
22959 }
22960 else
22961 return;
22962 }
22963
22964 gcc_assert (!arm_ccfsm_state || reverse);
22965 if (!JUMP_P (insn))
22966 return;
22967
22968 /* This jump might be paralleled with a clobber of the condition codes
22969 the jump should always come first */
22970 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22971 body = XVECEXP (body, 0, 0);
22972
22973 if (reverse
22974 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22975 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22976 {
22977 int insns_skipped;
22978 int fail = FALSE, succeed = FALSE;
22979 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22980 int then_not_else = TRUE;
22981 rtx_insn *this_insn = start_insn;
22982 rtx label = 0;
22983
22984 /* Register the insn jumped to. */
22985 if (reverse)
22986 {
22987 if (!seeking_return)
22988 label = XEXP (SET_SRC (body), 0);
22989 }
22990 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22991 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22992 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22993 {
22994 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22995 then_not_else = FALSE;
22996 }
22997 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22998 {
22999 seeking_return = 1;
23000 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23001 }
23002 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23003 {
23004 seeking_return = 1;
23005 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23006 then_not_else = FALSE;
23007 }
23008 else
23009 gcc_unreachable ();
23010
23011 /* See how many insns this branch skips, and what kind of insns. If all
23012 insns are okay, and the label or unconditional branch to the same
23013 label is not too far away, succeed. */
23014 for (insns_skipped = 0;
23015 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23016 {
23017 rtx scanbody;
23018
23019 this_insn = next_nonnote_insn (this_insn);
23020 if (!this_insn)
23021 break;
23022
23023 switch (GET_CODE (this_insn))
23024 {
23025 case CODE_LABEL:
23026 /* Succeed if it is the target label, otherwise fail since
23027 control falls in from somewhere else. */
23028 if (this_insn == label)
23029 {
23030 arm_ccfsm_state = 1;
23031 succeed = TRUE;
23032 }
23033 else
23034 fail = TRUE;
23035 break;
23036
23037 case BARRIER:
23038 /* Succeed if the following insn is the target label.
23039 Otherwise fail.
23040 If return insns are used then the last insn in a function
23041 will be a barrier. */
23042 this_insn = next_nonnote_insn (this_insn);
23043 if (this_insn && this_insn == label)
23044 {
23045 arm_ccfsm_state = 1;
23046 succeed = TRUE;
23047 }
23048 else
23049 fail = TRUE;
23050 break;
23051
23052 case CALL_INSN:
23053 /* The AAPCS says that conditional calls should not be
23054 used since they make interworking inefficient (the
23055 linker can't transform BL<cond> into BLX). That's
23056 only a problem if the machine has BLX. */
23057 if (arm_arch5)
23058 {
23059 fail = TRUE;
23060 break;
23061 }
23062
23063 /* Succeed if the following insn is the target label, or
23064 if the following two insns are a barrier and the
23065 target label. */
23066 this_insn = next_nonnote_insn (this_insn);
23067 if (this_insn && BARRIER_P (this_insn))
23068 this_insn = next_nonnote_insn (this_insn);
23069
23070 if (this_insn && this_insn == label
23071 && insns_skipped < max_insns_skipped)
23072 {
23073 arm_ccfsm_state = 1;
23074 succeed = TRUE;
23075 }
23076 else
23077 fail = TRUE;
23078 break;
23079
23080 case JUMP_INSN:
23081 /* If this is an unconditional branch to the same label, succeed.
23082 If it is to another label, do nothing. If it is conditional,
23083 fail. */
23084 /* XXX Probably, the tests for SET and the PC are
23085 unnecessary. */
23086
23087 scanbody = PATTERN (this_insn);
23088 if (GET_CODE (scanbody) == SET
23089 && GET_CODE (SET_DEST (scanbody)) == PC)
23090 {
23091 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23092 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23093 {
23094 arm_ccfsm_state = 2;
23095 succeed = TRUE;
23096 }
23097 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23098 fail = TRUE;
23099 }
23100 /* Fail if a conditional return is undesirable (e.g. on a
23101 StrongARM), but still allow this if optimizing for size. */
23102 else if (GET_CODE (scanbody) == return_code
23103 && !use_return_insn (TRUE, NULL)
23104 && !optimize_size)
23105 fail = TRUE;
23106 else if (GET_CODE (scanbody) == return_code)
23107 {
23108 arm_ccfsm_state = 2;
23109 succeed = TRUE;
23110 }
23111 else if (GET_CODE (scanbody) == PARALLEL)
23112 {
23113 switch (get_attr_conds (this_insn))
23114 {
23115 case CONDS_NOCOND:
23116 break;
23117 default:
23118 fail = TRUE;
23119 break;
23120 }
23121 }
23122 else
23123 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23124
23125 break;
23126
23127 case INSN:
23128 /* Instructions using or affecting the condition codes make it
23129 fail. */
23130 scanbody = PATTERN (this_insn);
23131 if (!(GET_CODE (scanbody) == SET
23132 || GET_CODE (scanbody) == PARALLEL)
23133 || get_attr_conds (this_insn) != CONDS_NOCOND)
23134 fail = TRUE;
23135 break;
23136
23137 default:
23138 break;
23139 }
23140 }
23141 if (succeed)
23142 {
23143 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23144 arm_target_label = CODE_LABEL_NUMBER (label);
23145 else
23146 {
23147 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23148
23149 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23150 {
23151 this_insn = next_nonnote_insn (this_insn);
23152 gcc_assert (!this_insn
23153 || (!BARRIER_P (this_insn)
23154 && !LABEL_P (this_insn)));
23155 }
23156 if (!this_insn)
23157 {
23158 /* Oh, dear! we ran off the end.. give up. */
23159 extract_constrain_insn_cached (insn);
23160 arm_ccfsm_state = 0;
23161 arm_target_insn = NULL;
23162 return;
23163 }
23164 arm_target_insn = this_insn;
23165 }
23166
23167 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23168 what it was. */
23169 if (!reverse)
23170 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23171
23172 if (reverse || then_not_else)
23173 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23174 }
23175
23176 /* Restore recog_data (getting the attributes of other insns can
23177 destroy this array, but final.c assumes that it remains intact
23178 across this call. */
23179 extract_constrain_insn_cached (insn);
23180 }
23181 }
23182
23183 /* Output IT instructions. */
23184 void
23185 thumb2_asm_output_opcode (FILE * stream)
23186 {
23187 char buff[5];
23188 int n;
23189
23190 if (arm_condexec_mask)
23191 {
23192 for (n = 0; n < arm_condexec_masklen; n++)
23193 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23194 buff[n] = 0;
23195 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23196 arm_condition_codes[arm_current_cc]);
23197 arm_condexec_mask = 0;
23198 }
23199 }
23200
23201 /* Returns true if REGNO is a valid register
23202 for holding a quantity of type MODE. */
23203 int
23204 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23205 {
23206 if (GET_MODE_CLASS (mode) == MODE_CC)
23207 return (regno == CC_REGNUM
23208 || (TARGET_HARD_FLOAT
23209 && regno == VFPCC_REGNUM));
23210
23211 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23212 return false;
23213
23214 if (TARGET_THUMB1)
23215 /* For the Thumb we only allow values bigger than SImode in
23216 registers 0 - 6, so that there is always a second low
23217 register available to hold the upper part of the value.
23218 We probably we ought to ensure that the register is the
23219 start of an even numbered register pair. */
23220 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23221
23222 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23223 {
23224 if (mode == SFmode || mode == SImode)
23225 return VFP_REGNO_OK_FOR_SINGLE (regno);
23226
23227 if (mode == DFmode)
23228 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23229
23230 if (mode == HFmode)
23231 return VFP_REGNO_OK_FOR_SINGLE (regno);
23232
23233 /* VFP registers can hold HImode values. */
23234 if (mode == HImode)
23235 return VFP_REGNO_OK_FOR_SINGLE (regno);
23236
23237 if (TARGET_NEON)
23238 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23239 || (VALID_NEON_QREG_MODE (mode)
23240 && NEON_REGNO_OK_FOR_QUAD (regno))
23241 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23242 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23243 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23244 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23245 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23246
23247 return FALSE;
23248 }
23249
23250 if (TARGET_REALLY_IWMMXT)
23251 {
23252 if (IS_IWMMXT_GR_REGNUM (regno))
23253 return mode == SImode;
23254
23255 if (IS_IWMMXT_REGNUM (regno))
23256 return VALID_IWMMXT_REG_MODE (mode);
23257 }
23258
23259 /* We allow almost any value to be stored in the general registers.
23260 Restrict doubleword quantities to even register pairs in ARM state
23261 so that we can use ldrd. Do not allow very large Neon structure
23262 opaque modes in general registers; they would use too many. */
23263 if (regno <= LAST_ARM_REGNUM)
23264 {
23265 if (ARM_NUM_REGS (mode) > 4)
23266 return FALSE;
23267
23268 if (TARGET_THUMB2)
23269 return TRUE;
23270
23271 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23272 }
23273
23274 if (regno == FRAME_POINTER_REGNUM
23275 || regno == ARG_POINTER_REGNUM)
23276 /* We only allow integers in the fake hard registers. */
23277 return GET_MODE_CLASS (mode) == MODE_INT;
23278
23279 return FALSE;
23280 }
23281
23282 /* Implement MODES_TIEABLE_P. */
23283
23284 bool
23285 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23286 {
23287 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23288 return true;
23289
23290 /* We specifically want to allow elements of "structure" modes to
23291 be tieable to the structure. This more general condition allows
23292 other rarer situations too. */
23293 if (TARGET_NEON
23294 && (VALID_NEON_DREG_MODE (mode1)
23295 || VALID_NEON_QREG_MODE (mode1)
23296 || VALID_NEON_STRUCT_MODE (mode1))
23297 && (VALID_NEON_DREG_MODE (mode2)
23298 || VALID_NEON_QREG_MODE (mode2)
23299 || VALID_NEON_STRUCT_MODE (mode2)))
23300 return true;
23301
23302 return false;
23303 }
23304
23305 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23306 not used in arm mode. */
23307
23308 enum reg_class
23309 arm_regno_class (int regno)
23310 {
23311 if (regno == PC_REGNUM)
23312 return NO_REGS;
23313
23314 if (TARGET_THUMB1)
23315 {
23316 if (regno == STACK_POINTER_REGNUM)
23317 return STACK_REG;
23318 if (regno == CC_REGNUM)
23319 return CC_REG;
23320 if (regno < 8)
23321 return LO_REGS;
23322 return HI_REGS;
23323 }
23324
23325 if (TARGET_THUMB2 && regno < 8)
23326 return LO_REGS;
23327
23328 if ( regno <= LAST_ARM_REGNUM
23329 || regno == FRAME_POINTER_REGNUM
23330 || regno == ARG_POINTER_REGNUM)
23331 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23332
23333 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23334 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23335
23336 if (IS_VFP_REGNUM (regno))
23337 {
23338 if (regno <= D7_VFP_REGNUM)
23339 return VFP_D0_D7_REGS;
23340 else if (regno <= LAST_LO_VFP_REGNUM)
23341 return VFP_LO_REGS;
23342 else
23343 return VFP_HI_REGS;
23344 }
23345
23346 if (IS_IWMMXT_REGNUM (regno))
23347 return IWMMXT_REGS;
23348
23349 if (IS_IWMMXT_GR_REGNUM (regno))
23350 return IWMMXT_GR_REGS;
23351
23352 return NO_REGS;
23353 }
23354
23355 /* Handle a special case when computing the offset
23356 of an argument from the frame pointer. */
23357 int
23358 arm_debugger_arg_offset (int value, rtx addr)
23359 {
23360 rtx_insn *insn;
23361
23362 /* We are only interested if dbxout_parms() failed to compute the offset. */
23363 if (value != 0)
23364 return 0;
23365
23366 /* We can only cope with the case where the address is held in a register. */
23367 if (!REG_P (addr))
23368 return 0;
23369
23370 /* If we are using the frame pointer to point at the argument, then
23371 an offset of 0 is correct. */
23372 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23373 return 0;
23374
23375 /* If we are using the stack pointer to point at the
23376 argument, then an offset of 0 is correct. */
23377 /* ??? Check this is consistent with thumb2 frame layout. */
23378 if ((TARGET_THUMB || !frame_pointer_needed)
23379 && REGNO (addr) == SP_REGNUM)
23380 return 0;
23381
23382 /* Oh dear. The argument is pointed to by a register rather
23383 than being held in a register, or being stored at a known
23384 offset from the frame pointer. Since GDB only understands
23385 those two kinds of argument we must translate the address
23386 held in the register into an offset from the frame pointer.
23387 We do this by searching through the insns for the function
23388 looking to see where this register gets its value. If the
23389 register is initialized from the frame pointer plus an offset
23390 then we are in luck and we can continue, otherwise we give up.
23391
23392 This code is exercised by producing debugging information
23393 for a function with arguments like this:
23394
23395 double func (double a, double b, int c, double d) {return d;}
23396
23397 Without this code the stab for parameter 'd' will be set to
23398 an offset of 0 from the frame pointer, rather than 8. */
23399
23400 /* The if() statement says:
23401
23402 If the insn is a normal instruction
23403 and if the insn is setting the value in a register
23404 and if the register being set is the register holding the address of the argument
23405 and if the address is computing by an addition
23406 that involves adding to a register
23407 which is the frame pointer
23408 a constant integer
23409
23410 then... */
23411
23412 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23413 {
23414 if ( NONJUMP_INSN_P (insn)
23415 && GET_CODE (PATTERN (insn)) == SET
23416 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23417 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23418 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23419 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23420 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23421 )
23422 {
23423 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23424
23425 break;
23426 }
23427 }
23428
23429 if (value == 0)
23430 {
23431 debug_rtx (addr);
23432 warning (0, "unable to compute real location of stacked parameter");
23433 value = 8; /* XXX magic hack */
23434 }
23435
23436 return value;
23437 }
23438 \f
23439 /* Implement TARGET_PROMOTED_TYPE. */
23440
23441 static tree
23442 arm_promoted_type (const_tree t)
23443 {
23444 if (SCALAR_FLOAT_TYPE_P (t)
23445 && TYPE_PRECISION (t) == 16
23446 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23447 return float_type_node;
23448 return NULL_TREE;
23449 }
23450
23451 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23452 This simply adds HFmode as a supported mode; even though we don't
23453 implement arithmetic on this type directly, it's supported by
23454 optabs conversions, much the way the double-word arithmetic is
23455 special-cased in the default hook. */
23456
23457 static bool
23458 arm_scalar_mode_supported_p (machine_mode mode)
23459 {
23460 if (mode == HFmode)
23461 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23462 else if (ALL_FIXED_POINT_MODE_P (mode))
23463 return true;
23464 else
23465 return default_scalar_mode_supported_p (mode);
23466 }
23467
23468 /* Set the value of FLT_EVAL_METHOD.
23469 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23470
23471 0: evaluate all operations and constants, whose semantic type has at
23472 most the range and precision of type float, to the range and
23473 precision of float; evaluate all other operations and constants to
23474 the range and precision of the semantic type;
23475
23476 N, where _FloatN is a supported interchange floating type
23477 evaluate all operations and constants, whose semantic type has at
23478 most the range and precision of _FloatN type, to the range and
23479 precision of the _FloatN type; evaluate all other operations and
23480 constants to the range and precision of the semantic type;
23481
23482 If we have the ARMv8.2-A extensions then we support _Float16 in native
23483 precision, so we should set this to 16. Otherwise, we support the type,
23484 but want to evaluate expressions in float precision, so set this to
23485 0. */
23486
23487 static enum flt_eval_method
23488 arm_excess_precision (enum excess_precision_type type)
23489 {
23490 switch (type)
23491 {
23492 case EXCESS_PRECISION_TYPE_FAST:
23493 case EXCESS_PRECISION_TYPE_STANDARD:
23494 /* We can calculate either in 16-bit range and precision or
23495 32-bit range and precision. Make that decision based on whether
23496 we have native support for the ARMv8.2-A 16-bit floating-point
23497 instructions or not. */
23498 return (TARGET_VFP_FP16INST
23499 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23500 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23501 case EXCESS_PRECISION_TYPE_IMPLICIT:
23502 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23503 default:
23504 gcc_unreachable ();
23505 }
23506 return FLT_EVAL_METHOD_UNPREDICTABLE;
23507 }
23508
23509
23510 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23511 _Float16 if we are using anything other than ieee format for 16-bit
23512 floating point. Otherwise, punt to the default implementation. */
23513 static machine_mode
23514 arm_floatn_mode (int n, bool extended)
23515 {
23516 if (!extended && n == 16)
23517 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23518
23519 return default_floatn_mode (n, extended);
23520 }
23521
23522
23523 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23524 not to early-clobber SRC registers in the process.
23525
23526 We assume that the operands described by SRC and DEST represent a
23527 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23528 number of components into which the copy has been decomposed. */
23529 void
23530 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23531 {
23532 unsigned int i;
23533
23534 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23535 || REGNO (operands[0]) < REGNO (operands[1]))
23536 {
23537 for (i = 0; i < count; i++)
23538 {
23539 operands[2 * i] = dest[i];
23540 operands[2 * i + 1] = src[i];
23541 }
23542 }
23543 else
23544 {
23545 for (i = 0; i < count; i++)
23546 {
23547 operands[2 * i] = dest[count - i - 1];
23548 operands[2 * i + 1] = src[count - i - 1];
23549 }
23550 }
23551 }
23552
23553 /* Split operands into moves from op[1] + op[2] into op[0]. */
23554
23555 void
23556 neon_split_vcombine (rtx operands[3])
23557 {
23558 unsigned int dest = REGNO (operands[0]);
23559 unsigned int src1 = REGNO (operands[1]);
23560 unsigned int src2 = REGNO (operands[2]);
23561 machine_mode halfmode = GET_MODE (operands[1]);
23562 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23563 rtx destlo, desthi;
23564
23565 if (src1 == dest && src2 == dest + halfregs)
23566 {
23567 /* No-op move. Can't split to nothing; emit something. */
23568 emit_note (NOTE_INSN_DELETED);
23569 return;
23570 }
23571
23572 /* Preserve register attributes for variable tracking. */
23573 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23574 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23575 GET_MODE_SIZE (halfmode));
23576
23577 /* Special case of reversed high/low parts. Use VSWP. */
23578 if (src2 == dest && src1 == dest + halfregs)
23579 {
23580 rtx x = gen_rtx_SET (destlo, operands[1]);
23581 rtx y = gen_rtx_SET (desthi, operands[2]);
23582 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23583 return;
23584 }
23585
23586 if (!reg_overlap_mentioned_p (operands[2], destlo))
23587 {
23588 /* Try to avoid unnecessary moves if part of the result
23589 is in the right place already. */
23590 if (src1 != dest)
23591 emit_move_insn (destlo, operands[1]);
23592 if (src2 != dest + halfregs)
23593 emit_move_insn (desthi, operands[2]);
23594 }
23595 else
23596 {
23597 if (src2 != dest + halfregs)
23598 emit_move_insn (desthi, operands[2]);
23599 if (src1 != dest)
23600 emit_move_insn (destlo, operands[1]);
23601 }
23602 }
23603 \f
23604 /* Return the number (counting from 0) of
23605 the least significant set bit in MASK. */
23606
23607 inline static int
23608 number_of_first_bit_set (unsigned mask)
23609 {
23610 return ctz_hwi (mask);
23611 }
23612
23613 /* Like emit_multi_reg_push, but allowing for a different set of
23614 registers to be described as saved. MASK is the set of registers
23615 to be saved; REAL_REGS is the set of registers to be described as
23616 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23617
23618 static rtx_insn *
23619 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23620 {
23621 unsigned long regno;
23622 rtx par[10], tmp, reg;
23623 rtx_insn *insn;
23624 int i, j;
23625
23626 /* Build the parallel of the registers actually being stored. */
23627 for (i = 0; mask; ++i, mask &= mask - 1)
23628 {
23629 regno = ctz_hwi (mask);
23630 reg = gen_rtx_REG (SImode, regno);
23631
23632 if (i == 0)
23633 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23634 else
23635 tmp = gen_rtx_USE (VOIDmode, reg);
23636
23637 par[i] = tmp;
23638 }
23639
23640 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23641 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23642 tmp = gen_frame_mem (BLKmode, tmp);
23643 tmp = gen_rtx_SET (tmp, par[0]);
23644 par[0] = tmp;
23645
23646 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23647 insn = emit_insn (tmp);
23648
23649 /* Always build the stack adjustment note for unwind info. */
23650 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23651 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23652 par[0] = tmp;
23653
23654 /* Build the parallel of the registers recorded as saved for unwind. */
23655 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23656 {
23657 regno = ctz_hwi (real_regs);
23658 reg = gen_rtx_REG (SImode, regno);
23659
23660 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23661 tmp = gen_frame_mem (SImode, tmp);
23662 tmp = gen_rtx_SET (tmp, reg);
23663 RTX_FRAME_RELATED_P (tmp) = 1;
23664 par[j + 1] = tmp;
23665 }
23666
23667 if (j == 0)
23668 tmp = par[0];
23669 else
23670 {
23671 RTX_FRAME_RELATED_P (par[0]) = 1;
23672 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23673 }
23674
23675 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23676
23677 return insn;
23678 }
23679
23680 /* Emit code to push or pop registers to or from the stack. F is the
23681 assembly file. MASK is the registers to pop. */
23682 static void
23683 thumb_pop (FILE *f, unsigned long mask)
23684 {
23685 int regno;
23686 int lo_mask = mask & 0xFF;
23687 int pushed_words = 0;
23688
23689 gcc_assert (mask);
23690
23691 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23692 {
23693 /* Special case. Do not generate a POP PC statement here, do it in
23694 thumb_exit() */
23695 thumb_exit (f, -1);
23696 return;
23697 }
23698
23699 fprintf (f, "\tpop\t{");
23700
23701 /* Look at the low registers first. */
23702 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23703 {
23704 if (lo_mask & 1)
23705 {
23706 asm_fprintf (f, "%r", regno);
23707
23708 if ((lo_mask & ~1) != 0)
23709 fprintf (f, ", ");
23710
23711 pushed_words++;
23712 }
23713 }
23714
23715 if (mask & (1 << PC_REGNUM))
23716 {
23717 /* Catch popping the PC. */
23718 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23719 || IS_CMSE_ENTRY (arm_current_func_type ()))
23720 {
23721 /* The PC is never poped directly, instead
23722 it is popped into r3 and then BX is used. */
23723 fprintf (f, "}\n");
23724
23725 thumb_exit (f, -1);
23726
23727 return;
23728 }
23729 else
23730 {
23731 if (mask & 0xFF)
23732 fprintf (f, ", ");
23733
23734 asm_fprintf (f, "%r", PC_REGNUM);
23735 }
23736 }
23737
23738 fprintf (f, "}\n");
23739 }
23740
23741 /* Generate code to return from a thumb function.
23742 If 'reg_containing_return_addr' is -1, then the return address is
23743 actually on the stack, at the stack pointer. */
23744 static void
23745 thumb_exit (FILE *f, int reg_containing_return_addr)
23746 {
23747 unsigned regs_available_for_popping;
23748 unsigned regs_to_pop;
23749 int pops_needed;
23750 unsigned available;
23751 unsigned required;
23752 machine_mode mode;
23753 int size;
23754 int restore_a4 = FALSE;
23755
23756 /* Compute the registers we need to pop. */
23757 regs_to_pop = 0;
23758 pops_needed = 0;
23759
23760 if (reg_containing_return_addr == -1)
23761 {
23762 regs_to_pop |= 1 << LR_REGNUM;
23763 ++pops_needed;
23764 }
23765
23766 if (TARGET_BACKTRACE)
23767 {
23768 /* Restore the (ARM) frame pointer and stack pointer. */
23769 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23770 pops_needed += 2;
23771 }
23772
23773 /* If there is nothing to pop then just emit the BX instruction and
23774 return. */
23775 if (pops_needed == 0)
23776 {
23777 if (crtl->calls_eh_return)
23778 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23779
23780 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23781 {
23782 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23783 reg_containing_return_addr);
23784 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23785 }
23786 else
23787 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23788 return;
23789 }
23790 /* Otherwise if we are not supporting interworking and we have not created
23791 a backtrace structure and the function was not entered in ARM mode then
23792 just pop the return address straight into the PC. */
23793 else if (!TARGET_INTERWORK
23794 && !TARGET_BACKTRACE
23795 && !is_called_in_ARM_mode (current_function_decl)
23796 && !crtl->calls_eh_return
23797 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23798 {
23799 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23800 return;
23801 }
23802
23803 /* Find out how many of the (return) argument registers we can corrupt. */
23804 regs_available_for_popping = 0;
23805
23806 /* If returning via __builtin_eh_return, the bottom three registers
23807 all contain information needed for the return. */
23808 if (crtl->calls_eh_return)
23809 size = 12;
23810 else
23811 {
23812 /* If we can deduce the registers used from the function's
23813 return value. This is more reliable that examining
23814 df_regs_ever_live_p () because that will be set if the register is
23815 ever used in the function, not just if the register is used
23816 to hold a return value. */
23817
23818 if (crtl->return_rtx != 0)
23819 mode = GET_MODE (crtl->return_rtx);
23820 else
23821 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23822
23823 size = GET_MODE_SIZE (mode);
23824
23825 if (size == 0)
23826 {
23827 /* In a void function we can use any argument register.
23828 In a function that returns a structure on the stack
23829 we can use the second and third argument registers. */
23830 if (mode == VOIDmode)
23831 regs_available_for_popping =
23832 (1 << ARG_REGISTER (1))
23833 | (1 << ARG_REGISTER (2))
23834 | (1 << ARG_REGISTER (3));
23835 else
23836 regs_available_for_popping =
23837 (1 << ARG_REGISTER (2))
23838 | (1 << ARG_REGISTER (3));
23839 }
23840 else if (size <= 4)
23841 regs_available_for_popping =
23842 (1 << ARG_REGISTER (2))
23843 | (1 << ARG_REGISTER (3));
23844 else if (size <= 8)
23845 regs_available_for_popping =
23846 (1 << ARG_REGISTER (3));
23847 }
23848
23849 /* Match registers to be popped with registers into which we pop them. */
23850 for (available = regs_available_for_popping,
23851 required = regs_to_pop;
23852 required != 0 && available != 0;
23853 available &= ~(available & - available),
23854 required &= ~(required & - required))
23855 -- pops_needed;
23856
23857 /* If we have any popping registers left over, remove them. */
23858 if (available > 0)
23859 regs_available_for_popping &= ~available;
23860
23861 /* Otherwise if we need another popping register we can use
23862 the fourth argument register. */
23863 else if (pops_needed)
23864 {
23865 /* If we have not found any free argument registers and
23866 reg a4 contains the return address, we must move it. */
23867 if (regs_available_for_popping == 0
23868 && reg_containing_return_addr == LAST_ARG_REGNUM)
23869 {
23870 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23871 reg_containing_return_addr = LR_REGNUM;
23872 }
23873 else if (size > 12)
23874 {
23875 /* Register a4 is being used to hold part of the return value,
23876 but we have dire need of a free, low register. */
23877 restore_a4 = TRUE;
23878
23879 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23880 }
23881
23882 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23883 {
23884 /* The fourth argument register is available. */
23885 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23886
23887 --pops_needed;
23888 }
23889 }
23890
23891 /* Pop as many registers as we can. */
23892 thumb_pop (f, regs_available_for_popping);
23893
23894 /* Process the registers we popped. */
23895 if (reg_containing_return_addr == -1)
23896 {
23897 /* The return address was popped into the lowest numbered register. */
23898 regs_to_pop &= ~(1 << LR_REGNUM);
23899
23900 reg_containing_return_addr =
23901 number_of_first_bit_set (regs_available_for_popping);
23902
23903 /* Remove this register for the mask of available registers, so that
23904 the return address will not be corrupted by further pops. */
23905 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23906 }
23907
23908 /* If we popped other registers then handle them here. */
23909 if (regs_available_for_popping)
23910 {
23911 int frame_pointer;
23912
23913 /* Work out which register currently contains the frame pointer. */
23914 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23915
23916 /* Move it into the correct place. */
23917 asm_fprintf (f, "\tmov\t%r, %r\n",
23918 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23919
23920 /* (Temporarily) remove it from the mask of popped registers. */
23921 regs_available_for_popping &= ~(1 << frame_pointer);
23922 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23923
23924 if (regs_available_for_popping)
23925 {
23926 int stack_pointer;
23927
23928 /* We popped the stack pointer as well,
23929 find the register that contains it. */
23930 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23931
23932 /* Move it into the stack register. */
23933 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23934
23935 /* At this point we have popped all necessary registers, so
23936 do not worry about restoring regs_available_for_popping
23937 to its correct value:
23938
23939 assert (pops_needed == 0)
23940 assert (regs_available_for_popping == (1 << frame_pointer))
23941 assert (regs_to_pop == (1 << STACK_POINTER)) */
23942 }
23943 else
23944 {
23945 /* Since we have just move the popped value into the frame
23946 pointer, the popping register is available for reuse, and
23947 we know that we still have the stack pointer left to pop. */
23948 regs_available_for_popping |= (1 << frame_pointer);
23949 }
23950 }
23951
23952 /* If we still have registers left on the stack, but we no longer have
23953 any registers into which we can pop them, then we must move the return
23954 address into the link register and make available the register that
23955 contained it. */
23956 if (regs_available_for_popping == 0 && pops_needed > 0)
23957 {
23958 regs_available_for_popping |= 1 << reg_containing_return_addr;
23959
23960 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23961 reg_containing_return_addr);
23962
23963 reg_containing_return_addr = LR_REGNUM;
23964 }
23965
23966 /* If we have registers left on the stack then pop some more.
23967 We know that at most we will want to pop FP and SP. */
23968 if (pops_needed > 0)
23969 {
23970 int popped_into;
23971 int move_to;
23972
23973 thumb_pop (f, regs_available_for_popping);
23974
23975 /* We have popped either FP or SP.
23976 Move whichever one it is into the correct register. */
23977 popped_into = number_of_first_bit_set (regs_available_for_popping);
23978 move_to = number_of_first_bit_set (regs_to_pop);
23979
23980 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23981
23982 regs_to_pop &= ~(1 << move_to);
23983
23984 --pops_needed;
23985 }
23986
23987 /* If we still have not popped everything then we must have only
23988 had one register available to us and we are now popping the SP. */
23989 if (pops_needed > 0)
23990 {
23991 int popped_into;
23992
23993 thumb_pop (f, regs_available_for_popping);
23994
23995 popped_into = number_of_first_bit_set (regs_available_for_popping);
23996
23997 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23998 /*
23999 assert (regs_to_pop == (1 << STACK_POINTER))
24000 assert (pops_needed == 1)
24001 */
24002 }
24003
24004 /* If necessary restore the a4 register. */
24005 if (restore_a4)
24006 {
24007 if (reg_containing_return_addr != LR_REGNUM)
24008 {
24009 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24010 reg_containing_return_addr = LR_REGNUM;
24011 }
24012
24013 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24014 }
24015
24016 if (crtl->calls_eh_return)
24017 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24018
24019 /* Return to caller. */
24020 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24021 {
24022 /* This is for the cases where LR is not being used to contain the return
24023 address. It may therefore contain information that we might not want
24024 to leak, hence it must be cleared. The value in R0 will never be a
24025 secret at this point, so it is safe to use it, see the clearing code
24026 in 'cmse_nonsecure_entry_clear_before_return'. */
24027 if (reg_containing_return_addr != LR_REGNUM)
24028 asm_fprintf (f, "\tmov\tlr, r0\n");
24029
24030 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24031 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24032 }
24033 else
24034 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24035 }
24036 \f
24037 /* Scan INSN just before assembler is output for it.
24038 For Thumb-1, we track the status of the condition codes; this
24039 information is used in the cbranchsi4_insn pattern. */
24040 void
24041 thumb1_final_prescan_insn (rtx_insn *insn)
24042 {
24043 if (flag_print_asm_name)
24044 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24045 INSN_ADDRESSES (INSN_UID (insn)));
24046 /* Don't overwrite the previous setter when we get to a cbranch. */
24047 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24048 {
24049 enum attr_conds conds;
24050
24051 if (cfun->machine->thumb1_cc_insn)
24052 {
24053 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24054 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24055 CC_STATUS_INIT;
24056 }
24057 conds = get_attr_conds (insn);
24058 if (conds == CONDS_SET)
24059 {
24060 rtx set = single_set (insn);
24061 cfun->machine->thumb1_cc_insn = insn;
24062 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24063 cfun->machine->thumb1_cc_op1 = const0_rtx;
24064 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24065 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24066 {
24067 rtx src1 = XEXP (SET_SRC (set), 1);
24068 if (src1 == const0_rtx)
24069 cfun->machine->thumb1_cc_mode = CCmode;
24070 }
24071 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24072 {
24073 /* Record the src register operand instead of dest because
24074 cprop_hardreg pass propagates src. */
24075 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24076 }
24077 }
24078 else if (conds != CONDS_NOCOND)
24079 cfun->machine->thumb1_cc_insn = NULL_RTX;
24080 }
24081
24082 /* Check if unexpected far jump is used. */
24083 if (cfun->machine->lr_save_eliminated
24084 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24085 internal_error("Unexpected thumb1 far jump");
24086 }
24087
24088 int
24089 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24090 {
24091 unsigned HOST_WIDE_INT mask = 0xff;
24092 int i;
24093
24094 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24095 if (val == 0) /* XXX */
24096 return 0;
24097
24098 for (i = 0; i < 25; i++)
24099 if ((val & (mask << i)) == val)
24100 return 1;
24101
24102 return 0;
24103 }
24104
24105 /* Returns nonzero if the current function contains,
24106 or might contain a far jump. */
24107 static int
24108 thumb_far_jump_used_p (void)
24109 {
24110 rtx_insn *insn;
24111 bool far_jump = false;
24112 unsigned int func_size = 0;
24113
24114 /* If we have already decided that far jumps may be used,
24115 do not bother checking again, and always return true even if
24116 it turns out that they are not being used. Once we have made
24117 the decision that far jumps are present (and that hence the link
24118 register will be pushed onto the stack) we cannot go back on it. */
24119 if (cfun->machine->far_jump_used)
24120 return 1;
24121
24122 /* If this function is not being called from the prologue/epilogue
24123 generation code then it must be being called from the
24124 INITIAL_ELIMINATION_OFFSET macro. */
24125 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24126 {
24127 /* In this case we know that we are being asked about the elimination
24128 of the arg pointer register. If that register is not being used,
24129 then there are no arguments on the stack, and we do not have to
24130 worry that a far jump might force the prologue to push the link
24131 register, changing the stack offsets. In this case we can just
24132 return false, since the presence of far jumps in the function will
24133 not affect stack offsets.
24134
24135 If the arg pointer is live (or if it was live, but has now been
24136 eliminated and so set to dead) then we do have to test to see if
24137 the function might contain a far jump. This test can lead to some
24138 false negatives, since before reload is completed, then length of
24139 branch instructions is not known, so gcc defaults to returning their
24140 longest length, which in turn sets the far jump attribute to true.
24141
24142 A false negative will not result in bad code being generated, but it
24143 will result in a needless push and pop of the link register. We
24144 hope that this does not occur too often.
24145
24146 If we need doubleword stack alignment this could affect the other
24147 elimination offsets so we can't risk getting it wrong. */
24148 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24149 cfun->machine->arg_pointer_live = 1;
24150 else if (!cfun->machine->arg_pointer_live)
24151 return 0;
24152 }
24153
24154 /* We should not change far_jump_used during or after reload, as there is
24155 no chance to change stack frame layout. */
24156 if (reload_in_progress || reload_completed)
24157 return 0;
24158
24159 /* Check to see if the function contains a branch
24160 insn with the far jump attribute set. */
24161 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24162 {
24163 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24164 {
24165 far_jump = true;
24166 }
24167 func_size += get_attr_length (insn);
24168 }
24169
24170 /* Attribute far_jump will always be true for thumb1 before
24171 shorten_branch pass. So checking far_jump attribute before
24172 shorten_branch isn't much useful.
24173
24174 Following heuristic tries to estimate more accurately if a far jump
24175 may finally be used. The heuristic is very conservative as there is
24176 no chance to roll-back the decision of not to use far jump.
24177
24178 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24179 2-byte insn is associated with a 4 byte constant pool. Using
24180 function size 2048/3 as the threshold is conservative enough. */
24181 if (far_jump)
24182 {
24183 if ((func_size * 3) >= 2048)
24184 {
24185 /* Record the fact that we have decided that
24186 the function does use far jumps. */
24187 cfun->machine->far_jump_used = 1;
24188 return 1;
24189 }
24190 }
24191
24192 return 0;
24193 }
24194
24195 /* Return nonzero if FUNC must be entered in ARM mode. */
24196 static bool
24197 is_called_in_ARM_mode (tree func)
24198 {
24199 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24200
24201 /* Ignore the problem about functions whose address is taken. */
24202 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24203 return true;
24204
24205 #ifdef ARM_PE
24206 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24207 #else
24208 return false;
24209 #endif
24210 }
24211
24212 /* Given the stack offsets and register mask in OFFSETS, decide how
24213 many additional registers to push instead of subtracting a constant
24214 from SP. For epilogues the principle is the same except we use pop.
24215 FOR_PROLOGUE indicates which we're generating. */
24216 static int
24217 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24218 {
24219 HOST_WIDE_INT amount;
24220 unsigned long live_regs_mask = offsets->saved_regs_mask;
24221 /* Extract a mask of the ones we can give to the Thumb's push/pop
24222 instruction. */
24223 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24224 /* Then count how many other high registers will need to be pushed. */
24225 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24226 int n_free, reg_base, size;
24227
24228 if (!for_prologue && frame_pointer_needed)
24229 amount = offsets->locals_base - offsets->saved_regs;
24230 else
24231 amount = offsets->outgoing_args - offsets->saved_regs;
24232
24233 /* If the stack frame size is 512 exactly, we can save one load
24234 instruction, which should make this a win even when optimizing
24235 for speed. */
24236 if (!optimize_size && amount != 512)
24237 return 0;
24238
24239 /* Can't do this if there are high registers to push. */
24240 if (high_regs_pushed != 0)
24241 return 0;
24242
24243 /* Shouldn't do it in the prologue if no registers would normally
24244 be pushed at all. In the epilogue, also allow it if we'll have
24245 a pop insn for the PC. */
24246 if (l_mask == 0
24247 && (for_prologue
24248 || TARGET_BACKTRACE
24249 || (live_regs_mask & 1 << LR_REGNUM) == 0
24250 || TARGET_INTERWORK
24251 || crtl->args.pretend_args_size != 0))
24252 return 0;
24253
24254 /* Don't do this if thumb_expand_prologue wants to emit instructions
24255 between the push and the stack frame allocation. */
24256 if (for_prologue
24257 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24258 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24259 return 0;
24260
24261 reg_base = 0;
24262 n_free = 0;
24263 if (!for_prologue)
24264 {
24265 size = arm_size_return_regs ();
24266 reg_base = ARM_NUM_INTS (size);
24267 live_regs_mask >>= reg_base;
24268 }
24269
24270 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24271 && (for_prologue || call_used_regs[reg_base + n_free]))
24272 {
24273 live_regs_mask >>= 1;
24274 n_free++;
24275 }
24276
24277 if (n_free == 0)
24278 return 0;
24279 gcc_assert (amount / 4 * 4 == amount);
24280
24281 if (amount >= 512 && (amount - n_free * 4) < 512)
24282 return (amount - 508) / 4;
24283 if (amount <= n_free * 4)
24284 return amount / 4;
24285 return 0;
24286 }
24287
24288 /* The bits which aren't usefully expanded as rtl. */
24289 const char *
24290 thumb1_unexpanded_epilogue (void)
24291 {
24292 arm_stack_offsets *offsets;
24293 int regno;
24294 unsigned long live_regs_mask = 0;
24295 int high_regs_pushed = 0;
24296 int extra_pop;
24297 int had_to_push_lr;
24298 int size;
24299
24300 if (cfun->machine->return_used_this_function != 0)
24301 return "";
24302
24303 if (IS_NAKED (arm_current_func_type ()))
24304 return "";
24305
24306 offsets = arm_get_frame_offsets ();
24307 live_regs_mask = offsets->saved_regs_mask;
24308 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24309
24310 /* If we can deduce the registers used from the function's return value.
24311 This is more reliable that examining df_regs_ever_live_p () because that
24312 will be set if the register is ever used in the function, not just if
24313 the register is used to hold a return value. */
24314 size = arm_size_return_regs ();
24315
24316 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24317 if (extra_pop > 0)
24318 {
24319 unsigned long extra_mask = (1 << extra_pop) - 1;
24320 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24321 }
24322
24323 /* The prolog may have pushed some high registers to use as
24324 work registers. e.g. the testsuite file:
24325 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24326 compiles to produce:
24327 push {r4, r5, r6, r7, lr}
24328 mov r7, r9
24329 mov r6, r8
24330 push {r6, r7}
24331 as part of the prolog. We have to undo that pushing here. */
24332
24333 if (high_regs_pushed)
24334 {
24335 unsigned long mask = live_regs_mask & 0xff;
24336 int next_hi_reg;
24337
24338 /* The available low registers depend on the size of the value we are
24339 returning. */
24340 if (size <= 12)
24341 mask |= 1 << 3;
24342 if (size <= 8)
24343 mask |= 1 << 2;
24344
24345 if (mask == 0)
24346 /* Oh dear! We have no low registers into which we can pop
24347 high registers! */
24348 internal_error
24349 ("no low registers available for popping high registers");
24350
24351 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24352 if (live_regs_mask & (1 << next_hi_reg))
24353 break;
24354
24355 while (high_regs_pushed)
24356 {
24357 /* Find lo register(s) into which the high register(s) can
24358 be popped. */
24359 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24360 {
24361 if (mask & (1 << regno))
24362 high_regs_pushed--;
24363 if (high_regs_pushed == 0)
24364 break;
24365 }
24366
24367 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24368
24369 /* Pop the values into the low register(s). */
24370 thumb_pop (asm_out_file, mask);
24371
24372 /* Move the value(s) into the high registers. */
24373 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24374 {
24375 if (mask & (1 << regno))
24376 {
24377 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24378 regno);
24379
24380 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24381 if (live_regs_mask & (1 << next_hi_reg))
24382 break;
24383 }
24384 }
24385 }
24386 live_regs_mask &= ~0x0f00;
24387 }
24388
24389 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24390 live_regs_mask &= 0xff;
24391
24392 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24393 {
24394 /* Pop the return address into the PC. */
24395 if (had_to_push_lr)
24396 live_regs_mask |= 1 << PC_REGNUM;
24397
24398 /* Either no argument registers were pushed or a backtrace
24399 structure was created which includes an adjusted stack
24400 pointer, so just pop everything. */
24401 if (live_regs_mask)
24402 thumb_pop (asm_out_file, live_regs_mask);
24403
24404 /* We have either just popped the return address into the
24405 PC or it is was kept in LR for the entire function.
24406 Note that thumb_pop has already called thumb_exit if the
24407 PC was in the list. */
24408 if (!had_to_push_lr)
24409 thumb_exit (asm_out_file, LR_REGNUM);
24410 }
24411 else
24412 {
24413 /* Pop everything but the return address. */
24414 if (live_regs_mask)
24415 thumb_pop (asm_out_file, live_regs_mask);
24416
24417 if (had_to_push_lr)
24418 {
24419 if (size > 12)
24420 {
24421 /* We have no free low regs, so save one. */
24422 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24423 LAST_ARG_REGNUM);
24424 }
24425
24426 /* Get the return address into a temporary register. */
24427 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24428
24429 if (size > 12)
24430 {
24431 /* Move the return address to lr. */
24432 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24433 LAST_ARG_REGNUM);
24434 /* Restore the low register. */
24435 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24436 IP_REGNUM);
24437 regno = LR_REGNUM;
24438 }
24439 else
24440 regno = LAST_ARG_REGNUM;
24441 }
24442 else
24443 regno = LR_REGNUM;
24444
24445 /* Remove the argument registers that were pushed onto the stack. */
24446 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24447 SP_REGNUM, SP_REGNUM,
24448 crtl->args.pretend_args_size);
24449
24450 thumb_exit (asm_out_file, regno);
24451 }
24452
24453 return "";
24454 }
24455
24456 /* Functions to save and restore machine-specific function data. */
24457 static struct machine_function *
24458 arm_init_machine_status (void)
24459 {
24460 struct machine_function *machine;
24461 machine = ggc_cleared_alloc<machine_function> ();
24462
24463 #if ARM_FT_UNKNOWN != 0
24464 machine->func_type = ARM_FT_UNKNOWN;
24465 #endif
24466 return machine;
24467 }
24468
24469 /* Return an RTX indicating where the return address to the
24470 calling function can be found. */
24471 rtx
24472 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24473 {
24474 if (count != 0)
24475 return NULL_RTX;
24476
24477 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24478 }
24479
24480 /* Do anything needed before RTL is emitted for each function. */
24481 void
24482 arm_init_expanders (void)
24483 {
24484 /* Arrange to initialize and mark the machine per-function status. */
24485 init_machine_status = arm_init_machine_status;
24486
24487 /* This is to stop the combine pass optimizing away the alignment
24488 adjustment of va_arg. */
24489 /* ??? It is claimed that this should not be necessary. */
24490 if (cfun)
24491 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24492 }
24493
24494 /* Check that FUNC is called with a different mode. */
24495
24496 bool
24497 arm_change_mode_p (tree func)
24498 {
24499 if (TREE_CODE (func) != FUNCTION_DECL)
24500 return false;
24501
24502 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24503
24504 if (!callee_tree)
24505 callee_tree = target_option_default_node;
24506
24507 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24508 int flags = callee_opts->x_target_flags;
24509
24510 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24511 }
24512
24513 /* Like arm_compute_initial_elimination offset. Simpler because there
24514 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24515 to point at the base of the local variables after static stack
24516 space for a function has been allocated. */
24517
24518 HOST_WIDE_INT
24519 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24520 {
24521 arm_stack_offsets *offsets;
24522
24523 offsets = arm_get_frame_offsets ();
24524
24525 switch (from)
24526 {
24527 case ARG_POINTER_REGNUM:
24528 switch (to)
24529 {
24530 case STACK_POINTER_REGNUM:
24531 return offsets->outgoing_args - offsets->saved_args;
24532
24533 case FRAME_POINTER_REGNUM:
24534 return offsets->soft_frame - offsets->saved_args;
24535
24536 case ARM_HARD_FRAME_POINTER_REGNUM:
24537 return offsets->saved_regs - offsets->saved_args;
24538
24539 case THUMB_HARD_FRAME_POINTER_REGNUM:
24540 return offsets->locals_base - offsets->saved_args;
24541
24542 default:
24543 gcc_unreachable ();
24544 }
24545 break;
24546
24547 case FRAME_POINTER_REGNUM:
24548 switch (to)
24549 {
24550 case STACK_POINTER_REGNUM:
24551 return offsets->outgoing_args - offsets->soft_frame;
24552
24553 case ARM_HARD_FRAME_POINTER_REGNUM:
24554 return offsets->saved_regs - offsets->soft_frame;
24555
24556 case THUMB_HARD_FRAME_POINTER_REGNUM:
24557 return offsets->locals_base - offsets->soft_frame;
24558
24559 default:
24560 gcc_unreachable ();
24561 }
24562 break;
24563
24564 default:
24565 gcc_unreachable ();
24566 }
24567 }
24568
24569 /* Generate the function's prologue. */
24570
24571 void
24572 thumb1_expand_prologue (void)
24573 {
24574 rtx_insn *insn;
24575
24576 HOST_WIDE_INT amount;
24577 HOST_WIDE_INT size;
24578 arm_stack_offsets *offsets;
24579 unsigned long func_type;
24580 int regno;
24581 unsigned long live_regs_mask;
24582 unsigned long l_mask;
24583 unsigned high_regs_pushed = 0;
24584 bool lr_needs_saving;
24585
24586 func_type = arm_current_func_type ();
24587
24588 /* Naked functions don't have prologues. */
24589 if (IS_NAKED (func_type))
24590 {
24591 if (flag_stack_usage_info)
24592 current_function_static_stack_size = 0;
24593 return;
24594 }
24595
24596 if (IS_INTERRUPT (func_type))
24597 {
24598 error ("interrupt Service Routines cannot be coded in Thumb mode");
24599 return;
24600 }
24601
24602 if (is_called_in_ARM_mode (current_function_decl))
24603 emit_insn (gen_prologue_thumb1_interwork ());
24604
24605 offsets = arm_get_frame_offsets ();
24606 live_regs_mask = offsets->saved_regs_mask;
24607 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24608
24609 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24610 l_mask = live_regs_mask & 0x40ff;
24611 /* Then count how many other high registers will need to be pushed. */
24612 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24613
24614 if (crtl->args.pretend_args_size)
24615 {
24616 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24617
24618 if (cfun->machine->uses_anonymous_args)
24619 {
24620 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24621 unsigned long mask;
24622
24623 mask = 1ul << (LAST_ARG_REGNUM + 1);
24624 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24625
24626 insn = thumb1_emit_multi_reg_push (mask, 0);
24627 }
24628 else
24629 {
24630 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24631 stack_pointer_rtx, x));
24632 }
24633 RTX_FRAME_RELATED_P (insn) = 1;
24634 }
24635
24636 if (TARGET_BACKTRACE)
24637 {
24638 HOST_WIDE_INT offset = 0;
24639 unsigned work_register;
24640 rtx work_reg, x, arm_hfp_rtx;
24641
24642 /* We have been asked to create a stack backtrace structure.
24643 The code looks like this:
24644
24645 0 .align 2
24646 0 func:
24647 0 sub SP, #16 Reserve space for 4 registers.
24648 2 push {R7} Push low registers.
24649 4 add R7, SP, #20 Get the stack pointer before the push.
24650 6 str R7, [SP, #8] Store the stack pointer
24651 (before reserving the space).
24652 8 mov R7, PC Get hold of the start of this code + 12.
24653 10 str R7, [SP, #16] Store it.
24654 12 mov R7, FP Get hold of the current frame pointer.
24655 14 str R7, [SP, #4] Store it.
24656 16 mov R7, LR Get hold of the current return address.
24657 18 str R7, [SP, #12] Store it.
24658 20 add R7, SP, #16 Point at the start of the
24659 backtrace structure.
24660 22 mov FP, R7 Put this value into the frame pointer. */
24661
24662 work_register = thumb_find_work_register (live_regs_mask);
24663 work_reg = gen_rtx_REG (SImode, work_register);
24664 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24665
24666 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24667 stack_pointer_rtx, GEN_INT (-16)));
24668 RTX_FRAME_RELATED_P (insn) = 1;
24669
24670 if (l_mask)
24671 {
24672 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24673 RTX_FRAME_RELATED_P (insn) = 1;
24674 lr_needs_saving = false;
24675
24676 offset = bit_count (l_mask) * UNITS_PER_WORD;
24677 }
24678
24679 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24680 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24681
24682 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24683 x = gen_frame_mem (SImode, x);
24684 emit_move_insn (x, work_reg);
24685
24686 /* Make sure that the instruction fetching the PC is in the right place
24687 to calculate "start of backtrace creation code + 12". */
24688 /* ??? The stores using the common WORK_REG ought to be enough to
24689 prevent the scheduler from doing anything weird. Failing that
24690 we could always move all of the following into an UNSPEC_VOLATILE. */
24691 if (l_mask)
24692 {
24693 x = gen_rtx_REG (SImode, PC_REGNUM);
24694 emit_move_insn (work_reg, x);
24695
24696 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24697 x = gen_frame_mem (SImode, x);
24698 emit_move_insn (x, work_reg);
24699
24700 emit_move_insn (work_reg, arm_hfp_rtx);
24701
24702 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24703 x = gen_frame_mem (SImode, x);
24704 emit_move_insn (x, work_reg);
24705 }
24706 else
24707 {
24708 emit_move_insn (work_reg, arm_hfp_rtx);
24709
24710 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24711 x = gen_frame_mem (SImode, x);
24712 emit_move_insn (x, work_reg);
24713
24714 x = gen_rtx_REG (SImode, PC_REGNUM);
24715 emit_move_insn (work_reg, x);
24716
24717 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24718 x = gen_frame_mem (SImode, x);
24719 emit_move_insn (x, work_reg);
24720 }
24721
24722 x = gen_rtx_REG (SImode, LR_REGNUM);
24723 emit_move_insn (work_reg, x);
24724
24725 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24726 x = gen_frame_mem (SImode, x);
24727 emit_move_insn (x, work_reg);
24728
24729 x = GEN_INT (offset + 12);
24730 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24731
24732 emit_move_insn (arm_hfp_rtx, work_reg);
24733 }
24734 /* Optimization: If we are not pushing any low registers but we are going
24735 to push some high registers then delay our first push. This will just
24736 be a push of LR and we can combine it with the push of the first high
24737 register. */
24738 else if ((l_mask & 0xff) != 0
24739 || (high_regs_pushed == 0 && lr_needs_saving))
24740 {
24741 unsigned long mask = l_mask;
24742 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24743 insn = thumb1_emit_multi_reg_push (mask, mask);
24744 RTX_FRAME_RELATED_P (insn) = 1;
24745 lr_needs_saving = false;
24746 }
24747
24748 if (high_regs_pushed)
24749 {
24750 unsigned pushable_regs;
24751 unsigned next_hi_reg;
24752 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24753 : crtl->args.info.nregs;
24754 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24755
24756 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24757 if (live_regs_mask & (1 << next_hi_reg))
24758 break;
24759
24760 /* Here we need to mask out registers used for passing arguments
24761 even if they can be pushed. This is to avoid using them to stash the high
24762 registers. Such kind of stash may clobber the use of arguments. */
24763 pushable_regs = l_mask & (~arg_regs_mask);
24764 if (lr_needs_saving)
24765 pushable_regs &= ~(1 << LR_REGNUM);
24766
24767 if (pushable_regs == 0)
24768 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24769
24770 while (high_regs_pushed > 0)
24771 {
24772 unsigned long real_regs_mask = 0;
24773 unsigned long push_mask = 0;
24774
24775 for (regno = LR_REGNUM; regno >= 0; regno --)
24776 {
24777 if (pushable_regs & (1 << regno))
24778 {
24779 emit_move_insn (gen_rtx_REG (SImode, regno),
24780 gen_rtx_REG (SImode, next_hi_reg));
24781
24782 high_regs_pushed --;
24783 real_regs_mask |= (1 << next_hi_reg);
24784 push_mask |= (1 << regno);
24785
24786 if (high_regs_pushed)
24787 {
24788 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24789 next_hi_reg --)
24790 if (live_regs_mask & (1 << next_hi_reg))
24791 break;
24792 }
24793 else
24794 break;
24795 }
24796 }
24797
24798 /* If we had to find a work register and we have not yet
24799 saved the LR then add it to the list of regs to push. */
24800 if (lr_needs_saving)
24801 {
24802 push_mask |= 1 << LR_REGNUM;
24803 real_regs_mask |= 1 << LR_REGNUM;
24804 lr_needs_saving = false;
24805 }
24806
24807 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24808 RTX_FRAME_RELATED_P (insn) = 1;
24809 }
24810 }
24811
24812 /* Load the pic register before setting the frame pointer,
24813 so we can use r7 as a temporary work register. */
24814 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24815 arm_load_pic_register (live_regs_mask);
24816
24817 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24818 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24819 stack_pointer_rtx);
24820
24821 size = offsets->outgoing_args - offsets->saved_args;
24822 if (flag_stack_usage_info)
24823 current_function_static_stack_size = size;
24824
24825 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24826 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24827 sorry ("-fstack-check=specific for Thumb-1");
24828
24829 amount = offsets->outgoing_args - offsets->saved_regs;
24830 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24831 if (amount)
24832 {
24833 if (amount < 512)
24834 {
24835 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24836 GEN_INT (- amount)));
24837 RTX_FRAME_RELATED_P (insn) = 1;
24838 }
24839 else
24840 {
24841 rtx reg, dwarf;
24842
24843 /* The stack decrement is too big for an immediate value in a single
24844 insn. In theory we could issue multiple subtracts, but after
24845 three of them it becomes more space efficient to place the full
24846 value in the constant pool and load into a register. (Also the
24847 ARM debugger really likes to see only one stack decrement per
24848 function). So instead we look for a scratch register into which
24849 we can load the decrement, and then we subtract this from the
24850 stack pointer. Unfortunately on the thumb the only available
24851 scratch registers are the argument registers, and we cannot use
24852 these as they may hold arguments to the function. Instead we
24853 attempt to locate a call preserved register which is used by this
24854 function. If we can find one, then we know that it will have
24855 been pushed at the start of the prologue and so we can corrupt
24856 it now. */
24857 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24858 if (live_regs_mask & (1 << regno))
24859 break;
24860
24861 gcc_assert(regno <= LAST_LO_REGNUM);
24862
24863 reg = gen_rtx_REG (SImode, regno);
24864
24865 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24866
24867 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24868 stack_pointer_rtx, reg));
24869
24870 dwarf = gen_rtx_SET (stack_pointer_rtx,
24871 plus_constant (Pmode, stack_pointer_rtx,
24872 -amount));
24873 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24874 RTX_FRAME_RELATED_P (insn) = 1;
24875 }
24876 }
24877
24878 if (frame_pointer_needed)
24879 thumb_set_frame_pointer (offsets);
24880
24881 /* If we are profiling, make sure no instructions are scheduled before
24882 the call to mcount. Similarly if the user has requested no
24883 scheduling in the prolog. Similarly if we want non-call exceptions
24884 using the EABI unwinder, to prevent faulting instructions from being
24885 swapped with a stack adjustment. */
24886 if (crtl->profile || !TARGET_SCHED_PROLOG
24887 || (arm_except_unwind_info (&global_options) == UI_TARGET
24888 && cfun->can_throw_non_call_exceptions))
24889 emit_insn (gen_blockage ());
24890
24891 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24892 if (live_regs_mask & 0xff)
24893 cfun->machine->lr_save_eliminated = 0;
24894 }
24895
24896 /* Clear caller saved registers not used to pass return values and leaked
24897 condition flags before exiting a cmse_nonsecure_entry function. */
24898
24899 void
24900 cmse_nonsecure_entry_clear_before_return (void)
24901 {
24902 uint64_t to_clear_mask[2];
24903 uint32_t padding_bits_to_clear = 0;
24904 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24905 int regno, maxregno = IP_REGNUM;
24906 tree result_type;
24907 rtx result_rtl;
24908
24909 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
24910 to_clear_mask[0] |= (1ULL << IP_REGNUM);
24911
24912 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24913 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24914 to make sure the instructions used to clear them are present. */
24915 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
24916 {
24917 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
24918 maxregno = LAST_VFP_REGNUM;
24919
24920 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
24921 to_clear_mask[0] |= float_mask;
24922
24923 float_mask = (1ULL << (maxregno - 63)) - 1;
24924 to_clear_mask[1] = float_mask;
24925
24926 /* Make sure we don't clear the two scratch registers used to clear the
24927 relevant FPSCR bits in output_return_instruction. */
24928 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
24929 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
24930 emit_use (gen_rtx_REG (SImode, 4));
24931 to_clear_mask[0] &= ~(1ULL << 4);
24932 }
24933
24934 /* If the user has defined registers to be caller saved, these are no longer
24935 restored by the function before returning and must thus be cleared for
24936 security purposes. */
24937 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
24938 {
24939 /* We do not touch registers that can be used to pass arguments as per
24940 the AAPCS, since these should never be made callee-saved by user
24941 options. */
24942 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
24943 continue;
24944 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
24945 continue;
24946 if (call_used_regs[regno])
24947 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
24948 }
24949
24950 /* Make sure we do not clear the registers used to return the result in. */
24951 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
24952 if (!VOID_TYPE_P (result_type))
24953 {
24954 result_rtl = arm_function_value (result_type, current_function_decl, 0);
24955
24956 /* No need to check that we return in registers, because we don't
24957 support returning on stack yet. */
24958 to_clear_mask[0]
24959 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
24960 padding_bits_to_clear_ptr);
24961 }
24962
24963 if (padding_bits_to_clear != 0)
24964 {
24965 rtx reg_rtx;
24966 /* Padding bits to clear is not 0 so we know we are dealing with
24967 returning a composite type, which only uses r0. Let's make sure that
24968 r1-r3 is cleared too, we will use r1 as a scratch register. */
24969 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
24970
24971 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
24972
24973 /* Fill the lower half of the negated padding_bits_to_clear. */
24974 emit_move_insn (reg_rtx,
24975 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
24976
24977 /* Also fill the top half of the negated padding_bits_to_clear. */
24978 if (((~padding_bits_to_clear) >> 16) > 0)
24979 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
24980 GEN_INT (16),
24981 GEN_INT (16)),
24982 GEN_INT ((~padding_bits_to_clear) >> 16)));
24983
24984 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
24985 gen_rtx_REG (SImode, R0_REGNUM),
24986 reg_rtx));
24987 }
24988
24989 for (regno = R0_REGNUM; regno <= maxregno; regno++)
24990 {
24991 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
24992 continue;
24993
24994 if (IS_VFP_REGNUM (regno))
24995 {
24996 /* If regno is an even vfp register and its successor is also to
24997 be cleared, use vmov. */
24998 if (TARGET_VFP_DOUBLE
24999 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25000 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25001 {
25002 emit_move_insn (gen_rtx_REG (DFmode, regno),
25003 CONST1_RTX (DFmode));
25004 emit_use (gen_rtx_REG (DFmode, regno));
25005 regno++;
25006 }
25007 else
25008 {
25009 emit_move_insn (gen_rtx_REG (SFmode, regno),
25010 CONST1_RTX (SFmode));
25011 emit_use (gen_rtx_REG (SFmode, regno));
25012 }
25013 }
25014 else
25015 {
25016 if (TARGET_THUMB1)
25017 {
25018 if (regno == R0_REGNUM)
25019 emit_move_insn (gen_rtx_REG (SImode, regno),
25020 const0_rtx);
25021 else
25022 /* R0 has either been cleared before, see code above, or it
25023 holds a return value, either way it is not secret
25024 information. */
25025 emit_move_insn (gen_rtx_REG (SImode, regno),
25026 gen_rtx_REG (SImode, R0_REGNUM));
25027 emit_use (gen_rtx_REG (SImode, regno));
25028 }
25029 else
25030 {
25031 emit_move_insn (gen_rtx_REG (SImode, regno),
25032 gen_rtx_REG (SImode, LR_REGNUM));
25033 emit_use (gen_rtx_REG (SImode, regno));
25034 }
25035 }
25036 }
25037 }
25038
25039 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25040 POP instruction can be generated. LR should be replaced by PC. All
25041 the checks required are already done by USE_RETURN_INSN (). Hence,
25042 all we really need to check here is if single register is to be
25043 returned, or multiple register return. */
25044 void
25045 thumb2_expand_return (bool simple_return)
25046 {
25047 int i, num_regs;
25048 unsigned long saved_regs_mask;
25049 arm_stack_offsets *offsets;
25050
25051 offsets = arm_get_frame_offsets ();
25052 saved_regs_mask = offsets->saved_regs_mask;
25053
25054 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25055 if (saved_regs_mask & (1 << i))
25056 num_regs++;
25057
25058 if (!simple_return && saved_regs_mask)
25059 {
25060 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25061 functions or adapt code to handle according to ACLE. This path should
25062 not be reachable for cmse_nonsecure_entry functions though we prefer
25063 to assert it for now to ensure that future code changes do not silently
25064 change this behavior. */
25065 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25066 if (num_regs == 1)
25067 {
25068 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25069 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25070 rtx addr = gen_rtx_MEM (SImode,
25071 gen_rtx_POST_INC (SImode,
25072 stack_pointer_rtx));
25073 set_mem_alias_set (addr, get_frame_alias_set ());
25074 XVECEXP (par, 0, 0) = ret_rtx;
25075 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25076 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25077 emit_jump_insn (par);
25078 }
25079 else
25080 {
25081 saved_regs_mask &= ~ (1 << LR_REGNUM);
25082 saved_regs_mask |= (1 << PC_REGNUM);
25083 arm_emit_multi_reg_pop (saved_regs_mask);
25084 }
25085 }
25086 else
25087 {
25088 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25089 cmse_nonsecure_entry_clear_before_return ();
25090 emit_jump_insn (simple_return_rtx);
25091 }
25092 }
25093
25094 void
25095 thumb1_expand_epilogue (void)
25096 {
25097 HOST_WIDE_INT amount;
25098 arm_stack_offsets *offsets;
25099 int regno;
25100
25101 /* Naked functions don't have prologues. */
25102 if (IS_NAKED (arm_current_func_type ()))
25103 return;
25104
25105 offsets = arm_get_frame_offsets ();
25106 amount = offsets->outgoing_args - offsets->saved_regs;
25107
25108 if (frame_pointer_needed)
25109 {
25110 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25111 amount = offsets->locals_base - offsets->saved_regs;
25112 }
25113 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25114
25115 gcc_assert (amount >= 0);
25116 if (amount)
25117 {
25118 emit_insn (gen_blockage ());
25119
25120 if (amount < 512)
25121 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25122 GEN_INT (amount)));
25123 else
25124 {
25125 /* r3 is always free in the epilogue. */
25126 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25127
25128 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25129 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25130 }
25131 }
25132
25133 /* Emit a USE (stack_pointer_rtx), so that
25134 the stack adjustment will not be deleted. */
25135 emit_insn (gen_force_register_use (stack_pointer_rtx));
25136
25137 if (crtl->profile || !TARGET_SCHED_PROLOG)
25138 emit_insn (gen_blockage ());
25139
25140 /* Emit a clobber for each insn that will be restored in the epilogue,
25141 so that flow2 will get register lifetimes correct. */
25142 for (regno = 0; regno < 13; regno++)
25143 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25144 emit_clobber (gen_rtx_REG (SImode, regno));
25145
25146 if (! df_regs_ever_live_p (LR_REGNUM))
25147 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25148
25149 /* Clear all caller-saved regs that are not used to return. */
25150 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25151 cmse_nonsecure_entry_clear_before_return ();
25152 }
25153
25154 /* Epilogue code for APCS frame. */
25155 static void
25156 arm_expand_epilogue_apcs_frame (bool really_return)
25157 {
25158 unsigned long func_type;
25159 unsigned long saved_regs_mask;
25160 int num_regs = 0;
25161 int i;
25162 int floats_from_frame = 0;
25163 arm_stack_offsets *offsets;
25164
25165 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25166 func_type = arm_current_func_type ();
25167
25168 /* Get frame offsets for ARM. */
25169 offsets = arm_get_frame_offsets ();
25170 saved_regs_mask = offsets->saved_regs_mask;
25171
25172 /* Find the offset of the floating-point save area in the frame. */
25173 floats_from_frame
25174 = (offsets->saved_args
25175 + arm_compute_static_chain_stack_bytes ()
25176 - offsets->frame);
25177
25178 /* Compute how many core registers saved and how far away the floats are. */
25179 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25180 if (saved_regs_mask & (1 << i))
25181 {
25182 num_regs++;
25183 floats_from_frame += 4;
25184 }
25185
25186 if (TARGET_HARD_FLOAT)
25187 {
25188 int start_reg;
25189 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25190
25191 /* The offset is from IP_REGNUM. */
25192 int saved_size = arm_get_vfp_saved_size ();
25193 if (saved_size > 0)
25194 {
25195 rtx_insn *insn;
25196 floats_from_frame += saved_size;
25197 insn = emit_insn (gen_addsi3 (ip_rtx,
25198 hard_frame_pointer_rtx,
25199 GEN_INT (-floats_from_frame)));
25200 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25201 ip_rtx, hard_frame_pointer_rtx);
25202 }
25203
25204 /* Generate VFP register multi-pop. */
25205 start_reg = FIRST_VFP_REGNUM;
25206
25207 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25208 /* Look for a case where a reg does not need restoring. */
25209 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25210 && (!df_regs_ever_live_p (i + 1)
25211 || call_used_regs[i + 1]))
25212 {
25213 if (start_reg != i)
25214 arm_emit_vfp_multi_reg_pop (start_reg,
25215 (i - start_reg) / 2,
25216 gen_rtx_REG (SImode,
25217 IP_REGNUM));
25218 start_reg = i + 2;
25219 }
25220
25221 /* Restore the remaining regs that we have discovered (or possibly
25222 even all of them, if the conditional in the for loop never
25223 fired). */
25224 if (start_reg != i)
25225 arm_emit_vfp_multi_reg_pop (start_reg,
25226 (i - start_reg) / 2,
25227 gen_rtx_REG (SImode, IP_REGNUM));
25228 }
25229
25230 if (TARGET_IWMMXT)
25231 {
25232 /* The frame pointer is guaranteed to be non-double-word aligned, as
25233 it is set to double-word-aligned old_stack_pointer - 4. */
25234 rtx_insn *insn;
25235 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25236
25237 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25238 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25239 {
25240 rtx addr = gen_frame_mem (V2SImode,
25241 plus_constant (Pmode, hard_frame_pointer_rtx,
25242 - lrm_count * 4));
25243 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25244 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25245 gen_rtx_REG (V2SImode, i),
25246 NULL_RTX);
25247 lrm_count += 2;
25248 }
25249 }
25250
25251 /* saved_regs_mask should contain IP which contains old stack pointer
25252 at the time of activation creation. Since SP and IP are adjacent registers,
25253 we can restore the value directly into SP. */
25254 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25255 saved_regs_mask &= ~(1 << IP_REGNUM);
25256 saved_regs_mask |= (1 << SP_REGNUM);
25257
25258 /* There are two registers left in saved_regs_mask - LR and PC. We
25259 only need to restore LR (the return address), but to
25260 save time we can load it directly into PC, unless we need a
25261 special function exit sequence, or we are not really returning. */
25262 if (really_return
25263 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25264 && !crtl->calls_eh_return)
25265 /* Delete LR from the register mask, so that LR on
25266 the stack is loaded into the PC in the register mask. */
25267 saved_regs_mask &= ~(1 << LR_REGNUM);
25268 else
25269 saved_regs_mask &= ~(1 << PC_REGNUM);
25270
25271 num_regs = bit_count (saved_regs_mask);
25272 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25273 {
25274 rtx_insn *insn;
25275 emit_insn (gen_blockage ());
25276 /* Unwind the stack to just below the saved registers. */
25277 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25278 hard_frame_pointer_rtx,
25279 GEN_INT (- 4 * num_regs)));
25280
25281 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25282 stack_pointer_rtx, hard_frame_pointer_rtx);
25283 }
25284
25285 arm_emit_multi_reg_pop (saved_regs_mask);
25286
25287 if (IS_INTERRUPT (func_type))
25288 {
25289 /* Interrupt handlers will have pushed the
25290 IP onto the stack, so restore it now. */
25291 rtx_insn *insn;
25292 rtx addr = gen_rtx_MEM (SImode,
25293 gen_rtx_POST_INC (SImode,
25294 stack_pointer_rtx));
25295 set_mem_alias_set (addr, get_frame_alias_set ());
25296 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25297 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25298 gen_rtx_REG (SImode, IP_REGNUM),
25299 NULL_RTX);
25300 }
25301
25302 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25303 return;
25304
25305 if (crtl->calls_eh_return)
25306 emit_insn (gen_addsi3 (stack_pointer_rtx,
25307 stack_pointer_rtx,
25308 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25309
25310 if (IS_STACKALIGN (func_type))
25311 /* Restore the original stack pointer. Before prologue, the stack was
25312 realigned and the original stack pointer saved in r0. For details,
25313 see comment in arm_expand_prologue. */
25314 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25315
25316 emit_jump_insn (simple_return_rtx);
25317 }
25318
25319 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25320 function is not a sibcall. */
25321 void
25322 arm_expand_epilogue (bool really_return)
25323 {
25324 unsigned long func_type;
25325 unsigned long saved_regs_mask;
25326 int num_regs = 0;
25327 int i;
25328 int amount;
25329 arm_stack_offsets *offsets;
25330
25331 func_type = arm_current_func_type ();
25332
25333 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25334 let output_return_instruction take care of instruction emission if any. */
25335 if (IS_NAKED (func_type)
25336 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25337 {
25338 if (really_return)
25339 emit_jump_insn (simple_return_rtx);
25340 return;
25341 }
25342
25343 /* If we are throwing an exception, then we really must be doing a
25344 return, so we can't tail-call. */
25345 gcc_assert (!crtl->calls_eh_return || really_return);
25346
25347 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25348 {
25349 arm_expand_epilogue_apcs_frame (really_return);
25350 return;
25351 }
25352
25353 /* Get frame offsets for ARM. */
25354 offsets = arm_get_frame_offsets ();
25355 saved_regs_mask = offsets->saved_regs_mask;
25356 num_regs = bit_count (saved_regs_mask);
25357
25358 if (frame_pointer_needed)
25359 {
25360 rtx_insn *insn;
25361 /* Restore stack pointer if necessary. */
25362 if (TARGET_ARM)
25363 {
25364 /* In ARM mode, frame pointer points to first saved register.
25365 Restore stack pointer to last saved register. */
25366 amount = offsets->frame - offsets->saved_regs;
25367
25368 /* Force out any pending memory operations that reference stacked data
25369 before stack de-allocation occurs. */
25370 emit_insn (gen_blockage ());
25371 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25372 hard_frame_pointer_rtx,
25373 GEN_INT (amount)));
25374 arm_add_cfa_adjust_cfa_note (insn, amount,
25375 stack_pointer_rtx,
25376 hard_frame_pointer_rtx);
25377
25378 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25379 deleted. */
25380 emit_insn (gen_force_register_use (stack_pointer_rtx));
25381 }
25382 else
25383 {
25384 /* In Thumb-2 mode, the frame pointer points to the last saved
25385 register. */
25386 amount = offsets->locals_base - offsets->saved_regs;
25387 if (amount)
25388 {
25389 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25390 hard_frame_pointer_rtx,
25391 GEN_INT (amount)));
25392 arm_add_cfa_adjust_cfa_note (insn, amount,
25393 hard_frame_pointer_rtx,
25394 hard_frame_pointer_rtx);
25395 }
25396
25397 /* Force out any pending memory operations that reference stacked data
25398 before stack de-allocation occurs. */
25399 emit_insn (gen_blockage ());
25400 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25401 hard_frame_pointer_rtx));
25402 arm_add_cfa_adjust_cfa_note (insn, 0,
25403 stack_pointer_rtx,
25404 hard_frame_pointer_rtx);
25405 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25406 deleted. */
25407 emit_insn (gen_force_register_use (stack_pointer_rtx));
25408 }
25409 }
25410 else
25411 {
25412 /* Pop off outgoing args and local frame to adjust stack pointer to
25413 last saved register. */
25414 amount = offsets->outgoing_args - offsets->saved_regs;
25415 if (amount)
25416 {
25417 rtx_insn *tmp;
25418 /* Force out any pending memory operations that reference stacked data
25419 before stack de-allocation occurs. */
25420 emit_insn (gen_blockage ());
25421 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25422 stack_pointer_rtx,
25423 GEN_INT (amount)));
25424 arm_add_cfa_adjust_cfa_note (tmp, amount,
25425 stack_pointer_rtx, stack_pointer_rtx);
25426 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25427 not deleted. */
25428 emit_insn (gen_force_register_use (stack_pointer_rtx));
25429 }
25430 }
25431
25432 if (TARGET_HARD_FLOAT)
25433 {
25434 /* Generate VFP register multi-pop. */
25435 int end_reg = LAST_VFP_REGNUM + 1;
25436
25437 /* Scan the registers in reverse order. We need to match
25438 any groupings made in the prologue and generate matching
25439 vldm operations. The need to match groups is because,
25440 unlike pop, vldm can only do consecutive regs. */
25441 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25442 /* Look for a case where a reg does not need restoring. */
25443 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25444 && (!df_regs_ever_live_p (i + 1)
25445 || call_used_regs[i + 1]))
25446 {
25447 /* Restore the regs discovered so far (from reg+2 to
25448 end_reg). */
25449 if (end_reg > i + 2)
25450 arm_emit_vfp_multi_reg_pop (i + 2,
25451 (end_reg - (i + 2)) / 2,
25452 stack_pointer_rtx);
25453 end_reg = i;
25454 }
25455
25456 /* Restore the remaining regs that we have discovered (or possibly
25457 even all of them, if the conditional in the for loop never
25458 fired). */
25459 if (end_reg > i + 2)
25460 arm_emit_vfp_multi_reg_pop (i + 2,
25461 (end_reg - (i + 2)) / 2,
25462 stack_pointer_rtx);
25463 }
25464
25465 if (TARGET_IWMMXT)
25466 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25467 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25468 {
25469 rtx_insn *insn;
25470 rtx addr = gen_rtx_MEM (V2SImode,
25471 gen_rtx_POST_INC (SImode,
25472 stack_pointer_rtx));
25473 set_mem_alias_set (addr, get_frame_alias_set ());
25474 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25475 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25476 gen_rtx_REG (V2SImode, i),
25477 NULL_RTX);
25478 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25479 stack_pointer_rtx, stack_pointer_rtx);
25480 }
25481
25482 if (saved_regs_mask)
25483 {
25484 rtx insn;
25485 bool return_in_pc = false;
25486
25487 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25488 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25489 && !IS_CMSE_ENTRY (func_type)
25490 && !IS_STACKALIGN (func_type)
25491 && really_return
25492 && crtl->args.pretend_args_size == 0
25493 && saved_regs_mask & (1 << LR_REGNUM)
25494 && !crtl->calls_eh_return)
25495 {
25496 saved_regs_mask &= ~(1 << LR_REGNUM);
25497 saved_regs_mask |= (1 << PC_REGNUM);
25498 return_in_pc = true;
25499 }
25500
25501 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25502 {
25503 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25504 if (saved_regs_mask & (1 << i))
25505 {
25506 rtx addr = gen_rtx_MEM (SImode,
25507 gen_rtx_POST_INC (SImode,
25508 stack_pointer_rtx));
25509 set_mem_alias_set (addr, get_frame_alias_set ());
25510
25511 if (i == PC_REGNUM)
25512 {
25513 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25514 XVECEXP (insn, 0, 0) = ret_rtx;
25515 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25516 addr);
25517 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25518 insn = emit_jump_insn (insn);
25519 }
25520 else
25521 {
25522 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25523 addr));
25524 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25525 gen_rtx_REG (SImode, i),
25526 NULL_RTX);
25527 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25528 stack_pointer_rtx,
25529 stack_pointer_rtx);
25530 }
25531 }
25532 }
25533 else
25534 {
25535 if (TARGET_LDRD
25536 && current_tune->prefer_ldrd_strd
25537 && !optimize_function_for_size_p (cfun))
25538 {
25539 if (TARGET_THUMB2)
25540 thumb2_emit_ldrd_pop (saved_regs_mask);
25541 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25542 arm_emit_ldrd_pop (saved_regs_mask);
25543 else
25544 arm_emit_multi_reg_pop (saved_regs_mask);
25545 }
25546 else
25547 arm_emit_multi_reg_pop (saved_regs_mask);
25548 }
25549
25550 if (return_in_pc)
25551 return;
25552 }
25553
25554 amount
25555 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25556 if (amount)
25557 {
25558 int i, j;
25559 rtx dwarf = NULL_RTX;
25560 rtx_insn *tmp =
25561 emit_insn (gen_addsi3 (stack_pointer_rtx,
25562 stack_pointer_rtx,
25563 GEN_INT (amount)));
25564
25565 RTX_FRAME_RELATED_P (tmp) = 1;
25566
25567 if (cfun->machine->uses_anonymous_args)
25568 {
25569 /* Restore pretend args. Refer arm_expand_prologue on how to save
25570 pretend_args in stack. */
25571 int num_regs = crtl->args.pretend_args_size / 4;
25572 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25573 for (j = 0, i = 0; j < num_regs; i++)
25574 if (saved_regs_mask & (1 << i))
25575 {
25576 rtx reg = gen_rtx_REG (SImode, i);
25577 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25578 j++;
25579 }
25580 REG_NOTES (tmp) = dwarf;
25581 }
25582 arm_add_cfa_adjust_cfa_note (tmp, amount,
25583 stack_pointer_rtx, stack_pointer_rtx);
25584 }
25585
25586 /* Clear all caller-saved regs that are not used to return. */
25587 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25588 {
25589 /* CMSE_ENTRY always returns. */
25590 gcc_assert (really_return);
25591 cmse_nonsecure_entry_clear_before_return ();
25592 }
25593
25594 if (!really_return)
25595 return;
25596
25597 if (crtl->calls_eh_return)
25598 emit_insn (gen_addsi3 (stack_pointer_rtx,
25599 stack_pointer_rtx,
25600 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25601
25602 if (IS_STACKALIGN (func_type))
25603 /* Restore the original stack pointer. Before prologue, the stack was
25604 realigned and the original stack pointer saved in r0. For details,
25605 see comment in arm_expand_prologue. */
25606 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25607
25608 emit_jump_insn (simple_return_rtx);
25609 }
25610
25611 /* Implementation of insn prologue_thumb1_interwork. This is the first
25612 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25613
25614 const char *
25615 thumb1_output_interwork (void)
25616 {
25617 const char * name;
25618 FILE *f = asm_out_file;
25619
25620 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25621 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25622 == SYMBOL_REF);
25623 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25624
25625 /* Generate code sequence to switch us into Thumb mode. */
25626 /* The .code 32 directive has already been emitted by
25627 ASM_DECLARE_FUNCTION_NAME. */
25628 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25629 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25630
25631 /* Generate a label, so that the debugger will notice the
25632 change in instruction sets. This label is also used by
25633 the assembler to bypass the ARM code when this function
25634 is called from a Thumb encoded function elsewhere in the
25635 same file. Hence the definition of STUB_NAME here must
25636 agree with the definition in gas/config/tc-arm.c. */
25637
25638 #define STUB_NAME ".real_start_of"
25639
25640 fprintf (f, "\t.code\t16\n");
25641 #ifdef ARM_PE
25642 if (arm_dllexport_name_p (name))
25643 name = arm_strip_name_encoding (name);
25644 #endif
25645 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25646 fprintf (f, "\t.thumb_func\n");
25647 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25648
25649 return "";
25650 }
25651
25652 /* Handle the case of a double word load into a low register from
25653 a computed memory address. The computed address may involve a
25654 register which is overwritten by the load. */
25655 const char *
25656 thumb_load_double_from_address (rtx *operands)
25657 {
25658 rtx addr;
25659 rtx base;
25660 rtx offset;
25661 rtx arg1;
25662 rtx arg2;
25663
25664 gcc_assert (REG_P (operands[0]));
25665 gcc_assert (MEM_P (operands[1]));
25666
25667 /* Get the memory address. */
25668 addr = XEXP (operands[1], 0);
25669
25670 /* Work out how the memory address is computed. */
25671 switch (GET_CODE (addr))
25672 {
25673 case REG:
25674 operands[2] = adjust_address (operands[1], SImode, 4);
25675
25676 if (REGNO (operands[0]) == REGNO (addr))
25677 {
25678 output_asm_insn ("ldr\t%H0, %2", operands);
25679 output_asm_insn ("ldr\t%0, %1", operands);
25680 }
25681 else
25682 {
25683 output_asm_insn ("ldr\t%0, %1", operands);
25684 output_asm_insn ("ldr\t%H0, %2", operands);
25685 }
25686 break;
25687
25688 case CONST:
25689 /* Compute <address> + 4 for the high order load. */
25690 operands[2] = adjust_address (operands[1], SImode, 4);
25691
25692 output_asm_insn ("ldr\t%0, %1", operands);
25693 output_asm_insn ("ldr\t%H0, %2", operands);
25694 break;
25695
25696 case PLUS:
25697 arg1 = XEXP (addr, 0);
25698 arg2 = XEXP (addr, 1);
25699
25700 if (CONSTANT_P (arg1))
25701 base = arg2, offset = arg1;
25702 else
25703 base = arg1, offset = arg2;
25704
25705 gcc_assert (REG_P (base));
25706
25707 /* Catch the case of <address> = <reg> + <reg> */
25708 if (REG_P (offset))
25709 {
25710 int reg_offset = REGNO (offset);
25711 int reg_base = REGNO (base);
25712 int reg_dest = REGNO (operands[0]);
25713
25714 /* Add the base and offset registers together into the
25715 higher destination register. */
25716 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25717 reg_dest + 1, reg_base, reg_offset);
25718
25719 /* Load the lower destination register from the address in
25720 the higher destination register. */
25721 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25722 reg_dest, reg_dest + 1);
25723
25724 /* Load the higher destination register from its own address
25725 plus 4. */
25726 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25727 reg_dest + 1, reg_dest + 1);
25728 }
25729 else
25730 {
25731 /* Compute <address> + 4 for the high order load. */
25732 operands[2] = adjust_address (operands[1], SImode, 4);
25733
25734 /* If the computed address is held in the low order register
25735 then load the high order register first, otherwise always
25736 load the low order register first. */
25737 if (REGNO (operands[0]) == REGNO (base))
25738 {
25739 output_asm_insn ("ldr\t%H0, %2", operands);
25740 output_asm_insn ("ldr\t%0, %1", operands);
25741 }
25742 else
25743 {
25744 output_asm_insn ("ldr\t%0, %1", operands);
25745 output_asm_insn ("ldr\t%H0, %2", operands);
25746 }
25747 }
25748 break;
25749
25750 case LABEL_REF:
25751 /* With no registers to worry about we can just load the value
25752 directly. */
25753 operands[2] = adjust_address (operands[1], SImode, 4);
25754
25755 output_asm_insn ("ldr\t%H0, %2", operands);
25756 output_asm_insn ("ldr\t%0, %1", operands);
25757 break;
25758
25759 default:
25760 gcc_unreachable ();
25761 }
25762
25763 return "";
25764 }
25765
25766 const char *
25767 thumb_output_move_mem_multiple (int n, rtx *operands)
25768 {
25769 switch (n)
25770 {
25771 case 2:
25772 if (REGNO (operands[4]) > REGNO (operands[5]))
25773 std::swap (operands[4], operands[5]);
25774
25775 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25776 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25777 break;
25778
25779 case 3:
25780 if (REGNO (operands[4]) > REGNO (operands[5]))
25781 std::swap (operands[4], operands[5]);
25782 if (REGNO (operands[5]) > REGNO (operands[6]))
25783 std::swap (operands[5], operands[6]);
25784 if (REGNO (operands[4]) > REGNO (operands[5]))
25785 std::swap (operands[4], operands[5]);
25786
25787 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25788 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25789 break;
25790
25791 default:
25792 gcc_unreachable ();
25793 }
25794
25795 return "";
25796 }
25797
25798 /* Output a call-via instruction for thumb state. */
25799 const char *
25800 thumb_call_via_reg (rtx reg)
25801 {
25802 int regno = REGNO (reg);
25803 rtx *labelp;
25804
25805 gcc_assert (regno < LR_REGNUM);
25806
25807 /* If we are in the normal text section we can use a single instance
25808 per compilation unit. If we are doing function sections, then we need
25809 an entry per section, since we can't rely on reachability. */
25810 if (in_section == text_section)
25811 {
25812 thumb_call_reg_needed = 1;
25813
25814 if (thumb_call_via_label[regno] == NULL)
25815 thumb_call_via_label[regno] = gen_label_rtx ();
25816 labelp = thumb_call_via_label + regno;
25817 }
25818 else
25819 {
25820 if (cfun->machine->call_via[regno] == NULL)
25821 cfun->machine->call_via[regno] = gen_label_rtx ();
25822 labelp = cfun->machine->call_via + regno;
25823 }
25824
25825 output_asm_insn ("bl\t%a0", labelp);
25826 return "";
25827 }
25828
25829 /* Routines for generating rtl. */
25830 void
25831 thumb_expand_movmemqi (rtx *operands)
25832 {
25833 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25834 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25835 HOST_WIDE_INT len = INTVAL (operands[2]);
25836 HOST_WIDE_INT offset = 0;
25837
25838 while (len >= 12)
25839 {
25840 emit_insn (gen_movmem12b (out, in, out, in));
25841 len -= 12;
25842 }
25843
25844 if (len >= 8)
25845 {
25846 emit_insn (gen_movmem8b (out, in, out, in));
25847 len -= 8;
25848 }
25849
25850 if (len >= 4)
25851 {
25852 rtx reg = gen_reg_rtx (SImode);
25853 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25854 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25855 len -= 4;
25856 offset += 4;
25857 }
25858
25859 if (len >= 2)
25860 {
25861 rtx reg = gen_reg_rtx (HImode);
25862 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25863 plus_constant (Pmode, in,
25864 offset))));
25865 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25866 offset)),
25867 reg));
25868 len -= 2;
25869 offset += 2;
25870 }
25871
25872 if (len)
25873 {
25874 rtx reg = gen_reg_rtx (QImode);
25875 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25876 plus_constant (Pmode, in,
25877 offset))));
25878 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25879 offset)),
25880 reg));
25881 }
25882 }
25883
25884 void
25885 thumb_reload_out_hi (rtx *operands)
25886 {
25887 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25888 }
25889
25890 /* Return the length of a function name prefix
25891 that starts with the character 'c'. */
25892 static int
25893 arm_get_strip_length (int c)
25894 {
25895 switch (c)
25896 {
25897 ARM_NAME_ENCODING_LENGTHS
25898 default: return 0;
25899 }
25900 }
25901
25902 /* Return a pointer to a function's name with any
25903 and all prefix encodings stripped from it. */
25904 const char *
25905 arm_strip_name_encoding (const char *name)
25906 {
25907 int skip;
25908
25909 while ((skip = arm_get_strip_length (* name)))
25910 name += skip;
25911
25912 return name;
25913 }
25914
25915 /* If there is a '*' anywhere in the name's prefix, then
25916 emit the stripped name verbatim, otherwise prepend an
25917 underscore if leading underscores are being used. */
25918 void
25919 arm_asm_output_labelref (FILE *stream, const char *name)
25920 {
25921 int skip;
25922 int verbatim = 0;
25923
25924 while ((skip = arm_get_strip_length (* name)))
25925 {
25926 verbatim |= (*name == '*');
25927 name += skip;
25928 }
25929
25930 if (verbatim)
25931 fputs (name, stream);
25932 else
25933 asm_fprintf (stream, "%U%s", name);
25934 }
25935
25936 /* This function is used to emit an EABI tag and its associated value.
25937 We emit the numerical value of the tag in case the assembler does not
25938 support textual tags. (Eg gas prior to 2.20). If requested we include
25939 the tag name in a comment so that anyone reading the assembler output
25940 will know which tag is being set.
25941
25942 This function is not static because arm-c.c needs it too. */
25943
25944 void
25945 arm_emit_eabi_attribute (const char *name, int num, int val)
25946 {
25947 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25948 if (flag_verbose_asm || flag_debug_asm)
25949 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25950 asm_fprintf (asm_out_file, "\n");
25951 }
25952
25953 /* This function is used to print CPU tuning information as comment
25954 in assembler file. Pointers are not printed for now. */
25955
25956 void
25957 arm_print_tune_info (void)
25958 {
25959 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25960 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25961 current_tune->constant_limit);
25962 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25963 current_tune->max_insns_skipped);
25964 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25965 current_tune->prefetch.num_slots);
25966 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25967 current_tune->prefetch.l1_cache_size);
25968 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25969 current_tune->prefetch.l1_cache_line_size);
25970 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25971 (int) current_tune->prefer_constant_pool);
25972 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25973 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25974 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25975 current_tune->branch_cost (false, false));
25976 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25977 current_tune->branch_cost (false, true));
25978 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25979 current_tune->branch_cost (true, false));
25980 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25981 current_tune->branch_cost (true, true));
25982 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25983 (int) current_tune->prefer_ldrd_strd);
25984 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25985 (int) current_tune->logical_op_non_short_circuit_thumb,
25986 (int) current_tune->logical_op_non_short_circuit_arm);
25987 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25988 (int) current_tune->prefer_neon_for_64bits);
25989 asm_fprintf (asm_out_file,
25990 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25991 (int) current_tune->disparage_flag_setting_t16_encodings);
25992 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25993 (int) current_tune->string_ops_prefer_neon);
25994 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25995 current_tune->max_insns_inline_memset);
25996 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25997 current_tune->fusible_ops);
25998 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25999 (int) current_tune->sched_autopref);
26000 }
26001
26002 static void
26003 arm_file_start (void)
26004 {
26005 int val;
26006
26007 if (TARGET_BPABI)
26008 {
26009 /* We don't have a specified CPU. Use the architecture to
26010 generate the tags.
26011
26012 Note: it might be better to do this unconditionally, then the
26013 assembler would not need to know about all new CPU names as
26014 they are added. */
26015 if (!arm_active_target.core_name)
26016 {
26017 /* armv7ve doesn't support any extensions. */
26018 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26019 {
26020 /* Keep backward compatability for assemblers
26021 which don't support armv7ve. */
26022 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26023 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26024 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26025 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26026 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26027 }
26028 else
26029 {
26030 const char* pos = strchr (arm_active_target.arch_name, '+');
26031 if (pos)
26032 {
26033 char buf[32];
26034 gcc_assert (strlen (arm_active_target.arch_name)
26035 <= sizeof (buf) / sizeof (*pos));
26036 strncpy (buf, arm_active_target.arch_name,
26037 (pos - arm_active_target.arch_name) * sizeof (*pos));
26038 buf[pos - arm_active_target.arch_name] = '\0';
26039 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26040 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26041 }
26042 else
26043 asm_fprintf (asm_out_file, "\t.arch %s\n",
26044 arm_active_target.arch_name);
26045 }
26046 }
26047 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26048 asm_fprintf (asm_out_file, "\t.arch %s\n",
26049 arm_active_target.core_name + 8);
26050 else
26051 {
26052 const char* truncated_name
26053 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26054 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26055 }
26056
26057 if (print_tune_info)
26058 arm_print_tune_info ();
26059
26060 if (! TARGET_SOFT_FLOAT)
26061 {
26062 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26063 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26064
26065 if (TARGET_HARD_FLOAT_ABI)
26066 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26067 }
26068
26069 /* Some of these attributes only apply when the corresponding features
26070 are used. However we don't have any easy way of figuring this out.
26071 Conservatively record the setting that would have been used. */
26072
26073 if (flag_rounding_math)
26074 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26075
26076 if (!flag_unsafe_math_optimizations)
26077 {
26078 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26079 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26080 }
26081 if (flag_signaling_nans)
26082 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26083
26084 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26085 flag_finite_math_only ? 1 : 3);
26086
26087 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26088 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26089 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26090 flag_short_enums ? 1 : 2);
26091
26092 /* Tag_ABI_optimization_goals. */
26093 if (optimize_size)
26094 val = 4;
26095 else if (optimize >= 2)
26096 val = 2;
26097 else if (optimize)
26098 val = 1;
26099 else
26100 val = 6;
26101 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26102
26103 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26104 unaligned_access);
26105
26106 if (arm_fp16_format)
26107 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26108 (int) arm_fp16_format);
26109
26110 if (arm_lang_output_object_attributes_hook)
26111 arm_lang_output_object_attributes_hook();
26112 }
26113
26114 default_file_start ();
26115 }
26116
26117 static void
26118 arm_file_end (void)
26119 {
26120 int regno;
26121
26122 if (NEED_INDICATE_EXEC_STACK)
26123 /* Add .note.GNU-stack. */
26124 file_end_indicate_exec_stack ();
26125
26126 if (! thumb_call_reg_needed)
26127 return;
26128
26129 switch_to_section (text_section);
26130 asm_fprintf (asm_out_file, "\t.code 16\n");
26131 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26132
26133 for (regno = 0; regno < LR_REGNUM; regno++)
26134 {
26135 rtx label = thumb_call_via_label[regno];
26136
26137 if (label != 0)
26138 {
26139 targetm.asm_out.internal_label (asm_out_file, "L",
26140 CODE_LABEL_NUMBER (label));
26141 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26142 }
26143 }
26144 }
26145
26146 #ifndef ARM_PE
26147 /* Symbols in the text segment can be accessed without indirecting via the
26148 constant pool; it may take an extra binary operation, but this is still
26149 faster than indirecting via memory. Don't do this when not optimizing,
26150 since we won't be calculating al of the offsets necessary to do this
26151 simplification. */
26152
26153 static void
26154 arm_encode_section_info (tree decl, rtx rtl, int first)
26155 {
26156 if (optimize > 0 && TREE_CONSTANT (decl))
26157 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26158
26159 default_encode_section_info (decl, rtl, first);
26160 }
26161 #endif /* !ARM_PE */
26162
26163 static void
26164 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26165 {
26166 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26167 && !strcmp (prefix, "L"))
26168 {
26169 arm_ccfsm_state = 0;
26170 arm_target_insn = NULL;
26171 }
26172 default_internal_label (stream, prefix, labelno);
26173 }
26174
26175 /* Output code to add DELTA to the first argument, and then jump
26176 to FUNCTION. Used for C++ multiple inheritance. */
26177
26178 static void
26179 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26180 HOST_WIDE_INT, tree function)
26181 {
26182 static int thunk_label = 0;
26183 char label[256];
26184 char labelpc[256];
26185 int mi_delta = delta;
26186 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26187 int shift = 0;
26188 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26189 ? 1 : 0);
26190 if (mi_delta < 0)
26191 mi_delta = - mi_delta;
26192
26193 final_start_function (emit_barrier (), file, 1);
26194
26195 if (TARGET_THUMB1)
26196 {
26197 int labelno = thunk_label++;
26198 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26199 /* Thunks are entered in arm mode when avaiable. */
26200 if (TARGET_THUMB1_ONLY)
26201 {
26202 /* push r3 so we can use it as a temporary. */
26203 /* TODO: Omit this save if r3 is not used. */
26204 fputs ("\tpush {r3}\n", file);
26205 fputs ("\tldr\tr3, ", file);
26206 }
26207 else
26208 {
26209 fputs ("\tldr\tr12, ", file);
26210 }
26211 assemble_name (file, label);
26212 fputc ('\n', file);
26213 if (flag_pic)
26214 {
26215 /* If we are generating PIC, the ldr instruction below loads
26216 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26217 the address of the add + 8, so we have:
26218
26219 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26220 = target + 1.
26221
26222 Note that we have "+ 1" because some versions of GNU ld
26223 don't set the low bit of the result for R_ARM_REL32
26224 relocations against thumb function symbols.
26225 On ARMv6M this is +4, not +8. */
26226 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26227 assemble_name (file, labelpc);
26228 fputs (":\n", file);
26229 if (TARGET_THUMB1_ONLY)
26230 {
26231 /* This is 2 insns after the start of the thunk, so we know it
26232 is 4-byte aligned. */
26233 fputs ("\tadd\tr3, pc, r3\n", file);
26234 fputs ("\tmov r12, r3\n", file);
26235 }
26236 else
26237 fputs ("\tadd\tr12, pc, r12\n", file);
26238 }
26239 else if (TARGET_THUMB1_ONLY)
26240 fputs ("\tmov r12, r3\n", file);
26241 }
26242 if (TARGET_THUMB1_ONLY)
26243 {
26244 if (mi_delta > 255)
26245 {
26246 fputs ("\tldr\tr3, ", file);
26247 assemble_name (file, label);
26248 fputs ("+4\n", file);
26249 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26250 mi_op, this_regno, this_regno);
26251 }
26252 else if (mi_delta != 0)
26253 {
26254 /* Thumb1 unified syntax requires s suffix in instruction name when
26255 one of the operands is immediate. */
26256 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26257 mi_op, this_regno, this_regno,
26258 mi_delta);
26259 }
26260 }
26261 else
26262 {
26263 /* TODO: Use movw/movt for large constants when available. */
26264 while (mi_delta != 0)
26265 {
26266 if ((mi_delta & (3 << shift)) == 0)
26267 shift += 2;
26268 else
26269 {
26270 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26271 mi_op, this_regno, this_regno,
26272 mi_delta & (0xff << shift));
26273 mi_delta &= ~(0xff << shift);
26274 shift += 8;
26275 }
26276 }
26277 }
26278 if (TARGET_THUMB1)
26279 {
26280 if (TARGET_THUMB1_ONLY)
26281 fputs ("\tpop\t{r3}\n", file);
26282
26283 fprintf (file, "\tbx\tr12\n");
26284 ASM_OUTPUT_ALIGN (file, 2);
26285 assemble_name (file, label);
26286 fputs (":\n", file);
26287 if (flag_pic)
26288 {
26289 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26290 rtx tem = XEXP (DECL_RTL (function), 0);
26291 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26292 pipeline offset is four rather than eight. Adjust the offset
26293 accordingly. */
26294 tem = plus_constant (GET_MODE (tem), tem,
26295 TARGET_THUMB1_ONLY ? -3 : -7);
26296 tem = gen_rtx_MINUS (GET_MODE (tem),
26297 tem,
26298 gen_rtx_SYMBOL_REF (Pmode,
26299 ggc_strdup (labelpc)));
26300 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26301 }
26302 else
26303 /* Output ".word .LTHUNKn". */
26304 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26305
26306 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26307 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26308 }
26309 else
26310 {
26311 fputs ("\tb\t", file);
26312 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26313 if (NEED_PLT_RELOC)
26314 fputs ("(PLT)", file);
26315 fputc ('\n', file);
26316 }
26317
26318 final_end_function ();
26319 }
26320
26321 /* MI thunk handling for TARGET_32BIT. */
26322
26323 static void
26324 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26325 HOST_WIDE_INT vcall_offset, tree function)
26326 {
26327 /* On ARM, this_regno is R0 or R1 depending on
26328 whether the function returns an aggregate or not.
26329 */
26330 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26331 function)
26332 ? R1_REGNUM : R0_REGNUM);
26333
26334 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26335 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26336 reload_completed = 1;
26337 emit_note (NOTE_INSN_PROLOGUE_END);
26338
26339 /* Add DELTA to THIS_RTX. */
26340 if (delta != 0)
26341 arm_split_constant (PLUS, Pmode, NULL_RTX,
26342 delta, this_rtx, this_rtx, false);
26343
26344 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26345 if (vcall_offset != 0)
26346 {
26347 /* Load *THIS_RTX. */
26348 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26349 /* Compute *THIS_RTX + VCALL_OFFSET. */
26350 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26351 false);
26352 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26353 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26354 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26355 }
26356
26357 /* Generate a tail call to the target function. */
26358 if (!TREE_USED (function))
26359 {
26360 assemble_external (function);
26361 TREE_USED (function) = 1;
26362 }
26363 rtx funexp = XEXP (DECL_RTL (function), 0);
26364 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26365 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26366 SIBLING_CALL_P (insn) = 1;
26367
26368 insn = get_insns ();
26369 shorten_branches (insn);
26370 final_start_function (insn, file, 1);
26371 final (insn, file, 1);
26372 final_end_function ();
26373
26374 /* Stop pretending this is a post-reload pass. */
26375 reload_completed = 0;
26376 }
26377
26378 /* Output code to add DELTA to the first argument, and then jump
26379 to FUNCTION. Used for C++ multiple inheritance. */
26380
26381 static void
26382 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26383 HOST_WIDE_INT vcall_offset, tree function)
26384 {
26385 if (TARGET_32BIT)
26386 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26387 else
26388 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26389 }
26390
26391 int
26392 arm_emit_vector_const (FILE *file, rtx x)
26393 {
26394 int i;
26395 const char * pattern;
26396
26397 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26398
26399 switch (GET_MODE (x))
26400 {
26401 case V2SImode: pattern = "%08x"; break;
26402 case V4HImode: pattern = "%04x"; break;
26403 case V8QImode: pattern = "%02x"; break;
26404 default: gcc_unreachable ();
26405 }
26406
26407 fprintf (file, "0x");
26408 for (i = CONST_VECTOR_NUNITS (x); i--;)
26409 {
26410 rtx element;
26411
26412 element = CONST_VECTOR_ELT (x, i);
26413 fprintf (file, pattern, INTVAL (element));
26414 }
26415
26416 return 1;
26417 }
26418
26419 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26420 HFmode constant pool entries are actually loaded with ldr. */
26421 void
26422 arm_emit_fp16_const (rtx c)
26423 {
26424 long bits;
26425
26426 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26427 if (WORDS_BIG_ENDIAN)
26428 assemble_zeros (2);
26429 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26430 if (!WORDS_BIG_ENDIAN)
26431 assemble_zeros (2);
26432 }
26433
26434 const char *
26435 arm_output_load_gr (rtx *operands)
26436 {
26437 rtx reg;
26438 rtx offset;
26439 rtx wcgr;
26440 rtx sum;
26441
26442 if (!MEM_P (operands [1])
26443 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26444 || !REG_P (reg = XEXP (sum, 0))
26445 || !CONST_INT_P (offset = XEXP (sum, 1))
26446 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26447 return "wldrw%?\t%0, %1";
26448
26449 /* Fix up an out-of-range load of a GR register. */
26450 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26451 wcgr = operands[0];
26452 operands[0] = reg;
26453 output_asm_insn ("ldr%?\t%0, %1", operands);
26454
26455 operands[0] = wcgr;
26456 operands[1] = reg;
26457 output_asm_insn ("tmcr%?\t%0, %1", operands);
26458 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26459
26460 return "";
26461 }
26462
26463 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26464
26465 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26466 named arg and all anonymous args onto the stack.
26467 XXX I know the prologue shouldn't be pushing registers, but it is faster
26468 that way. */
26469
26470 static void
26471 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26472 machine_mode mode,
26473 tree type,
26474 int *pretend_size,
26475 int second_time ATTRIBUTE_UNUSED)
26476 {
26477 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26478 int nregs;
26479
26480 cfun->machine->uses_anonymous_args = 1;
26481 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26482 {
26483 nregs = pcum->aapcs_ncrn;
26484 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26485 nregs++;
26486 }
26487 else
26488 nregs = pcum->nregs;
26489
26490 if (nregs < NUM_ARG_REGS)
26491 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26492 }
26493
26494 /* We can't rely on the caller doing the proper promotion when
26495 using APCS or ATPCS. */
26496
26497 static bool
26498 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26499 {
26500 return !TARGET_AAPCS_BASED;
26501 }
26502
26503 static machine_mode
26504 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26505 machine_mode mode,
26506 int *punsignedp ATTRIBUTE_UNUSED,
26507 const_tree fntype ATTRIBUTE_UNUSED,
26508 int for_return ATTRIBUTE_UNUSED)
26509 {
26510 if (GET_MODE_CLASS (mode) == MODE_INT
26511 && GET_MODE_SIZE (mode) < 4)
26512 return SImode;
26513
26514 return mode;
26515 }
26516
26517 /* AAPCS based ABIs use short enums by default. */
26518
26519 static bool
26520 arm_default_short_enums (void)
26521 {
26522 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26523 }
26524
26525
26526 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26527
26528 static bool
26529 arm_align_anon_bitfield (void)
26530 {
26531 return TARGET_AAPCS_BASED;
26532 }
26533
26534
26535 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26536
26537 static tree
26538 arm_cxx_guard_type (void)
26539 {
26540 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26541 }
26542
26543
26544 /* The EABI says test the least significant bit of a guard variable. */
26545
26546 static bool
26547 arm_cxx_guard_mask_bit (void)
26548 {
26549 return TARGET_AAPCS_BASED;
26550 }
26551
26552
26553 /* The EABI specifies that all array cookies are 8 bytes long. */
26554
26555 static tree
26556 arm_get_cookie_size (tree type)
26557 {
26558 tree size;
26559
26560 if (!TARGET_AAPCS_BASED)
26561 return default_cxx_get_cookie_size (type);
26562
26563 size = build_int_cst (sizetype, 8);
26564 return size;
26565 }
26566
26567
26568 /* The EABI says that array cookies should also contain the element size. */
26569
26570 static bool
26571 arm_cookie_has_size (void)
26572 {
26573 return TARGET_AAPCS_BASED;
26574 }
26575
26576
26577 /* The EABI says constructors and destructors should return a pointer to
26578 the object constructed/destroyed. */
26579
26580 static bool
26581 arm_cxx_cdtor_returns_this (void)
26582 {
26583 return TARGET_AAPCS_BASED;
26584 }
26585
26586 /* The EABI says that an inline function may never be the key
26587 method. */
26588
26589 static bool
26590 arm_cxx_key_method_may_be_inline (void)
26591 {
26592 return !TARGET_AAPCS_BASED;
26593 }
26594
26595 static void
26596 arm_cxx_determine_class_data_visibility (tree decl)
26597 {
26598 if (!TARGET_AAPCS_BASED
26599 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26600 return;
26601
26602 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26603 is exported. However, on systems without dynamic vague linkage,
26604 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26605 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26606 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26607 else
26608 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26609 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26610 }
26611
26612 static bool
26613 arm_cxx_class_data_always_comdat (void)
26614 {
26615 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26616 vague linkage if the class has no key function. */
26617 return !TARGET_AAPCS_BASED;
26618 }
26619
26620
26621 /* The EABI says __aeabi_atexit should be used to register static
26622 destructors. */
26623
26624 static bool
26625 arm_cxx_use_aeabi_atexit (void)
26626 {
26627 return TARGET_AAPCS_BASED;
26628 }
26629
26630
26631 void
26632 arm_set_return_address (rtx source, rtx scratch)
26633 {
26634 arm_stack_offsets *offsets;
26635 HOST_WIDE_INT delta;
26636 rtx addr;
26637 unsigned long saved_regs;
26638
26639 offsets = arm_get_frame_offsets ();
26640 saved_regs = offsets->saved_regs_mask;
26641
26642 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26643 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26644 else
26645 {
26646 if (frame_pointer_needed)
26647 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26648 else
26649 {
26650 /* LR will be the first saved register. */
26651 delta = offsets->outgoing_args - (offsets->frame + 4);
26652
26653
26654 if (delta >= 4096)
26655 {
26656 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26657 GEN_INT (delta & ~4095)));
26658 addr = scratch;
26659 delta &= 4095;
26660 }
26661 else
26662 addr = stack_pointer_rtx;
26663
26664 addr = plus_constant (Pmode, addr, delta);
26665 }
26666 /* The store needs to be marked as frame related in order to prevent
26667 DSE from deleting it as dead if it is based on fp. */
26668 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26669 RTX_FRAME_RELATED_P (insn) = 1;
26670 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26671 }
26672 }
26673
26674
26675 void
26676 thumb_set_return_address (rtx source, rtx scratch)
26677 {
26678 arm_stack_offsets *offsets;
26679 HOST_WIDE_INT delta;
26680 HOST_WIDE_INT limit;
26681 int reg;
26682 rtx addr;
26683 unsigned long mask;
26684
26685 emit_use (source);
26686
26687 offsets = arm_get_frame_offsets ();
26688 mask = offsets->saved_regs_mask;
26689 if (mask & (1 << LR_REGNUM))
26690 {
26691 limit = 1024;
26692 /* Find the saved regs. */
26693 if (frame_pointer_needed)
26694 {
26695 delta = offsets->soft_frame - offsets->saved_args;
26696 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26697 if (TARGET_THUMB1)
26698 limit = 128;
26699 }
26700 else
26701 {
26702 delta = offsets->outgoing_args - offsets->saved_args;
26703 reg = SP_REGNUM;
26704 }
26705 /* Allow for the stack frame. */
26706 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26707 delta -= 16;
26708 /* The link register is always the first saved register. */
26709 delta -= 4;
26710
26711 /* Construct the address. */
26712 addr = gen_rtx_REG (SImode, reg);
26713 if (delta > limit)
26714 {
26715 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26716 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26717 addr = scratch;
26718 }
26719 else
26720 addr = plus_constant (Pmode, addr, delta);
26721
26722 /* The store needs to be marked as frame related in order to prevent
26723 DSE from deleting it as dead if it is based on fp. */
26724 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26725 RTX_FRAME_RELATED_P (insn) = 1;
26726 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26727 }
26728 else
26729 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26730 }
26731
26732 /* Implements target hook vector_mode_supported_p. */
26733 bool
26734 arm_vector_mode_supported_p (machine_mode mode)
26735 {
26736 /* Neon also supports V2SImode, etc. listed in the clause below. */
26737 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26738 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26739 || mode == V2DImode || mode == V8HFmode))
26740 return true;
26741
26742 if ((TARGET_NEON || TARGET_IWMMXT)
26743 && ((mode == V2SImode)
26744 || (mode == V4HImode)
26745 || (mode == V8QImode)))
26746 return true;
26747
26748 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26749 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26750 || mode == V2HAmode))
26751 return true;
26752
26753 return false;
26754 }
26755
26756 /* Implements target hook array_mode_supported_p. */
26757
26758 static bool
26759 arm_array_mode_supported_p (machine_mode mode,
26760 unsigned HOST_WIDE_INT nelems)
26761 {
26762 if (TARGET_NEON
26763 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26764 && (nelems >= 2 && nelems <= 4))
26765 return true;
26766
26767 return false;
26768 }
26769
26770 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26771 registers when autovectorizing for Neon, at least until multiple vector
26772 widths are supported properly by the middle-end. */
26773
26774 static machine_mode
26775 arm_preferred_simd_mode (machine_mode mode)
26776 {
26777 if (TARGET_NEON)
26778 switch (mode)
26779 {
26780 case SFmode:
26781 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26782 case SImode:
26783 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26784 case HImode:
26785 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26786 case QImode:
26787 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26788 case DImode:
26789 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26790 return V2DImode;
26791 break;
26792
26793 default:;
26794 }
26795
26796 if (TARGET_REALLY_IWMMXT)
26797 switch (mode)
26798 {
26799 case SImode:
26800 return V2SImode;
26801 case HImode:
26802 return V4HImode;
26803 case QImode:
26804 return V8QImode;
26805
26806 default:;
26807 }
26808
26809 return word_mode;
26810 }
26811
26812 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26813
26814 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26815 using r0-r4 for function arguments, r7 for the stack frame and don't have
26816 enough left over to do doubleword arithmetic. For Thumb-2 all the
26817 potentially problematic instructions accept high registers so this is not
26818 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26819 that require many low registers. */
26820 static bool
26821 arm_class_likely_spilled_p (reg_class_t rclass)
26822 {
26823 if ((TARGET_THUMB1 && rclass == LO_REGS)
26824 || rclass == CC_REG)
26825 return true;
26826
26827 return false;
26828 }
26829
26830 /* Implements target hook small_register_classes_for_mode_p. */
26831 bool
26832 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26833 {
26834 return TARGET_THUMB1;
26835 }
26836
26837 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26838 ARM insns and therefore guarantee that the shift count is modulo 256.
26839 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26840 guarantee no particular behavior for out-of-range counts. */
26841
26842 static unsigned HOST_WIDE_INT
26843 arm_shift_truncation_mask (machine_mode mode)
26844 {
26845 return mode == SImode ? 255 : 0;
26846 }
26847
26848
26849 /* Map internal gcc register numbers to DWARF2 register numbers. */
26850
26851 unsigned int
26852 arm_dbx_register_number (unsigned int regno)
26853 {
26854 if (regno < 16)
26855 return regno;
26856
26857 if (IS_VFP_REGNUM (regno))
26858 {
26859 /* See comment in arm_dwarf_register_span. */
26860 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26861 return 64 + regno - FIRST_VFP_REGNUM;
26862 else
26863 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26864 }
26865
26866 if (IS_IWMMXT_GR_REGNUM (regno))
26867 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26868
26869 if (IS_IWMMXT_REGNUM (regno))
26870 return 112 + regno - FIRST_IWMMXT_REGNUM;
26871
26872 return DWARF_FRAME_REGISTERS;
26873 }
26874
26875 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26876 GCC models tham as 64 32-bit registers, so we need to describe this to
26877 the DWARF generation code. Other registers can use the default. */
26878 static rtx
26879 arm_dwarf_register_span (rtx rtl)
26880 {
26881 machine_mode mode;
26882 unsigned regno;
26883 rtx parts[16];
26884 int nregs;
26885 int i;
26886
26887 regno = REGNO (rtl);
26888 if (!IS_VFP_REGNUM (regno))
26889 return NULL_RTX;
26890
26891 /* XXX FIXME: The EABI defines two VFP register ranges:
26892 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26893 256-287: D0-D31
26894 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26895 corresponding D register. Until GDB supports this, we shall use the
26896 legacy encodings. We also use these encodings for D0-D15 for
26897 compatibility with older debuggers. */
26898 mode = GET_MODE (rtl);
26899 if (GET_MODE_SIZE (mode) < 8)
26900 return NULL_RTX;
26901
26902 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26903 {
26904 nregs = GET_MODE_SIZE (mode) / 4;
26905 for (i = 0; i < nregs; i += 2)
26906 if (TARGET_BIG_END)
26907 {
26908 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26909 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26910 }
26911 else
26912 {
26913 parts[i] = gen_rtx_REG (SImode, regno + i);
26914 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26915 }
26916 }
26917 else
26918 {
26919 nregs = GET_MODE_SIZE (mode) / 8;
26920 for (i = 0; i < nregs; i++)
26921 parts[i] = gen_rtx_REG (DImode, regno + i);
26922 }
26923
26924 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26925 }
26926
26927 #if ARM_UNWIND_INFO
26928 /* Emit unwind directives for a store-multiple instruction or stack pointer
26929 push during alignment.
26930 These should only ever be generated by the function prologue code, so
26931 expect them to have a particular form.
26932 The store-multiple instruction sometimes pushes pc as the last register,
26933 although it should not be tracked into unwind information, or for -Os
26934 sometimes pushes some dummy registers before first register that needs
26935 to be tracked in unwind information; such dummy registers are there just
26936 to avoid separate stack adjustment, and will not be restored in the
26937 epilogue. */
26938
26939 static void
26940 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26941 {
26942 int i;
26943 HOST_WIDE_INT offset;
26944 HOST_WIDE_INT nregs;
26945 int reg_size;
26946 unsigned reg;
26947 unsigned lastreg;
26948 unsigned padfirst = 0, padlast = 0;
26949 rtx e;
26950
26951 e = XVECEXP (p, 0, 0);
26952 gcc_assert (GET_CODE (e) == SET);
26953
26954 /* First insn will adjust the stack pointer. */
26955 gcc_assert (GET_CODE (e) == SET
26956 && REG_P (SET_DEST (e))
26957 && REGNO (SET_DEST (e)) == SP_REGNUM
26958 && GET_CODE (SET_SRC (e)) == PLUS);
26959
26960 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26961 nregs = XVECLEN (p, 0) - 1;
26962 gcc_assert (nregs);
26963
26964 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26965 if (reg < 16)
26966 {
26967 /* For -Os dummy registers can be pushed at the beginning to
26968 avoid separate stack pointer adjustment. */
26969 e = XVECEXP (p, 0, 1);
26970 e = XEXP (SET_DEST (e), 0);
26971 if (GET_CODE (e) == PLUS)
26972 padfirst = INTVAL (XEXP (e, 1));
26973 gcc_assert (padfirst == 0 || optimize_size);
26974 /* The function prologue may also push pc, but not annotate it as it is
26975 never restored. We turn this into a stack pointer adjustment. */
26976 e = XVECEXP (p, 0, nregs);
26977 e = XEXP (SET_DEST (e), 0);
26978 if (GET_CODE (e) == PLUS)
26979 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26980 else
26981 padlast = offset - 4;
26982 gcc_assert (padlast == 0 || padlast == 4);
26983 if (padlast == 4)
26984 fprintf (asm_out_file, "\t.pad #4\n");
26985 reg_size = 4;
26986 fprintf (asm_out_file, "\t.save {");
26987 }
26988 else if (IS_VFP_REGNUM (reg))
26989 {
26990 reg_size = 8;
26991 fprintf (asm_out_file, "\t.vsave {");
26992 }
26993 else
26994 /* Unknown register type. */
26995 gcc_unreachable ();
26996
26997 /* If the stack increment doesn't match the size of the saved registers,
26998 something has gone horribly wrong. */
26999 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27000
27001 offset = padfirst;
27002 lastreg = 0;
27003 /* The remaining insns will describe the stores. */
27004 for (i = 1; i <= nregs; i++)
27005 {
27006 /* Expect (set (mem <addr>) (reg)).
27007 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27008 e = XVECEXP (p, 0, i);
27009 gcc_assert (GET_CODE (e) == SET
27010 && MEM_P (SET_DEST (e))
27011 && REG_P (SET_SRC (e)));
27012
27013 reg = REGNO (SET_SRC (e));
27014 gcc_assert (reg >= lastreg);
27015
27016 if (i != 1)
27017 fprintf (asm_out_file, ", ");
27018 /* We can't use %r for vfp because we need to use the
27019 double precision register names. */
27020 if (IS_VFP_REGNUM (reg))
27021 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27022 else
27023 asm_fprintf (asm_out_file, "%r", reg);
27024
27025 if (flag_checking)
27026 {
27027 /* Check that the addresses are consecutive. */
27028 e = XEXP (SET_DEST (e), 0);
27029 if (GET_CODE (e) == PLUS)
27030 gcc_assert (REG_P (XEXP (e, 0))
27031 && REGNO (XEXP (e, 0)) == SP_REGNUM
27032 && CONST_INT_P (XEXP (e, 1))
27033 && offset == INTVAL (XEXP (e, 1)));
27034 else
27035 gcc_assert (i == 1
27036 && REG_P (e)
27037 && REGNO (e) == SP_REGNUM);
27038 offset += reg_size;
27039 }
27040 }
27041 fprintf (asm_out_file, "}\n");
27042 if (padfirst)
27043 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27044 }
27045
27046 /* Emit unwind directives for a SET. */
27047
27048 static void
27049 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27050 {
27051 rtx e0;
27052 rtx e1;
27053 unsigned reg;
27054
27055 e0 = XEXP (p, 0);
27056 e1 = XEXP (p, 1);
27057 switch (GET_CODE (e0))
27058 {
27059 case MEM:
27060 /* Pushing a single register. */
27061 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27062 || !REG_P (XEXP (XEXP (e0, 0), 0))
27063 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27064 abort ();
27065
27066 asm_fprintf (asm_out_file, "\t.save ");
27067 if (IS_VFP_REGNUM (REGNO (e1)))
27068 asm_fprintf(asm_out_file, "{d%d}\n",
27069 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27070 else
27071 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27072 break;
27073
27074 case REG:
27075 if (REGNO (e0) == SP_REGNUM)
27076 {
27077 /* A stack increment. */
27078 if (GET_CODE (e1) != PLUS
27079 || !REG_P (XEXP (e1, 0))
27080 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27081 || !CONST_INT_P (XEXP (e1, 1)))
27082 abort ();
27083
27084 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27085 -INTVAL (XEXP (e1, 1)));
27086 }
27087 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27088 {
27089 HOST_WIDE_INT offset;
27090
27091 if (GET_CODE (e1) == PLUS)
27092 {
27093 if (!REG_P (XEXP (e1, 0))
27094 || !CONST_INT_P (XEXP (e1, 1)))
27095 abort ();
27096 reg = REGNO (XEXP (e1, 0));
27097 offset = INTVAL (XEXP (e1, 1));
27098 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27099 HARD_FRAME_POINTER_REGNUM, reg,
27100 offset);
27101 }
27102 else if (REG_P (e1))
27103 {
27104 reg = REGNO (e1);
27105 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27106 HARD_FRAME_POINTER_REGNUM, reg);
27107 }
27108 else
27109 abort ();
27110 }
27111 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27112 {
27113 /* Move from sp to reg. */
27114 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27115 }
27116 else if (GET_CODE (e1) == PLUS
27117 && REG_P (XEXP (e1, 0))
27118 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27119 && CONST_INT_P (XEXP (e1, 1)))
27120 {
27121 /* Set reg to offset from sp. */
27122 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27123 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27124 }
27125 else
27126 abort ();
27127 break;
27128
27129 default:
27130 abort ();
27131 }
27132 }
27133
27134
27135 /* Emit unwind directives for the given insn. */
27136
27137 static void
27138 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27139 {
27140 rtx note, pat;
27141 bool handled_one = false;
27142
27143 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27144 return;
27145
27146 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27147 && (TREE_NOTHROW (current_function_decl)
27148 || crtl->all_throwers_are_sibcalls))
27149 return;
27150
27151 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27152 return;
27153
27154 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27155 {
27156 switch (REG_NOTE_KIND (note))
27157 {
27158 case REG_FRAME_RELATED_EXPR:
27159 pat = XEXP (note, 0);
27160 goto found;
27161
27162 case REG_CFA_REGISTER:
27163 pat = XEXP (note, 0);
27164 if (pat == NULL)
27165 {
27166 pat = PATTERN (insn);
27167 if (GET_CODE (pat) == PARALLEL)
27168 pat = XVECEXP (pat, 0, 0);
27169 }
27170
27171 /* Only emitted for IS_STACKALIGN re-alignment. */
27172 {
27173 rtx dest, src;
27174 unsigned reg;
27175
27176 src = SET_SRC (pat);
27177 dest = SET_DEST (pat);
27178
27179 gcc_assert (src == stack_pointer_rtx);
27180 reg = REGNO (dest);
27181 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27182 reg + 0x90, reg);
27183 }
27184 handled_one = true;
27185 break;
27186
27187 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27188 to get correct dwarf information for shrink-wrap. We should not
27189 emit unwind information for it because these are used either for
27190 pretend arguments or notes to adjust sp and restore registers from
27191 stack. */
27192 case REG_CFA_DEF_CFA:
27193 case REG_CFA_ADJUST_CFA:
27194 case REG_CFA_RESTORE:
27195 return;
27196
27197 case REG_CFA_EXPRESSION:
27198 case REG_CFA_OFFSET:
27199 /* ??? Only handling here what we actually emit. */
27200 gcc_unreachable ();
27201
27202 default:
27203 break;
27204 }
27205 }
27206 if (handled_one)
27207 return;
27208 pat = PATTERN (insn);
27209 found:
27210
27211 switch (GET_CODE (pat))
27212 {
27213 case SET:
27214 arm_unwind_emit_set (asm_out_file, pat);
27215 break;
27216
27217 case SEQUENCE:
27218 /* Store multiple. */
27219 arm_unwind_emit_sequence (asm_out_file, pat);
27220 break;
27221
27222 default:
27223 abort();
27224 }
27225 }
27226
27227
27228 /* Output a reference from a function exception table to the type_info
27229 object X. The EABI specifies that the symbol should be relocated by
27230 an R_ARM_TARGET2 relocation. */
27231
27232 static bool
27233 arm_output_ttype (rtx x)
27234 {
27235 fputs ("\t.word\t", asm_out_file);
27236 output_addr_const (asm_out_file, x);
27237 /* Use special relocations for symbol references. */
27238 if (!CONST_INT_P (x))
27239 fputs ("(TARGET2)", asm_out_file);
27240 fputc ('\n', asm_out_file);
27241
27242 return TRUE;
27243 }
27244
27245 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27246
27247 static void
27248 arm_asm_emit_except_personality (rtx personality)
27249 {
27250 fputs ("\t.personality\t", asm_out_file);
27251 output_addr_const (asm_out_file, personality);
27252 fputc ('\n', asm_out_file);
27253 }
27254 #endif /* ARM_UNWIND_INFO */
27255
27256 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27257
27258 static void
27259 arm_asm_init_sections (void)
27260 {
27261 #if ARM_UNWIND_INFO
27262 exception_section = get_unnamed_section (0, output_section_asm_op,
27263 "\t.handlerdata");
27264 #endif /* ARM_UNWIND_INFO */
27265
27266 #ifdef OBJECT_FORMAT_ELF
27267 if (target_pure_code)
27268 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27269 #endif
27270 }
27271
27272 /* Output unwind directives for the start/end of a function. */
27273
27274 void
27275 arm_output_fn_unwind (FILE * f, bool prologue)
27276 {
27277 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27278 return;
27279
27280 if (prologue)
27281 fputs ("\t.fnstart\n", f);
27282 else
27283 {
27284 /* If this function will never be unwound, then mark it as such.
27285 The came condition is used in arm_unwind_emit to suppress
27286 the frame annotations. */
27287 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27288 && (TREE_NOTHROW (current_function_decl)
27289 || crtl->all_throwers_are_sibcalls))
27290 fputs("\t.cantunwind\n", f);
27291
27292 fputs ("\t.fnend\n", f);
27293 }
27294 }
27295
27296 static bool
27297 arm_emit_tls_decoration (FILE *fp, rtx x)
27298 {
27299 enum tls_reloc reloc;
27300 rtx val;
27301
27302 val = XVECEXP (x, 0, 0);
27303 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27304
27305 output_addr_const (fp, val);
27306
27307 switch (reloc)
27308 {
27309 case TLS_GD32:
27310 fputs ("(tlsgd)", fp);
27311 break;
27312 case TLS_LDM32:
27313 fputs ("(tlsldm)", fp);
27314 break;
27315 case TLS_LDO32:
27316 fputs ("(tlsldo)", fp);
27317 break;
27318 case TLS_IE32:
27319 fputs ("(gottpoff)", fp);
27320 break;
27321 case TLS_LE32:
27322 fputs ("(tpoff)", fp);
27323 break;
27324 case TLS_DESCSEQ:
27325 fputs ("(tlsdesc)", fp);
27326 break;
27327 default:
27328 gcc_unreachable ();
27329 }
27330
27331 switch (reloc)
27332 {
27333 case TLS_GD32:
27334 case TLS_LDM32:
27335 case TLS_IE32:
27336 case TLS_DESCSEQ:
27337 fputs (" + (. - ", fp);
27338 output_addr_const (fp, XVECEXP (x, 0, 2));
27339 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27340 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27341 output_addr_const (fp, XVECEXP (x, 0, 3));
27342 fputc (')', fp);
27343 break;
27344 default:
27345 break;
27346 }
27347
27348 return TRUE;
27349 }
27350
27351 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27352
27353 static void
27354 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27355 {
27356 gcc_assert (size == 4);
27357 fputs ("\t.word\t", file);
27358 output_addr_const (file, x);
27359 fputs ("(tlsldo)", file);
27360 }
27361
27362 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27363
27364 static bool
27365 arm_output_addr_const_extra (FILE *fp, rtx x)
27366 {
27367 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27368 return arm_emit_tls_decoration (fp, x);
27369 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27370 {
27371 char label[256];
27372 int labelno = INTVAL (XVECEXP (x, 0, 0));
27373
27374 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27375 assemble_name_raw (fp, label);
27376
27377 return TRUE;
27378 }
27379 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27380 {
27381 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27382 if (GOT_PCREL)
27383 fputs ("+.", fp);
27384 fputs ("-(", fp);
27385 output_addr_const (fp, XVECEXP (x, 0, 0));
27386 fputc (')', fp);
27387 return TRUE;
27388 }
27389 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27390 {
27391 output_addr_const (fp, XVECEXP (x, 0, 0));
27392 if (GOT_PCREL)
27393 fputs ("+.", fp);
27394 fputs ("-(", fp);
27395 output_addr_const (fp, XVECEXP (x, 0, 1));
27396 fputc (')', fp);
27397 return TRUE;
27398 }
27399 else if (GET_CODE (x) == CONST_VECTOR)
27400 return arm_emit_vector_const (fp, x);
27401
27402 return FALSE;
27403 }
27404
27405 /* Output assembly for a shift instruction.
27406 SET_FLAGS determines how the instruction modifies the condition codes.
27407 0 - Do not set condition codes.
27408 1 - Set condition codes.
27409 2 - Use smallest instruction. */
27410 const char *
27411 arm_output_shift(rtx * operands, int set_flags)
27412 {
27413 char pattern[100];
27414 static const char flag_chars[3] = {'?', '.', '!'};
27415 const char *shift;
27416 HOST_WIDE_INT val;
27417 char c;
27418
27419 c = flag_chars[set_flags];
27420 shift = shift_op(operands[3], &val);
27421 if (shift)
27422 {
27423 if (val != -1)
27424 operands[2] = GEN_INT(val);
27425 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27426 }
27427 else
27428 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27429
27430 output_asm_insn (pattern, operands);
27431 return "";
27432 }
27433
27434 /* Output assembly for a WMMX immediate shift instruction. */
27435 const char *
27436 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27437 {
27438 int shift = INTVAL (operands[2]);
27439 char templ[50];
27440 machine_mode opmode = GET_MODE (operands[0]);
27441
27442 gcc_assert (shift >= 0);
27443
27444 /* If the shift value in the register versions is > 63 (for D qualifier),
27445 31 (for W qualifier) or 15 (for H qualifier). */
27446 if (((opmode == V4HImode) && (shift > 15))
27447 || ((opmode == V2SImode) && (shift > 31))
27448 || ((opmode == DImode) && (shift > 63)))
27449 {
27450 if (wror_or_wsra)
27451 {
27452 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27453 output_asm_insn (templ, operands);
27454 if (opmode == DImode)
27455 {
27456 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27457 output_asm_insn (templ, operands);
27458 }
27459 }
27460 else
27461 {
27462 /* The destination register will contain all zeros. */
27463 sprintf (templ, "wzero\t%%0");
27464 output_asm_insn (templ, operands);
27465 }
27466 return "";
27467 }
27468
27469 if ((opmode == DImode) && (shift > 32))
27470 {
27471 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27472 output_asm_insn (templ, operands);
27473 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27474 output_asm_insn (templ, operands);
27475 }
27476 else
27477 {
27478 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27479 output_asm_insn (templ, operands);
27480 }
27481 return "";
27482 }
27483
27484 /* Output assembly for a WMMX tinsr instruction. */
27485 const char *
27486 arm_output_iwmmxt_tinsr (rtx *operands)
27487 {
27488 int mask = INTVAL (operands[3]);
27489 int i;
27490 char templ[50];
27491 int units = mode_nunits[GET_MODE (operands[0])];
27492 gcc_assert ((mask & (mask - 1)) == 0);
27493 for (i = 0; i < units; ++i)
27494 {
27495 if ((mask & 0x01) == 1)
27496 {
27497 break;
27498 }
27499 mask >>= 1;
27500 }
27501 gcc_assert (i < units);
27502 {
27503 switch (GET_MODE (operands[0]))
27504 {
27505 case V8QImode:
27506 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27507 break;
27508 case V4HImode:
27509 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27510 break;
27511 case V2SImode:
27512 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27513 break;
27514 default:
27515 gcc_unreachable ();
27516 break;
27517 }
27518 output_asm_insn (templ, operands);
27519 }
27520 return "";
27521 }
27522
27523 /* Output a Thumb-1 casesi dispatch sequence. */
27524 const char *
27525 thumb1_output_casesi (rtx *operands)
27526 {
27527 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27528
27529 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27530
27531 switch (GET_MODE(diff_vec))
27532 {
27533 case QImode:
27534 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27535 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27536 case HImode:
27537 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27538 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27539 case SImode:
27540 return "bl\t%___gnu_thumb1_case_si";
27541 default:
27542 gcc_unreachable ();
27543 }
27544 }
27545
27546 /* Output a Thumb-2 casesi instruction. */
27547 const char *
27548 thumb2_output_casesi (rtx *operands)
27549 {
27550 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27551
27552 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27553
27554 output_asm_insn ("cmp\t%0, %1", operands);
27555 output_asm_insn ("bhi\t%l3", operands);
27556 switch (GET_MODE(diff_vec))
27557 {
27558 case QImode:
27559 return "tbb\t[%|pc, %0]";
27560 case HImode:
27561 return "tbh\t[%|pc, %0, lsl #1]";
27562 case SImode:
27563 if (flag_pic)
27564 {
27565 output_asm_insn ("adr\t%4, %l2", operands);
27566 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27567 output_asm_insn ("add\t%4, %4, %5", operands);
27568 return "bx\t%4";
27569 }
27570 else
27571 {
27572 output_asm_insn ("adr\t%4, %l2", operands);
27573 return "ldr\t%|pc, [%4, %0, lsl #2]";
27574 }
27575 default:
27576 gcc_unreachable ();
27577 }
27578 }
27579
27580 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27581 per-core tuning structs. */
27582 static int
27583 arm_issue_rate (void)
27584 {
27585 return current_tune->issue_rate;
27586 }
27587
27588 /* Return how many instructions should scheduler lookahead to choose the
27589 best one. */
27590 static int
27591 arm_first_cycle_multipass_dfa_lookahead (void)
27592 {
27593 int issue_rate = arm_issue_rate ();
27594
27595 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27596 }
27597
27598 /* Enable modeling of L2 auto-prefetcher. */
27599 static int
27600 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27601 {
27602 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27603 }
27604
27605 const char *
27606 arm_mangle_type (const_tree type)
27607 {
27608 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27609 has to be managled as if it is in the "std" namespace. */
27610 if (TARGET_AAPCS_BASED
27611 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27612 return "St9__va_list";
27613
27614 /* Half-precision float. */
27615 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27616 return "Dh";
27617
27618 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27619 builtin type. */
27620 if (TYPE_NAME (type) != NULL)
27621 return arm_mangle_builtin_type (type);
27622
27623 /* Use the default mangling. */
27624 return NULL;
27625 }
27626
27627 /* Order of allocation of core registers for Thumb: this allocation is
27628 written over the corresponding initial entries of the array
27629 initialized with REG_ALLOC_ORDER. We allocate all low registers
27630 first. Saving and restoring a low register is usually cheaper than
27631 using a call-clobbered high register. */
27632
27633 static const int thumb_core_reg_alloc_order[] =
27634 {
27635 3, 2, 1, 0, 4, 5, 6, 7,
27636 14, 12, 8, 9, 10, 11
27637 };
27638
27639 /* Adjust register allocation order when compiling for Thumb. */
27640
27641 void
27642 arm_order_regs_for_local_alloc (void)
27643 {
27644 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27645 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27646 if (TARGET_THUMB)
27647 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27648 sizeof (thumb_core_reg_alloc_order));
27649 }
27650
27651 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27652
27653 bool
27654 arm_frame_pointer_required (void)
27655 {
27656 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27657 return true;
27658
27659 /* If the function receives nonlocal gotos, it needs to save the frame
27660 pointer in the nonlocal_goto_save_area object. */
27661 if (cfun->has_nonlocal_label)
27662 return true;
27663
27664 /* The frame pointer is required for non-leaf APCS frames. */
27665 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27666 return true;
27667
27668 /* If we are probing the stack in the prologue, we will have a faulting
27669 instruction prior to the stack adjustment and this requires a frame
27670 pointer if we want to catch the exception using the EABI unwinder. */
27671 if (!IS_INTERRUPT (arm_current_func_type ())
27672 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27673 && arm_except_unwind_info (&global_options) == UI_TARGET
27674 && cfun->can_throw_non_call_exceptions)
27675 {
27676 HOST_WIDE_INT size = get_frame_size ();
27677
27678 /* That's irrelevant if there is no stack adjustment. */
27679 if (size <= 0)
27680 return false;
27681
27682 /* That's relevant only if there is a stack probe. */
27683 if (crtl->is_leaf && !cfun->calls_alloca)
27684 {
27685 /* We don't have the final size of the frame so adjust. */
27686 size += 32 * UNITS_PER_WORD;
27687 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27688 return true;
27689 }
27690 else
27691 return true;
27692 }
27693
27694 return false;
27695 }
27696
27697 /* Only thumb1 can't support conditional execution, so return true if
27698 the target is not thumb1. */
27699 static bool
27700 arm_have_conditional_execution (void)
27701 {
27702 return !TARGET_THUMB1;
27703 }
27704
27705 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27706 static HOST_WIDE_INT
27707 arm_vector_alignment (const_tree type)
27708 {
27709 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27710
27711 if (TARGET_AAPCS_BASED)
27712 align = MIN (align, 64);
27713
27714 return align;
27715 }
27716
27717 static unsigned int
27718 arm_autovectorize_vector_sizes (void)
27719 {
27720 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27721 }
27722
27723 static bool
27724 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27725 {
27726 /* Vectors which aren't in packed structures will not be less aligned than
27727 the natural alignment of their element type, so this is safe. */
27728 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27729 return !is_packed;
27730
27731 return default_builtin_vector_alignment_reachable (type, is_packed);
27732 }
27733
27734 static bool
27735 arm_builtin_support_vector_misalignment (machine_mode mode,
27736 const_tree type, int misalignment,
27737 bool is_packed)
27738 {
27739 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27740 {
27741 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27742
27743 if (is_packed)
27744 return align == 1;
27745
27746 /* If the misalignment is unknown, we should be able to handle the access
27747 so long as it is not to a member of a packed data structure. */
27748 if (misalignment == -1)
27749 return true;
27750
27751 /* Return true if the misalignment is a multiple of the natural alignment
27752 of the vector's element type. This is probably always going to be
27753 true in practice, since we've already established that this isn't a
27754 packed access. */
27755 return ((misalignment % align) == 0);
27756 }
27757
27758 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27759 is_packed);
27760 }
27761
27762 static void
27763 arm_conditional_register_usage (void)
27764 {
27765 int regno;
27766
27767 if (TARGET_THUMB1 && optimize_size)
27768 {
27769 /* When optimizing for size on Thumb-1, it's better not
27770 to use the HI regs, because of the overhead of
27771 stacking them. */
27772 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27773 fixed_regs[regno] = call_used_regs[regno] = 1;
27774 }
27775
27776 /* The link register can be clobbered by any branch insn,
27777 but we have no way to track that at present, so mark
27778 it as unavailable. */
27779 if (TARGET_THUMB1)
27780 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27781
27782 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27783 {
27784 /* VFPv3 registers are disabled when earlier VFP
27785 versions are selected due to the definition of
27786 LAST_VFP_REGNUM. */
27787 for (regno = FIRST_VFP_REGNUM;
27788 regno <= LAST_VFP_REGNUM; ++ regno)
27789 {
27790 fixed_regs[regno] = 0;
27791 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27792 || regno >= FIRST_VFP_REGNUM + 32;
27793 }
27794 }
27795
27796 if (TARGET_REALLY_IWMMXT)
27797 {
27798 regno = FIRST_IWMMXT_GR_REGNUM;
27799 /* The 2002/10/09 revision of the XScale ABI has wCG0
27800 and wCG1 as call-preserved registers. The 2002/11/21
27801 revision changed this so that all wCG registers are
27802 scratch registers. */
27803 for (regno = FIRST_IWMMXT_GR_REGNUM;
27804 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27805 fixed_regs[regno] = 0;
27806 /* The XScale ABI has wR0 - wR9 as scratch registers,
27807 the rest as call-preserved registers. */
27808 for (regno = FIRST_IWMMXT_REGNUM;
27809 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27810 {
27811 fixed_regs[regno] = 0;
27812 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27813 }
27814 }
27815
27816 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27817 {
27818 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27819 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27820 }
27821 else if (TARGET_APCS_STACK)
27822 {
27823 fixed_regs[10] = 1;
27824 call_used_regs[10] = 1;
27825 }
27826 /* -mcaller-super-interworking reserves r11 for calls to
27827 _interwork_r11_call_via_rN(). Making the register global
27828 is an easy way of ensuring that it remains valid for all
27829 calls. */
27830 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27831 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27832 {
27833 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27834 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27835 if (TARGET_CALLER_INTERWORKING)
27836 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27837 }
27838 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27839 }
27840
27841 static reg_class_t
27842 arm_preferred_rename_class (reg_class_t rclass)
27843 {
27844 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27845 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27846 and code size can be reduced. */
27847 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27848 return LO_REGS;
27849 else
27850 return NO_REGS;
27851 }
27852
27853 /* Compute the attribute "length" of insn "*push_multi".
27854 So this function MUST be kept in sync with that insn pattern. */
27855 int
27856 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27857 {
27858 int i, regno, hi_reg;
27859 int num_saves = XVECLEN (parallel_op, 0);
27860
27861 /* ARM mode. */
27862 if (TARGET_ARM)
27863 return 4;
27864 /* Thumb1 mode. */
27865 if (TARGET_THUMB1)
27866 return 2;
27867
27868 /* Thumb2 mode. */
27869 regno = REGNO (first_op);
27870 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27871 list is 8-bit. Normally this means all registers in the list must be
27872 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27873 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27874 with 16-bit encoding. */
27875 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27876 for (i = 1; i < num_saves && !hi_reg; i++)
27877 {
27878 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27879 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27880 }
27881
27882 if (!hi_reg)
27883 return 2;
27884 return 4;
27885 }
27886
27887 /* Compute the attribute "length" of insn. Currently, this function is used
27888 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27889 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27890 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27891 true if OPERANDS contains insn which explicit updates base register. */
27892
27893 int
27894 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27895 {
27896 /* ARM mode. */
27897 if (TARGET_ARM)
27898 return 4;
27899 /* Thumb1 mode. */
27900 if (TARGET_THUMB1)
27901 return 2;
27902
27903 rtx parallel_op = operands[0];
27904 /* Initialize to elements number of PARALLEL. */
27905 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27906 /* Initialize the value to base register. */
27907 unsigned regno = REGNO (operands[1]);
27908 /* Skip return and write back pattern.
27909 We only need register pop pattern for later analysis. */
27910 unsigned first_indx = 0;
27911 first_indx += return_pc ? 1 : 0;
27912 first_indx += write_back_p ? 1 : 0;
27913
27914 /* A pop operation can be done through LDM or POP. If the base register is SP
27915 and if it's with write back, then a LDM will be alias of POP. */
27916 bool pop_p = (regno == SP_REGNUM && write_back_p);
27917 bool ldm_p = !pop_p;
27918
27919 /* Check base register for LDM. */
27920 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27921 return 4;
27922
27923 /* Check each register in the list. */
27924 for (; indx >= first_indx; indx--)
27925 {
27926 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27927 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27928 comment in arm_attr_length_push_multi. */
27929 if (REGNO_REG_CLASS (regno) == HI_REGS
27930 && (regno != PC_REGNUM || ldm_p))
27931 return 4;
27932 }
27933
27934 return 2;
27935 }
27936
27937 /* Compute the number of instructions emitted by output_move_double. */
27938 int
27939 arm_count_output_move_double_insns (rtx *operands)
27940 {
27941 int count;
27942 rtx ops[2];
27943 /* output_move_double may modify the operands array, so call it
27944 here on a copy of the array. */
27945 ops[0] = operands[0];
27946 ops[1] = operands[1];
27947 output_move_double (ops, false, &count);
27948 return count;
27949 }
27950
27951 int
27952 vfp3_const_double_for_fract_bits (rtx operand)
27953 {
27954 REAL_VALUE_TYPE r0;
27955
27956 if (!CONST_DOUBLE_P (operand))
27957 return 0;
27958
27959 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27960 if (exact_real_inverse (DFmode, &r0)
27961 && !REAL_VALUE_NEGATIVE (r0))
27962 {
27963 if (exact_real_truncate (DFmode, &r0))
27964 {
27965 HOST_WIDE_INT value = real_to_integer (&r0);
27966 value = value & 0xffffffff;
27967 if ((value != 0) && ( (value & (value - 1)) == 0))
27968 {
27969 int ret = exact_log2 (value);
27970 gcc_assert (IN_RANGE (ret, 0, 31));
27971 return ret;
27972 }
27973 }
27974 }
27975 return 0;
27976 }
27977
27978 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27979 log2 is in [1, 32], return that log2. Otherwise return -1.
27980 This is used in the patterns for vcvt.s32.f32 floating-point to
27981 fixed-point conversions. */
27982
27983 int
27984 vfp3_const_double_for_bits (rtx x)
27985 {
27986 const REAL_VALUE_TYPE *r;
27987
27988 if (!CONST_DOUBLE_P (x))
27989 return -1;
27990
27991 r = CONST_DOUBLE_REAL_VALUE (x);
27992
27993 if (REAL_VALUE_NEGATIVE (*r)
27994 || REAL_VALUE_ISNAN (*r)
27995 || REAL_VALUE_ISINF (*r)
27996 || !real_isinteger (r, SFmode))
27997 return -1;
27998
27999 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28000
28001 /* The exact_log2 above will have returned -1 if this is
28002 not an exact log2. */
28003 if (!IN_RANGE (hwint, 1, 32))
28004 return -1;
28005
28006 return hwint;
28007 }
28008
28009 \f
28010 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28011
28012 static void
28013 arm_pre_atomic_barrier (enum memmodel model)
28014 {
28015 if (need_atomic_barrier_p (model, true))
28016 emit_insn (gen_memory_barrier ());
28017 }
28018
28019 static void
28020 arm_post_atomic_barrier (enum memmodel model)
28021 {
28022 if (need_atomic_barrier_p (model, false))
28023 emit_insn (gen_memory_barrier ());
28024 }
28025
28026 /* Emit the load-exclusive and store-exclusive instructions.
28027 Use acquire and release versions if necessary. */
28028
28029 static void
28030 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28031 {
28032 rtx (*gen) (rtx, rtx);
28033
28034 if (acq)
28035 {
28036 switch (mode)
28037 {
28038 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28039 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28040 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28041 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28042 default:
28043 gcc_unreachable ();
28044 }
28045 }
28046 else
28047 {
28048 switch (mode)
28049 {
28050 case QImode: gen = gen_arm_load_exclusiveqi; break;
28051 case HImode: gen = gen_arm_load_exclusivehi; break;
28052 case SImode: gen = gen_arm_load_exclusivesi; break;
28053 case DImode: gen = gen_arm_load_exclusivedi; break;
28054 default:
28055 gcc_unreachable ();
28056 }
28057 }
28058
28059 emit_insn (gen (rval, mem));
28060 }
28061
28062 static void
28063 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28064 rtx mem, bool rel)
28065 {
28066 rtx (*gen) (rtx, rtx, rtx);
28067
28068 if (rel)
28069 {
28070 switch (mode)
28071 {
28072 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28073 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28074 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28075 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28076 default:
28077 gcc_unreachable ();
28078 }
28079 }
28080 else
28081 {
28082 switch (mode)
28083 {
28084 case QImode: gen = gen_arm_store_exclusiveqi; break;
28085 case HImode: gen = gen_arm_store_exclusivehi; break;
28086 case SImode: gen = gen_arm_store_exclusivesi; break;
28087 case DImode: gen = gen_arm_store_exclusivedi; break;
28088 default:
28089 gcc_unreachable ();
28090 }
28091 }
28092
28093 emit_insn (gen (bval, rval, mem));
28094 }
28095
28096 /* Mark the previous jump instruction as unlikely. */
28097
28098 static void
28099 emit_unlikely_jump (rtx insn)
28100 {
28101 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28102
28103 rtx_insn *jump = emit_jump_insn (insn);
28104 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28105 }
28106
28107 /* Expand a compare and swap pattern. */
28108
28109 void
28110 arm_expand_compare_and_swap (rtx operands[])
28111 {
28112 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28113 machine_mode mode;
28114 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28115
28116 bval = operands[0];
28117 rval = operands[1];
28118 mem = operands[2];
28119 oldval = operands[3];
28120 newval = operands[4];
28121 is_weak = operands[5];
28122 mod_s = operands[6];
28123 mod_f = operands[7];
28124 mode = GET_MODE (mem);
28125
28126 /* Normally the succ memory model must be stronger than fail, but in the
28127 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28128 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28129
28130 if (TARGET_HAVE_LDACQ
28131 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28132 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28133 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28134
28135 switch (mode)
28136 {
28137 case QImode:
28138 case HImode:
28139 /* For narrow modes, we're going to perform the comparison in SImode,
28140 so do the zero-extension now. */
28141 rval = gen_reg_rtx (SImode);
28142 oldval = convert_modes (SImode, mode, oldval, true);
28143 /* FALLTHRU */
28144
28145 case SImode:
28146 /* Force the value into a register if needed. We waited until after
28147 the zero-extension above to do this properly. */
28148 if (!arm_add_operand (oldval, SImode))
28149 oldval = force_reg (SImode, oldval);
28150 break;
28151
28152 case DImode:
28153 if (!cmpdi_operand (oldval, mode))
28154 oldval = force_reg (mode, oldval);
28155 break;
28156
28157 default:
28158 gcc_unreachable ();
28159 }
28160
28161 switch (mode)
28162 {
28163 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28164 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28165 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28166 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28167 default:
28168 gcc_unreachable ();
28169 }
28170
28171 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
28172 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28173
28174 if (mode == QImode || mode == HImode)
28175 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28176
28177 /* In all cases, we arrange for success to be signaled by Z set.
28178 This arrangement allows for the boolean result to be used directly
28179 in a subsequent branch, post optimization. For Thumb-1 targets, the
28180 boolean negation of the result is also stored in bval because Thumb-1
28181 backend lacks dependency tracking for CC flag due to flag-setting not
28182 being represented at RTL level. */
28183 if (TARGET_THUMB1)
28184 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28185 else
28186 {
28187 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28188 emit_insn (gen_rtx_SET (bval, x));
28189 }
28190 }
28191
28192 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28193 another memory store between the load-exclusive and store-exclusive can
28194 reset the monitor from Exclusive to Open state. This means we must wait
28195 until after reload to split the pattern, lest we get a register spill in
28196 the middle of the atomic sequence. Success of the compare and swap is
28197 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28198 for Thumb-1 targets (ie. negation of the boolean value returned by
28199 atomic_compare_and_swapmode standard pattern in operand 0). */
28200
28201 void
28202 arm_split_compare_and_swap (rtx operands[])
28203 {
28204 rtx rval, mem, oldval, newval, neg_bval;
28205 machine_mode mode;
28206 enum memmodel mod_s, mod_f;
28207 bool is_weak;
28208 rtx_code_label *label1, *label2;
28209 rtx x, cond;
28210
28211 rval = operands[1];
28212 mem = operands[2];
28213 oldval = operands[3];
28214 newval = operands[4];
28215 is_weak = (operands[5] != const0_rtx);
28216 mod_s = memmodel_from_int (INTVAL (operands[6]));
28217 mod_f = memmodel_from_int (INTVAL (operands[7]));
28218 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28219 mode = GET_MODE (mem);
28220
28221 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28222
28223 bool use_acquire = TARGET_HAVE_LDACQ
28224 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28225 || is_mm_release (mod_s));
28226
28227 bool use_release = TARGET_HAVE_LDACQ
28228 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28229 || is_mm_acquire (mod_s));
28230
28231 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28232 a full barrier is emitted after the store-release. */
28233 if (is_armv8_sync)
28234 use_acquire = false;
28235
28236 /* Checks whether a barrier is needed and emits one accordingly. */
28237 if (!(use_acquire || use_release))
28238 arm_pre_atomic_barrier (mod_s);
28239
28240 label1 = NULL;
28241 if (!is_weak)
28242 {
28243 label1 = gen_label_rtx ();
28244 emit_label (label1);
28245 }
28246 label2 = gen_label_rtx ();
28247
28248 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28249
28250 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28251 as required to communicate with arm_expand_compare_and_swap. */
28252 if (TARGET_32BIT)
28253 {
28254 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28255 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28256 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28257 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28258 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28259 }
28260 else
28261 {
28262 emit_move_insn (neg_bval, const1_rtx);
28263 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28264 if (thumb1_cmpneg_operand (oldval, SImode))
28265 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28266 label2, cond));
28267 else
28268 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28269 }
28270
28271 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28272
28273 /* Weak or strong, we want EQ to be true for success, so that we
28274 match the flags that we got from the compare above. */
28275 if (TARGET_32BIT)
28276 {
28277 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28278 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28279 emit_insn (gen_rtx_SET (cond, x));
28280 }
28281
28282 if (!is_weak)
28283 {
28284 /* Z is set to boolean value of !neg_bval, as required to communicate
28285 with arm_expand_compare_and_swap. */
28286 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28287 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28288 }
28289
28290 if (!is_mm_relaxed (mod_f))
28291 emit_label (label2);
28292
28293 /* Checks whether a barrier is needed and emits one accordingly. */
28294 if (is_armv8_sync
28295 || !(use_acquire || use_release))
28296 arm_post_atomic_barrier (mod_s);
28297
28298 if (is_mm_relaxed (mod_f))
28299 emit_label (label2);
28300 }
28301
28302 /* Split an atomic operation pattern. Operation is given by CODE and is one
28303 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28304 operation). Operation is performed on the content at MEM and on VALUE
28305 following the memory model MODEL_RTX. The content at MEM before and after
28306 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28307 success of the operation is returned in COND. Using a scratch register or
28308 an operand register for these determines what result is returned for that
28309 pattern. */
28310
28311 void
28312 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28313 rtx value, rtx model_rtx, rtx cond)
28314 {
28315 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28316 machine_mode mode = GET_MODE (mem);
28317 machine_mode wmode = (mode == DImode ? DImode : SImode);
28318 rtx_code_label *label;
28319 bool all_low_regs, bind_old_new;
28320 rtx x;
28321
28322 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28323
28324 bool use_acquire = TARGET_HAVE_LDACQ
28325 && !(is_mm_relaxed (model) || is_mm_consume (model)
28326 || is_mm_release (model));
28327
28328 bool use_release = TARGET_HAVE_LDACQ
28329 && !(is_mm_relaxed (model) || is_mm_consume (model)
28330 || is_mm_acquire (model));
28331
28332 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28333 a full barrier is emitted after the store-release. */
28334 if (is_armv8_sync)
28335 use_acquire = false;
28336
28337 /* Checks whether a barrier is needed and emits one accordingly. */
28338 if (!(use_acquire || use_release))
28339 arm_pre_atomic_barrier (model);
28340
28341 label = gen_label_rtx ();
28342 emit_label (label);
28343
28344 if (new_out)
28345 new_out = gen_lowpart (wmode, new_out);
28346 if (old_out)
28347 old_out = gen_lowpart (wmode, old_out);
28348 else
28349 old_out = new_out;
28350 value = simplify_gen_subreg (wmode, value, mode, 0);
28351
28352 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28353
28354 /* Does the operation require destination and first operand to use the same
28355 register? This is decided by register constraints of relevant insn
28356 patterns in thumb1.md. */
28357 gcc_assert (!new_out || REG_P (new_out));
28358 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28359 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28360 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28361 bind_old_new =
28362 (TARGET_THUMB1
28363 && code != SET
28364 && code != MINUS
28365 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28366
28367 /* We want to return the old value while putting the result of the operation
28368 in the same register as the old value so copy the old value over to the
28369 destination register and use that register for the operation. */
28370 if (old_out && bind_old_new)
28371 {
28372 emit_move_insn (new_out, old_out);
28373 old_out = new_out;
28374 }
28375
28376 switch (code)
28377 {
28378 case SET:
28379 new_out = value;
28380 break;
28381
28382 case NOT:
28383 x = gen_rtx_AND (wmode, old_out, value);
28384 emit_insn (gen_rtx_SET (new_out, x));
28385 x = gen_rtx_NOT (wmode, new_out);
28386 emit_insn (gen_rtx_SET (new_out, x));
28387 break;
28388
28389 case MINUS:
28390 if (CONST_INT_P (value))
28391 {
28392 value = GEN_INT (-INTVAL (value));
28393 code = PLUS;
28394 }
28395 /* FALLTHRU */
28396
28397 case PLUS:
28398 if (mode == DImode)
28399 {
28400 /* DImode plus/minus need to clobber flags. */
28401 /* The adddi3 and subdi3 patterns are incorrectly written so that
28402 they require matching operands, even when we could easily support
28403 three operands. Thankfully, this can be fixed up post-splitting,
28404 as the individual add+adc patterns do accept three operands and
28405 post-reload cprop can make these moves go away. */
28406 emit_move_insn (new_out, old_out);
28407 if (code == PLUS)
28408 x = gen_adddi3 (new_out, new_out, value);
28409 else
28410 x = gen_subdi3 (new_out, new_out, value);
28411 emit_insn (x);
28412 break;
28413 }
28414 /* FALLTHRU */
28415
28416 default:
28417 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28418 emit_insn (gen_rtx_SET (new_out, x));
28419 break;
28420 }
28421
28422 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28423 use_release);
28424
28425 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28426 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28427
28428 /* Checks whether a barrier is needed and emits one accordingly. */
28429 if (is_armv8_sync
28430 || !(use_acquire || use_release))
28431 arm_post_atomic_barrier (model);
28432 }
28433 \f
28434 #define MAX_VECT_LEN 16
28435
28436 struct expand_vec_perm_d
28437 {
28438 rtx target, op0, op1;
28439 unsigned char perm[MAX_VECT_LEN];
28440 machine_mode vmode;
28441 unsigned char nelt;
28442 bool one_vector_p;
28443 bool testing_p;
28444 };
28445
28446 /* Generate a variable permutation. */
28447
28448 static void
28449 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28450 {
28451 machine_mode vmode = GET_MODE (target);
28452 bool one_vector_p = rtx_equal_p (op0, op1);
28453
28454 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28455 gcc_checking_assert (GET_MODE (op0) == vmode);
28456 gcc_checking_assert (GET_MODE (op1) == vmode);
28457 gcc_checking_assert (GET_MODE (sel) == vmode);
28458 gcc_checking_assert (TARGET_NEON);
28459
28460 if (one_vector_p)
28461 {
28462 if (vmode == V8QImode)
28463 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28464 else
28465 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28466 }
28467 else
28468 {
28469 rtx pair;
28470
28471 if (vmode == V8QImode)
28472 {
28473 pair = gen_reg_rtx (V16QImode);
28474 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28475 pair = gen_lowpart (TImode, pair);
28476 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28477 }
28478 else
28479 {
28480 pair = gen_reg_rtx (OImode);
28481 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28482 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28483 }
28484 }
28485 }
28486
28487 void
28488 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28489 {
28490 machine_mode vmode = GET_MODE (target);
28491 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28492 bool one_vector_p = rtx_equal_p (op0, op1);
28493 rtx rmask[MAX_VECT_LEN], mask;
28494
28495 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28496 numbering of elements for big-endian, we must reverse the order. */
28497 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28498
28499 /* The VTBL instruction does not use a modulo index, so we must take care
28500 of that ourselves. */
28501 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28502 for (i = 0; i < nelt; ++i)
28503 rmask[i] = mask;
28504 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28505 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28506
28507 arm_expand_vec_perm_1 (target, op0, op1, sel);
28508 }
28509
28510 /* Map lane ordering between architectural lane order, and GCC lane order,
28511 taking into account ABI. See comment above output_move_neon for details. */
28512
28513 static int
28514 neon_endian_lane_map (machine_mode mode, int lane)
28515 {
28516 if (BYTES_BIG_ENDIAN)
28517 {
28518 int nelems = GET_MODE_NUNITS (mode);
28519 /* Reverse lane order. */
28520 lane = (nelems - 1 - lane);
28521 /* Reverse D register order, to match ABI. */
28522 if (GET_MODE_SIZE (mode) == 16)
28523 lane = lane ^ (nelems / 2);
28524 }
28525 return lane;
28526 }
28527
28528 /* Some permutations index into pairs of vectors, this is a helper function
28529 to map indexes into those pairs of vectors. */
28530
28531 static int
28532 neon_pair_endian_lane_map (machine_mode mode, int lane)
28533 {
28534 int nelem = GET_MODE_NUNITS (mode);
28535 if (BYTES_BIG_ENDIAN)
28536 lane =
28537 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28538 return lane;
28539 }
28540
28541 /* Generate or test for an insn that supports a constant permutation. */
28542
28543 /* Recognize patterns for the VUZP insns. */
28544
28545 static bool
28546 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28547 {
28548 unsigned int i, odd, mask, nelt = d->nelt;
28549 rtx out0, out1, in0, in1;
28550 rtx (*gen)(rtx, rtx, rtx, rtx);
28551 int first_elem;
28552 int swap_nelt;
28553
28554 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28555 return false;
28556
28557 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28558 big endian pattern on 64 bit vectors, so we correct for that. */
28559 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28560 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28561
28562 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28563
28564 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28565 odd = 0;
28566 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28567 odd = 1;
28568 else
28569 return false;
28570 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28571
28572 for (i = 0; i < nelt; i++)
28573 {
28574 unsigned elt =
28575 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28576 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28577 return false;
28578 }
28579
28580 /* Success! */
28581 if (d->testing_p)
28582 return true;
28583
28584 switch (d->vmode)
28585 {
28586 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28587 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28588 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28589 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28590 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28591 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28592 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28593 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28594 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28595 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28596 default:
28597 gcc_unreachable ();
28598 }
28599
28600 in0 = d->op0;
28601 in1 = d->op1;
28602 if (swap_nelt != 0)
28603 std::swap (in0, in1);
28604
28605 out0 = d->target;
28606 out1 = gen_reg_rtx (d->vmode);
28607 if (odd)
28608 std::swap (out0, out1);
28609
28610 emit_insn (gen (out0, in0, in1, out1));
28611 return true;
28612 }
28613
28614 /* Recognize patterns for the VZIP insns. */
28615
28616 static bool
28617 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28618 {
28619 unsigned int i, high, mask, nelt = d->nelt;
28620 rtx out0, out1, in0, in1;
28621 rtx (*gen)(rtx, rtx, rtx, rtx);
28622 int first_elem;
28623 bool is_swapped;
28624
28625 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28626 return false;
28627
28628 is_swapped = BYTES_BIG_ENDIAN;
28629
28630 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28631
28632 high = nelt / 2;
28633 if (first_elem == neon_endian_lane_map (d->vmode, high))
28634 ;
28635 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28636 high = 0;
28637 else
28638 return false;
28639 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28640
28641 for (i = 0; i < nelt / 2; i++)
28642 {
28643 unsigned elt =
28644 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28645 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28646 != elt)
28647 return false;
28648 elt =
28649 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28650 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28651 != elt)
28652 return false;
28653 }
28654
28655 /* Success! */
28656 if (d->testing_p)
28657 return true;
28658
28659 switch (d->vmode)
28660 {
28661 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28662 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28663 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28664 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28665 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28666 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28667 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28668 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28669 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28670 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28671 default:
28672 gcc_unreachable ();
28673 }
28674
28675 in0 = d->op0;
28676 in1 = d->op1;
28677 if (is_swapped)
28678 std::swap (in0, in1);
28679
28680 out0 = d->target;
28681 out1 = gen_reg_rtx (d->vmode);
28682 if (high)
28683 std::swap (out0, out1);
28684
28685 emit_insn (gen (out0, in0, in1, out1));
28686 return true;
28687 }
28688
28689 /* Recognize patterns for the VREV insns. */
28690
28691 static bool
28692 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28693 {
28694 unsigned int i, j, diff, nelt = d->nelt;
28695 rtx (*gen)(rtx, rtx);
28696
28697 if (!d->one_vector_p)
28698 return false;
28699
28700 diff = d->perm[0];
28701 switch (diff)
28702 {
28703 case 7:
28704 switch (d->vmode)
28705 {
28706 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28707 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28708 default:
28709 return false;
28710 }
28711 break;
28712 case 3:
28713 switch (d->vmode)
28714 {
28715 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28716 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28717 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28718 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28719 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28720 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28721 default:
28722 return false;
28723 }
28724 break;
28725 case 1:
28726 switch (d->vmode)
28727 {
28728 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28729 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28730 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28731 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28732 case V4SImode: gen = gen_neon_vrev64v4si; break;
28733 case V2SImode: gen = gen_neon_vrev64v2si; break;
28734 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28735 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28736 default:
28737 return false;
28738 }
28739 break;
28740 default:
28741 return false;
28742 }
28743
28744 for (i = 0; i < nelt ; i += diff + 1)
28745 for (j = 0; j <= diff; j += 1)
28746 {
28747 /* This is guaranteed to be true as the value of diff
28748 is 7, 3, 1 and we should have enough elements in the
28749 queue to generate this. Getting a vector mask with a
28750 value of diff other than these values implies that
28751 something is wrong by the time we get here. */
28752 gcc_assert (i + j < nelt);
28753 if (d->perm[i + j] != i + diff - j)
28754 return false;
28755 }
28756
28757 /* Success! */
28758 if (d->testing_p)
28759 return true;
28760
28761 emit_insn (gen (d->target, d->op0));
28762 return true;
28763 }
28764
28765 /* Recognize patterns for the VTRN insns. */
28766
28767 static bool
28768 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28769 {
28770 unsigned int i, odd, mask, nelt = d->nelt;
28771 rtx out0, out1, in0, in1;
28772 rtx (*gen)(rtx, rtx, rtx, rtx);
28773
28774 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28775 return false;
28776
28777 /* Note that these are little-endian tests. Adjust for big-endian later. */
28778 if (d->perm[0] == 0)
28779 odd = 0;
28780 else if (d->perm[0] == 1)
28781 odd = 1;
28782 else
28783 return false;
28784 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28785
28786 for (i = 0; i < nelt; i += 2)
28787 {
28788 if (d->perm[i] != i + odd)
28789 return false;
28790 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28791 return false;
28792 }
28793
28794 /* Success! */
28795 if (d->testing_p)
28796 return true;
28797
28798 switch (d->vmode)
28799 {
28800 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28801 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28802 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28803 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28804 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28805 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28806 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28807 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28808 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28809 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28810 default:
28811 gcc_unreachable ();
28812 }
28813
28814 in0 = d->op0;
28815 in1 = d->op1;
28816 if (BYTES_BIG_ENDIAN)
28817 {
28818 std::swap (in0, in1);
28819 odd = !odd;
28820 }
28821
28822 out0 = d->target;
28823 out1 = gen_reg_rtx (d->vmode);
28824 if (odd)
28825 std::swap (out0, out1);
28826
28827 emit_insn (gen (out0, in0, in1, out1));
28828 return true;
28829 }
28830
28831 /* Recognize patterns for the VEXT insns. */
28832
28833 static bool
28834 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28835 {
28836 unsigned int i, nelt = d->nelt;
28837 rtx (*gen) (rtx, rtx, rtx, rtx);
28838 rtx offset;
28839
28840 unsigned int location;
28841
28842 unsigned int next = d->perm[0] + 1;
28843
28844 /* TODO: Handle GCC's numbering of elements for big-endian. */
28845 if (BYTES_BIG_ENDIAN)
28846 return false;
28847
28848 /* Check if the extracted indexes are increasing by one. */
28849 for (i = 1; i < nelt; next++, i++)
28850 {
28851 /* If we hit the most significant element of the 2nd vector in
28852 the previous iteration, no need to test further. */
28853 if (next == 2 * nelt)
28854 return false;
28855
28856 /* If we are operating on only one vector: it could be a
28857 rotation. If there are only two elements of size < 64, let
28858 arm_evpc_neon_vrev catch it. */
28859 if (d->one_vector_p && (next == nelt))
28860 {
28861 if ((nelt == 2) && (d->vmode != V2DImode))
28862 return false;
28863 else
28864 next = 0;
28865 }
28866
28867 if (d->perm[i] != next)
28868 return false;
28869 }
28870
28871 location = d->perm[0];
28872
28873 switch (d->vmode)
28874 {
28875 case V16QImode: gen = gen_neon_vextv16qi; break;
28876 case V8QImode: gen = gen_neon_vextv8qi; break;
28877 case V4HImode: gen = gen_neon_vextv4hi; break;
28878 case V8HImode: gen = gen_neon_vextv8hi; break;
28879 case V2SImode: gen = gen_neon_vextv2si; break;
28880 case V4SImode: gen = gen_neon_vextv4si; break;
28881 case V4HFmode: gen = gen_neon_vextv4hf; break;
28882 case V8HFmode: gen = gen_neon_vextv8hf; break;
28883 case V2SFmode: gen = gen_neon_vextv2sf; break;
28884 case V4SFmode: gen = gen_neon_vextv4sf; break;
28885 case V2DImode: gen = gen_neon_vextv2di; break;
28886 default:
28887 return false;
28888 }
28889
28890 /* Success! */
28891 if (d->testing_p)
28892 return true;
28893
28894 offset = GEN_INT (location);
28895 emit_insn (gen (d->target, d->op0, d->op1, offset));
28896 return true;
28897 }
28898
28899 /* The NEON VTBL instruction is a fully variable permuation that's even
28900 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28901 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28902 can do slightly better by expanding this as a constant where we don't
28903 have to apply a mask. */
28904
28905 static bool
28906 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28907 {
28908 rtx rperm[MAX_VECT_LEN], sel;
28909 machine_mode vmode = d->vmode;
28910 unsigned int i, nelt = d->nelt;
28911
28912 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28913 numbering of elements for big-endian, we must reverse the order. */
28914 if (BYTES_BIG_ENDIAN)
28915 return false;
28916
28917 if (d->testing_p)
28918 return true;
28919
28920 /* Generic code will try constant permutation twice. Once with the
28921 original mode and again with the elements lowered to QImode.
28922 So wait and don't do the selector expansion ourselves. */
28923 if (vmode != V8QImode && vmode != V16QImode)
28924 return false;
28925
28926 for (i = 0; i < nelt; ++i)
28927 rperm[i] = GEN_INT (d->perm[i]);
28928 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28929 sel = force_reg (vmode, sel);
28930
28931 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28932 return true;
28933 }
28934
28935 static bool
28936 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28937 {
28938 /* Check if the input mask matches vext before reordering the
28939 operands. */
28940 if (TARGET_NEON)
28941 if (arm_evpc_neon_vext (d))
28942 return true;
28943
28944 /* The pattern matching functions above are written to look for a small
28945 number to begin the sequence (0, 1, N/2). If we begin with an index
28946 from the second operand, we can swap the operands. */
28947 if (d->perm[0] >= d->nelt)
28948 {
28949 unsigned i, nelt = d->nelt;
28950
28951 for (i = 0; i < nelt; ++i)
28952 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28953
28954 std::swap (d->op0, d->op1);
28955 }
28956
28957 if (TARGET_NEON)
28958 {
28959 if (arm_evpc_neon_vuzp (d))
28960 return true;
28961 if (arm_evpc_neon_vzip (d))
28962 return true;
28963 if (arm_evpc_neon_vrev (d))
28964 return true;
28965 if (arm_evpc_neon_vtrn (d))
28966 return true;
28967 return arm_evpc_neon_vtbl (d);
28968 }
28969 return false;
28970 }
28971
28972 /* Expand a vec_perm_const pattern. */
28973
28974 bool
28975 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28976 {
28977 struct expand_vec_perm_d d;
28978 int i, nelt, which;
28979
28980 d.target = target;
28981 d.op0 = op0;
28982 d.op1 = op1;
28983
28984 d.vmode = GET_MODE (target);
28985 gcc_assert (VECTOR_MODE_P (d.vmode));
28986 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28987 d.testing_p = false;
28988
28989 for (i = which = 0; i < nelt; ++i)
28990 {
28991 rtx e = XVECEXP (sel, 0, i);
28992 int ei = INTVAL (e) & (2 * nelt - 1);
28993 which |= (ei < nelt ? 1 : 2);
28994 d.perm[i] = ei;
28995 }
28996
28997 switch (which)
28998 {
28999 default:
29000 gcc_unreachable();
29001
29002 case 3:
29003 d.one_vector_p = false;
29004 if (!rtx_equal_p (op0, op1))
29005 break;
29006
29007 /* The elements of PERM do not suggest that only the first operand
29008 is used, but both operands are identical. Allow easier matching
29009 of the permutation by folding the permutation into the single
29010 input vector. */
29011 /* FALLTHRU */
29012 case 2:
29013 for (i = 0; i < nelt; ++i)
29014 d.perm[i] &= nelt - 1;
29015 d.op0 = op1;
29016 d.one_vector_p = true;
29017 break;
29018
29019 case 1:
29020 d.op1 = op0;
29021 d.one_vector_p = true;
29022 break;
29023 }
29024
29025 return arm_expand_vec_perm_const_1 (&d);
29026 }
29027
29028 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29029
29030 static bool
29031 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29032 const unsigned char *sel)
29033 {
29034 struct expand_vec_perm_d d;
29035 unsigned int i, nelt, which;
29036 bool ret;
29037
29038 d.vmode = vmode;
29039 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29040 d.testing_p = true;
29041 memcpy (d.perm, sel, nelt);
29042
29043 /* Categorize the set of elements in the selector. */
29044 for (i = which = 0; i < nelt; ++i)
29045 {
29046 unsigned char e = d.perm[i];
29047 gcc_assert (e < 2 * nelt);
29048 which |= (e < nelt ? 1 : 2);
29049 }
29050
29051 /* For all elements from second vector, fold the elements to first. */
29052 if (which == 2)
29053 for (i = 0; i < nelt; ++i)
29054 d.perm[i] -= nelt;
29055
29056 /* Check whether the mask can be applied to the vector type. */
29057 d.one_vector_p = (which != 3);
29058
29059 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29060 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29061 if (!d.one_vector_p)
29062 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29063
29064 start_sequence ();
29065 ret = arm_expand_vec_perm_const_1 (&d);
29066 end_sequence ();
29067
29068 return ret;
29069 }
29070
29071 bool
29072 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29073 {
29074 /* If we are soft float and we do not have ldrd
29075 then all auto increment forms are ok. */
29076 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29077 return true;
29078
29079 switch (code)
29080 {
29081 /* Post increment and Pre Decrement are supported for all
29082 instruction forms except for vector forms. */
29083 case ARM_POST_INC:
29084 case ARM_PRE_DEC:
29085 if (VECTOR_MODE_P (mode))
29086 {
29087 if (code != ARM_PRE_DEC)
29088 return true;
29089 else
29090 return false;
29091 }
29092
29093 return true;
29094
29095 case ARM_POST_DEC:
29096 case ARM_PRE_INC:
29097 /* Without LDRD and mode size greater than
29098 word size, there is no point in auto-incrementing
29099 because ldm and stm will not have these forms. */
29100 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29101 return false;
29102
29103 /* Vector and floating point modes do not support
29104 these auto increment forms. */
29105 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29106 return false;
29107
29108 return true;
29109
29110 default:
29111 return false;
29112
29113 }
29114
29115 return false;
29116 }
29117
29118 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29119 on ARM, since we know that shifts by negative amounts are no-ops.
29120 Additionally, the default expansion code is not available or suitable
29121 for post-reload insn splits (this can occur when the register allocator
29122 chooses not to do a shift in NEON).
29123
29124 This function is used in both initial expand and post-reload splits, and
29125 handles all kinds of 64-bit shifts.
29126
29127 Input requirements:
29128 - It is safe for the input and output to be the same register, but
29129 early-clobber rules apply for the shift amount and scratch registers.
29130 - Shift by register requires both scratch registers. In all other cases
29131 the scratch registers may be NULL.
29132 - Ashiftrt by a register also clobbers the CC register. */
29133 void
29134 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29135 rtx amount, rtx scratch1, rtx scratch2)
29136 {
29137 rtx out_high = gen_highpart (SImode, out);
29138 rtx out_low = gen_lowpart (SImode, out);
29139 rtx in_high = gen_highpart (SImode, in);
29140 rtx in_low = gen_lowpart (SImode, in);
29141
29142 /* Terminology:
29143 in = the register pair containing the input value.
29144 out = the destination register pair.
29145 up = the high- or low-part of each pair.
29146 down = the opposite part to "up".
29147 In a shift, we can consider bits to shift from "up"-stream to
29148 "down"-stream, so in a left-shift "up" is the low-part and "down"
29149 is the high-part of each register pair. */
29150
29151 rtx out_up = code == ASHIFT ? out_low : out_high;
29152 rtx out_down = code == ASHIFT ? out_high : out_low;
29153 rtx in_up = code == ASHIFT ? in_low : in_high;
29154 rtx in_down = code == ASHIFT ? in_high : in_low;
29155
29156 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29157 gcc_assert (out
29158 && (REG_P (out) || GET_CODE (out) == SUBREG)
29159 && GET_MODE (out) == DImode);
29160 gcc_assert (in
29161 && (REG_P (in) || GET_CODE (in) == SUBREG)
29162 && GET_MODE (in) == DImode);
29163 gcc_assert (amount
29164 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29165 && GET_MODE (amount) == SImode)
29166 || CONST_INT_P (amount)));
29167 gcc_assert (scratch1 == NULL
29168 || (GET_CODE (scratch1) == SCRATCH)
29169 || (GET_MODE (scratch1) == SImode
29170 && REG_P (scratch1)));
29171 gcc_assert (scratch2 == NULL
29172 || (GET_CODE (scratch2) == SCRATCH)
29173 || (GET_MODE (scratch2) == SImode
29174 && REG_P (scratch2)));
29175 gcc_assert (!REG_P (out) || !REG_P (amount)
29176 || !HARD_REGISTER_P (out)
29177 || (REGNO (out) != REGNO (amount)
29178 && REGNO (out) + 1 != REGNO (amount)));
29179
29180 /* Macros to make following code more readable. */
29181 #define SUB_32(DEST,SRC) \
29182 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29183 #define RSB_32(DEST,SRC) \
29184 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29185 #define SUB_S_32(DEST,SRC) \
29186 gen_addsi3_compare0 ((DEST), (SRC), \
29187 GEN_INT (-32))
29188 #define SET(DEST,SRC) \
29189 gen_rtx_SET ((DEST), (SRC))
29190 #define SHIFT(CODE,SRC,AMOUNT) \
29191 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29192 #define LSHIFT(CODE,SRC,AMOUNT) \
29193 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29194 SImode, (SRC), (AMOUNT))
29195 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29196 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29197 SImode, (SRC), (AMOUNT))
29198 #define ORR(A,B) \
29199 gen_rtx_IOR (SImode, (A), (B))
29200 #define BRANCH(COND,LABEL) \
29201 gen_arm_cond_branch ((LABEL), \
29202 gen_rtx_ ## COND (CCmode, cc_reg, \
29203 const0_rtx), \
29204 cc_reg)
29205
29206 /* Shifts by register and shifts by constant are handled separately. */
29207 if (CONST_INT_P (amount))
29208 {
29209 /* We have a shift-by-constant. */
29210
29211 /* First, handle out-of-range shift amounts.
29212 In both cases we try to match the result an ARM instruction in a
29213 shift-by-register would give. This helps reduce execution
29214 differences between optimization levels, but it won't stop other
29215 parts of the compiler doing different things. This is "undefined
29216 behavior, in any case. */
29217 if (INTVAL (amount) <= 0)
29218 emit_insn (gen_movdi (out, in));
29219 else if (INTVAL (amount) >= 64)
29220 {
29221 if (code == ASHIFTRT)
29222 {
29223 rtx const31_rtx = GEN_INT (31);
29224 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29225 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29226 }
29227 else
29228 emit_insn (gen_movdi (out, const0_rtx));
29229 }
29230
29231 /* Now handle valid shifts. */
29232 else if (INTVAL (amount) < 32)
29233 {
29234 /* Shifts by a constant less than 32. */
29235 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29236
29237 /* Clearing the out register in DImode first avoids lots
29238 of spilling and results in less stack usage.
29239 Later this redundant insn is completely removed.
29240 Do that only if "in" and "out" are different registers. */
29241 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29242 emit_insn (SET (out, const0_rtx));
29243 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29244 emit_insn (SET (out_down,
29245 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29246 out_down)));
29247 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29248 }
29249 else
29250 {
29251 /* Shifts by a constant greater than 31. */
29252 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29253
29254 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29255 emit_insn (SET (out, const0_rtx));
29256 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29257 if (code == ASHIFTRT)
29258 emit_insn (gen_ashrsi3 (out_up, in_up,
29259 GEN_INT (31)));
29260 else
29261 emit_insn (SET (out_up, const0_rtx));
29262 }
29263 }
29264 else
29265 {
29266 /* We have a shift-by-register. */
29267 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29268
29269 /* This alternative requires the scratch registers. */
29270 gcc_assert (scratch1 && REG_P (scratch1));
29271 gcc_assert (scratch2 && REG_P (scratch2));
29272
29273 /* We will need the values "amount-32" and "32-amount" later.
29274 Swapping them around now allows the later code to be more general. */
29275 switch (code)
29276 {
29277 case ASHIFT:
29278 emit_insn (SUB_32 (scratch1, amount));
29279 emit_insn (RSB_32 (scratch2, amount));
29280 break;
29281 case ASHIFTRT:
29282 emit_insn (RSB_32 (scratch1, amount));
29283 /* Also set CC = amount > 32. */
29284 emit_insn (SUB_S_32 (scratch2, amount));
29285 break;
29286 case LSHIFTRT:
29287 emit_insn (RSB_32 (scratch1, amount));
29288 emit_insn (SUB_32 (scratch2, amount));
29289 break;
29290 default:
29291 gcc_unreachable ();
29292 }
29293
29294 /* Emit code like this:
29295
29296 arithmetic-left:
29297 out_down = in_down << amount;
29298 out_down = (in_up << (amount - 32)) | out_down;
29299 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29300 out_up = in_up << amount;
29301
29302 arithmetic-right:
29303 out_down = in_down >> amount;
29304 out_down = (in_up << (32 - amount)) | out_down;
29305 if (amount < 32)
29306 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29307 out_up = in_up << amount;
29308
29309 logical-right:
29310 out_down = in_down >> amount;
29311 out_down = (in_up << (32 - amount)) | out_down;
29312 if (amount < 32)
29313 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29314 out_up = in_up << amount;
29315
29316 The ARM and Thumb2 variants are the same but implemented slightly
29317 differently. If this were only called during expand we could just
29318 use the Thumb2 case and let combine do the right thing, but this
29319 can also be called from post-reload splitters. */
29320
29321 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29322
29323 if (!TARGET_THUMB2)
29324 {
29325 /* Emit code for ARM mode. */
29326 emit_insn (SET (out_down,
29327 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29328 if (code == ASHIFTRT)
29329 {
29330 rtx_code_label *done_label = gen_label_rtx ();
29331 emit_jump_insn (BRANCH (LT, done_label));
29332 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29333 out_down)));
29334 emit_label (done_label);
29335 }
29336 else
29337 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29338 out_down)));
29339 }
29340 else
29341 {
29342 /* Emit code for Thumb2 mode.
29343 Thumb2 can't do shift and or in one insn. */
29344 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29345 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29346
29347 if (code == ASHIFTRT)
29348 {
29349 rtx_code_label *done_label = gen_label_rtx ();
29350 emit_jump_insn (BRANCH (LT, done_label));
29351 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29352 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29353 emit_label (done_label);
29354 }
29355 else
29356 {
29357 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29358 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29359 }
29360 }
29361
29362 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29363 }
29364
29365 #undef SUB_32
29366 #undef RSB_32
29367 #undef SUB_S_32
29368 #undef SET
29369 #undef SHIFT
29370 #undef LSHIFT
29371 #undef REV_LSHIFT
29372 #undef ORR
29373 #undef BRANCH
29374 }
29375
29376 /* Returns true if the pattern is a valid symbolic address, which is either a
29377 symbol_ref or (symbol_ref + addend).
29378
29379 According to the ARM ELF ABI, the initial addend of REL-type relocations
29380 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29381 literal field of the instruction as a 16-bit signed value in the range
29382 -32768 <= A < 32768. */
29383
29384 bool
29385 arm_valid_symbolic_address_p (rtx addr)
29386 {
29387 rtx xop0, xop1 = NULL_RTX;
29388 rtx tmp = addr;
29389
29390 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29391 return true;
29392
29393 /* (const (plus: symbol_ref const_int)) */
29394 if (GET_CODE (addr) == CONST)
29395 tmp = XEXP (addr, 0);
29396
29397 if (GET_CODE (tmp) == PLUS)
29398 {
29399 xop0 = XEXP (tmp, 0);
29400 xop1 = XEXP (tmp, 1);
29401
29402 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29403 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29404 }
29405
29406 return false;
29407 }
29408
29409 /* Returns true if a valid comparison operation and makes
29410 the operands in a form that is valid. */
29411 bool
29412 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29413 {
29414 enum rtx_code code = GET_CODE (*comparison);
29415 int code_int;
29416 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29417 ? GET_MODE (*op2) : GET_MODE (*op1);
29418
29419 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29420
29421 if (code == UNEQ || code == LTGT)
29422 return false;
29423
29424 code_int = (int)code;
29425 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29426 PUT_CODE (*comparison, (enum rtx_code)code_int);
29427
29428 switch (mode)
29429 {
29430 case SImode:
29431 if (!arm_add_operand (*op1, mode))
29432 *op1 = force_reg (mode, *op1);
29433 if (!arm_add_operand (*op2, mode))
29434 *op2 = force_reg (mode, *op2);
29435 return true;
29436
29437 case DImode:
29438 if (!cmpdi_operand (*op1, mode))
29439 *op1 = force_reg (mode, *op1);
29440 if (!cmpdi_operand (*op2, mode))
29441 *op2 = force_reg (mode, *op2);
29442 return true;
29443
29444 case HFmode:
29445 if (!TARGET_VFP_FP16INST)
29446 break;
29447 /* FP16 comparisons are done in SF mode. */
29448 mode = SFmode;
29449 *op1 = convert_to_mode (mode, *op1, 1);
29450 *op2 = convert_to_mode (mode, *op2, 1);
29451 /* Fall through. */
29452 case SFmode:
29453 case DFmode:
29454 if (!vfp_compare_operand (*op1, mode))
29455 *op1 = force_reg (mode, *op1);
29456 if (!vfp_compare_operand (*op2, mode))
29457 *op2 = force_reg (mode, *op2);
29458 return true;
29459 default:
29460 break;
29461 }
29462
29463 return false;
29464
29465 }
29466
29467 /* Maximum number of instructions to set block of memory. */
29468 static int
29469 arm_block_set_max_insns (void)
29470 {
29471 if (optimize_function_for_size_p (cfun))
29472 return 4;
29473 else
29474 return current_tune->max_insns_inline_memset;
29475 }
29476
29477 /* Return TRUE if it's profitable to set block of memory for
29478 non-vectorized case. VAL is the value to set the memory
29479 with. LENGTH is the number of bytes to set. ALIGN is the
29480 alignment of the destination memory in bytes. UNALIGNED_P
29481 is TRUE if we can only set the memory with instructions
29482 meeting alignment requirements. USE_STRD_P is TRUE if we
29483 can use strd to set the memory. */
29484 static bool
29485 arm_block_set_non_vect_profit_p (rtx val,
29486 unsigned HOST_WIDE_INT length,
29487 unsigned HOST_WIDE_INT align,
29488 bool unaligned_p, bool use_strd_p)
29489 {
29490 int num = 0;
29491 /* For leftovers in bytes of 0-7, we can set the memory block using
29492 strb/strh/str with minimum instruction number. */
29493 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29494
29495 if (unaligned_p)
29496 {
29497 num = arm_const_inline_cost (SET, val);
29498 num += length / align + length % align;
29499 }
29500 else if (use_strd_p)
29501 {
29502 num = arm_const_double_inline_cost (val);
29503 num += (length >> 3) + leftover[length & 7];
29504 }
29505 else
29506 {
29507 num = arm_const_inline_cost (SET, val);
29508 num += (length >> 2) + leftover[length & 3];
29509 }
29510
29511 /* We may be able to combine last pair STRH/STRB into a single STR
29512 by shifting one byte back. */
29513 if (unaligned_access && length > 3 && (length & 3) == 3)
29514 num--;
29515
29516 return (num <= arm_block_set_max_insns ());
29517 }
29518
29519 /* Return TRUE if it's profitable to set block of memory for
29520 vectorized case. LENGTH is the number of bytes to set.
29521 ALIGN is the alignment of destination memory in bytes.
29522 MODE is the vector mode used to set the memory. */
29523 static bool
29524 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29525 unsigned HOST_WIDE_INT align,
29526 machine_mode mode)
29527 {
29528 int num;
29529 bool unaligned_p = ((align & 3) != 0);
29530 unsigned int nelt = GET_MODE_NUNITS (mode);
29531
29532 /* Instruction loading constant value. */
29533 num = 1;
29534 /* Instructions storing the memory. */
29535 num += (length + nelt - 1) / nelt;
29536 /* Instructions adjusting the address expression. Only need to
29537 adjust address expression if it's 4 bytes aligned and bytes
29538 leftover can only be stored by mis-aligned store instruction. */
29539 if (!unaligned_p && (length & 3) != 0)
29540 num++;
29541
29542 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29543 if (!unaligned_p && mode == V16QImode)
29544 num--;
29545
29546 return (num <= arm_block_set_max_insns ());
29547 }
29548
29549 /* Set a block of memory using vectorization instructions for the
29550 unaligned case. We fill the first LENGTH bytes of the memory
29551 area starting from DSTBASE with byte constant VALUE. ALIGN is
29552 the alignment requirement of memory. Return TRUE if succeeded. */
29553 static bool
29554 arm_block_set_unaligned_vect (rtx dstbase,
29555 unsigned HOST_WIDE_INT length,
29556 unsigned HOST_WIDE_INT value,
29557 unsigned HOST_WIDE_INT align)
29558 {
29559 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29560 rtx dst, mem;
29561 rtx val_elt, val_vec, reg;
29562 rtx rval[MAX_VECT_LEN];
29563 rtx (*gen_func) (rtx, rtx);
29564 machine_mode mode;
29565 unsigned HOST_WIDE_INT v = value;
29566 unsigned int offset = 0;
29567 gcc_assert ((align & 0x3) != 0);
29568 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29569 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29570 if (length >= nelt_v16)
29571 {
29572 mode = V16QImode;
29573 gen_func = gen_movmisalignv16qi;
29574 }
29575 else
29576 {
29577 mode = V8QImode;
29578 gen_func = gen_movmisalignv8qi;
29579 }
29580 nelt_mode = GET_MODE_NUNITS (mode);
29581 gcc_assert (length >= nelt_mode);
29582 /* Skip if it isn't profitable. */
29583 if (!arm_block_set_vect_profit_p (length, align, mode))
29584 return false;
29585
29586 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29587 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29588
29589 v = sext_hwi (v, BITS_PER_WORD);
29590 val_elt = GEN_INT (v);
29591 for (j = 0; j < nelt_mode; j++)
29592 rval[j] = val_elt;
29593
29594 reg = gen_reg_rtx (mode);
29595 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29596 /* Emit instruction loading the constant value. */
29597 emit_move_insn (reg, val_vec);
29598
29599 /* Handle nelt_mode bytes in a vector. */
29600 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29601 {
29602 emit_insn ((*gen_func) (mem, reg));
29603 if (i + 2 * nelt_mode <= length)
29604 {
29605 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29606 offset += nelt_mode;
29607 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29608 }
29609 }
29610
29611 /* If there are not less than nelt_v8 bytes leftover, we must be in
29612 V16QI mode. */
29613 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29614
29615 /* Handle (8, 16) bytes leftover. */
29616 if (i + nelt_v8 < length)
29617 {
29618 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29619 offset += length - i;
29620 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29621
29622 /* We are shifting bytes back, set the alignment accordingly. */
29623 if ((length & 1) != 0 && align >= 2)
29624 set_mem_align (mem, BITS_PER_UNIT);
29625
29626 emit_insn (gen_movmisalignv16qi (mem, reg));
29627 }
29628 /* Handle (0, 8] bytes leftover. */
29629 else if (i < length && i + nelt_v8 >= length)
29630 {
29631 if (mode == V16QImode)
29632 reg = gen_lowpart (V8QImode, reg);
29633
29634 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29635 + (nelt_mode - nelt_v8))));
29636 offset += (length - i) + (nelt_mode - nelt_v8);
29637 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29638
29639 /* We are shifting bytes back, set the alignment accordingly. */
29640 if ((length & 1) != 0 && align >= 2)
29641 set_mem_align (mem, BITS_PER_UNIT);
29642
29643 emit_insn (gen_movmisalignv8qi (mem, reg));
29644 }
29645
29646 return true;
29647 }
29648
29649 /* Set a block of memory using vectorization instructions for the
29650 aligned case. We fill the first LENGTH bytes of the memory area
29651 starting from DSTBASE with byte constant VALUE. ALIGN is the
29652 alignment requirement of memory. Return TRUE if succeeded. */
29653 static bool
29654 arm_block_set_aligned_vect (rtx dstbase,
29655 unsigned HOST_WIDE_INT length,
29656 unsigned HOST_WIDE_INT value,
29657 unsigned HOST_WIDE_INT align)
29658 {
29659 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29660 rtx dst, addr, mem;
29661 rtx val_elt, val_vec, reg;
29662 rtx rval[MAX_VECT_LEN];
29663 machine_mode mode;
29664 unsigned HOST_WIDE_INT v = value;
29665 unsigned int offset = 0;
29666
29667 gcc_assert ((align & 0x3) == 0);
29668 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29669 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29670 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29671 mode = V16QImode;
29672 else
29673 mode = V8QImode;
29674
29675 nelt_mode = GET_MODE_NUNITS (mode);
29676 gcc_assert (length >= nelt_mode);
29677 /* Skip if it isn't profitable. */
29678 if (!arm_block_set_vect_profit_p (length, align, mode))
29679 return false;
29680
29681 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29682
29683 v = sext_hwi (v, BITS_PER_WORD);
29684 val_elt = GEN_INT (v);
29685 for (j = 0; j < nelt_mode; j++)
29686 rval[j] = val_elt;
29687
29688 reg = gen_reg_rtx (mode);
29689 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29690 /* Emit instruction loading the constant value. */
29691 emit_move_insn (reg, val_vec);
29692
29693 i = 0;
29694 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29695 if (mode == V16QImode)
29696 {
29697 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29698 emit_insn (gen_movmisalignv16qi (mem, reg));
29699 i += nelt_mode;
29700 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29701 if (i + nelt_v8 < length && i + nelt_v16 > length)
29702 {
29703 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29704 offset += length - nelt_mode;
29705 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29706 /* We are shifting bytes back, set the alignment accordingly. */
29707 if ((length & 0x3) == 0)
29708 set_mem_align (mem, BITS_PER_UNIT * 4);
29709 else if ((length & 0x1) == 0)
29710 set_mem_align (mem, BITS_PER_UNIT * 2);
29711 else
29712 set_mem_align (mem, BITS_PER_UNIT);
29713
29714 emit_insn (gen_movmisalignv16qi (mem, reg));
29715 return true;
29716 }
29717 /* Fall through for bytes leftover. */
29718 mode = V8QImode;
29719 nelt_mode = GET_MODE_NUNITS (mode);
29720 reg = gen_lowpart (V8QImode, reg);
29721 }
29722
29723 /* Handle 8 bytes in a vector. */
29724 for (; (i + nelt_mode <= length); i += nelt_mode)
29725 {
29726 addr = plus_constant (Pmode, dst, i);
29727 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29728 emit_move_insn (mem, reg);
29729 }
29730
29731 /* Handle single word leftover by shifting 4 bytes back. We can
29732 use aligned access for this case. */
29733 if (i + UNITS_PER_WORD == length)
29734 {
29735 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29736 offset += i - UNITS_PER_WORD;
29737 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29738 /* We are shifting 4 bytes back, set the alignment accordingly. */
29739 if (align > UNITS_PER_WORD)
29740 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29741
29742 emit_move_insn (mem, reg);
29743 }
29744 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29745 We have to use unaligned access for this case. */
29746 else if (i < length)
29747 {
29748 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29749 offset += length - nelt_mode;
29750 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29751 /* We are shifting bytes back, set the alignment accordingly. */
29752 if ((length & 1) == 0)
29753 set_mem_align (mem, BITS_PER_UNIT * 2);
29754 else
29755 set_mem_align (mem, BITS_PER_UNIT);
29756
29757 emit_insn (gen_movmisalignv8qi (mem, reg));
29758 }
29759
29760 return true;
29761 }
29762
29763 /* Set a block of memory using plain strh/strb instructions, only
29764 using instructions allowed by ALIGN on processor. We fill the
29765 first LENGTH bytes of the memory area starting from DSTBASE
29766 with byte constant VALUE. ALIGN is the alignment requirement
29767 of memory. */
29768 static bool
29769 arm_block_set_unaligned_non_vect (rtx dstbase,
29770 unsigned HOST_WIDE_INT length,
29771 unsigned HOST_WIDE_INT value,
29772 unsigned HOST_WIDE_INT align)
29773 {
29774 unsigned int i;
29775 rtx dst, addr, mem;
29776 rtx val_exp, val_reg, reg;
29777 machine_mode mode;
29778 HOST_WIDE_INT v = value;
29779
29780 gcc_assert (align == 1 || align == 2);
29781
29782 if (align == 2)
29783 v |= (value << BITS_PER_UNIT);
29784
29785 v = sext_hwi (v, BITS_PER_WORD);
29786 val_exp = GEN_INT (v);
29787 /* Skip if it isn't profitable. */
29788 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29789 align, true, false))
29790 return false;
29791
29792 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29793 mode = (align == 2 ? HImode : QImode);
29794 val_reg = force_reg (SImode, val_exp);
29795 reg = gen_lowpart (mode, val_reg);
29796
29797 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29798 {
29799 addr = plus_constant (Pmode, dst, i);
29800 mem = adjust_automodify_address (dstbase, mode, addr, i);
29801 emit_move_insn (mem, reg);
29802 }
29803
29804 /* Handle single byte leftover. */
29805 if (i + 1 == length)
29806 {
29807 reg = gen_lowpart (QImode, val_reg);
29808 addr = plus_constant (Pmode, dst, i);
29809 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29810 emit_move_insn (mem, reg);
29811 i++;
29812 }
29813
29814 gcc_assert (i == length);
29815 return true;
29816 }
29817
29818 /* Set a block of memory using plain strd/str/strh/strb instructions,
29819 to permit unaligned copies on processors which support unaligned
29820 semantics for those instructions. We fill the first LENGTH bytes
29821 of the memory area starting from DSTBASE with byte constant VALUE.
29822 ALIGN is the alignment requirement of memory. */
29823 static bool
29824 arm_block_set_aligned_non_vect (rtx dstbase,
29825 unsigned HOST_WIDE_INT length,
29826 unsigned HOST_WIDE_INT value,
29827 unsigned HOST_WIDE_INT align)
29828 {
29829 unsigned int i;
29830 rtx dst, addr, mem;
29831 rtx val_exp, val_reg, reg;
29832 unsigned HOST_WIDE_INT v;
29833 bool use_strd_p;
29834
29835 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29836 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29837
29838 v = (value | (value << 8) | (value << 16) | (value << 24));
29839 if (length < UNITS_PER_WORD)
29840 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29841
29842 if (use_strd_p)
29843 v |= (v << BITS_PER_WORD);
29844 else
29845 v = sext_hwi (v, BITS_PER_WORD);
29846
29847 val_exp = GEN_INT (v);
29848 /* Skip if it isn't profitable. */
29849 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29850 align, false, use_strd_p))
29851 {
29852 if (!use_strd_p)
29853 return false;
29854
29855 /* Try without strd. */
29856 v = (v >> BITS_PER_WORD);
29857 v = sext_hwi (v, BITS_PER_WORD);
29858 val_exp = GEN_INT (v);
29859 use_strd_p = false;
29860 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29861 align, false, use_strd_p))
29862 return false;
29863 }
29864
29865 i = 0;
29866 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29867 /* Handle double words using strd if possible. */
29868 if (use_strd_p)
29869 {
29870 val_reg = force_reg (DImode, val_exp);
29871 reg = val_reg;
29872 for (; (i + 8 <= length); i += 8)
29873 {
29874 addr = plus_constant (Pmode, dst, i);
29875 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29876 emit_move_insn (mem, reg);
29877 }
29878 }
29879 else
29880 val_reg = force_reg (SImode, val_exp);
29881
29882 /* Handle words. */
29883 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29884 for (; (i + 4 <= length); i += 4)
29885 {
29886 addr = plus_constant (Pmode, dst, i);
29887 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29888 if ((align & 3) == 0)
29889 emit_move_insn (mem, reg);
29890 else
29891 emit_insn (gen_unaligned_storesi (mem, reg));
29892 }
29893
29894 /* Merge last pair of STRH and STRB into a STR if possible. */
29895 if (unaligned_access && i > 0 && (i + 3) == length)
29896 {
29897 addr = plus_constant (Pmode, dst, i - 1);
29898 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29899 /* We are shifting one byte back, set the alignment accordingly. */
29900 if ((align & 1) == 0)
29901 set_mem_align (mem, BITS_PER_UNIT);
29902
29903 /* Most likely this is an unaligned access, and we can't tell at
29904 compilation time. */
29905 emit_insn (gen_unaligned_storesi (mem, reg));
29906 return true;
29907 }
29908
29909 /* Handle half word leftover. */
29910 if (i + 2 <= length)
29911 {
29912 reg = gen_lowpart (HImode, val_reg);
29913 addr = plus_constant (Pmode, dst, i);
29914 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29915 if ((align & 1) == 0)
29916 emit_move_insn (mem, reg);
29917 else
29918 emit_insn (gen_unaligned_storehi (mem, reg));
29919
29920 i += 2;
29921 }
29922
29923 /* Handle single byte leftover. */
29924 if (i + 1 == length)
29925 {
29926 reg = gen_lowpart (QImode, val_reg);
29927 addr = plus_constant (Pmode, dst, i);
29928 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29929 emit_move_insn (mem, reg);
29930 }
29931
29932 return true;
29933 }
29934
29935 /* Set a block of memory using vectorization instructions for both
29936 aligned and unaligned cases. We fill the first LENGTH bytes of
29937 the memory area starting from DSTBASE with byte constant VALUE.
29938 ALIGN is the alignment requirement of memory. */
29939 static bool
29940 arm_block_set_vect (rtx dstbase,
29941 unsigned HOST_WIDE_INT length,
29942 unsigned HOST_WIDE_INT value,
29943 unsigned HOST_WIDE_INT align)
29944 {
29945 /* Check whether we need to use unaligned store instruction. */
29946 if (((align & 3) != 0 || (length & 3) != 0)
29947 /* Check whether unaligned store instruction is available. */
29948 && (!unaligned_access || BYTES_BIG_ENDIAN))
29949 return false;
29950
29951 if ((align & 3) == 0)
29952 return arm_block_set_aligned_vect (dstbase, length, value, align);
29953 else
29954 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29955 }
29956
29957 /* Expand string store operation. Firstly we try to do that by using
29958 vectorization instructions, then try with ARM unaligned access and
29959 double-word store if profitable. OPERANDS[0] is the destination,
29960 OPERANDS[1] is the number of bytes, operands[2] is the value to
29961 initialize the memory, OPERANDS[3] is the known alignment of the
29962 destination. */
29963 bool
29964 arm_gen_setmem (rtx *operands)
29965 {
29966 rtx dstbase = operands[0];
29967 unsigned HOST_WIDE_INT length;
29968 unsigned HOST_WIDE_INT value;
29969 unsigned HOST_WIDE_INT align;
29970
29971 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29972 return false;
29973
29974 length = UINTVAL (operands[1]);
29975 if (length > 64)
29976 return false;
29977
29978 value = (UINTVAL (operands[2]) & 0xFF);
29979 align = UINTVAL (operands[3]);
29980 if (TARGET_NEON && length >= 8
29981 && current_tune->string_ops_prefer_neon
29982 && arm_block_set_vect (dstbase, length, value, align))
29983 return true;
29984
29985 if (!unaligned_access && (align & 3) != 0)
29986 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29987
29988 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29989 }
29990
29991
29992 static bool
29993 arm_macro_fusion_p (void)
29994 {
29995 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29996 }
29997
29998 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
29999 for MOVW / MOVT macro fusion. */
30000
30001 static bool
30002 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30003 {
30004 /* We are trying to fuse
30005 movw imm / movt imm
30006 instructions as a group that gets scheduled together. */
30007
30008 rtx set_dest = SET_DEST (curr_set);
30009
30010 if (GET_MODE (set_dest) != SImode)
30011 return false;
30012
30013 /* We are trying to match:
30014 prev (movw) == (set (reg r0) (const_int imm16))
30015 curr (movt) == (set (zero_extract (reg r0)
30016 (const_int 16)
30017 (const_int 16))
30018 (const_int imm16_1))
30019 or
30020 prev (movw) == (set (reg r1)
30021 (high (symbol_ref ("SYM"))))
30022 curr (movt) == (set (reg r0)
30023 (lo_sum (reg r1)
30024 (symbol_ref ("SYM")))) */
30025
30026 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30027 {
30028 if (CONST_INT_P (SET_SRC (curr_set))
30029 && CONST_INT_P (SET_SRC (prev_set))
30030 && REG_P (XEXP (set_dest, 0))
30031 && REG_P (SET_DEST (prev_set))
30032 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30033 return true;
30034
30035 }
30036 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30037 && REG_P (SET_DEST (curr_set))
30038 && REG_P (SET_DEST (prev_set))
30039 && GET_CODE (SET_SRC (prev_set)) == HIGH
30040 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30041 return true;
30042
30043 return false;
30044 }
30045
30046 static bool
30047 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30048 {
30049 rtx prev_set = single_set (prev);
30050 rtx curr_set = single_set (curr);
30051
30052 if (!prev_set
30053 || !curr_set)
30054 return false;
30055
30056 if (any_condjump_p (curr))
30057 return false;
30058
30059 if (!arm_macro_fusion_p ())
30060 return false;
30061
30062 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30063 && aarch_crypto_can_dual_issue (prev, curr))
30064 return true;
30065
30066 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30067 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30068 return true;
30069
30070 return false;
30071 }
30072
30073 /* Return true iff the instruction fusion described by OP is enabled. */
30074 bool
30075 arm_fusion_enabled_p (tune_params::fuse_ops op)
30076 {
30077 return current_tune->fusible_ops & op;
30078 }
30079
30080 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30081
30082 static unsigned HOST_WIDE_INT
30083 arm_asan_shadow_offset (void)
30084 {
30085 return HOST_WIDE_INT_1U << 29;
30086 }
30087
30088
30089 /* This is a temporary fix for PR60655. Ideally we need
30090 to handle most of these cases in the generic part but
30091 currently we reject minus (..) (sym_ref). We try to
30092 ameliorate the case with minus (sym_ref1) (sym_ref2)
30093 where they are in the same section. */
30094
30095 static bool
30096 arm_const_not_ok_for_debug_p (rtx p)
30097 {
30098 tree decl_op0 = NULL;
30099 tree decl_op1 = NULL;
30100
30101 if (GET_CODE (p) == MINUS)
30102 {
30103 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30104 {
30105 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30106 if (decl_op1
30107 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30108 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30109 {
30110 if ((VAR_P (decl_op1)
30111 || TREE_CODE (decl_op1) == CONST_DECL)
30112 && (VAR_P (decl_op0)
30113 || TREE_CODE (decl_op0) == CONST_DECL))
30114 return (get_variable_section (decl_op1, false)
30115 != get_variable_section (decl_op0, false));
30116
30117 if (TREE_CODE (decl_op1) == LABEL_DECL
30118 && TREE_CODE (decl_op0) == LABEL_DECL)
30119 return (DECL_CONTEXT (decl_op1)
30120 != DECL_CONTEXT (decl_op0));
30121 }
30122
30123 return true;
30124 }
30125 }
30126
30127 return false;
30128 }
30129
30130 /* return TRUE if x is a reference to a value in a constant pool */
30131 extern bool
30132 arm_is_constant_pool_ref (rtx x)
30133 {
30134 return (MEM_P (x)
30135 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30136 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30137 }
30138
30139 /* Remember the last target of arm_set_current_function. */
30140 static GTY(()) tree arm_previous_fndecl;
30141
30142 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30143
30144 void
30145 save_restore_target_globals (tree new_tree)
30146 {
30147 /* If we have a previous state, use it. */
30148 if (TREE_TARGET_GLOBALS (new_tree))
30149 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30150 else if (new_tree == target_option_default_node)
30151 restore_target_globals (&default_target_globals);
30152 else
30153 {
30154 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30155 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30156 }
30157
30158 arm_option_params_internal ();
30159 }
30160
30161 /* Invalidate arm_previous_fndecl. */
30162
30163 void
30164 arm_reset_previous_fndecl (void)
30165 {
30166 arm_previous_fndecl = NULL_TREE;
30167 }
30168
30169 /* Establish appropriate back-end context for processing the function
30170 FNDECL. The argument might be NULL to indicate processing at top
30171 level, outside of any function scope. */
30172
30173 static void
30174 arm_set_current_function (tree fndecl)
30175 {
30176 if (!fndecl || fndecl == arm_previous_fndecl)
30177 return;
30178
30179 tree old_tree = (arm_previous_fndecl
30180 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30181 : NULL_TREE);
30182
30183 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30184
30185 /* If current function has no attributes but previous one did,
30186 use the default node. */
30187 if (! new_tree && old_tree)
30188 new_tree = target_option_default_node;
30189
30190 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30191 the default have been handled by save_restore_target_globals from
30192 arm_pragma_target_parse. */
30193 if (old_tree == new_tree)
30194 return;
30195
30196 arm_previous_fndecl = fndecl;
30197
30198 /* First set the target options. */
30199 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30200
30201 save_restore_target_globals (new_tree);
30202 }
30203
30204 /* Implement TARGET_OPTION_PRINT. */
30205
30206 static void
30207 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30208 {
30209 int flags = ptr->x_target_flags;
30210 const char *fpu_name;
30211
30212 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30213 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30214
30215 fprintf (file, "%*sselected arch %s\n", indent, "",
30216 TARGET_THUMB2_P (flags) ? "thumb2" :
30217 TARGET_THUMB_P (flags) ? "thumb1" :
30218 "arm");
30219
30220 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30221 }
30222
30223 /* Hook to determine if one function can safely inline another. */
30224
30225 static bool
30226 arm_can_inline_p (tree caller, tree callee)
30227 {
30228 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30229 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30230 bool can_inline = true;
30231
30232 struct cl_target_option *caller_opts
30233 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30234 : target_option_default_node);
30235
30236 struct cl_target_option *callee_opts
30237 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30238 : target_option_default_node);
30239
30240 if (callee_opts == caller_opts)
30241 return true;
30242
30243 /* Callee's ISA features should be a subset of the caller's. */
30244 struct arm_build_target caller_target;
30245 struct arm_build_target callee_target;
30246 caller_target.isa = sbitmap_alloc (isa_num_bits);
30247 callee_target.isa = sbitmap_alloc (isa_num_bits);
30248
30249 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30250 false);
30251 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30252 false);
30253 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30254 can_inline = false;
30255
30256 sbitmap_free (caller_target.isa);
30257 sbitmap_free (callee_target.isa);
30258
30259 /* OK to inline between different modes.
30260 Function with mode specific instructions, e.g using asm,
30261 must be explicitly protected with noinline. */
30262 return can_inline;
30263 }
30264
30265 /* Hook to fix function's alignment affected by target attribute. */
30266
30267 static void
30268 arm_relayout_function (tree fndecl)
30269 {
30270 if (DECL_USER_ALIGN (fndecl))
30271 return;
30272
30273 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30274
30275 if (!callee_tree)
30276 callee_tree = target_option_default_node;
30277
30278 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30279 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
30280 }
30281
30282 /* Inner function to process the attribute((target(...))), take an argument and
30283 set the current options from the argument. If we have a list, recursively
30284 go over the list. */
30285
30286 static bool
30287 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30288 {
30289 if (TREE_CODE (args) == TREE_LIST)
30290 {
30291 bool ret = true;
30292
30293 for (; args; args = TREE_CHAIN (args))
30294 if (TREE_VALUE (args)
30295 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30296 ret = false;
30297 return ret;
30298 }
30299
30300 else if (TREE_CODE (args) != STRING_CST)
30301 {
30302 error ("attribute %<target%> argument not a string");
30303 return false;
30304 }
30305
30306 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30307 char *q;
30308
30309 while ((q = strtok (argstr, ",")) != NULL)
30310 {
30311 while (ISSPACE (*q)) ++q;
30312
30313 argstr = NULL;
30314 if (!strncmp (q, "thumb", 5))
30315 opts->x_target_flags |= MASK_THUMB;
30316
30317 else if (!strncmp (q, "arm", 3))
30318 opts->x_target_flags &= ~MASK_THUMB;
30319
30320 else if (!strncmp (q, "fpu=", 4))
30321 {
30322 int fpu_index;
30323 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30324 &fpu_index, CL_TARGET))
30325 {
30326 error ("invalid fpu for attribute(target(\"%s\"))", q);
30327 return false;
30328 }
30329 if (fpu_index == TARGET_FPU_auto)
30330 {
30331 /* This doesn't really make sense until we support
30332 general dynamic selection of the architecture and all
30333 sub-features. */
30334 sorry ("auto fpu selection not currently permitted here");
30335 return false;
30336 }
30337 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30338 }
30339 else
30340 {
30341 error ("attribute(target(\"%s\")) is unknown", q);
30342 return false;
30343 }
30344 }
30345
30346 return true;
30347 }
30348
30349 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30350
30351 tree
30352 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30353 struct gcc_options *opts_set)
30354 {
30355 tree t;
30356
30357 if (!arm_valid_target_attribute_rec (args, opts))
30358 return NULL_TREE;
30359
30360 t = build_target_option_node (opts);
30361 arm_configure_build_target (&arm_active_target, TREE_TARGET_OPTION (t),
30362 opts_set, false);
30363 arm_option_check_internal (opts);
30364 /* Do any overrides, such as global options arch=xxx. */
30365 arm_option_override_internal (opts, opts_set);
30366
30367 /* Resynchronize the saved target options. */
30368 cl_target_option_save (TREE_TARGET_OPTION (t), opts);
30369
30370 return t;
30371 }
30372
30373 static void
30374 add_attribute (const char * mode, tree *attributes)
30375 {
30376 size_t len = strlen (mode);
30377 tree value = build_string (len, mode);
30378
30379 TREE_TYPE (value) = build_array_type (char_type_node,
30380 build_index_type (size_int (len)));
30381
30382 *attributes = tree_cons (get_identifier ("target"),
30383 build_tree_list (NULL_TREE, value),
30384 *attributes);
30385 }
30386
30387 /* For testing. Insert thumb or arm modes alternatively on functions. */
30388
30389 static void
30390 arm_insert_attributes (tree fndecl, tree * attributes)
30391 {
30392 const char *mode;
30393
30394 if (! TARGET_FLIP_THUMB)
30395 return;
30396
30397 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30398 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30399 return;
30400
30401 /* Nested definitions must inherit mode. */
30402 if (current_function_decl)
30403 {
30404 mode = TARGET_THUMB ? "thumb" : "arm";
30405 add_attribute (mode, attributes);
30406 return;
30407 }
30408
30409 /* If there is already a setting don't change it. */
30410 if (lookup_attribute ("target", *attributes) != NULL)
30411 return;
30412
30413 mode = thumb_flipper ? "thumb" : "arm";
30414 add_attribute (mode, attributes);
30415
30416 thumb_flipper = !thumb_flipper;
30417 }
30418
30419 /* Hook to validate attribute((target("string"))). */
30420
30421 static bool
30422 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30423 tree args, int ARG_UNUSED (flags))
30424 {
30425 bool ret = true;
30426 struct gcc_options func_options;
30427 tree cur_tree, new_optimize;
30428 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30429
30430 /* Get the optimization options of the current function. */
30431 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30432
30433 /* If the function changed the optimization levels as well as setting target
30434 options, start with the optimizations specified. */
30435 if (!func_optimize)
30436 func_optimize = optimization_default_node;
30437
30438 /* Init func_options. */
30439 memset (&func_options, 0, sizeof (func_options));
30440 init_options_struct (&func_options, NULL);
30441 lang_hooks.init_options_struct (&func_options);
30442
30443 /* Initialize func_options to the defaults. */
30444 cl_optimization_restore (&func_options,
30445 TREE_OPTIMIZATION (func_optimize));
30446
30447 cl_target_option_restore (&func_options,
30448 TREE_TARGET_OPTION (target_option_default_node));
30449
30450 /* Set func_options flags with new target mode. */
30451 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30452 &global_options_set);
30453
30454 if (cur_tree == NULL_TREE)
30455 ret = false;
30456
30457 new_optimize = build_optimization_node (&func_options);
30458
30459 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30460
30461 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30462
30463 finalize_options_struct (&func_options);
30464
30465 return ret;
30466 }
30467
30468 /* Match an ISA feature bitmap to a named FPU. We always use the
30469 first entry that exactly matches the feature set, so that we
30470 effectively canonicalize the FPU name for the assembler. */
30471 static const char*
30472 arm_identify_fpu_from_isa (sbitmap isa)
30473 {
30474 auto_sbitmap fpubits (isa_num_bits);
30475 auto_sbitmap cand_fpubits (isa_num_bits);
30476
30477 bitmap_and (fpubits, isa, isa_all_fpubits);
30478
30479 /* If there are no ISA feature bits relating to the FPU, we must be
30480 doing soft-float. */
30481 if (bitmap_empty_p (fpubits))
30482 return "softvfp";
30483
30484 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30485 {
30486 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30487 if (bitmap_equal_p (fpubits, cand_fpubits))
30488 return all_fpus[i].name;
30489 }
30490 /* We must find an entry, or things have gone wrong. */
30491 gcc_unreachable ();
30492 }
30493
30494 void
30495 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30496 {
30497
30498 fprintf (stream, "\t.syntax unified\n");
30499
30500 if (TARGET_THUMB)
30501 {
30502 if (is_called_in_ARM_mode (decl)
30503 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30504 && cfun->is_thunk))
30505 fprintf (stream, "\t.code 32\n");
30506 else if (TARGET_THUMB1)
30507 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30508 else
30509 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30510 }
30511 else
30512 fprintf (stream, "\t.arm\n");
30513
30514 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30515 (TARGET_SOFT_FLOAT
30516 ? "softvfp"
30517 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30518
30519 if (TARGET_POKE_FUNCTION_NAME)
30520 arm_poke_function_name (stream, (const char *) name);
30521 }
30522
30523 /* If MEM is in the form of [base+offset], extract the two parts
30524 of address and set to BASE and OFFSET, otherwise return false
30525 after clearing BASE and OFFSET. */
30526
30527 static bool
30528 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30529 {
30530 rtx addr;
30531
30532 gcc_assert (MEM_P (mem));
30533
30534 addr = XEXP (mem, 0);
30535
30536 /* Strip off const from addresses like (const (addr)). */
30537 if (GET_CODE (addr) == CONST)
30538 addr = XEXP (addr, 0);
30539
30540 if (GET_CODE (addr) == REG)
30541 {
30542 *base = addr;
30543 *offset = const0_rtx;
30544 return true;
30545 }
30546
30547 if (GET_CODE (addr) == PLUS
30548 && GET_CODE (XEXP (addr, 0)) == REG
30549 && CONST_INT_P (XEXP (addr, 1)))
30550 {
30551 *base = XEXP (addr, 0);
30552 *offset = XEXP (addr, 1);
30553 return true;
30554 }
30555
30556 *base = NULL_RTX;
30557 *offset = NULL_RTX;
30558
30559 return false;
30560 }
30561
30562 /* If INSN is a load or store of address in the form of [base+offset],
30563 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30564 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30565 otherwise return FALSE. */
30566
30567 static bool
30568 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30569 {
30570 rtx x, dest, src;
30571
30572 gcc_assert (INSN_P (insn));
30573 x = PATTERN (insn);
30574 if (GET_CODE (x) != SET)
30575 return false;
30576
30577 src = SET_SRC (x);
30578 dest = SET_DEST (x);
30579 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30580 {
30581 *is_load = false;
30582 extract_base_offset_in_addr (dest, base, offset);
30583 }
30584 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30585 {
30586 *is_load = true;
30587 extract_base_offset_in_addr (src, base, offset);
30588 }
30589 else
30590 return false;
30591
30592 return (*base != NULL_RTX && *offset != NULL_RTX);
30593 }
30594
30595 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30596
30597 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30598 and PRI are only calculated for these instructions. For other instruction,
30599 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30600 instruction fusion can be supported by returning different priorities.
30601
30602 It's important that irrelevant instructions get the largest FUSION_PRI. */
30603
30604 static void
30605 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30606 int *fusion_pri, int *pri)
30607 {
30608 int tmp, off_val;
30609 bool is_load;
30610 rtx base, offset;
30611
30612 gcc_assert (INSN_P (insn));
30613
30614 tmp = max_pri - 1;
30615 if (!fusion_load_store (insn, &base, &offset, &is_load))
30616 {
30617 *pri = tmp;
30618 *fusion_pri = tmp;
30619 return;
30620 }
30621
30622 /* Load goes first. */
30623 if (is_load)
30624 *fusion_pri = tmp - 1;
30625 else
30626 *fusion_pri = tmp - 2;
30627
30628 tmp /= 2;
30629
30630 /* INSN with smaller base register goes first. */
30631 tmp -= ((REGNO (base) & 0xff) << 20);
30632
30633 /* INSN with smaller offset goes first. */
30634 off_val = (int)(INTVAL (offset));
30635 if (off_val >= 0)
30636 tmp -= (off_val & 0xfffff);
30637 else
30638 tmp += ((- off_val) & 0xfffff);
30639
30640 *pri = tmp;
30641 return;
30642 }
30643
30644
30645 /* Construct and return a PARALLEL RTX vector with elements numbering the
30646 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30647 the vector - from the perspective of the architecture. This does not
30648 line up with GCC's perspective on lane numbers, so we end up with
30649 different masks depending on our target endian-ness. The diagram
30650 below may help. We must draw the distinction when building masks
30651 which select one half of the vector. An instruction selecting
30652 architectural low-lanes for a big-endian target, must be described using
30653 a mask selecting GCC high-lanes.
30654
30655 Big-Endian Little-Endian
30656
30657 GCC 0 1 2 3 3 2 1 0
30658 | x | x | x | x | | x | x | x | x |
30659 Architecture 3 2 1 0 3 2 1 0
30660
30661 Low Mask: { 2, 3 } { 0, 1 }
30662 High Mask: { 0, 1 } { 2, 3 }
30663 */
30664
30665 rtx
30666 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30667 {
30668 int nunits = GET_MODE_NUNITS (mode);
30669 rtvec v = rtvec_alloc (nunits / 2);
30670 int high_base = nunits / 2;
30671 int low_base = 0;
30672 int base;
30673 rtx t1;
30674 int i;
30675
30676 if (BYTES_BIG_ENDIAN)
30677 base = high ? low_base : high_base;
30678 else
30679 base = high ? high_base : low_base;
30680
30681 for (i = 0; i < nunits / 2; i++)
30682 RTVEC_ELT (v, i) = GEN_INT (base + i);
30683
30684 t1 = gen_rtx_PARALLEL (mode, v);
30685 return t1;
30686 }
30687
30688 /* Check OP for validity as a PARALLEL RTX vector with elements
30689 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30690 from the perspective of the architecture. See the diagram above
30691 arm_simd_vect_par_cnst_half_p for more details. */
30692
30693 bool
30694 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30695 bool high)
30696 {
30697 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30698 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30699 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30700 int i = 0;
30701
30702 if (!VECTOR_MODE_P (mode))
30703 return false;
30704
30705 if (count_op != count_ideal)
30706 return false;
30707
30708 for (i = 0; i < count_ideal; i++)
30709 {
30710 rtx elt_op = XVECEXP (op, 0, i);
30711 rtx elt_ideal = XVECEXP (ideal, 0, i);
30712
30713 if (!CONST_INT_P (elt_op)
30714 || INTVAL (elt_ideal) != INTVAL (elt_op))
30715 return false;
30716 }
30717 return true;
30718 }
30719
30720 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30721 in Thumb1. */
30722 static bool
30723 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30724 const_tree)
30725 {
30726 /* For now, we punt and not handle this for TARGET_THUMB1. */
30727 if (vcall_offset && TARGET_THUMB1)
30728 return false;
30729
30730 /* Otherwise ok. */
30731 return true;
30732 }
30733
30734 /* Generate RTL for a conditional branch with rtx comparison CODE in
30735 mode CC_MODE. The destination of the unlikely conditional branch
30736 is LABEL_REF. */
30737
30738 void
30739 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30740 rtx label_ref)
30741 {
30742 rtx x;
30743 x = gen_rtx_fmt_ee (code, VOIDmode,
30744 gen_rtx_REG (cc_mode, CC_REGNUM),
30745 const0_rtx);
30746
30747 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30748 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30749 pc_rtx);
30750 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30751 }
30752
30753 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30754
30755 For pure-code sections there is no letter code for this attribute, so
30756 output all the section flags numerically when this is needed. */
30757
30758 static bool
30759 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30760 {
30761
30762 if (flags & SECTION_ARM_PURECODE)
30763 {
30764 *num = 0x20000000;
30765
30766 if (!(flags & SECTION_DEBUG))
30767 *num |= 0x2;
30768 if (flags & SECTION_EXCLUDE)
30769 *num |= 0x80000000;
30770 if (flags & SECTION_WRITE)
30771 *num |= 0x1;
30772 if (flags & SECTION_CODE)
30773 *num |= 0x4;
30774 if (flags & SECTION_MERGE)
30775 *num |= 0x10;
30776 if (flags & SECTION_STRINGS)
30777 *num |= 0x20;
30778 if (flags & SECTION_TLS)
30779 *num |= 0x400;
30780 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30781 *num |= 0x200;
30782
30783 return true;
30784 }
30785
30786 return false;
30787 }
30788
30789 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30790
30791 If pure-code is passed as an option, make sure all functions are in
30792 sections that have the SHF_ARM_PURECODE attribute. */
30793
30794 static section *
30795 arm_function_section (tree decl, enum node_frequency freq,
30796 bool startup, bool exit)
30797 {
30798 const char * section_name;
30799 section * sec;
30800
30801 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30802 return default_function_section (decl, freq, startup, exit);
30803
30804 if (!target_pure_code)
30805 return default_function_section (decl, freq, startup, exit);
30806
30807
30808 section_name = DECL_SECTION_NAME (decl);
30809
30810 /* If a function is not in a named section then it falls under the 'default'
30811 text section, also known as '.text'. We can preserve previous behavior as
30812 the default text section already has the SHF_ARM_PURECODE section
30813 attribute. */
30814 if (!section_name)
30815 {
30816 section *default_sec = default_function_section (decl, freq, startup,
30817 exit);
30818
30819 /* If default_sec is not null, then it must be a special section like for
30820 example .text.startup. We set the pure-code attribute and return the
30821 same section to preserve existing behavior. */
30822 if (default_sec)
30823 default_sec->common.flags |= SECTION_ARM_PURECODE;
30824 return default_sec;
30825 }
30826
30827 /* Otherwise look whether a section has already been created with
30828 'section_name'. */
30829 sec = get_named_section (decl, section_name, 0);
30830 if (!sec)
30831 /* If that is not the case passing NULL as the section's name to
30832 'get_named_section' will create a section with the declaration's
30833 section name. */
30834 sec = get_named_section (decl, NULL, 0);
30835
30836 /* Set the SHF_ARM_PURECODE attribute. */
30837 sec->common.flags |= SECTION_ARM_PURECODE;
30838
30839 return sec;
30840 }
30841
30842 /* Implements the TARGET_SECTION_FLAGS hook.
30843
30844 If DECL is a function declaration and pure-code is passed as an option
30845 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30846 section's name and RELOC indicates whether the declarations initializer may
30847 contain runtime relocations. */
30848
30849 static unsigned int
30850 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30851 {
30852 unsigned int flags = default_section_type_flags (decl, name, reloc);
30853
30854 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30855 flags |= SECTION_ARM_PURECODE;
30856
30857 return flags;
30858 }
30859
30860 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30861
30862 static void
30863 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
30864 rtx op0, rtx op1,
30865 rtx *quot_p, rtx *rem_p)
30866 {
30867 if (mode == SImode)
30868 gcc_assert (!TARGET_IDIV);
30869
30870 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
30871 MODE_INT);
30872
30873 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
30874 libval_mode, 2,
30875 op0, GET_MODE (op0),
30876 op1, GET_MODE (op1));
30877
30878 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
30879 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
30880 GET_MODE_SIZE (mode));
30881
30882 gcc_assert (quotient);
30883 gcc_assert (remainder);
30884
30885 *quot_p = quotient;
30886 *rem_p = remainder;
30887 }
30888
30889 #include "gt-arm.h"