1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
64 #include "target-globals.h"
66 #include "tm-constrs.h"
68 #include "optabs-libfuncs.h"
73 /* This file should be included last. */
74 #include "target-def.h"
76 /* Forward definitions of types. */
77 typedef struct minipool_node Mnode
;
78 typedef struct minipool_fixup Mfix
;
80 /* The last .arch and .fpu assembly strings that we printed. */
81 static std::string arm_last_printed_arch_string
;
82 static std::string arm_last_printed_fpu_string
;
84 void (*arm_lang_output_object_attributes_hook
)(void);
91 /* Forward function declarations. */
92 static bool arm_const_not_ok_for_debug_p (rtx
);
93 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets
*arm_get_frame_offsets (void);
96 static void arm_compute_frame_layout (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
99 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned bitmap_popcount (const sbitmap
);
102 static int arm_address_register_rtx_p (rtx
, int);
103 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
104 static bool is_called_in_ARM_mode (tree
);
105 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
106 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
107 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
108 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
109 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
110 inline static int thumb1_index_register_rtx_p (rtx
, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx
, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx
, int);
116 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
117 static bool arm_print_operand_punct_valid_p (unsigned char code
);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
119 static arm_cc
get_arm_condition_code (rtx
);
120 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
121 static const char *output_multi_immediate (rtx
*, const char *, const char *,
123 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
124 static struct machine_function
*arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
127 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
128 static Mnode
*add_minipool_forward_ref (Mfix
*);
129 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
130 static Mnode
*add_minipool_backward_ref (Mfix
*);
131 static void assign_minipool_offsets (Mfix
*);
132 static void arm_print_value (FILE *, rtx
);
133 static void dump_minipool (rtx_insn
*);
134 static int arm_barrier_cost (rtx_insn
*);
135 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
136 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
137 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_core_reg_mask (void);
143 static unsigned long arm_isr_value (tree
);
144 static unsigned long arm_compute_func_type (void);
145 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
151 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
153 static void arm_output_function_epilogue (FILE *);
154 static void arm_output_function_prologue (FILE *);
155 static int arm_comp_type_attributes (const_tree
, const_tree
);
156 static void arm_set_default_type_attributes (tree
);
157 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
158 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
159 static int optimal_immediate_sequence (enum rtx_code code
,
160 unsigned HOST_WIDE_INT val
,
161 struct four_ints
*return_sequence
);
162 static int optimal_immediate_sequence_1 (enum rtx_code code
,
163 unsigned HOST_WIDE_INT val
,
164 struct four_ints
*return_sequence
,
166 static int arm_get_strip_length (int);
167 static bool arm_function_ok_for_sibcall (tree
, tree
);
168 static machine_mode
arm_promote_function_mode (const_tree
,
171 static bool arm_return_in_memory (const_tree
, const_tree
);
172 static rtx
arm_function_value (const_tree
, const_tree
, bool);
173 static rtx
arm_libcall_value_1 (machine_mode
);
174 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
175 static bool arm_function_value_regno_p (const unsigned int);
176 static void arm_internal_label (FILE *, const char *, unsigned long);
177 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
179 static bool arm_have_conditional_execution (void);
180 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
181 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
182 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
183 static int arm_insn_cost (rtx_insn
*, bool);
184 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
185 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
186 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
187 static void emit_constant_insn (rtx cond
, rtx pattern
);
188 static rtx_insn
*emit_set_insn (rtx
, rtx
);
189 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
190 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
191 static void arm_emit_multi_reg_pop (unsigned long);
192 static int vfp_emit_fstmd (int, int);
193 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
194 static int arm_arg_partial_bytes (cumulative_args_t
,
195 const function_arg_info
&);
196 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
197 static void arm_function_arg_advance (cumulative_args_t
,
198 const function_arg_info
&);
199 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
200 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
201 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
203 static rtx
aapcs_libcall_value (machine_mode
);
204 static int aapcs_select_return_coproc (const_tree
, const_tree
);
206 #ifdef OBJECT_FORMAT_ELF
207 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
208 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
211 static void arm_encode_section_info (tree
, rtx
, int);
214 static void arm_file_end (void);
215 static void arm_file_start (void);
216 static void arm_insert_attributes (tree
, tree
*);
218 static void arm_setup_incoming_varargs (cumulative_args_t
,
219 const function_arg_info
&, int *, int);
220 static bool arm_pass_by_reference (cumulative_args_t
,
221 const function_arg_info
&);
222 static bool arm_promote_prototypes (const_tree
);
223 static bool arm_default_short_enums (void);
224 static bool arm_align_anon_bitfield (void);
225 static bool arm_return_in_msb (const_tree
);
226 static bool arm_must_pass_in_stack (const function_arg_info
&);
227 static bool arm_return_in_memory (const_tree
, const_tree
);
229 static void arm_unwind_emit (FILE *, rtx_insn
*);
230 static bool arm_output_ttype (rtx
);
231 static void arm_asm_emit_except_personality (rtx
);
233 static void arm_asm_init_sections (void);
234 static rtx
arm_dwarf_register_span (rtx
);
236 static tree
arm_cxx_guard_type (void);
237 static bool arm_cxx_guard_mask_bit (void);
238 static tree
arm_get_cookie_size (tree
);
239 static bool arm_cookie_has_size (void);
240 static bool arm_cxx_cdtor_returns_this (void);
241 static bool arm_cxx_key_method_may_be_inline (void);
242 static void arm_cxx_determine_class_data_visibility (tree
);
243 static bool arm_cxx_class_data_always_comdat (void);
244 static bool arm_cxx_use_aeabi_atexit (void);
245 static void arm_init_libfuncs (void);
246 static tree
arm_build_builtin_va_list (void);
247 static void arm_expand_builtin_va_start (tree
, rtx
);
248 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
249 static void arm_option_override (void);
250 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
251 struct cl_target_option
*);
252 static void arm_override_options_after_change (void);
253 static void arm_option_print (FILE *, int, struct cl_target_option
*);
254 static void arm_set_current_function (tree
);
255 static bool arm_can_inline_p (tree
, tree
);
256 static void arm_relayout_function (tree
);
257 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
258 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
259 static bool arm_sched_can_speculate_insn (rtx_insn
*);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn
*);
262 static int arm_issue_rate (void);
263 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
264 static int arm_first_cycle_multipass_dfa_lookahead (void);
265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
266 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
267 static bool arm_output_addr_const_extra (FILE *, rtx
);
268 static bool arm_allocate_stack_slots_for_args (void);
269 static bool arm_warn_func_return (tree
);
270 static tree
arm_promoted_type (const_tree t
);
271 static bool arm_scalar_mode_supported_p (scalar_mode
);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx
, tree
, rtx
);
276 static rtx
arm_trampoline_adjust_address (rtx
);
277 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
281 static bool arm_array_mode_supported_p (machine_mode
,
282 unsigned HOST_WIDE_INT
);
283 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
284 static bool arm_class_likely_spilled_p (reg_class_t
);
285 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
286 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
287 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
291 static void arm_conditional_register_usage (void);
292 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
293 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
294 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
298 static int arm_cortex_m7_branch_cost (bool, bool);
300 static bool arm_vectorize_vec_perm_const (machine_mode
, rtx
, rtx
, rtx
,
301 const vec_perm_indices
&);
303 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
305 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
307 int misalign ATTRIBUTE_UNUSED
);
308 static unsigned arm_add_stmt_cost (vec_info
*vinfo
, void *data
, int count
,
309 enum vect_cost_for_stmt kind
,
310 struct _stmt_vec_info
*stmt_info
,
311 tree vectype
, int misalign
,
312 enum vect_cost_model_location where
);
314 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
315 bool op0_preserve_value
);
316 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
318 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
319 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
321 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
322 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
323 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
325 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
326 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
327 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
328 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
329 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
330 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
331 static rtx_insn
* thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
332 vec
<const char *> &, vec
<rtx
> &,
335 /* Table of machine attributes. */
336 static const struct attribute_spec arm_attribute_table
[] =
338 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
339 affects_type_identity, handler, exclude } */
340 /* Function calls made to this symbol must be done indirectly, because
341 it may lie outside of the 26 bit addressing range of a normal function
343 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
344 /* Whereas these functions are always known to reside within the 26 bit
346 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
347 /* Specify the procedure call conventions for a function. */
348 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
350 /* Interrupt Service Routines have special prologue and epilogue requirements. */
351 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
353 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
355 { "naked", 0, 0, true, false, false, false,
356 arm_handle_fndecl_attribute
, NULL
},
358 /* ARM/PE has three new attributes:
360 dllexport - for exporting a function/variable that will live in a dll
361 dllimport - for importing a function/variable from a dll
363 Microsoft allows multiple declspecs in one __declspec, separating
364 them with spaces. We do NOT support this. Instead, use __declspec
367 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
368 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
369 { "interfacearm", 0, 0, true, false, false, false,
370 arm_handle_fndecl_attribute
, NULL
},
371 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
372 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
374 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
376 { "notshared", 0, 0, false, true, false, false,
377 arm_handle_notshared_attribute
, NULL
},
379 /* ARMv8-M Security Extensions support. */
380 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
381 arm_handle_cmse_nonsecure_entry
, NULL
},
382 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
383 arm_handle_cmse_nonsecure_call
, NULL
},
384 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
},
385 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
388 /* Initialize the GCC target structure. */
389 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
390 #undef TARGET_MERGE_DECL_ATTRIBUTES
391 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
394 #undef TARGET_CHECK_BUILTIN_CALL
395 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
397 #undef TARGET_LEGITIMIZE_ADDRESS
398 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
400 #undef TARGET_ATTRIBUTE_TABLE
401 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
403 #undef TARGET_INSERT_ATTRIBUTES
404 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
406 #undef TARGET_ASM_FILE_START
407 #define TARGET_ASM_FILE_START arm_file_start
408 #undef TARGET_ASM_FILE_END
409 #define TARGET_ASM_FILE_END arm_file_end
411 #undef TARGET_ASM_ALIGNED_SI_OP
412 #define TARGET_ASM_ALIGNED_SI_OP NULL
413 #undef TARGET_ASM_INTEGER
414 #define TARGET_ASM_INTEGER arm_assemble_integer
416 #undef TARGET_PRINT_OPERAND
417 #define TARGET_PRINT_OPERAND arm_print_operand
418 #undef TARGET_PRINT_OPERAND_ADDRESS
419 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
420 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
421 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
423 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
424 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
426 #undef TARGET_ASM_FUNCTION_PROLOGUE
427 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
429 #undef TARGET_ASM_FUNCTION_EPILOGUE
430 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
432 #undef TARGET_CAN_INLINE_P
433 #define TARGET_CAN_INLINE_P arm_can_inline_p
435 #undef TARGET_RELAYOUT_FUNCTION
436 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
438 #undef TARGET_OPTION_OVERRIDE
439 #define TARGET_OPTION_OVERRIDE arm_option_override
441 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
442 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
444 #undef TARGET_OPTION_RESTORE
445 #define TARGET_OPTION_RESTORE arm_option_restore
447 #undef TARGET_OPTION_PRINT
448 #define TARGET_OPTION_PRINT arm_option_print
450 #undef TARGET_COMP_TYPE_ATTRIBUTES
451 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
453 #undef TARGET_SCHED_CAN_SPECULATE_INSN
454 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
456 #undef TARGET_SCHED_MACRO_FUSION_P
457 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
459 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
460 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
462 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
463 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
465 #undef TARGET_SCHED_ADJUST_COST
466 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
468 #undef TARGET_SET_CURRENT_FUNCTION
469 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
471 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
472 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
474 #undef TARGET_SCHED_REORDER
475 #define TARGET_SCHED_REORDER arm_sched_reorder
477 #undef TARGET_REGISTER_MOVE_COST
478 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
480 #undef TARGET_MEMORY_MOVE_COST
481 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
483 #undef TARGET_ENCODE_SECTION_INFO
485 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
487 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
490 #undef TARGET_STRIP_NAME_ENCODING
491 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
493 #undef TARGET_ASM_INTERNAL_LABEL
494 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
496 #undef TARGET_FLOATN_MODE
497 #define TARGET_FLOATN_MODE arm_floatn_mode
499 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
500 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
502 #undef TARGET_FUNCTION_VALUE
503 #define TARGET_FUNCTION_VALUE arm_function_value
505 #undef TARGET_LIBCALL_VALUE
506 #define TARGET_LIBCALL_VALUE arm_libcall_value
508 #undef TARGET_FUNCTION_VALUE_REGNO_P
509 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
511 #undef TARGET_ASM_OUTPUT_MI_THUNK
512 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
513 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
516 #undef TARGET_RTX_COSTS
517 #define TARGET_RTX_COSTS arm_rtx_costs
518 #undef TARGET_ADDRESS_COST
519 #define TARGET_ADDRESS_COST arm_address_cost
520 #undef TARGET_INSN_COST
521 #define TARGET_INSN_COST arm_insn_cost
523 #undef TARGET_SHIFT_TRUNCATION_MASK
524 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
525 #undef TARGET_VECTOR_MODE_SUPPORTED_P
526 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
527 #undef TARGET_ARRAY_MODE_SUPPORTED_P
528 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
529 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
530 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
531 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
532 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
533 arm_autovectorize_vector_modes
535 #undef TARGET_MACHINE_DEPENDENT_REORG
536 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
538 #undef TARGET_INIT_BUILTINS
539 #define TARGET_INIT_BUILTINS arm_init_builtins
540 #undef TARGET_EXPAND_BUILTIN
541 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
542 #undef TARGET_BUILTIN_DECL
543 #define TARGET_BUILTIN_DECL arm_builtin_decl
545 #undef TARGET_INIT_LIBFUNCS
546 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
548 #undef TARGET_PROMOTE_FUNCTION_MODE
549 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
550 #undef TARGET_PROMOTE_PROTOTYPES
551 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
552 #undef TARGET_PASS_BY_REFERENCE
553 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
554 #undef TARGET_ARG_PARTIAL_BYTES
555 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
556 #undef TARGET_FUNCTION_ARG
557 #define TARGET_FUNCTION_ARG arm_function_arg
558 #undef TARGET_FUNCTION_ARG_ADVANCE
559 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
560 #undef TARGET_FUNCTION_ARG_PADDING
561 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
562 #undef TARGET_FUNCTION_ARG_BOUNDARY
563 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
565 #undef TARGET_SETUP_INCOMING_VARARGS
566 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
568 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
569 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
571 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
572 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
573 #undef TARGET_TRAMPOLINE_INIT
574 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
575 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
576 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
578 #undef TARGET_WARN_FUNC_RETURN
579 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
581 #undef TARGET_DEFAULT_SHORT_ENUMS
582 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
584 #undef TARGET_ALIGN_ANON_BITFIELD
585 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
587 #undef TARGET_NARROW_VOLATILE_BITFIELD
588 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
590 #undef TARGET_CXX_GUARD_TYPE
591 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
593 #undef TARGET_CXX_GUARD_MASK_BIT
594 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
596 #undef TARGET_CXX_GET_COOKIE_SIZE
597 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
599 #undef TARGET_CXX_COOKIE_HAS_SIZE
600 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
602 #undef TARGET_CXX_CDTOR_RETURNS_THIS
603 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
605 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
606 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
608 #undef TARGET_CXX_USE_AEABI_ATEXIT
609 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
611 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
612 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
613 arm_cxx_determine_class_data_visibility
615 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
616 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
618 #undef TARGET_RETURN_IN_MSB
619 #define TARGET_RETURN_IN_MSB arm_return_in_msb
621 #undef TARGET_RETURN_IN_MEMORY
622 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
624 #undef TARGET_MUST_PASS_IN_STACK
625 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
628 #undef TARGET_ASM_UNWIND_EMIT
629 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
631 /* EABI unwinding tables use a different format for the typeinfo tables. */
632 #undef TARGET_ASM_TTYPE
633 #define TARGET_ASM_TTYPE arm_output_ttype
635 #undef TARGET_ARM_EABI_UNWINDER
636 #define TARGET_ARM_EABI_UNWINDER true
638 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
639 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
641 #endif /* ARM_UNWIND_INFO */
643 #undef TARGET_ASM_INIT_SECTIONS
644 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
646 #undef TARGET_DWARF_REGISTER_SPAN
647 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
649 #undef TARGET_CANNOT_COPY_INSN_P
650 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
653 #undef TARGET_HAVE_TLS
654 #define TARGET_HAVE_TLS true
657 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
658 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
660 #undef TARGET_LEGITIMATE_CONSTANT_P
661 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663 #undef TARGET_CANNOT_FORCE_CONST_MEM
664 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666 #undef TARGET_MAX_ANCHOR_OFFSET
667 #define TARGET_MAX_ANCHOR_OFFSET 4095
669 /* The minimum is set such that the total size of the block
670 for a particular anchor is -4088 + 1 + 4095 bytes, which is
671 divisible by eight, ensuring natural spacing of anchors. */
672 #undef TARGET_MIN_ANCHOR_OFFSET
673 #define TARGET_MIN_ANCHOR_OFFSET -4088
675 #undef TARGET_SCHED_ISSUE_RATE
676 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678 #undef TARGET_SCHED_VARIABLE_ISSUE
679 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
683 arm_first_cycle_multipass_dfa_lookahead
685 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
686 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
687 arm_first_cycle_multipass_dfa_lookahead_guard
689 #undef TARGET_MANGLE_TYPE
690 #define TARGET_MANGLE_TYPE arm_mangle_type
692 #undef TARGET_INVALID_CONVERSION
693 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695 #undef TARGET_INVALID_UNARY_OP
696 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698 #undef TARGET_INVALID_BINARY_OP
699 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
702 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704 #undef TARGET_BUILD_BUILTIN_VA_LIST
705 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
706 #undef TARGET_EXPAND_BUILTIN_VA_START
707 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
709 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
712 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
713 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
716 #undef TARGET_LEGITIMATE_ADDRESS_P
717 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
719 #undef TARGET_PREFERRED_RELOAD_CLASS
720 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722 #undef TARGET_PROMOTED_TYPE
723 #define TARGET_PROMOTED_TYPE arm_promoted_type
725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
726 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728 #undef TARGET_COMPUTE_FRAME_LAYOUT
729 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731 #undef TARGET_FRAME_POINTER_REQUIRED
732 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734 #undef TARGET_CAN_ELIMINATE
735 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737 #undef TARGET_CONDITIONAL_REGISTER_USAGE
738 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740 #undef TARGET_CLASS_LIKELY_SPILLED_P
741 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743 #undef TARGET_VECTORIZE_BUILTINS
744 #define TARGET_VECTORIZE_BUILTINS
746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
748 arm_builtin_vectorized_function
750 #undef TARGET_VECTOR_ALIGNMENT
751 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
753 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
754 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
755 arm_vector_alignment_reachable
757 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
758 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
759 arm_builtin_support_vector_misalignment
761 #undef TARGET_PREFERRED_RENAME_CLASS
762 #define TARGET_PREFERRED_RENAME_CLASS \
763 arm_preferred_rename_class
765 #undef TARGET_VECTORIZE_VEC_PERM_CONST
766 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
768 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
769 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
770 arm_builtin_vectorization_cost
771 #undef TARGET_VECTORIZE_ADD_STMT_COST
772 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
774 #undef TARGET_CANONICALIZE_COMPARISON
775 #define TARGET_CANONICALIZE_COMPARISON \
776 arm_canonicalize_comparison
778 #undef TARGET_ASAN_SHADOW_OFFSET
779 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
781 #undef MAX_INSN_PER_IT_BLOCK
782 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
784 #undef TARGET_CAN_USE_DOLOOP_P
785 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
787 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
788 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
790 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
791 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
793 #undef TARGET_SCHED_FUSION_PRIORITY
794 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
796 #undef TARGET_ASM_FUNCTION_SECTION
797 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
799 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
800 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
802 #undef TARGET_SECTION_TYPE_FLAGS
803 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
805 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
806 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
808 #undef TARGET_C_EXCESS_PRECISION
809 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
811 /* Although the architecture reserves bits 0 and 1, only the former is
812 used for ARM/Thumb ISA selection in v7 and earlier versions. */
813 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
814 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
816 #undef TARGET_FIXED_CONDITION_CODE_REGS
817 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
819 #undef TARGET_HARD_REGNO_NREGS
820 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
821 #undef TARGET_HARD_REGNO_MODE_OK
822 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
824 #undef TARGET_MODES_TIEABLE_P
825 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
827 #undef TARGET_CAN_CHANGE_MODE_CLASS
828 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
830 #undef TARGET_CONSTANT_ALIGNMENT
831 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
833 #undef TARGET_INVALID_WITHIN_DOLOOP
834 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
836 #undef TARGET_MD_ASM_ADJUST
837 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
839 /* Obstack for minipool constant handling. */
840 static struct obstack minipool_obstack
;
841 static char * minipool_startobj
;
843 /* The maximum number of insns skipped which
844 will be conditionalised if possible. */
845 static int max_insns_skipped
= 5;
847 extern FILE * asm_out_file
;
849 /* True if we are currently building a constant table. */
850 int making_const_table
;
852 /* The processor for which instructions should be scheduled. */
853 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
855 /* The current tuning set. */
856 const struct tune_params
*current_tune
;
858 /* Which floating point hardware to schedule for. */
861 /* Used for Thumb call_via trampolines. */
862 rtx thumb_call_via_label
[14];
863 static int thumb_call_reg_needed
;
865 /* The bits in this mask specify which instruction scheduling options should
867 unsigned int tune_flags
= 0;
869 /* The highest ARM architecture version supported by the
871 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
873 /* Active target architecture and tuning. */
875 struct arm_build_target arm_active_target
;
877 /* The following are used in the arm.md file as equivalents to bits
878 in the above two flag variables. */
880 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
883 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
886 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
889 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
892 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
895 /* Nonzero if this chip supports the ARM 6K extensions. */
898 /* Nonzero if this chip supports the ARM 6KZ extensions. */
901 /* Nonzero if instructions present in ARMv6-M can be used. */
904 /* Nonzero if this chip supports the ARM 7 extensions. */
907 /* Nonzero if this chip supports the Large Physical Address Extension. */
908 int arm_arch_lpae
= 0;
910 /* Nonzero if instructions not present in the 'M' profile can be used. */
911 int arm_arch_notm
= 0;
913 /* Nonzero if instructions present in ARMv7E-M can be used. */
916 /* Nonzero if instructions present in ARMv8 can be used. */
919 /* Nonzero if this chip supports the ARMv8.1 extensions. */
922 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
925 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
928 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
930 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
932 int arm_arch8_1m_main
= 0;
934 /* Nonzero if this chip supports the FP16 instructions extension of ARM
936 int arm_fp16_inst
= 0;
938 /* Nonzero if this chip can benefit from load scheduling. */
939 int arm_ld_sched
= 0;
941 /* Nonzero if this chip is a StrongARM. */
942 int arm_tune_strongarm
= 0;
944 /* Nonzero if this chip supports Intel Wireless MMX technology. */
945 int arm_arch_iwmmxt
= 0;
947 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
948 int arm_arch_iwmmxt2
= 0;
950 /* Nonzero if this chip is an XScale. */
951 int arm_arch_xscale
= 0;
953 /* Nonzero if tuning for XScale */
954 int arm_tune_xscale
= 0;
956 /* Nonzero if we want to tune for stores that access the write-buffer.
957 This typically means an ARM6 or ARM7 with MMU or MPU. */
958 int arm_tune_wbuf
= 0;
960 /* Nonzero if tuning for Cortex-A9. */
961 int arm_tune_cortex_a9
= 0;
963 /* Nonzero if we should define __THUMB_INTERWORK__ in the
965 XXX This is a bit of a hack, it's intended to help work around
966 problems in GLD which doesn't understand that armv5t code is
967 interworking clean. */
968 int arm_cpp_interwork
= 0;
970 /* Nonzero if chip supports Thumb 1. */
973 /* Nonzero if chip supports Thumb 2. */
976 /* Nonzero if chip supports integer division instruction. */
977 int arm_arch_arm_hwdiv
;
978 int arm_arch_thumb_hwdiv
;
980 /* Nonzero if chip disallows volatile memory access in IT block. */
981 int arm_arch_no_volatile_ce
;
983 /* Nonzero if we shouldn't use literal pools. */
984 bool arm_disable_literal_pool
= false;
986 /* The register number to be used for the PIC offset register. */
987 unsigned arm_pic_register
= INVALID_REGNUM
;
989 enum arm_pcs arm_pcs_default
;
991 /* For an explanation of these variables, see final_prescan_insn below. */
993 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
994 enum arm_cond_code arm_current_cc
;
997 int arm_target_label
;
998 /* The number of conditionally executed insns, including the current insn. */
999 int arm_condexec_count
= 0;
1000 /* A bitmask specifying the patterns for the IT block.
1001 Zero means do not output an IT block before this insn. */
1002 int arm_condexec_mask
= 0;
1003 /* The number of bits used in arm_condexec_mask. */
1004 int arm_condexec_masklen
= 0;
1006 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1007 int arm_arch_crc
= 0;
1009 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1010 int arm_arch_dotprod
= 0;
1012 /* Nonzero if chip supports the ARMv8-M security extensions. */
1013 int arm_arch_cmse
= 0;
1015 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1016 int arm_m_profile_small_mul
= 0;
1018 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1019 int arm_arch_i8mm
= 0;
1021 /* Nonzero if chip supports the BFloat16 instructions. */
1022 int arm_arch_bf16
= 0;
1024 /* Nonzero if chip supports the Custom Datapath Extension. */
1025 int arm_arch_cde
= 0;
1026 int arm_arch_cde_coproc
= 0;
1027 const int arm_arch_cde_coproc_bits
[] = {
1028 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1031 /* The condition codes of the ARM, and the inverse function. */
1032 static const char * const arm_condition_codes
[] =
1034 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1035 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1038 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1039 int arm_regs_in_sequence
[] =
1041 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1044 #define DEF_FP_SYSREG(reg) #reg,
1045 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1048 #undef DEF_FP_SYSREG
1050 #define ARM_LSL_NAME "lsl"
1051 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1054 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1055 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1057 /* Initialization code. */
1061 enum processor_type scheduler
;
1062 unsigned int tune_flags
;
1063 const struct tune_params
*tune
;
1066 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1067 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1074 /* arm generic vectorizer costs. */
1076 struct cpu_vec_costs arm_default_vec_cost
= {
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 1, /* vec_unalign_load_cost. */
1085 1, /* vec_unalign_store_cost. */
1086 1, /* vec_store_cost. */
1087 3, /* cond_taken_branch_cost. */
1088 1, /* cond_not_taken_branch_cost. */
1091 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1092 #include "aarch-cost-tables.h"
1096 const struct cpu_cost_table cortexa9_extra_costs
=
1103 COSTS_N_INSNS (1), /* shift_reg. */
1104 COSTS_N_INSNS (1), /* arith_shift. */
1105 COSTS_N_INSNS (2), /* arith_shift_reg. */
1107 COSTS_N_INSNS (1), /* log_shift_reg. */
1108 COSTS_N_INSNS (1), /* extend. */
1109 COSTS_N_INSNS (2), /* extend_arith. */
1110 COSTS_N_INSNS (1), /* bfi. */
1111 COSTS_N_INSNS (1), /* bfx. */
1115 true /* non_exec_costs_exec. */
1120 COSTS_N_INSNS (3), /* simple. */
1121 COSTS_N_INSNS (3), /* flag_setting. */
1122 COSTS_N_INSNS (2), /* extend. */
1123 COSTS_N_INSNS (3), /* add. */
1124 COSTS_N_INSNS (2), /* extend_add. */
1125 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1129 0, /* simple (N/A). */
1130 0, /* flag_setting (N/A). */
1131 COSTS_N_INSNS (4), /* extend. */
1133 COSTS_N_INSNS (4), /* extend_add. */
1139 COSTS_N_INSNS (2), /* load. */
1140 COSTS_N_INSNS (2), /* load_sign_extend. */
1141 COSTS_N_INSNS (2), /* ldrd. */
1142 COSTS_N_INSNS (2), /* ldm_1st. */
1143 1, /* ldm_regs_per_insn_1st. */
1144 2, /* ldm_regs_per_insn_subsequent. */
1145 COSTS_N_INSNS (5), /* loadf. */
1146 COSTS_N_INSNS (5), /* loadd. */
1147 COSTS_N_INSNS (1), /* load_unaligned. */
1148 COSTS_N_INSNS (2), /* store. */
1149 COSTS_N_INSNS (2), /* strd. */
1150 COSTS_N_INSNS (2), /* stm_1st. */
1151 1, /* stm_regs_per_insn_1st. */
1152 2, /* stm_regs_per_insn_subsequent. */
1153 COSTS_N_INSNS (1), /* storef. */
1154 COSTS_N_INSNS (1), /* stored. */
1155 COSTS_N_INSNS (1), /* store_unaligned. */
1156 COSTS_N_INSNS (1), /* loadv. */
1157 COSTS_N_INSNS (1) /* storev. */
1162 COSTS_N_INSNS (14), /* div. */
1163 COSTS_N_INSNS (4), /* mult. */
1164 COSTS_N_INSNS (7), /* mult_addsub. */
1165 COSTS_N_INSNS (30), /* fma. */
1166 COSTS_N_INSNS (3), /* addsub. */
1167 COSTS_N_INSNS (1), /* fpconst. */
1168 COSTS_N_INSNS (1), /* neg. */
1169 COSTS_N_INSNS (3), /* compare. */
1170 COSTS_N_INSNS (3), /* widen. */
1171 COSTS_N_INSNS (3), /* narrow. */
1172 COSTS_N_INSNS (3), /* toint. */
1173 COSTS_N_INSNS (3), /* fromint. */
1174 COSTS_N_INSNS (3) /* roundint. */
1178 COSTS_N_INSNS (24), /* div. */
1179 COSTS_N_INSNS (5), /* mult. */
1180 COSTS_N_INSNS (8), /* mult_addsub. */
1181 COSTS_N_INSNS (30), /* fma. */
1182 COSTS_N_INSNS (3), /* addsub. */
1183 COSTS_N_INSNS (1), /* fpconst. */
1184 COSTS_N_INSNS (1), /* neg. */
1185 COSTS_N_INSNS (3), /* compare. */
1186 COSTS_N_INSNS (3), /* widen. */
1187 COSTS_N_INSNS (3), /* narrow. */
1188 COSTS_N_INSNS (3), /* toint. */
1189 COSTS_N_INSNS (3), /* fromint. */
1190 COSTS_N_INSNS (3) /* roundint. */
1195 COSTS_N_INSNS (1) /* alu. */
1199 const struct cpu_cost_table cortexa8_extra_costs
=
1205 COSTS_N_INSNS (1), /* shift. */
1207 COSTS_N_INSNS (1), /* arith_shift. */
1208 0, /* arith_shift_reg. */
1209 COSTS_N_INSNS (1), /* log_shift. */
1210 0, /* log_shift_reg. */
1212 0, /* extend_arith. */
1218 true /* non_exec_costs_exec. */
1223 COSTS_N_INSNS (1), /* simple. */
1224 COSTS_N_INSNS (1), /* flag_setting. */
1225 COSTS_N_INSNS (1), /* extend. */
1226 COSTS_N_INSNS (1), /* add. */
1227 COSTS_N_INSNS (1), /* extend_add. */
1228 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1232 0, /* simple (N/A). */
1233 0, /* flag_setting (N/A). */
1234 COSTS_N_INSNS (2), /* extend. */
1236 COSTS_N_INSNS (2), /* extend_add. */
1242 COSTS_N_INSNS (1), /* load. */
1243 COSTS_N_INSNS (1), /* load_sign_extend. */
1244 COSTS_N_INSNS (1), /* ldrd. */
1245 COSTS_N_INSNS (1), /* ldm_1st. */
1246 1, /* ldm_regs_per_insn_1st. */
1247 2, /* ldm_regs_per_insn_subsequent. */
1248 COSTS_N_INSNS (1), /* loadf. */
1249 COSTS_N_INSNS (1), /* loadd. */
1250 COSTS_N_INSNS (1), /* load_unaligned. */
1251 COSTS_N_INSNS (1), /* store. */
1252 COSTS_N_INSNS (1), /* strd. */
1253 COSTS_N_INSNS (1), /* stm_1st. */
1254 1, /* stm_regs_per_insn_1st. */
1255 2, /* stm_regs_per_insn_subsequent. */
1256 COSTS_N_INSNS (1), /* storef. */
1257 COSTS_N_INSNS (1), /* stored. */
1258 COSTS_N_INSNS (1), /* store_unaligned. */
1259 COSTS_N_INSNS (1), /* loadv. */
1260 COSTS_N_INSNS (1) /* storev. */
1265 COSTS_N_INSNS (36), /* div. */
1266 COSTS_N_INSNS (11), /* mult. */
1267 COSTS_N_INSNS (20), /* mult_addsub. */
1268 COSTS_N_INSNS (30), /* fma. */
1269 COSTS_N_INSNS (9), /* addsub. */
1270 COSTS_N_INSNS (3), /* fpconst. */
1271 COSTS_N_INSNS (3), /* neg. */
1272 COSTS_N_INSNS (6), /* compare. */
1273 COSTS_N_INSNS (4), /* widen. */
1274 COSTS_N_INSNS (4), /* narrow. */
1275 COSTS_N_INSNS (8), /* toint. */
1276 COSTS_N_INSNS (8), /* fromint. */
1277 COSTS_N_INSNS (8) /* roundint. */
1281 COSTS_N_INSNS (64), /* div. */
1282 COSTS_N_INSNS (16), /* mult. */
1283 COSTS_N_INSNS (25), /* mult_addsub. */
1284 COSTS_N_INSNS (30), /* fma. */
1285 COSTS_N_INSNS (9), /* addsub. */
1286 COSTS_N_INSNS (3), /* fpconst. */
1287 COSTS_N_INSNS (3), /* neg. */
1288 COSTS_N_INSNS (6), /* compare. */
1289 COSTS_N_INSNS (6), /* widen. */
1290 COSTS_N_INSNS (6), /* narrow. */
1291 COSTS_N_INSNS (8), /* toint. */
1292 COSTS_N_INSNS (8), /* fromint. */
1293 COSTS_N_INSNS (8) /* roundint. */
1298 COSTS_N_INSNS (1) /* alu. */
1302 const struct cpu_cost_table cortexa5_extra_costs
=
1308 COSTS_N_INSNS (1), /* shift. */
1309 COSTS_N_INSNS (1), /* shift_reg. */
1310 COSTS_N_INSNS (1), /* arith_shift. */
1311 COSTS_N_INSNS (1), /* arith_shift_reg. */
1312 COSTS_N_INSNS (1), /* log_shift. */
1313 COSTS_N_INSNS (1), /* log_shift_reg. */
1314 COSTS_N_INSNS (1), /* extend. */
1315 COSTS_N_INSNS (1), /* extend_arith. */
1316 COSTS_N_INSNS (1), /* bfi. */
1317 COSTS_N_INSNS (1), /* bfx. */
1318 COSTS_N_INSNS (1), /* clz. */
1319 COSTS_N_INSNS (1), /* rev. */
1321 true /* non_exec_costs_exec. */
1328 COSTS_N_INSNS (1), /* flag_setting. */
1329 COSTS_N_INSNS (1), /* extend. */
1330 COSTS_N_INSNS (1), /* add. */
1331 COSTS_N_INSNS (1), /* extend_add. */
1332 COSTS_N_INSNS (7) /* idiv. */
1336 0, /* simple (N/A). */
1337 0, /* flag_setting (N/A). */
1338 COSTS_N_INSNS (1), /* extend. */
1340 COSTS_N_INSNS (2), /* extend_add. */
1346 COSTS_N_INSNS (1), /* load. */
1347 COSTS_N_INSNS (1), /* load_sign_extend. */
1348 COSTS_N_INSNS (6), /* ldrd. */
1349 COSTS_N_INSNS (1), /* ldm_1st. */
1350 1, /* ldm_regs_per_insn_1st. */
1351 2, /* ldm_regs_per_insn_subsequent. */
1352 COSTS_N_INSNS (2), /* loadf. */
1353 COSTS_N_INSNS (4), /* loadd. */
1354 COSTS_N_INSNS (1), /* load_unaligned. */
1355 COSTS_N_INSNS (1), /* store. */
1356 COSTS_N_INSNS (3), /* strd. */
1357 COSTS_N_INSNS (1), /* stm_1st. */
1358 1, /* stm_regs_per_insn_1st. */
1359 2, /* stm_regs_per_insn_subsequent. */
1360 COSTS_N_INSNS (2), /* storef. */
1361 COSTS_N_INSNS (2), /* stored. */
1362 COSTS_N_INSNS (1), /* store_unaligned. */
1363 COSTS_N_INSNS (1), /* loadv. */
1364 COSTS_N_INSNS (1) /* storev. */
1369 COSTS_N_INSNS (15), /* div. */
1370 COSTS_N_INSNS (3), /* mult. */
1371 COSTS_N_INSNS (7), /* mult_addsub. */
1372 COSTS_N_INSNS (7), /* fma. */
1373 COSTS_N_INSNS (3), /* addsub. */
1374 COSTS_N_INSNS (3), /* fpconst. */
1375 COSTS_N_INSNS (3), /* neg. */
1376 COSTS_N_INSNS (3), /* compare. */
1377 COSTS_N_INSNS (3), /* widen. */
1378 COSTS_N_INSNS (3), /* narrow. */
1379 COSTS_N_INSNS (3), /* toint. */
1380 COSTS_N_INSNS (3), /* fromint. */
1381 COSTS_N_INSNS (3) /* roundint. */
1385 COSTS_N_INSNS (30), /* div. */
1386 COSTS_N_INSNS (6), /* mult. */
1387 COSTS_N_INSNS (10), /* mult_addsub. */
1388 COSTS_N_INSNS (7), /* fma. */
1389 COSTS_N_INSNS (3), /* addsub. */
1390 COSTS_N_INSNS (3), /* fpconst. */
1391 COSTS_N_INSNS (3), /* neg. */
1392 COSTS_N_INSNS (3), /* compare. */
1393 COSTS_N_INSNS (3), /* widen. */
1394 COSTS_N_INSNS (3), /* narrow. */
1395 COSTS_N_INSNS (3), /* toint. */
1396 COSTS_N_INSNS (3), /* fromint. */
1397 COSTS_N_INSNS (3) /* roundint. */
1402 COSTS_N_INSNS (1) /* alu. */
1407 const struct cpu_cost_table cortexa7_extra_costs
=
1413 COSTS_N_INSNS (1), /* shift. */
1414 COSTS_N_INSNS (1), /* shift_reg. */
1415 COSTS_N_INSNS (1), /* arith_shift. */
1416 COSTS_N_INSNS (1), /* arith_shift_reg. */
1417 COSTS_N_INSNS (1), /* log_shift. */
1418 COSTS_N_INSNS (1), /* log_shift_reg. */
1419 COSTS_N_INSNS (1), /* extend. */
1420 COSTS_N_INSNS (1), /* extend_arith. */
1421 COSTS_N_INSNS (1), /* bfi. */
1422 COSTS_N_INSNS (1), /* bfx. */
1423 COSTS_N_INSNS (1), /* clz. */
1424 COSTS_N_INSNS (1), /* rev. */
1426 true /* non_exec_costs_exec. */
1433 COSTS_N_INSNS (1), /* flag_setting. */
1434 COSTS_N_INSNS (1), /* extend. */
1435 COSTS_N_INSNS (1), /* add. */
1436 COSTS_N_INSNS (1), /* extend_add. */
1437 COSTS_N_INSNS (7) /* idiv. */
1441 0, /* simple (N/A). */
1442 0, /* flag_setting (N/A). */
1443 COSTS_N_INSNS (1), /* extend. */
1445 COSTS_N_INSNS (2), /* extend_add. */
1451 COSTS_N_INSNS (1), /* load. */
1452 COSTS_N_INSNS (1), /* load_sign_extend. */
1453 COSTS_N_INSNS (3), /* ldrd. */
1454 COSTS_N_INSNS (1), /* ldm_1st. */
1455 1, /* ldm_regs_per_insn_1st. */
1456 2, /* ldm_regs_per_insn_subsequent. */
1457 COSTS_N_INSNS (2), /* loadf. */
1458 COSTS_N_INSNS (2), /* loadd. */
1459 COSTS_N_INSNS (1), /* load_unaligned. */
1460 COSTS_N_INSNS (1), /* store. */
1461 COSTS_N_INSNS (3), /* strd. */
1462 COSTS_N_INSNS (1), /* stm_1st. */
1463 1, /* stm_regs_per_insn_1st. */
1464 2, /* stm_regs_per_insn_subsequent. */
1465 COSTS_N_INSNS (2), /* storef. */
1466 COSTS_N_INSNS (2), /* stored. */
1467 COSTS_N_INSNS (1), /* store_unaligned. */
1468 COSTS_N_INSNS (1), /* loadv. */
1469 COSTS_N_INSNS (1) /* storev. */
1474 COSTS_N_INSNS (15), /* div. */
1475 COSTS_N_INSNS (3), /* mult. */
1476 COSTS_N_INSNS (7), /* mult_addsub. */
1477 COSTS_N_INSNS (7), /* fma. */
1478 COSTS_N_INSNS (3), /* addsub. */
1479 COSTS_N_INSNS (3), /* fpconst. */
1480 COSTS_N_INSNS (3), /* neg. */
1481 COSTS_N_INSNS (3), /* compare. */
1482 COSTS_N_INSNS (3), /* widen. */
1483 COSTS_N_INSNS (3), /* narrow. */
1484 COSTS_N_INSNS (3), /* toint. */
1485 COSTS_N_INSNS (3), /* fromint. */
1486 COSTS_N_INSNS (3) /* roundint. */
1490 COSTS_N_INSNS (30), /* div. */
1491 COSTS_N_INSNS (6), /* mult. */
1492 COSTS_N_INSNS (10), /* mult_addsub. */
1493 COSTS_N_INSNS (7), /* fma. */
1494 COSTS_N_INSNS (3), /* addsub. */
1495 COSTS_N_INSNS (3), /* fpconst. */
1496 COSTS_N_INSNS (3), /* neg. */
1497 COSTS_N_INSNS (3), /* compare. */
1498 COSTS_N_INSNS (3), /* widen. */
1499 COSTS_N_INSNS (3), /* narrow. */
1500 COSTS_N_INSNS (3), /* toint. */
1501 COSTS_N_INSNS (3), /* fromint. */
1502 COSTS_N_INSNS (3) /* roundint. */
1507 COSTS_N_INSNS (1) /* alu. */
1511 const struct cpu_cost_table cortexa12_extra_costs
=
1518 COSTS_N_INSNS (1), /* shift_reg. */
1519 COSTS_N_INSNS (1), /* arith_shift. */
1520 COSTS_N_INSNS (1), /* arith_shift_reg. */
1521 COSTS_N_INSNS (1), /* log_shift. */
1522 COSTS_N_INSNS (1), /* log_shift_reg. */
1524 COSTS_N_INSNS (1), /* extend_arith. */
1526 COSTS_N_INSNS (1), /* bfx. */
1527 COSTS_N_INSNS (1), /* clz. */
1528 COSTS_N_INSNS (1), /* rev. */
1530 true /* non_exec_costs_exec. */
1535 COSTS_N_INSNS (2), /* simple. */
1536 COSTS_N_INSNS (3), /* flag_setting. */
1537 COSTS_N_INSNS (2), /* extend. */
1538 COSTS_N_INSNS (3), /* add. */
1539 COSTS_N_INSNS (2), /* extend_add. */
1540 COSTS_N_INSNS (18) /* idiv. */
1544 0, /* simple (N/A). */
1545 0, /* flag_setting (N/A). */
1546 COSTS_N_INSNS (3), /* extend. */
1548 COSTS_N_INSNS (3), /* extend_add. */
1554 COSTS_N_INSNS (3), /* load. */
1555 COSTS_N_INSNS (3), /* load_sign_extend. */
1556 COSTS_N_INSNS (3), /* ldrd. */
1557 COSTS_N_INSNS (3), /* ldm_1st. */
1558 1, /* ldm_regs_per_insn_1st. */
1559 2, /* ldm_regs_per_insn_subsequent. */
1560 COSTS_N_INSNS (3), /* loadf. */
1561 COSTS_N_INSNS (3), /* loadd. */
1562 0, /* load_unaligned. */
1566 1, /* stm_regs_per_insn_1st. */
1567 2, /* stm_regs_per_insn_subsequent. */
1568 COSTS_N_INSNS (2), /* storef. */
1569 COSTS_N_INSNS (2), /* stored. */
1570 0, /* store_unaligned. */
1571 COSTS_N_INSNS (1), /* loadv. */
1572 COSTS_N_INSNS (1) /* storev. */
1577 COSTS_N_INSNS (17), /* div. */
1578 COSTS_N_INSNS (4), /* mult. */
1579 COSTS_N_INSNS (8), /* mult_addsub. */
1580 COSTS_N_INSNS (8), /* fma. */
1581 COSTS_N_INSNS (4), /* addsub. */
1582 COSTS_N_INSNS (2), /* fpconst. */
1583 COSTS_N_INSNS (2), /* neg. */
1584 COSTS_N_INSNS (2), /* compare. */
1585 COSTS_N_INSNS (4), /* widen. */
1586 COSTS_N_INSNS (4), /* narrow. */
1587 COSTS_N_INSNS (4), /* toint. */
1588 COSTS_N_INSNS (4), /* fromint. */
1589 COSTS_N_INSNS (4) /* roundint. */
1593 COSTS_N_INSNS (31), /* div. */
1594 COSTS_N_INSNS (4), /* mult. */
1595 COSTS_N_INSNS (8), /* mult_addsub. */
1596 COSTS_N_INSNS (8), /* fma. */
1597 COSTS_N_INSNS (4), /* addsub. */
1598 COSTS_N_INSNS (2), /* fpconst. */
1599 COSTS_N_INSNS (2), /* neg. */
1600 COSTS_N_INSNS (2), /* compare. */
1601 COSTS_N_INSNS (4), /* widen. */
1602 COSTS_N_INSNS (4), /* narrow. */
1603 COSTS_N_INSNS (4), /* toint. */
1604 COSTS_N_INSNS (4), /* fromint. */
1605 COSTS_N_INSNS (4) /* roundint. */
1610 COSTS_N_INSNS (1) /* alu. */
1614 const struct cpu_cost_table cortexa15_extra_costs
=
1622 COSTS_N_INSNS (1), /* arith_shift. */
1623 COSTS_N_INSNS (1), /* arith_shift_reg. */
1624 COSTS_N_INSNS (1), /* log_shift. */
1625 COSTS_N_INSNS (1), /* log_shift_reg. */
1627 COSTS_N_INSNS (1), /* extend_arith. */
1628 COSTS_N_INSNS (1), /* bfi. */
1633 true /* non_exec_costs_exec. */
1638 COSTS_N_INSNS (2), /* simple. */
1639 COSTS_N_INSNS (3), /* flag_setting. */
1640 COSTS_N_INSNS (2), /* extend. */
1641 COSTS_N_INSNS (2), /* add. */
1642 COSTS_N_INSNS (2), /* extend_add. */
1643 COSTS_N_INSNS (18) /* idiv. */
1647 0, /* simple (N/A). */
1648 0, /* flag_setting (N/A). */
1649 COSTS_N_INSNS (3), /* extend. */
1651 COSTS_N_INSNS (3), /* extend_add. */
1657 COSTS_N_INSNS (3), /* load. */
1658 COSTS_N_INSNS (3), /* load_sign_extend. */
1659 COSTS_N_INSNS (3), /* ldrd. */
1660 COSTS_N_INSNS (4), /* ldm_1st. */
1661 1, /* ldm_regs_per_insn_1st. */
1662 2, /* ldm_regs_per_insn_subsequent. */
1663 COSTS_N_INSNS (4), /* loadf. */
1664 COSTS_N_INSNS (4), /* loadd. */
1665 0, /* load_unaligned. */
1668 COSTS_N_INSNS (1), /* stm_1st. */
1669 1, /* stm_regs_per_insn_1st. */
1670 2, /* stm_regs_per_insn_subsequent. */
1673 0, /* store_unaligned. */
1674 COSTS_N_INSNS (1), /* loadv. */
1675 COSTS_N_INSNS (1) /* storev. */
1680 COSTS_N_INSNS (17), /* div. */
1681 COSTS_N_INSNS (4), /* mult. */
1682 COSTS_N_INSNS (8), /* mult_addsub. */
1683 COSTS_N_INSNS (8), /* fma. */
1684 COSTS_N_INSNS (4), /* addsub. */
1685 COSTS_N_INSNS (2), /* fpconst. */
1686 COSTS_N_INSNS (2), /* neg. */
1687 COSTS_N_INSNS (5), /* compare. */
1688 COSTS_N_INSNS (4), /* widen. */
1689 COSTS_N_INSNS (4), /* narrow. */
1690 COSTS_N_INSNS (4), /* toint. */
1691 COSTS_N_INSNS (4), /* fromint. */
1692 COSTS_N_INSNS (4) /* roundint. */
1696 COSTS_N_INSNS (31), /* div. */
1697 COSTS_N_INSNS (4), /* mult. */
1698 COSTS_N_INSNS (8), /* mult_addsub. */
1699 COSTS_N_INSNS (8), /* fma. */
1700 COSTS_N_INSNS (4), /* addsub. */
1701 COSTS_N_INSNS (2), /* fpconst. */
1702 COSTS_N_INSNS (2), /* neg. */
1703 COSTS_N_INSNS (2), /* compare. */
1704 COSTS_N_INSNS (4), /* widen. */
1705 COSTS_N_INSNS (4), /* narrow. */
1706 COSTS_N_INSNS (4), /* toint. */
1707 COSTS_N_INSNS (4), /* fromint. */
1708 COSTS_N_INSNS (4) /* roundint. */
1713 COSTS_N_INSNS (1) /* alu. */
1717 const struct cpu_cost_table v7m_extra_costs
=
1725 0, /* arith_shift. */
1726 COSTS_N_INSNS (1), /* arith_shift_reg. */
1728 COSTS_N_INSNS (1), /* log_shift_reg. */
1730 COSTS_N_INSNS (1), /* extend_arith. */
1735 COSTS_N_INSNS (1), /* non_exec. */
1736 false /* non_exec_costs_exec. */
1741 COSTS_N_INSNS (1), /* simple. */
1742 COSTS_N_INSNS (1), /* flag_setting. */
1743 COSTS_N_INSNS (2), /* extend. */
1744 COSTS_N_INSNS (1), /* add. */
1745 COSTS_N_INSNS (3), /* extend_add. */
1746 COSTS_N_INSNS (8) /* idiv. */
1750 0, /* simple (N/A). */
1751 0, /* flag_setting (N/A). */
1752 COSTS_N_INSNS (2), /* extend. */
1754 COSTS_N_INSNS (3), /* extend_add. */
1760 COSTS_N_INSNS (2), /* load. */
1761 0, /* load_sign_extend. */
1762 COSTS_N_INSNS (3), /* ldrd. */
1763 COSTS_N_INSNS (2), /* ldm_1st. */
1764 1, /* ldm_regs_per_insn_1st. */
1765 1, /* ldm_regs_per_insn_subsequent. */
1766 COSTS_N_INSNS (2), /* loadf. */
1767 COSTS_N_INSNS (3), /* loadd. */
1768 COSTS_N_INSNS (1), /* load_unaligned. */
1769 COSTS_N_INSNS (2), /* store. */
1770 COSTS_N_INSNS (3), /* strd. */
1771 COSTS_N_INSNS (2), /* stm_1st. */
1772 1, /* stm_regs_per_insn_1st. */
1773 1, /* stm_regs_per_insn_subsequent. */
1774 COSTS_N_INSNS (2), /* storef. */
1775 COSTS_N_INSNS (3), /* stored. */
1776 COSTS_N_INSNS (1), /* store_unaligned. */
1777 COSTS_N_INSNS (1), /* loadv. */
1778 COSTS_N_INSNS (1) /* storev. */
1783 COSTS_N_INSNS (7), /* div. */
1784 COSTS_N_INSNS (2), /* mult. */
1785 COSTS_N_INSNS (5), /* mult_addsub. */
1786 COSTS_N_INSNS (3), /* fma. */
1787 COSTS_N_INSNS (1), /* addsub. */
1799 COSTS_N_INSNS (15), /* div. */
1800 COSTS_N_INSNS (5), /* mult. */
1801 COSTS_N_INSNS (7), /* mult_addsub. */
1802 COSTS_N_INSNS (7), /* fma. */
1803 COSTS_N_INSNS (3), /* addsub. */
1816 COSTS_N_INSNS (1) /* alu. */
1820 const struct addr_mode_cost_table generic_addr_mode_costs
=
1824 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1825 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1826 COSTS_N_INSNS (0) /* AMO_WB. */
1830 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1831 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1832 COSTS_N_INSNS (0) /* AMO_WB. */
1836 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1837 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1838 COSTS_N_INSNS (0) /* AMO_WB. */
1842 const struct tune_params arm_slowmul_tune
=
1844 &generic_extra_costs
, /* Insn extra costs. */
1845 &generic_addr_mode_costs
, /* Addressing mode costs. */
1846 NULL
, /* Sched adj cost. */
1847 arm_default_branch_cost
,
1848 &arm_default_vec_cost
,
1849 3, /* Constant limit. */
1850 5, /* Max cond insns. */
1851 8, /* Memset max inline. */
1852 1, /* Issue rate. */
1853 ARM_PREFETCH_NOT_BENEFICIAL
,
1854 tune_params::PREF_CONST_POOL_TRUE
,
1855 tune_params::PREF_LDRD_FALSE
,
1856 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1857 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1858 tune_params::DISPARAGE_FLAGS_NEITHER
,
1859 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1860 tune_params::FUSE_NOTHING
,
1861 tune_params::SCHED_AUTOPREF_OFF
1864 const struct tune_params arm_fastmul_tune
=
1866 &generic_extra_costs
, /* Insn extra costs. */
1867 &generic_addr_mode_costs
, /* Addressing mode costs. */
1868 NULL
, /* Sched adj cost. */
1869 arm_default_branch_cost
,
1870 &arm_default_vec_cost
,
1871 1, /* Constant limit. */
1872 5, /* Max cond insns. */
1873 8, /* Memset max inline. */
1874 1, /* Issue rate. */
1875 ARM_PREFETCH_NOT_BENEFICIAL
,
1876 tune_params::PREF_CONST_POOL_TRUE
,
1877 tune_params::PREF_LDRD_FALSE
,
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1880 tune_params::DISPARAGE_FLAGS_NEITHER
,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1882 tune_params::FUSE_NOTHING
,
1883 tune_params::SCHED_AUTOPREF_OFF
1886 /* StrongARM has early execution of branches, so a sequence that is worth
1887 skipping is shorter. Set max_insns_skipped to a lower value. */
1889 const struct tune_params arm_strongarm_tune
=
1891 &generic_extra_costs
, /* Insn extra costs. */
1892 &generic_addr_mode_costs
, /* Addressing mode costs. */
1893 NULL
, /* Sched adj cost. */
1894 arm_default_branch_cost
,
1895 &arm_default_vec_cost
,
1896 1, /* Constant limit. */
1897 3, /* Max cond insns. */
1898 8, /* Memset max inline. */
1899 1, /* Issue rate. */
1900 ARM_PREFETCH_NOT_BENEFICIAL
,
1901 tune_params::PREF_CONST_POOL_TRUE
,
1902 tune_params::PREF_LDRD_FALSE
,
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1905 tune_params::DISPARAGE_FLAGS_NEITHER
,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1907 tune_params::FUSE_NOTHING
,
1908 tune_params::SCHED_AUTOPREF_OFF
1911 const struct tune_params arm_xscale_tune
=
1913 &generic_extra_costs
, /* Insn extra costs. */
1914 &generic_addr_mode_costs
, /* Addressing mode costs. */
1915 xscale_sched_adjust_cost
,
1916 arm_default_branch_cost
,
1917 &arm_default_vec_cost
,
1918 2, /* Constant limit. */
1919 3, /* Max cond insns. */
1920 8, /* Memset max inline. */
1921 1, /* Issue rate. */
1922 ARM_PREFETCH_NOT_BENEFICIAL
,
1923 tune_params::PREF_CONST_POOL_TRUE
,
1924 tune_params::PREF_LDRD_FALSE
,
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1927 tune_params::DISPARAGE_FLAGS_NEITHER
,
1928 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1929 tune_params::FUSE_NOTHING
,
1930 tune_params::SCHED_AUTOPREF_OFF
1933 const struct tune_params arm_9e_tune
=
1935 &generic_extra_costs
, /* Insn extra costs. */
1936 &generic_addr_mode_costs
, /* Addressing mode costs. */
1937 NULL
, /* Sched adj cost. */
1938 arm_default_branch_cost
,
1939 &arm_default_vec_cost
,
1940 1, /* Constant limit. */
1941 5, /* Max cond insns. */
1942 8, /* Memset max inline. */
1943 1, /* Issue rate. */
1944 ARM_PREFETCH_NOT_BENEFICIAL
,
1945 tune_params::PREF_CONST_POOL_TRUE
,
1946 tune_params::PREF_LDRD_FALSE
,
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1949 tune_params::DISPARAGE_FLAGS_NEITHER
,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1951 tune_params::FUSE_NOTHING
,
1952 tune_params::SCHED_AUTOPREF_OFF
1955 const struct tune_params arm_marvell_pj4_tune
=
1957 &generic_extra_costs
, /* Insn extra costs. */
1958 &generic_addr_mode_costs
, /* Addressing mode costs. */
1959 NULL
, /* Sched adj cost. */
1960 arm_default_branch_cost
,
1961 &arm_default_vec_cost
,
1962 1, /* Constant limit. */
1963 5, /* Max cond insns. */
1964 8, /* Memset max inline. */
1965 2, /* Issue rate. */
1966 ARM_PREFETCH_NOT_BENEFICIAL
,
1967 tune_params::PREF_CONST_POOL_TRUE
,
1968 tune_params::PREF_LDRD_FALSE
,
1969 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1970 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1971 tune_params::DISPARAGE_FLAGS_NEITHER
,
1972 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1973 tune_params::FUSE_NOTHING
,
1974 tune_params::SCHED_AUTOPREF_OFF
1977 const struct tune_params arm_v6t2_tune
=
1979 &generic_extra_costs
, /* Insn extra costs. */
1980 &generic_addr_mode_costs
, /* Addressing mode costs. */
1981 NULL
, /* Sched adj cost. */
1982 arm_default_branch_cost
,
1983 &arm_default_vec_cost
,
1984 1, /* Constant limit. */
1985 5, /* Max cond insns. */
1986 8, /* Memset max inline. */
1987 1, /* Issue rate. */
1988 ARM_PREFETCH_NOT_BENEFICIAL
,
1989 tune_params::PREF_CONST_POOL_FALSE
,
1990 tune_params::PREF_LDRD_FALSE
,
1991 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1992 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1993 tune_params::DISPARAGE_FLAGS_NEITHER
,
1994 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1995 tune_params::FUSE_NOTHING
,
1996 tune_params::SCHED_AUTOPREF_OFF
2000 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2001 const struct tune_params arm_cortex_tune
=
2003 &generic_extra_costs
,
2004 &generic_addr_mode_costs
, /* Addressing mode costs. */
2005 NULL
, /* Sched adj cost. */
2006 arm_default_branch_cost
,
2007 &arm_default_vec_cost
,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL
,
2013 tune_params::PREF_CONST_POOL_FALSE
,
2014 tune_params::PREF_LDRD_FALSE
,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER
,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2019 tune_params::FUSE_NOTHING
,
2020 tune_params::SCHED_AUTOPREF_OFF
2023 const struct tune_params arm_cortex_a8_tune
=
2025 &cortexa8_extra_costs
,
2026 &generic_addr_mode_costs
, /* Addressing mode costs. */
2027 NULL
, /* Sched adj cost. */
2028 arm_default_branch_cost
,
2029 &arm_default_vec_cost
,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 2, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL
,
2035 tune_params::PREF_CONST_POOL_FALSE
,
2036 tune_params::PREF_LDRD_FALSE
,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER
,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2041 tune_params::FUSE_NOTHING
,
2042 tune_params::SCHED_AUTOPREF_OFF
2045 const struct tune_params arm_cortex_a7_tune
=
2047 &cortexa7_extra_costs
,
2048 &generic_addr_mode_costs
, /* Addressing mode costs. */
2049 NULL
, /* Sched adj cost. */
2050 arm_default_branch_cost
,
2051 &arm_default_vec_cost
,
2052 1, /* Constant limit. */
2053 5, /* Max cond insns. */
2054 8, /* Memset max inline. */
2055 2, /* Issue rate. */
2056 ARM_PREFETCH_NOT_BENEFICIAL
,
2057 tune_params::PREF_CONST_POOL_FALSE
,
2058 tune_params::PREF_LDRD_FALSE
,
2059 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2060 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2061 tune_params::DISPARAGE_FLAGS_NEITHER
,
2062 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2063 tune_params::FUSE_NOTHING
,
2064 tune_params::SCHED_AUTOPREF_OFF
2067 const struct tune_params arm_cortex_a15_tune
=
2069 &cortexa15_extra_costs
,
2070 &generic_addr_mode_costs
, /* Addressing mode costs. */
2071 NULL
, /* Sched adj cost. */
2072 arm_default_branch_cost
,
2073 &arm_default_vec_cost
,
2074 1, /* Constant limit. */
2075 2, /* Max cond insns. */
2076 8, /* Memset max inline. */
2077 3, /* Issue rate. */
2078 ARM_PREFETCH_NOT_BENEFICIAL
,
2079 tune_params::PREF_CONST_POOL_FALSE
,
2080 tune_params::PREF_LDRD_TRUE
,
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2082 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2083 tune_params::DISPARAGE_FLAGS_ALL
,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2085 tune_params::FUSE_NOTHING
,
2086 tune_params::SCHED_AUTOPREF_FULL
2089 const struct tune_params arm_cortex_a35_tune
=
2091 &cortexa53_extra_costs
,
2092 &generic_addr_mode_costs
, /* Addressing mode costs. */
2093 NULL
, /* Sched adj cost. */
2094 arm_default_branch_cost
,
2095 &arm_default_vec_cost
,
2096 1, /* Constant limit. */
2097 5, /* Max cond insns. */
2098 8, /* Memset max inline. */
2099 1, /* Issue rate. */
2100 ARM_PREFETCH_NOT_BENEFICIAL
,
2101 tune_params::PREF_CONST_POOL_FALSE
,
2102 tune_params::PREF_LDRD_FALSE
,
2103 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2104 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2105 tune_params::DISPARAGE_FLAGS_NEITHER
,
2106 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2107 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2108 tune_params::SCHED_AUTOPREF_OFF
2111 const struct tune_params arm_cortex_a53_tune
=
2113 &cortexa53_extra_costs
,
2114 &generic_addr_mode_costs
, /* Addressing mode costs. */
2115 NULL
, /* Sched adj cost. */
2116 arm_default_branch_cost
,
2117 &arm_default_vec_cost
,
2118 1, /* Constant limit. */
2119 5, /* Max cond insns. */
2120 8, /* Memset max inline. */
2121 2, /* Issue rate. */
2122 ARM_PREFETCH_NOT_BENEFICIAL
,
2123 tune_params::PREF_CONST_POOL_FALSE
,
2124 tune_params::PREF_LDRD_FALSE
,
2125 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2126 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2127 tune_params::DISPARAGE_FLAGS_NEITHER
,
2128 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2129 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2130 tune_params::SCHED_AUTOPREF_OFF
2133 const struct tune_params arm_cortex_a57_tune
=
2135 &cortexa57_extra_costs
,
2136 &generic_addr_mode_costs
, /* addressing mode costs */
2137 NULL
, /* Sched adj cost. */
2138 arm_default_branch_cost
,
2139 &arm_default_vec_cost
,
2140 1, /* Constant limit. */
2141 2, /* Max cond insns. */
2142 8, /* Memset max inline. */
2143 3, /* Issue rate. */
2144 ARM_PREFETCH_NOT_BENEFICIAL
,
2145 tune_params::PREF_CONST_POOL_FALSE
,
2146 tune_params::PREF_LDRD_TRUE
,
2147 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2148 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2149 tune_params::DISPARAGE_FLAGS_ALL
,
2150 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2151 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2152 tune_params::SCHED_AUTOPREF_FULL
2155 const struct tune_params arm_exynosm1_tune
=
2157 &exynosm1_extra_costs
,
2158 &generic_addr_mode_costs
, /* Addressing mode costs. */
2159 NULL
, /* Sched adj cost. */
2160 arm_default_branch_cost
,
2161 &arm_default_vec_cost
,
2162 1, /* Constant limit. */
2163 2, /* Max cond insns. */
2164 8, /* Memset max inline. */
2165 3, /* Issue rate. */
2166 ARM_PREFETCH_NOT_BENEFICIAL
,
2167 tune_params::PREF_CONST_POOL_FALSE
,
2168 tune_params::PREF_LDRD_TRUE
,
2169 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2170 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2171 tune_params::DISPARAGE_FLAGS_ALL
,
2172 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2173 tune_params::FUSE_NOTHING
,
2174 tune_params::SCHED_AUTOPREF_OFF
2177 const struct tune_params arm_xgene1_tune
=
2179 &xgene1_extra_costs
,
2180 &generic_addr_mode_costs
, /* Addressing mode costs. */
2181 NULL
, /* Sched adj cost. */
2182 arm_default_branch_cost
,
2183 &arm_default_vec_cost
,
2184 1, /* Constant limit. */
2185 2, /* Max cond insns. */
2186 32, /* Memset max inline. */
2187 4, /* Issue rate. */
2188 ARM_PREFETCH_NOT_BENEFICIAL
,
2189 tune_params::PREF_CONST_POOL_FALSE
,
2190 tune_params::PREF_LDRD_TRUE
,
2191 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2192 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2193 tune_params::DISPARAGE_FLAGS_ALL
,
2194 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2195 tune_params::FUSE_NOTHING
,
2196 tune_params::SCHED_AUTOPREF_OFF
2199 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2200 less appealing. Set max_insns_skipped to a low value. */
2202 const struct tune_params arm_cortex_a5_tune
=
2204 &cortexa5_extra_costs
,
2205 &generic_addr_mode_costs
, /* Addressing mode costs. */
2206 NULL
, /* Sched adj cost. */
2207 arm_cortex_a5_branch_cost
,
2208 &arm_default_vec_cost
,
2209 1, /* Constant limit. */
2210 1, /* Max cond insns. */
2211 8, /* Memset max inline. */
2212 2, /* Issue rate. */
2213 ARM_PREFETCH_NOT_BENEFICIAL
,
2214 tune_params::PREF_CONST_POOL_FALSE
,
2215 tune_params::PREF_LDRD_FALSE
,
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2218 tune_params::DISPARAGE_FLAGS_NEITHER
,
2219 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2220 tune_params::FUSE_NOTHING
,
2221 tune_params::SCHED_AUTOPREF_OFF
2224 const struct tune_params arm_cortex_a9_tune
=
2226 &cortexa9_extra_costs
,
2227 &generic_addr_mode_costs
, /* Addressing mode costs. */
2228 cortex_a9_sched_adjust_cost
,
2229 arm_default_branch_cost
,
2230 &arm_default_vec_cost
,
2231 1, /* Constant limit. */
2232 5, /* Max cond insns. */
2233 8, /* Memset max inline. */
2234 2, /* Issue rate. */
2235 ARM_PREFETCH_BENEFICIAL(4,32,32),
2236 tune_params::PREF_CONST_POOL_FALSE
,
2237 tune_params::PREF_LDRD_FALSE
,
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2239 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2240 tune_params::DISPARAGE_FLAGS_NEITHER
,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2242 tune_params::FUSE_NOTHING
,
2243 tune_params::SCHED_AUTOPREF_OFF
2246 const struct tune_params arm_cortex_a12_tune
=
2248 &cortexa12_extra_costs
,
2249 &generic_addr_mode_costs
, /* Addressing mode costs. */
2250 NULL
, /* Sched adj cost. */
2251 arm_default_branch_cost
,
2252 &arm_default_vec_cost
, /* Vectorizer costs. */
2253 1, /* Constant limit. */
2254 2, /* Max cond insns. */
2255 8, /* Memset max inline. */
2256 2, /* Issue rate. */
2257 ARM_PREFETCH_NOT_BENEFICIAL
,
2258 tune_params::PREF_CONST_POOL_FALSE
,
2259 tune_params::PREF_LDRD_TRUE
,
2260 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2262 tune_params::DISPARAGE_FLAGS_ALL
,
2263 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2264 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2265 tune_params::SCHED_AUTOPREF_OFF
2268 const struct tune_params arm_cortex_a73_tune
=
2270 &cortexa57_extra_costs
,
2271 &generic_addr_mode_costs
, /* Addressing mode costs. */
2272 NULL
, /* Sched adj cost. */
2273 arm_default_branch_cost
,
2274 &arm_default_vec_cost
, /* Vectorizer costs. */
2275 1, /* Constant limit. */
2276 2, /* Max cond insns. */
2277 8, /* Memset max inline. */
2278 2, /* Issue rate. */
2279 ARM_PREFETCH_NOT_BENEFICIAL
,
2280 tune_params::PREF_CONST_POOL_FALSE
,
2281 tune_params::PREF_LDRD_TRUE
,
2282 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2284 tune_params::DISPARAGE_FLAGS_ALL
,
2285 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2286 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2287 tune_params::SCHED_AUTOPREF_FULL
2290 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2291 cycle to execute each. An LDR from the constant pool also takes two cycles
2292 to execute, but mildly increases pipelining opportunity (consecutive
2293 loads/stores can be pipelined together, saving one cycle), and may also
2294 improve icache utilisation. Hence we prefer the constant pool for such
2297 const struct tune_params arm_v7m_tune
=
2300 &generic_addr_mode_costs
, /* Addressing mode costs. */
2301 NULL
, /* Sched adj cost. */
2302 arm_cortex_m_branch_cost
,
2303 &arm_default_vec_cost
,
2304 1, /* Constant limit. */
2305 2, /* Max cond insns. */
2306 8, /* Memset max inline. */
2307 1, /* Issue rate. */
2308 ARM_PREFETCH_NOT_BENEFICIAL
,
2309 tune_params::PREF_CONST_POOL_TRUE
,
2310 tune_params::PREF_LDRD_FALSE
,
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2313 tune_params::DISPARAGE_FLAGS_NEITHER
,
2314 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2315 tune_params::FUSE_NOTHING
,
2316 tune_params::SCHED_AUTOPREF_OFF
2319 /* Cortex-M7 tuning. */
2321 const struct tune_params arm_cortex_m7_tune
=
2324 &generic_addr_mode_costs
, /* Addressing mode costs. */
2325 NULL
, /* Sched adj cost. */
2326 arm_cortex_m7_branch_cost
,
2327 &arm_default_vec_cost
,
2328 0, /* Constant limit. */
2329 1, /* Max cond insns. */
2330 8, /* Memset max inline. */
2331 2, /* Issue rate. */
2332 ARM_PREFETCH_NOT_BENEFICIAL
,
2333 tune_params::PREF_CONST_POOL_TRUE
,
2334 tune_params::PREF_LDRD_FALSE
,
2335 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2336 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2337 tune_params::DISPARAGE_FLAGS_NEITHER
,
2338 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2339 tune_params::FUSE_NOTHING
,
2340 tune_params::SCHED_AUTOPREF_OFF
2343 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2344 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2346 const struct tune_params arm_v6m_tune
=
2348 &generic_extra_costs
, /* Insn extra costs. */
2349 &generic_addr_mode_costs
, /* Addressing mode costs. */
2350 NULL
, /* Sched adj cost. */
2351 arm_default_branch_cost
,
2352 &arm_default_vec_cost
, /* Vectorizer costs. */
2353 1, /* Constant limit. */
2354 5, /* Max cond insns. */
2355 8, /* Memset max inline. */
2356 1, /* Issue rate. */
2357 ARM_PREFETCH_NOT_BENEFICIAL
,
2358 tune_params::PREF_CONST_POOL_FALSE
,
2359 tune_params::PREF_LDRD_FALSE
,
2360 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2362 tune_params::DISPARAGE_FLAGS_NEITHER
,
2363 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2364 tune_params::FUSE_NOTHING
,
2365 tune_params::SCHED_AUTOPREF_OFF
2368 const struct tune_params arm_fa726te_tune
=
2370 &generic_extra_costs
, /* Insn extra costs. */
2371 &generic_addr_mode_costs
, /* Addressing mode costs. */
2372 fa726te_sched_adjust_cost
,
2373 arm_default_branch_cost
,
2374 &arm_default_vec_cost
,
2375 1, /* Constant limit. */
2376 5, /* Max cond insns. */
2377 8, /* Memset max inline. */
2378 2, /* Issue rate. */
2379 ARM_PREFETCH_NOT_BENEFICIAL
,
2380 tune_params::PREF_CONST_POOL_TRUE
,
2381 tune_params::PREF_LDRD_FALSE
,
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2383 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2384 tune_params::DISPARAGE_FLAGS_NEITHER
,
2385 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2386 tune_params::FUSE_NOTHING
,
2387 tune_params::SCHED_AUTOPREF_OFF
2390 /* Auto-generated CPU, FPU and architecture tables. */
2391 #include "arm-cpu-data.h"
2393 /* The name of the preprocessor macro to define for this architecture. PROFILE
2394 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2395 is thus chosen to be big enough to hold the longest architecture name. */
2397 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2399 /* Supported TLS relocations. */
2410 TLS_DESCSEQ
/* GNU scheme */
2413 /* The maximum number of insns to be used when loading a constant. */
2415 arm_constant_limit (bool size_p
)
2417 return size_p
? 1 : current_tune
->constant_limit
;
2420 /* Emit an insn that's a simple single-set. Both the operands must be known
2422 inline static rtx_insn
*
2423 emit_set_insn (rtx x
, rtx y
)
2425 return emit_insn (gen_rtx_SET (x
, y
));
2428 /* Return the number of bits set in VALUE. */
2430 bit_count (unsigned long value
)
2432 unsigned long count
= 0;
2437 value
&= value
- 1; /* Clear the least-significant set bit. */
2443 /* Return the number of bits set in BMAP. */
2445 bitmap_popcount (const sbitmap bmap
)
2447 unsigned int count
= 0;
2449 sbitmap_iterator sbi
;
2451 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2460 } arm_fixed_mode_set
;
2462 /* A small helper for setting fixed-point library libfuncs. */
2465 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2466 const char *funcname
, const char *modename
,
2471 if (num_suffix
== 0)
2472 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2474 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2476 set_optab_libfunc (optable
, mode
, buffer
);
2480 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2481 machine_mode from
, const char *funcname
,
2482 const char *toname
, const char *fromname
)
2485 const char *maybe_suffix_2
= "";
2487 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2488 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2489 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2490 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2491 maybe_suffix_2
= "2";
2493 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2496 set_conv_libfunc (optable
, to
, from
, buffer
);
2499 static GTY(()) rtx speculation_barrier_libfunc
;
2501 /* Record that we have no arithmetic or comparison libfuncs for
2502 machine mode MODE. */
2505 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2508 set_optab_libfunc (add_optab
, mode
, NULL
);
2509 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2510 set_optab_libfunc (smul_optab
, mode
, NULL
);
2511 set_optab_libfunc (neg_optab
, mode
, NULL
);
2512 set_optab_libfunc (sub_optab
, mode
, NULL
);
2515 set_optab_libfunc (eq_optab
, mode
, NULL
);
2516 set_optab_libfunc (ne_optab
, mode
, NULL
);
2517 set_optab_libfunc (lt_optab
, mode
, NULL
);
2518 set_optab_libfunc (le_optab
, mode
, NULL
);
2519 set_optab_libfunc (ge_optab
, mode
, NULL
);
2520 set_optab_libfunc (gt_optab
, mode
, NULL
);
2521 set_optab_libfunc (unord_optab
, mode
, NULL
);
2524 /* Set up library functions unique to ARM. */
2526 arm_init_libfuncs (void)
2528 machine_mode mode_iter
;
2530 /* For Linux, we have access to kernel support for atomic operations. */
2531 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2532 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2534 /* There are no special library functions unless we are using the
2539 /* The functions below are described in Section 4 of the "Run-Time
2540 ABI for the ARM architecture", Version 1.0. */
2542 /* Double-precision floating-point arithmetic. Table 2. */
2543 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2544 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2545 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2546 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2547 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2549 /* Double-precision comparisons. Table 3. */
2550 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2551 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2552 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2553 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2554 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2555 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2556 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2558 /* Single-precision floating-point arithmetic. Table 4. */
2559 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2560 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2561 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2562 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2563 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2565 /* Single-precision comparisons. Table 5. */
2566 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2567 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2568 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2569 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2570 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2571 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2572 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2574 /* Floating-point to integer conversions. Table 6. */
2575 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2576 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2577 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2578 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2579 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2580 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2581 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2582 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2584 /* Conversions between floating types. Table 7. */
2585 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2586 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2588 /* Integer to floating-point conversions. Table 8. */
2589 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2590 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2591 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2592 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2593 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2594 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2595 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2596 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2598 /* Long long. Table 9. */
2599 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2600 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2601 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2602 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2603 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2604 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2605 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2606 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2608 /* Integer (32/32->32) division. \S 4.3.1. */
2609 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2610 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2612 /* The divmod functions are designed so that they can be used for
2613 plain division, even though they return both the quotient and the
2614 remainder. The quotient is returned in the usual location (i.e.,
2615 r0 for SImode, {r0, r1} for DImode), just as would be expected
2616 for an ordinary division routine. Because the AAPCS calling
2617 conventions specify that all of { r0, r1, r2, r3 } are
2618 callee-saved registers, there is no need to tell the compiler
2619 explicitly that those registers are clobbered by these
2621 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2622 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2624 /* For SImode division the ABI provides div-without-mod routines,
2625 which are faster. */
2626 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2627 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2629 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2630 divmod libcalls instead. */
2631 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2632 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2633 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2634 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2636 /* Half-precision float operations. The compiler handles all operations
2637 with NULL libfuncs by converting the SFmode. */
2638 switch (arm_fp16_format
)
2640 case ARM_FP16_FORMAT_IEEE
:
2641 case ARM_FP16_FORMAT_ALTERNATIVE
:
2644 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2645 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2647 : "__gnu_f2h_alternative"));
2648 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2649 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2651 : "__gnu_h2f_alternative"));
2653 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2654 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2656 : "__gnu_d2h_alternative"));
2658 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2665 /* For all possible libcalls in BFmode, record NULL. */
2666 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2668 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2669 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2670 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2671 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2673 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2675 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2677 const arm_fixed_mode_set fixed_arith_modes
[] =
2680 { E_UQQmode
, "uqq" },
2682 { E_UHQmode
, "uhq" },
2684 { E_USQmode
, "usq" },
2686 { E_UDQmode
, "udq" },
2688 { E_UTQmode
, "utq" },
2690 { E_UHAmode
, "uha" },
2692 { E_USAmode
, "usa" },
2694 { E_UDAmode
, "uda" },
2696 { E_UTAmode
, "uta" }
2698 const arm_fixed_mode_set fixed_conv_modes
[] =
2701 { E_UQQmode
, "uqq" },
2703 { E_UHQmode
, "uhq" },
2705 { E_USQmode
, "usq" },
2707 { E_UDQmode
, "udq" },
2709 { E_UTQmode
, "utq" },
2711 { E_UHAmode
, "uha" },
2713 { E_USAmode
, "usa" },
2715 { E_UDAmode
, "uda" },
2717 { E_UTAmode
, "uta" },
2728 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2730 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2731 "add", fixed_arith_modes
[i
].name
, 3);
2732 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2733 "ssadd", fixed_arith_modes
[i
].name
, 3);
2734 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2735 "usadd", fixed_arith_modes
[i
].name
, 3);
2736 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2737 "sub", fixed_arith_modes
[i
].name
, 3);
2738 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2739 "sssub", fixed_arith_modes
[i
].name
, 3);
2740 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2741 "ussub", fixed_arith_modes
[i
].name
, 3);
2742 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2743 "mul", fixed_arith_modes
[i
].name
, 3);
2744 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2745 "ssmul", fixed_arith_modes
[i
].name
, 3);
2746 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2747 "usmul", fixed_arith_modes
[i
].name
, 3);
2748 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2749 "div", fixed_arith_modes
[i
].name
, 3);
2750 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2751 "udiv", fixed_arith_modes
[i
].name
, 3);
2752 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2753 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2754 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2755 "usdiv", fixed_arith_modes
[i
].name
, 3);
2756 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2757 "neg", fixed_arith_modes
[i
].name
, 2);
2758 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2759 "ssneg", fixed_arith_modes
[i
].name
, 2);
2760 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2761 "usneg", fixed_arith_modes
[i
].name
, 2);
2762 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2763 "ashl", fixed_arith_modes
[i
].name
, 3);
2764 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2765 "ashr", fixed_arith_modes
[i
].name
, 3);
2766 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2767 "lshr", fixed_arith_modes
[i
].name
, 3);
2768 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2769 "ssashl", fixed_arith_modes
[i
].name
, 3);
2770 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2771 "usashl", fixed_arith_modes
[i
].name
, 3);
2772 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2773 "cmp", fixed_arith_modes
[i
].name
, 2);
2776 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2777 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2780 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2781 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2784 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2785 fixed_conv_modes
[j
].mode
, "fract",
2786 fixed_conv_modes
[i
].name
,
2787 fixed_conv_modes
[j
].name
);
2788 arm_set_fixed_conv_libfunc (satfract_optab
,
2789 fixed_conv_modes
[i
].mode
,
2790 fixed_conv_modes
[j
].mode
, "satfract",
2791 fixed_conv_modes
[i
].name
,
2792 fixed_conv_modes
[j
].name
);
2793 arm_set_fixed_conv_libfunc (fractuns_optab
,
2794 fixed_conv_modes
[i
].mode
,
2795 fixed_conv_modes
[j
].mode
, "fractuns",
2796 fixed_conv_modes
[i
].name
,
2797 fixed_conv_modes
[j
].name
);
2798 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2799 fixed_conv_modes
[i
].mode
,
2800 fixed_conv_modes
[j
].mode
, "satfractuns",
2801 fixed_conv_modes
[i
].name
,
2802 fixed_conv_modes
[j
].name
);
2806 if (TARGET_AAPCS_BASED
)
2807 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2809 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2812 /* On AAPCS systems, this is the "struct __va_list". */
2813 static GTY(()) tree va_list_type
;
2815 /* Return the type to use as __builtin_va_list. */
2817 arm_build_builtin_va_list (void)
2822 if (!TARGET_AAPCS_BASED
)
2823 return std_build_builtin_va_list ();
2825 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2833 The C Library ABI further reinforces this definition in \S
2836 We must follow this definition exactly. The structure tag
2837 name is visible in C++ mangled names, and thus forms a part
2838 of the ABI. The field name may be used by people who
2839 #include <stdarg.h>. */
2840 /* Create the type. */
2841 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2842 /* Give it the required name. */
2843 va_list_name
= build_decl (BUILTINS_LOCATION
,
2845 get_identifier ("__va_list"),
2847 DECL_ARTIFICIAL (va_list_name
) = 1;
2848 TYPE_NAME (va_list_type
) = va_list_name
;
2849 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2850 /* Create the __ap field. */
2851 ap_field
= build_decl (BUILTINS_LOCATION
,
2853 get_identifier ("__ap"),
2855 DECL_ARTIFICIAL (ap_field
) = 1;
2856 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2857 TYPE_FIELDS (va_list_type
) = ap_field
;
2858 /* Compute its layout. */
2859 layout_type (va_list_type
);
2861 return va_list_type
;
2864 /* Return an expression of type "void *" pointing to the next
2865 available argument in a variable-argument list. VALIST is the
2866 user-level va_list object, of type __builtin_va_list. */
2868 arm_extract_valist_ptr (tree valist
)
2870 if (TREE_TYPE (valist
) == error_mark_node
)
2871 return error_mark_node
;
2873 /* On an AAPCS target, the pointer is stored within "struct
2875 if (TARGET_AAPCS_BASED
)
2877 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2878 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2879 valist
, ap_field
, NULL_TREE
);
2885 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2887 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2889 valist
= arm_extract_valist_ptr (valist
);
2890 std_expand_builtin_va_start (valist
, nextarg
);
2893 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2895 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2898 valist
= arm_extract_valist_ptr (valist
);
2899 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2902 /* Check any incompatible options that the user has specified. */
2904 arm_option_check_internal (struct gcc_options
*opts
)
2906 int flags
= opts
->x_target_flags
;
2908 /* iWMMXt and NEON are incompatible. */
2910 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2911 error ("iWMMXt and NEON are incompatible");
2913 /* Make sure that the processor choice does not conflict with any of the
2914 other command line choices. */
2915 if (TARGET_ARM_P (flags
)
2916 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2917 error ("target CPU does not support ARM mode");
2919 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2920 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2921 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2923 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2924 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2926 /* If this target is normally configured to use APCS frames, warn if they
2927 are turned off and debugging is turned on. */
2928 if (TARGET_ARM_P (flags
)
2929 && write_symbols
!= NO_DEBUG
2930 && !TARGET_APCS_FRAME
2931 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2932 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2935 /* iWMMXt unsupported under Thumb mode. */
2936 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2937 error ("iWMMXt unsupported under Thumb mode");
2939 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2940 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2942 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2944 error ("RTP PIC is incompatible with Thumb");
2948 if (target_pure_code
|| target_slow_flash_data
)
2950 const char *flag
= (target_pure_code
? "-mpure-code" :
2951 "-mslow-flash-data");
2952 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
2954 /* We only support -mslow-flash-data on M-profile targets with
2956 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
2957 error ("%s only supports non-pic code on M-profile targets with the "
2958 "MOVT instruction", flag
);
2960 /* We only support -mpure-code on M-profile targets. */
2961 if (target_pure_code
&& common_unsupported_modes
)
2962 error ("%s only supports non-pic code on M-profile targets", flag
);
2964 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2965 -mword-relocations forbids relocation of MOVT/MOVW. */
2966 if (target_word_relocations
)
2967 error ("%s incompatible with %<-mword-relocations%>", flag
);
2971 /* Recompute the global settings depending on target attribute options. */
2974 arm_option_params_internal (void)
2976 /* If we are not using the default (ARM mode) section anchor offset
2977 ranges, then set the correct ranges now. */
2980 /* Thumb-1 LDR instructions cannot have negative offsets.
2981 Permissible positive offset ranges are 5-bit (for byte loads),
2982 6-bit (for halfword loads), or 7-bit (for word loads).
2983 Empirical results suggest a 7-bit anchor range gives the best
2984 overall code size. */
2985 targetm
.min_anchor_offset
= 0;
2986 targetm
.max_anchor_offset
= 127;
2988 else if (TARGET_THUMB2
)
2990 /* The minimum is set such that the total size of the block
2991 for a particular anchor is 248 + 1 + 4095 bytes, which is
2992 divisible by eight, ensuring natural spacing of anchors. */
2993 targetm
.min_anchor_offset
= -248;
2994 targetm
.max_anchor_offset
= 4095;
2998 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2999 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3002 /* Increase the number of conditional instructions with -Os. */
3003 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3005 /* For THUMB2, we limit the conditional sequence to one IT block. */
3007 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3010 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3012 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3015 /* True if -mflip-thumb should next add an attribute for the default
3016 mode, false if it should next add an attribute for the opposite mode. */
3017 static GTY(()) bool thumb_flipper
;
3019 /* Options after initial target override. */
3020 static GTY(()) tree init_optimize
;
3023 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3024 struct gcc_options
*opts_set
)
3026 /* -falign-functions without argument: supply one. */
3027 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3028 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3029 && opts
->x_optimize_size
? "2" : "4";
3032 /* Implement targetm.override_options_after_change. */
3035 arm_override_options_after_change (void)
3037 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3040 /* Implement TARGET_OPTION_RESTORE. */
3042 arm_option_restore (struct gcc_options */
* opts */
,
3043 struct gcc_options
*opts_set
, struct cl_target_option
*ptr
)
3045 arm_configure_build_target (&arm_active_target
, ptr
, opts_set
, false);
3048 /* Reset options between modes that the user has specified. */
3050 arm_option_override_internal (struct gcc_options
*opts
,
3051 struct gcc_options
*opts_set
)
3053 arm_override_options_after_change_1 (opts
, opts_set
);
3055 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3057 /* The default is to enable interworking, so this warning message would
3058 be confusing to users who have just compiled with
3059 eg, -march=armv4. */
3060 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3061 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3064 if (TARGET_THUMB_P (opts
->x_target_flags
)
3065 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3067 warning (0, "target CPU does not support THUMB instructions");
3068 opts
->x_target_flags
&= ~MASK_THUMB
;
3071 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3073 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3074 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3077 /* Callee super interworking implies thumb interworking. Adding
3078 this to the flags here simplifies the logic elsewhere. */
3079 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3080 opts
->x_target_flags
|= MASK_INTERWORK
;
3082 /* need to remember initial values so combinaisons of options like
3083 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3084 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3086 if (! opts_set
->x_arm_restrict_it
)
3087 opts
->x_arm_restrict_it
= arm_arch8
;
3089 /* ARM execution state and M profile don't have [restrict] IT. */
3090 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3091 opts
->x_arm_restrict_it
= 0;
3093 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3094 if (!opts_set
->x_arm_restrict_it
3095 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3096 opts
->x_arm_restrict_it
= 0;
3098 /* Enable -munaligned-access by default for
3099 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3100 i.e. Thumb2 and ARM state only.
3101 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3102 - ARMv8 architecture-base processors.
3104 Disable -munaligned-access by default for
3105 - all pre-ARMv6 architecture-based processors
3106 - ARMv6-M architecture-based processors
3107 - ARMv8-M Baseline processors. */
3109 if (! opts_set
->x_unaligned_access
)
3111 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3112 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3114 else if (opts
->x_unaligned_access
== 1
3115 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3117 warning (0, "target CPU does not support unaligned accesses");
3118 opts
->x_unaligned_access
= 0;
3121 /* Don't warn since it's on by default in -O2. */
3122 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3123 opts
->x_flag_schedule_insns
= 0;
3125 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3127 /* Disable shrink-wrap when optimizing function for size, since it tends to
3128 generate additional returns. */
3129 if (optimize_function_for_size_p (cfun
)
3130 && TARGET_THUMB2_P (opts
->x_target_flags
))
3131 opts
->x_flag_shrink_wrap
= false;
3133 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3135 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3136 - epilogue_insns - does not accurately model the corresponding insns
3137 emitted in the asm file. In particular, see the comment in thumb_exit
3138 'Find out how many of the (return) argument registers we can corrupt'.
3139 As a consequence, the epilogue may clobber registers without fipa-ra
3140 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3141 TODO: Accurately model clobbers for epilogue_insns and reenable
3143 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3144 opts
->x_flag_ipa_ra
= 0;
3146 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3148 /* Thumb2 inline assembly code should always use unified syntax.
3149 This will apply to ARM and Thumb1 eventually. */
3150 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3151 opts
->x_inline_asm_unified
= true;
3153 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3154 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3158 static sbitmap isa_all_fpubits_internal
;
3159 static sbitmap isa_all_fpbits
;
3160 static sbitmap isa_quirkbits
;
3162 /* Configure a build target TARGET from the user-specified options OPTS and
3163 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3164 architecture have been specified, but the two are not identical. */
3166 arm_configure_build_target (struct arm_build_target
*target
,
3167 struct cl_target_option
*opts
,
3168 struct gcc_options
*opts_set
,
3169 bool warn_compatible
)
3171 const cpu_option
*arm_selected_tune
= NULL
;
3172 const arch_option
*arm_selected_arch
= NULL
;
3173 const cpu_option
*arm_selected_cpu
= NULL
;
3174 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3175 const char *tune_opts
= NULL
;
3176 const char *arch_opts
= NULL
;
3177 const char *cpu_opts
= NULL
;
3179 bitmap_clear (target
->isa
);
3180 target
->core_name
= NULL
;
3181 target
->arch_name
= NULL
;
3183 if (opts_set
->x_arm_arch_string
)
3185 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3187 opts
->x_arm_arch_string
);
3188 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3191 if (opts_set
->x_arm_cpu_string
)
3193 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3194 opts
->x_arm_cpu_string
);
3195 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3196 arm_selected_tune
= arm_selected_cpu
;
3197 /* If taking the tuning from -mcpu, we don't need to rescan the
3198 options for tuning. */
3201 if (opts_set
->x_arm_tune_string
)
3203 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3204 opts
->x_arm_tune_string
);
3205 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3208 if (arm_selected_arch
)
3210 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3211 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3214 if (arm_selected_cpu
)
3216 auto_sbitmap
cpu_isa (isa_num_bits
);
3217 auto_sbitmap
isa_delta (isa_num_bits
);
3219 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3220 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3222 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3223 /* Ignore any bits that are quirk bits. */
3224 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3225 /* Ignore (for now) any bits that might be set by -mfpu. */
3226 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits_internal
);
3228 /* And if the target ISA lacks floating point, ignore any
3229 extensions that depend on that. */
3230 if (!bitmap_bit_p (target
->isa
, isa_bit_vfpv2
))
3231 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3233 if (!bitmap_empty_p (isa_delta
))
3235 if (warn_compatible
)
3236 warning (0, "switch %<-mcpu=%s%> conflicts "
3237 "with %<-march=%s%> switch",
3238 arm_selected_cpu
->common
.name
,
3239 arm_selected_arch
->common
.name
);
3240 /* -march wins for code generation.
3241 -mcpu wins for default tuning. */
3242 if (!arm_selected_tune
)
3243 arm_selected_tune
= arm_selected_cpu
;
3245 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3246 target
->arch_name
= arm_selected_arch
->common
.name
;
3250 /* Architecture and CPU are essentially the same.
3251 Prefer the CPU setting. */
3252 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3253 target
->core_name
= arm_selected_cpu
->common
.name
;
3254 /* Copy the CPU's capabilities, so that we inherit the
3255 appropriate extensions and quirks. */
3256 bitmap_copy (target
->isa
, cpu_isa
);
3261 /* Pick a CPU based on the architecture. */
3262 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3263 target
->arch_name
= arm_selected_arch
->common
.name
;
3264 /* Note: target->core_name is left unset in this path. */
3267 else if (arm_selected_cpu
)
3269 target
->core_name
= arm_selected_cpu
->common
.name
;
3270 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3271 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3273 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3275 /* If the user did not specify a processor or architecture, choose
3279 const cpu_option
*sel
;
3280 auto_sbitmap
sought_isa (isa_num_bits
);
3281 bitmap_clear (sought_isa
);
3282 auto_sbitmap
default_isa (isa_num_bits
);
3284 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3285 TARGET_CPU_DEFAULT
);
3286 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3287 gcc_assert (arm_selected_cpu
->common
.name
);
3289 /* RWE: All of the selection logic below (to the end of this
3290 'if' clause) looks somewhat suspect. It appears to be mostly
3291 there to support forcing thumb support when the default CPU
3292 does not have thumb (somewhat dubious in terms of what the
3293 user might be expecting). I think it should be removed once
3294 support for the pre-thumb era cores is removed. */
3295 sel
= arm_selected_cpu
;
3296 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3297 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3300 /* Now check to see if the user has specified any command line
3301 switches that require certain abilities from the cpu. */
3303 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3304 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3306 /* If there are such requirements and the default CPU does not
3307 satisfy them, we need to run over the complete list of
3308 cores looking for one that is satisfactory. */
3309 if (!bitmap_empty_p (sought_isa
)
3310 && !bitmap_subset_p (sought_isa
, default_isa
))
3312 auto_sbitmap
candidate_isa (isa_num_bits
);
3313 /* We're only interested in a CPU with at least the
3314 capabilities of the default CPU and the required
3315 additional features. */
3316 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3318 /* Try to locate a CPU type that supports all of the abilities
3319 of the default CPU, plus the extra abilities requested by
3321 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3323 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3324 /* An exact match? */
3325 if (bitmap_equal_p (default_isa
, candidate_isa
))
3329 if (sel
->common
.name
== NULL
)
3331 unsigned current_bit_count
= isa_num_bits
;
3332 const cpu_option
*best_fit
= NULL
;
3334 /* Ideally we would like to issue an error message here
3335 saying that it was not possible to find a CPU compatible
3336 with the default CPU, but which also supports the command
3337 line options specified by the programmer, and so they
3338 ought to use the -mcpu=<name> command line option to
3339 override the default CPU type.
3341 If we cannot find a CPU that has exactly the
3342 characteristics of the default CPU and the given
3343 command line options we scan the array again looking
3344 for a best match. The best match must have at least
3345 the capabilities of the perfect match. */
3346 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3348 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3350 if (bitmap_subset_p (default_isa
, candidate_isa
))
3354 bitmap_and_compl (candidate_isa
, candidate_isa
,
3356 count
= bitmap_popcount (candidate_isa
);
3358 if (count
< current_bit_count
)
3361 current_bit_count
= count
;
3365 gcc_assert (best_fit
);
3369 arm_selected_cpu
= sel
;
3372 /* Now we know the CPU, we can finally initialize the target
3374 target
->core_name
= arm_selected_cpu
->common
.name
;
3375 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3376 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3378 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3381 gcc_assert (arm_selected_cpu
);
3382 gcc_assert (arm_selected_arch
);
3384 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3386 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3387 auto_sbitmap
fpu_bits (isa_num_bits
);
3389 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3390 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3391 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3394 /* There may be implied bits which we still need to enable. These are
3395 non-named features which are needed to complete other sets of features,
3396 but cannot be enabled from arm-cpus.in due to being shared between
3397 multiple fgroups. Each entry in all_implied_fbits is of the form
3398 ante -> cons, meaning that if the feature "ante" is enabled, we should
3399 implicitly enable "cons". */
3400 const struct fbit_implication
*impl
= all_implied_fbits
;
3403 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3404 bitmap_set_bit (target
->isa
, impl
->cons
);
3408 if (!arm_selected_tune
)
3409 arm_selected_tune
= arm_selected_cpu
;
3410 else /* Validate the features passed to -mtune. */
3411 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3413 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3415 /* Finish initializing the target structure. */
3416 target
->arch_pp_name
= arm_selected_arch
->arch
;
3417 target
->base_arch
= arm_selected_arch
->base_arch
;
3418 target
->profile
= arm_selected_arch
->profile
;
3420 target
->tune_flags
= tune_data
->tune_flags
;
3421 target
->tune
= tune_data
->tune
;
3422 target
->tune_core
= tune_data
->scheduler
;
3423 arm_option_reconfigure_globals ();
3426 /* Fix up any incompatible options that the user has specified. */
3428 arm_option_override (void)
3430 static const enum isa_feature fpu_bitlist_internal
[]
3431 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3432 static const enum isa_feature fp_bitlist
[]
3433 = { ISA_ALL_FP
, isa_nobit
};
3434 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3435 cl_target_option opts
;
3437 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3438 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3440 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3441 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3442 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3443 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3445 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3447 if (!global_options_set
.x_arm_fpu_index
)
3452 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3455 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3458 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3459 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3462 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3463 SUBTARGET_OVERRIDE_OPTIONS
;
3466 /* Initialize boolean versions of the architectural flags, for use
3467 in the arm.md file and for enabling feature flags. */
3468 arm_option_reconfigure_globals ();
3470 arm_tune
= arm_active_target
.tune_core
;
3471 tune_flags
= arm_active_target
.tune_flags
;
3472 current_tune
= arm_active_target
.tune
;
3474 /* TBD: Dwarf info for apcs frame is not handled yet. */
3475 if (TARGET_APCS_FRAME
)
3476 flag_shrink_wrap
= false;
3478 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3480 warning (0, "%<-mapcs-stack-check%> incompatible with "
3481 "%<-mno-apcs-frame%>");
3482 target_flags
|= MASK_APCS_FRAME
;
3485 if (TARGET_POKE_FUNCTION_NAME
)
3486 target_flags
|= MASK_APCS_FRAME
;
3488 if (TARGET_APCS_REENT
&& flag_pic
)
3489 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3491 if (TARGET_APCS_REENT
)
3492 warning (0, "APCS reentrant code not supported. Ignored");
3494 /* Set up some tuning parameters. */
3495 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3496 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3497 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3498 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3499 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3500 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3502 /* For arm2/3 there is no need to do any scheduling if we are doing
3503 software floating-point. */
3504 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3505 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3507 /* Override the default structure alignment for AAPCS ABI. */
3508 if (!global_options_set
.x_arm_structure_size_boundary
)
3510 if (TARGET_AAPCS_BASED
)
3511 arm_structure_size_boundary
= 8;
3515 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3517 if (arm_structure_size_boundary
!= 8
3518 && arm_structure_size_boundary
!= 32
3519 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3521 if (ARM_DOUBLEWORD_ALIGN
)
3523 "structure size boundary can only be set to 8, 32 or 64");
3525 warning (0, "structure size boundary can only be set to 8 or 32");
3526 arm_structure_size_boundary
3527 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3531 if (TARGET_VXWORKS_RTP
)
3533 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3534 arm_pic_data_is_text_relative
= 0;
3537 && !arm_pic_data_is_text_relative
3538 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3539 /* When text & data segments don't have a fixed displacement, the
3540 intended use is with a single, read only, pic base register.
3541 Unless the user explicitly requested not to do that, set
3543 target_flags
|= MASK_SINGLE_PIC_BASE
;
3545 /* If stack checking is disabled, we can use r10 as the PIC register,
3546 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3547 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3549 if (TARGET_VXWORKS_RTP
)
3550 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3551 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3554 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3555 arm_pic_register
= 9;
3557 /* If in FDPIC mode then force arm_pic_register to be r9. */
3560 arm_pic_register
= FDPIC_REGNUM
;
3562 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3565 if (arm_pic_register_string
!= NULL
)
3567 int pic_register
= decode_reg_name (arm_pic_register_string
);
3570 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3572 /* Prevent the user from choosing an obviously stupid PIC register. */
3573 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3574 || pic_register
== HARD_FRAME_POINTER_REGNUM
3575 || pic_register
== STACK_POINTER_REGNUM
3576 || pic_register
>= PC_REGNUM
3577 || (TARGET_VXWORKS_RTP
3578 && (unsigned int) pic_register
!= arm_pic_register
))
3579 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3581 arm_pic_register
= pic_register
;
3585 target_word_relocations
= 1;
3587 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3588 if (fix_cm3_ldrd
== 2)
3590 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3596 /* Hot/Cold partitioning is not currently supported, since we can't
3597 handle literal pool placement in that case. */
3598 if (flag_reorder_blocks_and_partition
)
3600 inform (input_location
,
3601 "%<-freorder-blocks-and-partition%> not supported "
3602 "on this architecture");
3603 flag_reorder_blocks_and_partition
= 0;
3604 flag_reorder_blocks
= 1;
3608 /* Hoisting PIC address calculations more aggressively provides a small,
3609 but measurable, size reduction for PIC code. Therefore, we decrease
3610 the bar for unrestricted expression hoisting to the cost of PIC address
3611 calculation, which is 2 instructions. */
3612 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3613 param_gcse_unrestricted_cost
, 2);
3615 /* ARM EABI defaults to strict volatile bitfields. */
3616 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3617 && abi_version_at_least(2))
3618 flag_strict_volatile_bitfields
= 1;
3620 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3621 have deemed it beneficial (signified by setting
3622 prefetch.num_slots to 1 or more). */
3623 if (flag_prefetch_loop_arrays
< 0
3626 && current_tune
->prefetch
.num_slots
> 0)
3627 flag_prefetch_loop_arrays
= 1;
3629 /* Set up parameters to be used in prefetching algorithm. Do not
3630 override the defaults unless we are tuning for a core we have
3631 researched values for. */
3632 if (current_tune
->prefetch
.num_slots
> 0)
3633 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3634 param_simultaneous_prefetches
,
3635 current_tune
->prefetch
.num_slots
);
3636 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3637 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3638 param_l1_cache_line_size
,
3639 current_tune
->prefetch
.l1_cache_line_size
);
3640 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3641 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3642 param_l1_cache_size
,
3643 current_tune
->prefetch
.l1_cache_size
);
3645 /* Look through ready list and all of queue for instructions
3646 relevant for L2 auto-prefetcher. */
3647 int sched_autopref_queue_depth
;
3649 switch (current_tune
->sched_autopref
)
3651 case tune_params::SCHED_AUTOPREF_OFF
:
3652 sched_autopref_queue_depth
= -1;
3655 case tune_params::SCHED_AUTOPREF_RANK
:
3656 sched_autopref_queue_depth
= 0;
3659 case tune_params::SCHED_AUTOPREF_FULL
:
3660 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3667 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3668 param_sched_autopref_queue_depth
,
3669 sched_autopref_queue_depth
);
3671 /* Currently, for slow flash data, we just disable literal pools. We also
3672 disable it for pure-code. */
3673 if (target_slow_flash_data
|| target_pure_code
)
3674 arm_disable_literal_pool
= true;
3676 /* Disable scheduling fusion by default if it's not armv7 processor
3677 or doesn't prefer ldrd/strd. */
3678 if (flag_schedule_fusion
== 2
3679 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3680 flag_schedule_fusion
= 0;
3682 /* Need to remember initial options before they are overriden. */
3683 init_optimize
= build_optimization_node (&global_options
,
3684 &global_options_set
);
3686 arm_options_perform_arch_sanity_checks ();
3687 arm_option_override_internal (&global_options
, &global_options_set
);
3688 arm_option_check_internal (&global_options
);
3689 arm_option_params_internal ();
3691 /* Create the default target_options structure. */
3692 target_option_default_node
= target_option_current_node
3693 = build_target_option_node (&global_options
, &global_options_set
);
3695 /* Register global variables with the garbage collector. */
3696 arm_add_gc_roots ();
3698 /* Init initial mode for testing. */
3699 thumb_flipper
= TARGET_THUMB
;
3703 /* Reconfigure global status flags from the active_target.isa. */
3705 arm_option_reconfigure_globals (void)
3707 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3708 arm_base_arch
= arm_active_target
.base_arch
;
3710 /* Initialize boolean versions of the architectural flags, for use
3711 in the arm.md file. */
3712 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3713 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3714 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3715 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3716 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3717 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3718 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3719 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3720 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3721 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3722 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3723 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3724 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3725 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3726 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3727 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3728 isa_bit_armv8_1m_main
);
3729 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3730 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3731 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3732 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3733 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3734 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3735 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3736 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3737 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3738 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3739 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3740 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3742 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3745 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3746 error ("selected fp16 options are incompatible");
3747 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3751 arm_arch_cde_coproc
= 0;
3752 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3753 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3754 isa_bit_cdecp6
, isa_bit_cdecp7
};
3755 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3757 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3760 arm_arch_cde
|= cde_bit
;
3761 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3765 /* And finally, set up some quirks. */
3766 arm_arch_no_volatile_ce
3767 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3768 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3769 isa_bit_quirk_armv6kz
);
3771 /* Use the cp15 method if it is available. */
3772 if (target_thread_pointer
== TP_AUTO
)
3774 if (arm_arch6k
&& !TARGET_THUMB1
)
3775 target_thread_pointer
= TP_CP15
;
3777 target_thread_pointer
= TP_SOFT
;
3781 /* Perform some validation between the desired architecture and the rest of the
3784 arm_options_perform_arch_sanity_checks (void)
3786 /* V5T code we generate is completely interworking capable, so we turn off
3787 TARGET_INTERWORK here to avoid many tests later on. */
3789 /* XXX However, we must pass the right pre-processor defines to CPP
3790 or GLD can get confused. This is a hack. */
3791 if (TARGET_INTERWORK
)
3792 arm_cpp_interwork
= 1;
3795 target_flags
&= ~MASK_INTERWORK
;
3797 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3798 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3800 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3801 error ("iwmmxt abi requires an iwmmxt capable cpu");
3803 /* BPABI targets use linker tricks to allow interworking on cores
3804 without thumb support. */
3805 if (TARGET_INTERWORK
3807 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3809 warning (0, "target CPU does not support interworking" );
3810 target_flags
&= ~MASK_INTERWORK
;
3813 /* If soft-float is specified then don't use FPU. */
3814 if (TARGET_SOFT_FLOAT
)
3815 arm_fpu_attr
= FPU_NONE
;
3817 arm_fpu_attr
= FPU_VFP
;
3819 if (TARGET_AAPCS_BASED
)
3821 if (TARGET_CALLER_INTERWORKING
)
3822 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3824 if (TARGET_CALLEE_INTERWORKING
)
3825 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3828 /* __fp16 support currently assumes the core has ldrh. */
3829 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3830 sorry ("__fp16 and no ldrh");
3832 if (use_cmse
&& !arm_arch_cmse
)
3833 error ("target CPU does not support ARMv8-M Security Extensions");
3835 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3836 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3837 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3838 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3841 if (TARGET_AAPCS_BASED
)
3843 if (arm_abi
== ARM_ABI_IWMMXT
)
3844 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3845 else if (TARGET_HARD_FLOAT_ABI
)
3847 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3848 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
3849 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
3850 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3853 arm_pcs_default
= ARM_PCS_AAPCS
;
3857 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3858 sorry ("%<-mfloat-abi=hard%> and VFP");
3860 if (arm_abi
== ARM_ABI_APCS
)
3861 arm_pcs_default
= ARM_PCS_APCS
;
3863 arm_pcs_default
= ARM_PCS_ATPCS
;
3867 /* Test whether a local function descriptor is canonical, i.e.,
3868 whether we can use GOTOFFFUNCDESC to compute the address of the
3871 arm_fdpic_local_funcdesc_p (rtx fnx
)
3874 enum symbol_visibility vis
;
3880 if (! SYMBOL_REF_LOCAL_P (fnx
))
3883 fn
= SYMBOL_REF_DECL (fnx
);
3888 vis
= DECL_VISIBILITY (fn
);
3890 if (vis
== VISIBILITY_PROTECTED
)
3891 /* Private function descriptors for protected functions are not
3892 canonical. Temporarily change the visibility to global so that
3893 we can ensure uniqueness of funcdesc pointers. */
3894 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
3896 ret
= default_binds_local_p_1 (fn
, flag_pic
);
3898 DECL_VISIBILITY (fn
) = vis
;
3904 arm_add_gc_roots (void)
3906 gcc_obstack_init(&minipool_obstack
);
3907 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3910 /* A table of known ARM exception types.
3911 For use with the interrupt function attribute. */
3915 const char *const arg
;
3916 const unsigned long return_value
;
3920 static const isr_attribute_arg isr_attribute_args
[] =
3922 { "IRQ", ARM_FT_ISR
},
3923 { "irq", ARM_FT_ISR
},
3924 { "FIQ", ARM_FT_FIQ
},
3925 { "fiq", ARM_FT_FIQ
},
3926 { "ABORT", ARM_FT_ISR
},
3927 { "abort", ARM_FT_ISR
},
3928 { "UNDEF", ARM_FT_EXCEPTION
},
3929 { "undef", ARM_FT_EXCEPTION
},
3930 { "SWI", ARM_FT_EXCEPTION
},
3931 { "swi", ARM_FT_EXCEPTION
},
3932 { NULL
, ARM_FT_NORMAL
}
3935 /* Returns the (interrupt) function type of the current
3936 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3938 static unsigned long
3939 arm_isr_value (tree argument
)
3941 const isr_attribute_arg
* ptr
;
3945 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3947 /* No argument - default to IRQ. */
3948 if (argument
== NULL_TREE
)
3951 /* Get the value of the argument. */
3952 if (TREE_VALUE (argument
) == NULL_TREE
3953 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3954 return ARM_FT_UNKNOWN
;
3956 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3958 /* Check it against the list of known arguments. */
3959 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3960 if (streq (arg
, ptr
->arg
))
3961 return ptr
->return_value
;
3963 /* An unrecognized interrupt type. */
3964 return ARM_FT_UNKNOWN
;
3967 /* Computes the type of the current function. */
3969 static unsigned long
3970 arm_compute_func_type (void)
3972 unsigned long type
= ARM_FT_UNKNOWN
;
3976 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3978 /* Decide if the current function is volatile. Such functions
3979 never return, and many memory cycles can be saved by not storing
3980 register values that will never be needed again. This optimization
3981 was added to speed up context switching in a kernel application. */
3983 && (TREE_NOTHROW (current_function_decl
)
3984 || !(flag_unwind_tables
3986 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3987 && TREE_THIS_VOLATILE (current_function_decl
))
3988 type
|= ARM_FT_VOLATILE
;
3990 if (cfun
->static_chain_decl
!= NULL
)
3991 type
|= ARM_FT_NESTED
;
3993 attr
= DECL_ATTRIBUTES (current_function_decl
);
3995 a
= lookup_attribute ("naked", attr
);
3997 type
|= ARM_FT_NAKED
;
3999 a
= lookup_attribute ("isr", attr
);
4001 a
= lookup_attribute ("interrupt", attr
);
4004 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4006 type
|= arm_isr_value (TREE_VALUE (a
));
4008 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4009 type
|= ARM_FT_CMSE_ENTRY
;
4014 /* Returns the type of the current function. */
4017 arm_current_func_type (void)
4019 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4020 cfun
->machine
->func_type
= arm_compute_func_type ();
4022 return cfun
->machine
->func_type
;
4026 arm_allocate_stack_slots_for_args (void)
4028 /* Naked functions should not allocate stack slots for arguments. */
4029 return !IS_NAKED (arm_current_func_type ());
4033 arm_warn_func_return (tree decl
)
4035 /* Naked functions are implemented entirely in assembly, including the
4036 return sequence, so suppress warnings about this. */
4037 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4041 /* Output assembler code for a block containing the constant parts
4042 of a trampoline, leaving space for the variable parts.
4044 On the ARM, (if r8 is the static chain regnum, and remembering that
4045 referencing pc adds an offset of 8) the trampoline looks like:
4048 .word static chain value
4049 .word function's address
4050 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4052 In FDPIC mode, the trampoline looks like:
4053 .word trampoline address
4054 .word trampoline GOT address
4055 ldr r12, [pc, #8] ; #4 for Arm mode
4056 ldr r9, [pc, #8] ; #4 for Arm mode
4057 ldr pc, [pc, #8] ; #4 for Arm mode
4058 .word static chain value
4060 .word function's address
4064 arm_asm_trampoline_template (FILE *f
)
4066 fprintf (f
, "\t.syntax unified\n");
4070 /* The first two words are a function descriptor pointing to the
4071 trampoline code just below. */
4073 fprintf (f
, "\t.arm\n");
4074 else if (TARGET_THUMB2
)
4075 fprintf (f
, "\t.thumb\n");
4077 /* Only ARM and Thumb-2 are supported. */
4080 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4081 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4082 /* Trampoline code which sets the static chain register but also
4083 PIC register before jumping into real code. */
4084 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4085 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4086 TARGET_THUMB2
? 8 : 4);
4087 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4088 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4089 TARGET_THUMB2
? 8 : 4);
4090 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4091 PC_REGNUM
, PC_REGNUM
,
4092 TARGET_THUMB2
? 8 : 4);
4093 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4095 else if (TARGET_ARM
)
4097 fprintf (f
, "\t.arm\n");
4098 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4099 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4101 else if (TARGET_THUMB2
)
4103 fprintf (f
, "\t.thumb\n");
4104 /* The Thumb-2 trampoline is similar to the arm implementation.
4105 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4106 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4107 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4108 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4112 ASM_OUTPUT_ALIGN (f
, 2);
4113 fprintf (f
, "\t.code\t16\n");
4114 fprintf (f
, ".Ltrampoline_start:\n");
4115 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4116 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4117 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4118 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4119 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4120 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4122 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4123 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4126 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4129 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4131 rtx fnaddr
, mem
, a_tramp
;
4133 emit_block_move (m_tramp
, assemble_trampoline_template (),
4134 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4138 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4139 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4140 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4141 /* The function start address is at offset 8, but in Thumb mode
4142 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4144 rtx trampoline_code_start
4145 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4147 /* Write initial funcdesc which points to the trampoline. */
4148 mem
= adjust_address (m_tramp
, SImode
, 0);
4149 emit_move_insn (mem
, trampoline_code_start
);
4150 mem
= adjust_address (m_tramp
, SImode
, 4);
4151 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4152 /* Setup static chain. */
4153 mem
= adjust_address (m_tramp
, SImode
, 20);
4154 emit_move_insn (mem
, chain_value
);
4155 /* GOT + real function entry point. */
4156 mem
= adjust_address (m_tramp
, SImode
, 24);
4157 emit_move_insn (mem
, gotaddr
);
4158 mem
= adjust_address (m_tramp
, SImode
, 28);
4159 emit_move_insn (mem
, fnaddr
);
4163 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4164 emit_move_insn (mem
, chain_value
);
4166 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4167 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4168 emit_move_insn (mem
, fnaddr
);
4171 a_tramp
= XEXP (m_tramp
, 0);
4172 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4173 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
4174 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
4177 /* Thumb trampolines should be entered in thumb mode, so set
4178 the bottom bit of the address. */
4181 arm_trampoline_adjust_address (rtx addr
)
4183 /* For FDPIC don't fix trampoline address since it's a function
4184 descriptor and not a function address. */
4185 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4186 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4187 NULL
, 0, OPTAB_LIB_WIDEN
);
4191 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4192 includes call-clobbered registers too. If this is a leaf function
4193 we can just examine the registers used by the RTL, but otherwise we
4194 have to assume that whatever function is called might clobber
4195 anything, and so we have to save all the call-clobbered registers
4197 static inline bool reg_needs_saving_p (unsigned reg
)
4199 unsigned long func_type
= arm_current_func_type ();
4201 if (IS_INTERRUPT (func_type
))
4202 if (df_regs_ever_live_p (reg
)
4203 /* Save call-clobbered core registers. */
4204 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4209 if (!df_regs_ever_live_p (reg
)
4210 || call_used_or_fixed_reg_p (reg
))
4216 /* Return 1 if it is possible to return using a single instruction.
4217 If SIBLING is non-null, this is a test for a return before a sibling
4218 call. SIBLING is the call insn, so we can examine its register usage. */
4221 use_return_insn (int iscond
, rtx sibling
)
4224 unsigned int func_type
;
4225 unsigned long saved_int_regs
;
4226 unsigned HOST_WIDE_INT stack_adjust
;
4227 arm_stack_offsets
*offsets
;
4229 /* Never use a return instruction before reload has run. */
4230 if (!reload_completed
)
4233 func_type
= arm_current_func_type ();
4235 /* Naked, volatile and stack alignment functions need special
4237 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4240 /* So do interrupt functions that use the frame pointer and Thumb
4241 interrupt functions. */
4242 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4245 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4246 && !optimize_function_for_size_p (cfun
))
4249 offsets
= arm_get_frame_offsets ();
4250 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4252 /* As do variadic functions. */
4253 if (crtl
->args
.pretend_args_size
4254 || cfun
->machine
->uses_anonymous_args
4255 /* Or if the function calls __builtin_eh_return () */
4256 || crtl
->calls_eh_return
4257 /* Or if the function calls alloca */
4258 || cfun
->calls_alloca
4259 /* Or if there is a stack adjustment. However, if the stack pointer
4260 is saved on the stack, we can use a pre-incrementing stack load. */
4261 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4262 && stack_adjust
== 4))
4263 /* Or if the static chain register was saved above the frame, under the
4264 assumption that the stack pointer isn't saved on the stack. */
4265 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4266 && arm_compute_static_chain_stack_bytes() != 0))
4269 saved_int_regs
= offsets
->saved_regs_mask
;
4271 /* Unfortunately, the insn
4273 ldmib sp, {..., sp, ...}
4275 triggers a bug on most SA-110 based devices, such that the stack
4276 pointer won't be correctly restored if the instruction takes a
4277 page fault. We work around this problem by popping r3 along with
4278 the other registers, since that is never slower than executing
4279 another instruction.
4281 We test for !arm_arch5t here, because code for any architecture
4282 less than this could potentially be run on one of the buggy
4284 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4286 /* Validate that r3 is a call-clobbered register (always true in
4287 the default abi) ... */
4288 if (!call_used_or_fixed_reg_p (3))
4291 /* ... that it isn't being used for a return value ... */
4292 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4295 /* ... or for a tail-call argument ... */
4298 gcc_assert (CALL_P (sibling
));
4300 if (find_regno_fusage (sibling
, USE
, 3))
4304 /* ... and that there are no call-saved registers in r0-r2
4305 (always true in the default ABI). */
4306 if (saved_int_regs
& 0x7)
4310 /* Can't be done if interworking with Thumb, and any registers have been
4312 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4315 /* On StrongARM, conditional returns are expensive if they aren't
4316 taken and multiple registers have been stacked. */
4317 if (iscond
&& arm_tune_strongarm
)
4319 /* Conditional return when just the LR is stored is a simple
4320 conditional-load instruction, that's not expensive. */
4321 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4325 && arm_pic_register
!= INVALID_REGNUM
4326 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4330 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4331 several instructions if anything needs to be popped. Armv8.1-M Mainline
4332 also needs several instructions to save and restore FP context. */
4333 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4336 /* If there are saved registers but the LR isn't saved, then we need
4337 two instructions for the return. */
4338 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4341 /* Can't be done if any of the VFP regs are pushed,
4342 since this also requires an insn. */
4343 if (TARGET_VFP_BASE
)
4344 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4345 if (reg_needs_saving_p (regno
))
4348 if (TARGET_REALLY_IWMMXT
)
4349 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4350 if (reg_needs_saving_p (regno
))
4356 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4357 shrink-wrapping if possible. This is the case if we need to emit a
4358 prologue, which we can test by looking at the offsets. */
4360 use_simple_return_p (void)
4362 arm_stack_offsets
*offsets
;
4364 /* Note this function can be called before or after reload. */
4365 if (!reload_completed
)
4366 arm_compute_frame_layout ();
4368 offsets
= arm_get_frame_offsets ();
4369 return offsets
->outgoing_args
!= 0;
4372 /* Return TRUE if int I is a valid immediate ARM constant. */
4375 const_ok_for_arm (HOST_WIDE_INT i
)
4379 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4380 be all zero, or all one. */
4381 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4382 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4383 != ((~(unsigned HOST_WIDE_INT
) 0)
4384 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4387 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4389 /* Fast return for 0 and small values. We must do this for zero, since
4390 the code below can't handle that one case. */
4391 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4394 /* Get the number of trailing zeros. */
4395 lowbit
= ffs((int) i
) - 1;
4397 /* Only even shifts are allowed in ARM mode so round down to the
4398 nearest even number. */
4402 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4407 /* Allow rotated constants in ARM mode. */
4409 && ((i
& ~0xc000003f) == 0
4410 || (i
& ~0xf000000f) == 0
4411 || (i
& ~0xfc000003) == 0))
4414 else if (TARGET_THUMB2
)
4418 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4421 if (i
== v
|| i
== (v
| (v
<< 8)))
4424 /* Allow repeated pattern 0xXY00XY00. */
4430 else if (TARGET_HAVE_MOVT
)
4432 /* Thumb-1 Targets with MOVT. */
4442 /* Return true if I is a valid constant for the operation CODE. */
4444 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4446 if (const_ok_for_arm (i
))
4452 /* See if we can use movw. */
4453 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4456 /* Otherwise, try mvn. */
4457 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4460 /* See if we can use addw or subw. */
4462 && ((i
& 0xfffff000) == 0
4463 || ((-i
) & 0xfffff000) == 0))
4484 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4486 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4492 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4496 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4503 /* Return true if I is a valid di mode constant for the operation CODE. */
4505 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4507 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4508 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4509 rtx hi
= GEN_INT (hi_val
);
4510 rtx lo
= GEN_INT (lo_val
);
4520 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4521 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4523 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4530 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
4531 Avoid generating useless code when one of the bytes is zero. */
4533 thumb1_gen_const_int (rtx op0
, HOST_WIDE_INT op1
)
4535 bool mov_done_p
= false;
4538 /* Emit upper 3 bytes if needed. */
4539 for (i
= 0; i
< 3; i
++)
4541 int byte
= (op1
>> (8 * (3 - i
))) & 0xff;
4545 emit_set_insn (op0
, mov_done_p
4546 ? gen_rtx_PLUS (SImode
,op0
, GEN_INT (byte
))
4552 emit_set_insn (op0
, gen_rtx_ASHIFT (SImode
, op0
, GEN_INT (8)));
4555 /* Emit lower byte if needed. */
4557 emit_set_insn (op0
, GEN_INT (op1
& 0xff));
4558 else if (op1
& 0xff)
4559 emit_set_insn (op0
, gen_rtx_PLUS (SImode
, op0
, GEN_INT (op1
& 0xff)));
4562 /* Emit a sequence of insns to handle a large constant.
4563 CODE is the code of the operation required, it can be any of SET, PLUS,
4564 IOR, AND, XOR, MINUS;
4565 MODE is the mode in which the operation is being performed;
4566 VAL is the integer to operate on;
4567 SOURCE is the other operand (a register, or a null-pointer for SET);
4568 SUBTARGETS means it is safe to create scratch registers if that will
4569 either produce a simpler sequence, or we will want to cse the values.
4570 Return value is the number of insns emitted. */
4572 /* ??? Tweak this for thumb2. */
4574 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4575 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4579 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4580 cond
= COND_EXEC_TEST (PATTERN (insn
));
4584 if (subtargets
|| code
== SET
4585 || (REG_P (target
) && REG_P (source
)
4586 && REGNO (target
) != REGNO (source
)))
4588 /* After arm_reorg has been called, we can't fix up expensive
4589 constants by pushing them into memory so we must synthesize
4590 them in-line, regardless of the cost. This is only likely to
4591 be more costly on chips that have load delay slots and we are
4592 compiling without running the scheduler (so no splitting
4593 occurred before the final instruction emission).
4595 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4597 if (!cfun
->machine
->after_arm_reorg
4599 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4601 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4606 /* Currently SET is the only monadic value for CODE, all
4607 the rest are diadic. */
4608 if (TARGET_USE_MOVT
)
4609 arm_emit_movpair (target
, GEN_INT (val
));
4611 emit_set_insn (target
, GEN_INT (val
));
4617 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4619 if (TARGET_USE_MOVT
)
4620 arm_emit_movpair (temp
, GEN_INT (val
));
4622 emit_set_insn (temp
, GEN_INT (val
));
4624 /* For MINUS, the value is subtracted from, since we never
4625 have subtraction of a constant. */
4627 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4629 emit_set_insn (target
,
4630 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4636 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4640 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4641 ARM/THUMB2 immediates, and add up to VAL.
4642 Thr function return value gives the number of insns required. */
4644 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4645 struct four_ints
*return_sequence
)
4647 int best_consecutive_zeros
= 0;
4651 struct four_ints tmp_sequence
;
4653 /* If we aren't targeting ARM, the best place to start is always at
4654 the bottom, otherwise look more closely. */
4657 for (i
= 0; i
< 32; i
+= 2)
4659 int consecutive_zeros
= 0;
4661 if (!(val
& (3 << i
)))
4663 while ((i
< 32) && !(val
& (3 << i
)))
4665 consecutive_zeros
+= 2;
4668 if (consecutive_zeros
> best_consecutive_zeros
)
4670 best_consecutive_zeros
= consecutive_zeros
;
4671 best_start
= i
- consecutive_zeros
;
4678 /* So long as it won't require any more insns to do so, it's
4679 desirable to emit a small constant (in bits 0...9) in the last
4680 insn. This way there is more chance that it can be combined with
4681 a later addressing insn to form a pre-indexed load or store
4682 operation. Consider:
4684 *((volatile int *)0xe0000100) = 1;
4685 *((volatile int *)0xe0000110) = 2;
4687 We want this to wind up as:
4691 str rB, [rA, #0x100]
4693 str rB, [rA, #0x110]
4695 rather than having to synthesize both large constants from scratch.
4697 Therefore, we calculate how many insns would be required to emit
4698 the constant starting from `best_start', and also starting from
4699 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4700 yield a shorter sequence, we may as well use zero. */
4701 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4703 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4705 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4706 if (insns2
<= insns1
)
4708 *return_sequence
= tmp_sequence
;
4716 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4718 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4719 struct four_ints
*return_sequence
, int i
)
4721 int remainder
= val
& 0xffffffff;
4724 /* Try and find a way of doing the job in either two or three
4727 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4728 location. We start at position I. This may be the MSB, or
4729 optimial_immediate_sequence may have positioned it at the largest block
4730 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4731 wrapping around to the top of the word when we drop off the bottom.
4732 In the worst case this code should produce no more than four insns.
4734 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4735 constants, shifted to any arbitrary location. We should always start
4740 unsigned int b1
, b2
, b3
, b4
;
4741 unsigned HOST_WIDE_INT result
;
4744 gcc_assert (insns
< 4);
4749 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4750 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4753 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4754 /* We can use addw/subw for the last 12 bits. */
4758 /* Use an 8-bit shifted/rotated immediate. */
4762 result
= remainder
& ((0x0ff << end
)
4763 | ((i
< end
) ? (0xff >> (32 - end
))
4770 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4771 arbitrary shifts. */
4772 i
-= TARGET_ARM
? 2 : 1;
4776 /* Next, see if we can do a better job with a thumb2 replicated
4779 We do it this way around to catch the cases like 0x01F001E0 where
4780 two 8-bit immediates would work, but a replicated constant would
4783 TODO: 16-bit constants that don't clear all the bits, but still win.
4784 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4787 b1
= (remainder
& 0xff000000) >> 24;
4788 b2
= (remainder
& 0x00ff0000) >> 16;
4789 b3
= (remainder
& 0x0000ff00) >> 8;
4790 b4
= remainder
& 0xff;
4794 /* The 8-bit immediate already found clears b1 (and maybe b2),
4795 but must leave b3 and b4 alone. */
4797 /* First try to find a 32-bit replicated constant that clears
4798 almost everything. We can assume that we can't do it in one,
4799 or else we wouldn't be here. */
4800 unsigned int tmp
= b1
& b2
& b3
& b4
;
4801 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4803 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4804 + (tmp
== b3
) + (tmp
== b4
);
4806 && (matching_bytes
>= 3
4807 || (matching_bytes
== 2
4808 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4810 /* At least 3 of the bytes match, and the fourth has at
4811 least as many bits set, or two of the bytes match
4812 and it will only require one more insn to finish. */
4820 /* Second, try to find a 16-bit replicated constant that can
4821 leave three of the bytes clear. If b2 or b4 is already
4822 zero, then we can. If the 8-bit from above would not
4823 clear b2 anyway, then we still win. */
4824 else if (b1
== b3
&& (!b2
|| !b4
4825 || (remainder
& 0x00ff0000 & ~result
)))
4827 result
= remainder
& 0xff00ff00;
4833 /* The 8-bit immediate already found clears b2 (and maybe b3)
4834 and we don't get here unless b1 is alredy clear, but it will
4835 leave b4 unchanged. */
4837 /* If we can clear b2 and b4 at once, then we win, since the
4838 8-bits couldn't possibly reach that far. */
4841 result
= remainder
& 0x00ff00ff;
4847 return_sequence
->i
[insns
++] = result
;
4848 remainder
&= ~result
;
4850 if (code
== SET
|| code
== MINUS
)
4858 /* Emit an instruction with the indicated PATTERN. If COND is
4859 non-NULL, conditionalize the execution of the instruction on COND
4863 emit_constant_insn (rtx cond
, rtx pattern
)
4866 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4867 emit_insn (pattern
);
4870 /* As above, but extra parameter GENERATE which, if clear, suppresses
4874 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4875 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4876 int subtargets
, int generate
)
4880 int final_invert
= 0;
4882 int set_sign_bit_copies
= 0;
4883 int clear_sign_bit_copies
= 0;
4884 int clear_zero_bit_copies
= 0;
4885 int set_zero_bit_copies
= 0;
4886 int insns
= 0, neg_insns
, inv_insns
;
4887 unsigned HOST_WIDE_INT temp1
, temp2
;
4888 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4889 struct four_ints
*immediates
;
4890 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4892 /* Find out which operations are safe for a given CODE. Also do a quick
4893 check for degenerate cases; these can occur when DImode operations
4906 if (remainder
== 0xffffffff)
4909 emit_constant_insn (cond
,
4910 gen_rtx_SET (target
,
4911 GEN_INT (ARM_SIGN_EXTEND (val
))));
4917 if (reload_completed
&& rtx_equal_p (target
, source
))
4921 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4930 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4933 if (remainder
== 0xffffffff)
4935 if (reload_completed
&& rtx_equal_p (target
, source
))
4938 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4947 if (reload_completed
&& rtx_equal_p (target
, source
))
4950 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4954 if (remainder
== 0xffffffff)
4957 emit_constant_insn (cond
,
4958 gen_rtx_SET (target
,
4959 gen_rtx_NOT (mode
, source
)));
4966 /* We treat MINUS as (val - source), since (source - val) is always
4967 passed as (source + (-val)). */
4971 emit_constant_insn (cond
,
4972 gen_rtx_SET (target
,
4973 gen_rtx_NEG (mode
, source
)));
4976 if (const_ok_for_arm (val
))
4979 emit_constant_insn (cond
,
4980 gen_rtx_SET (target
,
4981 gen_rtx_MINUS (mode
, GEN_INT (val
),
4992 /* If we can do it in one insn get out quickly. */
4993 if (const_ok_for_op (val
, code
))
4996 emit_constant_insn (cond
,
4997 gen_rtx_SET (target
,
4999 ? gen_rtx_fmt_ee (code
, mode
, source
,
5005 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5007 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5008 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5012 if (mode
== SImode
&& i
== 16)
5013 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5015 emit_constant_insn (cond
,
5016 gen_zero_extendhisi2
5017 (target
, gen_lowpart (HImode
, source
)));
5019 /* Extz only supports SImode, but we can coerce the operands
5021 emit_constant_insn (cond
,
5022 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5023 gen_lowpart (SImode
, source
),
5024 GEN_INT (i
), const0_rtx
));
5030 /* Calculate a few attributes that may be useful for specific
5032 /* Count number of leading zeros. */
5033 for (i
= 31; i
>= 0; i
--)
5035 if ((remainder
& (1 << i
)) == 0)
5036 clear_sign_bit_copies
++;
5041 /* Count number of leading 1's. */
5042 for (i
= 31; i
>= 0; i
--)
5044 if ((remainder
& (1 << i
)) != 0)
5045 set_sign_bit_copies
++;
5050 /* Count number of trailing zero's. */
5051 for (i
= 0; i
<= 31; i
++)
5053 if ((remainder
& (1 << i
)) == 0)
5054 clear_zero_bit_copies
++;
5059 /* Count number of trailing 1's. */
5060 for (i
= 0; i
<= 31; i
++)
5062 if ((remainder
& (1 << i
)) != 0)
5063 set_zero_bit_copies
++;
5071 /* See if we can do this by sign_extending a constant that is known
5072 to be negative. This is a good, way of doing it, since the shift
5073 may well merge into a subsequent insn. */
5074 if (set_sign_bit_copies
> 1)
5076 if (const_ok_for_arm
5077 (temp1
= ARM_SIGN_EXTEND (remainder
5078 << (set_sign_bit_copies
- 1))))
5082 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5083 emit_constant_insn (cond
,
5084 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5085 emit_constant_insn (cond
,
5086 gen_ashrsi3 (target
, new_src
,
5087 GEN_INT (set_sign_bit_copies
- 1)));
5091 /* For an inverted constant, we will need to set the low bits,
5092 these will be shifted out of harm's way. */
5093 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5094 if (const_ok_for_arm (~temp1
))
5098 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5099 emit_constant_insn (cond
,
5100 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5101 emit_constant_insn (cond
,
5102 gen_ashrsi3 (target
, new_src
,
5103 GEN_INT (set_sign_bit_copies
- 1)));
5109 /* See if we can calculate the value as the difference between two
5110 valid immediates. */
5111 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5113 int topshift
= clear_sign_bit_copies
& ~1;
5115 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5116 & (0xff000000 >> topshift
));
5118 /* If temp1 is zero, then that means the 9 most significant
5119 bits of remainder were 1 and we've caused it to overflow.
5120 When topshift is 0 we don't need to do anything since we
5121 can borrow from 'bit 32'. */
5122 if (temp1
== 0 && topshift
!= 0)
5123 temp1
= 0x80000000 >> (topshift
- 1);
5125 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5127 if (const_ok_for_arm (temp2
))
5131 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5132 emit_constant_insn (cond
,
5133 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5134 emit_constant_insn (cond
,
5135 gen_addsi3 (target
, new_src
,
5143 /* See if we can generate this by setting the bottom (or the top)
5144 16 bits, and then shifting these into the other half of the
5145 word. We only look for the simplest cases, to do more would cost
5146 too much. Be careful, however, not to generate this when the
5147 alternative would take fewer insns. */
5148 if (val
& 0xffff0000)
5150 temp1
= remainder
& 0xffff0000;
5151 temp2
= remainder
& 0x0000ffff;
5153 /* Overlaps outside this range are best done using other methods. */
5154 for (i
= 9; i
< 24; i
++)
5156 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5157 && !const_ok_for_arm (temp2
))
5159 rtx new_src
= (subtargets
5160 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5162 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5163 source
, subtargets
, generate
);
5171 gen_rtx_ASHIFT (mode
, source
,
5178 /* Don't duplicate cases already considered. */
5179 for (i
= 17; i
< 24; i
++)
5181 if (((temp1
| (temp1
>> i
)) == remainder
)
5182 && !const_ok_for_arm (temp1
))
5184 rtx new_src
= (subtargets
5185 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5187 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5188 source
, subtargets
, generate
);
5193 gen_rtx_SET (target
,
5196 gen_rtx_LSHIFTRT (mode
, source
,
5207 /* If we have IOR or XOR, and the constant can be loaded in a
5208 single instruction, and we can find a temporary to put it in,
5209 then this can be done in two instructions instead of 3-4. */
5211 /* TARGET can't be NULL if SUBTARGETS is 0 */
5212 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5214 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5218 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5220 emit_constant_insn (cond
,
5221 gen_rtx_SET (sub
, GEN_INT (val
)));
5222 emit_constant_insn (cond
,
5223 gen_rtx_SET (target
,
5224 gen_rtx_fmt_ee (code
, mode
,
5235 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5236 and the remainder 0s for e.g. 0xfff00000)
5237 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5239 This can be done in 2 instructions by using shifts with mov or mvn.
5244 mvn r0, r0, lsr #12 */
5245 if (set_sign_bit_copies
> 8
5246 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5250 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5251 rtx shift
= GEN_INT (set_sign_bit_copies
);
5257 gen_rtx_ASHIFT (mode
,
5262 gen_rtx_SET (target
,
5264 gen_rtx_LSHIFTRT (mode
, sub
,
5271 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5273 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5275 For eg. r0 = r0 | 0xfff
5280 if (set_zero_bit_copies
> 8
5281 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5285 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5286 rtx shift
= GEN_INT (set_zero_bit_copies
);
5292 gen_rtx_LSHIFTRT (mode
,
5297 gen_rtx_SET (target
,
5299 gen_rtx_ASHIFT (mode
, sub
,
5305 /* This will never be reached for Thumb2 because orn is a valid
5306 instruction. This is for Thumb1 and the ARM 32 bit cases.
5308 x = y | constant (such that ~constant is a valid constant)
5310 x = ~(~y & ~constant).
5312 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5316 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5317 emit_constant_insn (cond
,
5319 gen_rtx_NOT (mode
, source
)));
5322 sub
= gen_reg_rtx (mode
);
5323 emit_constant_insn (cond
,
5325 gen_rtx_AND (mode
, source
,
5327 emit_constant_insn (cond
,
5328 gen_rtx_SET (target
,
5329 gen_rtx_NOT (mode
, sub
)));
5336 /* See if two shifts will do 2 or more insn's worth of work. */
5337 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5339 HOST_WIDE_INT shift_mask
= ((0xffffffff
5340 << (32 - clear_sign_bit_copies
))
5343 if ((remainder
| shift_mask
) != 0xffffffff)
5345 HOST_WIDE_INT new_val
5346 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5350 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5351 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5352 new_src
, source
, subtargets
, 1);
5357 rtx targ
= subtargets
? NULL_RTX
: target
;
5358 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5359 targ
, source
, subtargets
, 0);
5365 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5366 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5368 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5369 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5375 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5377 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5379 if ((remainder
| shift_mask
) != 0xffffffff)
5381 HOST_WIDE_INT new_val
5382 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5385 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5387 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5388 new_src
, source
, subtargets
, 1);
5393 rtx targ
= subtargets
? NULL_RTX
: target
;
5395 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5396 targ
, source
, subtargets
, 0);
5402 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5403 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5405 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5406 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5418 /* Calculate what the instruction sequences would be if we generated it
5419 normally, negated, or inverted. */
5421 /* AND cannot be split into multiple insns, so invert and use BIC. */
5424 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5427 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5432 if (can_invert
|| final_invert
)
5433 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5438 immediates
= &pos_immediates
;
5440 /* Is the negated immediate sequence more efficient? */
5441 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5444 immediates
= &neg_immediates
;
5449 /* Is the inverted immediate sequence more efficient?
5450 We must allow for an extra NOT instruction for XOR operations, although
5451 there is some chance that the final 'mvn' will get optimized later. */
5452 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5455 immediates
= &inv_immediates
;
5463 /* Now output the chosen sequence as instructions. */
5466 for (i
= 0; i
< insns
; i
++)
5468 rtx new_src
, temp1_rtx
;
5470 temp1
= immediates
->i
[i
];
5472 if (code
== SET
|| code
== MINUS
)
5473 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5474 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5475 new_src
= gen_reg_rtx (mode
);
5481 else if (can_negate
)
5484 temp1
= trunc_int_for_mode (temp1
, mode
);
5485 temp1_rtx
= GEN_INT (temp1
);
5489 else if (code
== MINUS
)
5490 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5492 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5494 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5499 can_negate
= can_invert
;
5503 else if (code
== MINUS
)
5511 emit_constant_insn (cond
, gen_rtx_SET (target
,
5512 gen_rtx_NOT (mode
, source
)));
5519 /* Return TRUE if op is a constant where both the low and top words are
5520 suitable for RSB/RSC instructions. This is never true for Thumb, since
5521 we do not have RSC in that case. */
5523 arm_const_double_prefer_rsbs_rsc (rtx op
)
5525 /* Thumb lacks RSC, so we never prefer that sequence. */
5526 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5528 HOST_WIDE_INT hi
, lo
;
5529 lo
= UINTVAL (op
) & 0xffffffffULL
;
5530 hi
= UINTVAL (op
) >> 32;
5531 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5534 /* Canonicalize a comparison so that we are more likely to recognize it.
5535 This can be done for a few constant compares, where we can make the
5536 immediate value easier to load. */
5539 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5540 bool op0_preserve_value
)
5543 unsigned HOST_WIDE_INT i
, maxval
;
5545 mode
= GET_MODE (*op0
);
5546 if (mode
== VOIDmode
)
5547 mode
= GET_MODE (*op1
);
5549 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5551 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5552 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5553 either reversed or (for constant OP1) adjusted to GE/LT.
5554 Similarly for GTU/LEU in Thumb mode. */
5558 if (*code
== GT
|| *code
== LE
5559 || *code
== GTU
|| *code
== LEU
)
5561 /* Missing comparison. First try to use an available
5563 if (CONST_INT_P (*op1
))
5572 /* Try to convert to GE/LT, unless that would be more
5574 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5575 && arm_const_double_prefer_rsbs_rsc (*op1
))
5577 *op1
= GEN_INT (i
+ 1);
5578 *code
= *code
== GT
? GE
: LT
;
5585 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5587 /* Try to convert to GEU/LTU, unless that would
5588 be more expensive. */
5589 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5590 && arm_const_double_prefer_rsbs_rsc (*op1
))
5592 *op1
= GEN_INT (i
+ 1);
5593 *code
= *code
== GTU
? GEU
: LTU
;
5603 if (!op0_preserve_value
)
5605 std::swap (*op0
, *op1
);
5606 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5612 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5613 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5614 to facilitate possible combining with a cmp into 'ands'. */
5616 && GET_CODE (*op0
) == ZERO_EXTEND
5617 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5618 && GET_MODE (XEXP (*op0
, 0)) == QImode
5619 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5620 && subreg_lowpart_p (XEXP (*op0
, 0))
5621 && *op1
== const0_rtx
)
5622 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5625 /* Comparisons smaller than DImode. Only adjust comparisons against
5626 an out-of-range constant. */
5627 if (!CONST_INT_P (*op1
)
5628 || const_ok_for_arm (INTVAL (*op1
))
5629 || const_ok_for_arm (- INTVAL (*op1
)))
5643 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5645 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5646 *code
= *code
== GT
? GE
: LT
;
5654 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5656 *op1
= GEN_INT (i
- 1);
5657 *code
= *code
== GE
? GT
: LE
;
5664 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5665 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5667 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5668 *code
= *code
== GTU
? GEU
: LTU
;
5676 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5678 *op1
= GEN_INT (i
- 1);
5679 *code
= *code
== GEU
? GTU
: LEU
;
5690 /* Define how to find the value returned by a function. */
5693 arm_function_value(const_tree type
, const_tree func
,
5694 bool outgoing ATTRIBUTE_UNUSED
)
5697 int unsignedp ATTRIBUTE_UNUSED
;
5698 rtx r ATTRIBUTE_UNUSED
;
5700 mode
= TYPE_MODE (type
);
5702 if (TARGET_AAPCS_BASED
)
5703 return aapcs_allocate_return_reg (mode
, type
, func
);
5705 /* Promote integer types. */
5706 if (INTEGRAL_TYPE_P (type
))
5707 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5709 /* Promotes small structs returned in a register to full-word size
5710 for big-endian AAPCS. */
5711 if (arm_return_in_msb (type
))
5713 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5714 if (size
% UNITS_PER_WORD
!= 0)
5716 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5717 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5721 return arm_libcall_value_1 (mode
);
5724 /* libcall hashtable helpers. */
5726 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5728 static inline hashval_t
hash (const rtx_def
*);
5729 static inline bool equal (const rtx_def
*, const rtx_def
*);
5730 static inline void remove (rtx_def
*);
5734 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5736 return rtx_equal_p (p1
, p2
);
5740 libcall_hasher::hash (const rtx_def
*p1
)
5742 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5745 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5748 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5750 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5754 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5756 static bool init_done
= false;
5757 static libcall_table_type
*libcall_htab
= NULL
;
5763 libcall_htab
= new libcall_table_type (31);
5764 add_libcall (libcall_htab
,
5765 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5766 add_libcall (libcall_htab
,
5767 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5768 add_libcall (libcall_htab
,
5769 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5770 add_libcall (libcall_htab
,
5771 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5773 add_libcall (libcall_htab
,
5774 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5775 add_libcall (libcall_htab
,
5776 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5777 add_libcall (libcall_htab
,
5778 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5779 add_libcall (libcall_htab
,
5780 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5782 add_libcall (libcall_htab
,
5783 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5784 add_libcall (libcall_htab
,
5785 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5786 add_libcall (libcall_htab
,
5787 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5788 add_libcall (libcall_htab
,
5789 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5790 add_libcall (libcall_htab
,
5791 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5792 add_libcall (libcall_htab
,
5793 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5794 add_libcall (libcall_htab
,
5795 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5796 add_libcall (libcall_htab
,
5797 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5799 /* Values from double-precision helper functions are returned in core
5800 registers if the selected core only supports single-precision
5801 arithmetic, even if we are using the hard-float ABI. The same is
5802 true for single-precision helpers except in case of MVE, because in
5803 MVE we will be using the hard-float ABI on a CPU which doesn't support
5804 single-precision operations in hardware. In MVE the following check
5805 enables use of emulation for the single-precision arithmetic
5807 if (TARGET_HAVE_MVE
)
5809 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5810 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5811 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5812 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5813 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5814 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5815 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5816 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5817 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5818 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5819 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5821 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5822 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5823 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5824 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5825 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5826 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5827 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5828 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5829 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5830 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5831 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5832 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5834 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5836 add_libcall (libcall_htab
,
5837 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5840 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5844 arm_libcall_value_1 (machine_mode mode
)
5846 if (TARGET_AAPCS_BASED
)
5847 return aapcs_libcall_value (mode
);
5848 else if (TARGET_IWMMXT_ABI
5849 && arm_vector_mode_supported_p (mode
))
5850 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5852 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5855 /* Define how to find the value returned by a library function
5856 assuming the value has mode MODE. */
5859 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5861 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5862 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5864 /* The following libcalls return their result in integer registers,
5865 even though they return a floating point value. */
5866 if (arm_libcall_uses_aapcs_base (libcall
))
5867 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5871 return arm_libcall_value_1 (mode
);
5874 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5877 arm_function_value_regno_p (const unsigned int regno
)
5879 if (regno
== ARG_REGISTER (1)
5881 && TARGET_AAPCS_BASED
5882 && TARGET_HARD_FLOAT
5883 && regno
== FIRST_VFP_REGNUM
)
5884 || (TARGET_IWMMXT_ABI
5885 && regno
== FIRST_IWMMXT_REGNUM
))
5891 /* Determine the amount of memory needed to store the possible return
5892 registers of an untyped call. */
5894 arm_apply_result_size (void)
5900 if (TARGET_HARD_FLOAT_ABI
)
5902 if (TARGET_IWMMXT_ABI
)
5909 /* Decide whether TYPE should be returned in memory (true)
5910 or in a register (false). FNTYPE is the type of the function making
5913 arm_return_in_memory (const_tree type
, const_tree fntype
)
5917 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5919 if (TARGET_AAPCS_BASED
)
5921 /* Simple, non-aggregate types (ie not including vectors and
5922 complex) are always returned in a register (or registers).
5923 We don't care about which register here, so we can short-cut
5924 some of the detail. */
5925 if (!AGGREGATE_TYPE_P (type
)
5926 && TREE_CODE (type
) != VECTOR_TYPE
5927 && TREE_CODE (type
) != COMPLEX_TYPE
)
5930 /* Any return value that is no larger than one word can be
5932 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5935 /* Check any available co-processors to see if they accept the
5936 type as a register candidate (VFP, for example, can return
5937 some aggregates in consecutive registers). These aren't
5938 available if the call is variadic. */
5939 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5942 /* Vector values should be returned using ARM registers, not
5943 memory (unless they're over 16 bytes, which will break since
5944 we only have four call-clobbered registers to play with). */
5945 if (TREE_CODE (type
) == VECTOR_TYPE
)
5946 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5948 /* The rest go in memory. */
5952 if (TREE_CODE (type
) == VECTOR_TYPE
)
5953 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5955 if (!AGGREGATE_TYPE_P (type
) &&
5956 (TREE_CODE (type
) != VECTOR_TYPE
))
5957 /* All simple types are returned in registers. */
5960 if (arm_abi
!= ARM_ABI_APCS
)
5962 /* ATPCS and later return aggregate types in memory only if they are
5963 larger than a word (or are variable size). */
5964 return (size
< 0 || size
> UNITS_PER_WORD
);
5967 /* For the arm-wince targets we choose to be compatible with Microsoft's
5968 ARM and Thumb compilers, which always return aggregates in memory. */
5970 /* All structures/unions bigger than one word are returned in memory.
5971 Also catch the case where int_size_in_bytes returns -1. In this case
5972 the aggregate is either huge or of variable size, and in either case
5973 we will want to return it via memory and not in a register. */
5974 if (size
< 0 || size
> UNITS_PER_WORD
)
5977 if (TREE_CODE (type
) == RECORD_TYPE
)
5981 /* For a struct the APCS says that we only return in a register
5982 if the type is 'integer like' and every addressable element
5983 has an offset of zero. For practical purposes this means
5984 that the structure can have at most one non bit-field element
5985 and that this element must be the first one in the structure. */
5987 /* Find the first field, ignoring non FIELD_DECL things which will
5988 have been created by C++. */
5989 /* NOTE: This code is deprecated and has not been updated to handle
5990 DECL_FIELD_ABI_IGNORED. */
5991 for (field
= TYPE_FIELDS (type
);
5992 field
&& TREE_CODE (field
) != FIELD_DECL
;
5993 field
= DECL_CHAIN (field
))
5997 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5999 /* Check that the first field is valid for returning in a register. */
6001 /* ... Floats are not allowed */
6002 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6005 /* ... Aggregates that are not themselves valid for returning in
6006 a register are not allowed. */
6007 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6010 /* Now check the remaining fields, if any. Only bitfields are allowed,
6011 since they are not addressable. */
6012 for (field
= DECL_CHAIN (field
);
6014 field
= DECL_CHAIN (field
))
6016 if (TREE_CODE (field
) != FIELD_DECL
)
6019 if (!DECL_BIT_FIELD_TYPE (field
))
6026 if (TREE_CODE (type
) == UNION_TYPE
)
6030 /* Unions can be returned in registers if every element is
6031 integral, or can be returned in an integer register. */
6032 for (field
= TYPE_FIELDS (type
);
6034 field
= DECL_CHAIN (field
))
6036 if (TREE_CODE (field
) != FIELD_DECL
)
6039 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6042 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6048 #endif /* not ARM_WINCE */
6050 /* Return all other types in memory. */
6054 const struct pcs_attribute_arg
6058 } pcs_attribute_args
[] =
6060 {"aapcs", ARM_PCS_AAPCS
},
6061 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6063 /* We could recognize these, but changes would be needed elsewhere
6064 * to implement them. */
6065 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6066 {"atpcs", ARM_PCS_ATPCS
},
6067 {"apcs", ARM_PCS_APCS
},
6069 {NULL
, ARM_PCS_UNKNOWN
}
6073 arm_pcs_from_attribute (tree attr
)
6075 const struct pcs_attribute_arg
*ptr
;
6078 /* Get the value of the argument. */
6079 if (TREE_VALUE (attr
) == NULL_TREE
6080 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6081 return ARM_PCS_UNKNOWN
;
6083 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6085 /* Check it against the list of known arguments. */
6086 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6087 if (streq (arg
, ptr
->arg
))
6090 /* An unrecognized interrupt type. */
6091 return ARM_PCS_UNKNOWN
;
6094 /* Get the PCS variant to use for this call. TYPE is the function's type
6095 specification, DECL is the specific declartion. DECL may be null if
6096 the call could be indirect or if this is a library call. */
6098 arm_get_pcs_model (const_tree type
, const_tree decl
)
6100 bool user_convention
= false;
6101 enum arm_pcs user_pcs
= arm_pcs_default
;
6106 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6109 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6110 user_convention
= true;
6113 if (TARGET_AAPCS_BASED
)
6115 /* Detect varargs functions. These always use the base rules
6116 (no argument is ever a candidate for a co-processor
6118 bool base_rules
= stdarg_p (type
);
6120 if (user_convention
)
6122 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6123 sorry ("non-AAPCS derived PCS variant");
6124 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6125 error ("variadic functions must use the base AAPCS variant");
6129 return ARM_PCS_AAPCS
;
6130 else if (user_convention
)
6132 else if (decl
&& flag_unit_at_a_time
)
6134 /* Local functions never leak outside this compilation unit,
6135 so we are free to use whatever conventions are
6137 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6138 cgraph_node
*local_info_node
6139 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6140 if (local_info_node
&& local_info_node
->local
)
6141 return ARM_PCS_AAPCS_LOCAL
;
6144 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6145 sorry ("PCS variant");
6147 /* For everything else we use the target's default. */
6148 return arm_pcs_default
;
6153 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6154 const_tree fntype ATTRIBUTE_UNUSED
,
6155 rtx libcall ATTRIBUTE_UNUSED
,
6156 const_tree fndecl ATTRIBUTE_UNUSED
)
6158 /* Record the unallocated VFP registers. */
6159 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6160 pcum
->aapcs_vfp_reg_alloc
= 0;
6163 /* Bitmasks that indicate whether earlier versions of GCC would have
6164 taken a different path through the ABI logic. This should result in
6165 a -Wpsabi warning if the earlier path led to a different ABI decision.
6167 WARN_PSABI_EMPTY_CXX17_BASE
6168 Indicates that the type includes an artificial empty C++17 base field
6169 that, prior to GCC 10.1, would prevent the type from being treated as
6170 a HFA or HVA. See PR94711 for details.
6172 WARN_PSABI_NO_UNIQUE_ADDRESS
6173 Indicates that the type includes an empty [[no_unique_address]] field
6174 that, prior to GCC 10.1, would prevent the type from being treated as
6176 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6177 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6179 /* Walk down the type tree of TYPE counting consecutive base elements.
6180 If *MODEP is VOIDmode, then set it to the first valid floating point
6181 type. If a non-floating point type is found, or if a floating point
6182 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6183 otherwise return the count in the sub-tree.
6185 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6186 function has changed its behavior relative to earlier versions of GCC.
6187 Normally the argument should be nonnull and point to a zero-initialized
6188 variable. The function then records whether the ABI decision might
6189 be affected by a known fix to the ABI logic, setting the associated
6190 WARN_PSABI_* bits if so.
6192 When the argument is instead a null pointer, the function tries to
6193 simulate the behavior of GCC before all such ABI fixes were made.
6194 This is useful to check whether the function returns something
6195 different after the ABI fixes. */
6197 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6198 unsigned int *warn_psabi_flags
)
6203 switch (TREE_CODE (type
))
6206 mode
= TYPE_MODE (type
);
6207 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6210 if (*modep
== VOIDmode
)
6219 mode
= TYPE_MODE (TREE_TYPE (type
));
6220 if (mode
!= DFmode
&& mode
!= SFmode
)
6223 if (*modep
== VOIDmode
)
6232 /* Use V2SImode and V4SImode as representatives of all 64-bit
6233 and 128-bit vector types, whether or not those modes are
6234 supported with the present options. */
6235 size
= int_size_in_bytes (type
);
6248 if (*modep
== VOIDmode
)
6251 /* Vector modes are considered to be opaque: two vectors are
6252 equivalent for the purposes of being homogeneous aggregates
6253 if they are the same size. */
6262 tree index
= TYPE_DOMAIN (type
);
6264 /* Can't handle incomplete types nor sizes that are not
6266 if (!COMPLETE_TYPE_P (type
)
6267 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6270 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6274 || !TYPE_MAX_VALUE (index
)
6275 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6276 || !TYPE_MIN_VALUE (index
)
6277 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6281 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6282 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6284 /* There must be no padding. */
6285 if (wi::to_wide (TYPE_SIZE (type
))
6286 != count
* GET_MODE_BITSIZE (*modep
))
6298 /* Can't handle incomplete types nor sizes that are not
6300 if (!COMPLETE_TYPE_P (type
)
6301 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6304 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6306 if (TREE_CODE (field
) != FIELD_DECL
)
6309 if (DECL_FIELD_ABI_IGNORED (field
))
6311 /* See whether this is something that earlier versions of
6312 GCC failed to ignore. */
6314 if (lookup_attribute ("no_unique_address",
6315 DECL_ATTRIBUTES (field
)))
6316 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6317 else if (cxx17_empty_base_field_p (field
))
6318 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6320 /* No compatibility problem. */
6323 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6324 if (warn_psabi_flags
)
6326 *warn_psabi_flags
|= flag
;
6331 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6338 /* There must be no padding. */
6339 if (wi::to_wide (TYPE_SIZE (type
))
6340 != count
* GET_MODE_BITSIZE (*modep
))
6347 case QUAL_UNION_TYPE
:
6349 /* These aren't very interesting except in a degenerate case. */
6354 /* Can't handle incomplete types nor sizes that are not
6356 if (!COMPLETE_TYPE_P (type
)
6357 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6360 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6362 if (TREE_CODE (field
) != FIELD_DECL
)
6365 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6369 count
= count
> sub_count
? count
: sub_count
;
6372 /* There must be no padding. */
6373 if (wi::to_wide (TYPE_SIZE (type
))
6374 != count
* GET_MODE_BITSIZE (*modep
))
6387 /* Return true if PCS_VARIANT should use VFP registers. */
6389 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6391 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6393 static bool seen_thumb1_vfp
= false;
6395 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6397 sorry ("Thumb-1 hard-float VFP ABI");
6398 /* sorry() is not immediately fatal, so only display this once. */
6399 seen_thumb1_vfp
= true;
6405 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6408 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6409 (TARGET_VFP_DOUBLE
|| !is_double
));
6412 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6413 suitable for passing or returning in VFP registers for the PCS
6414 variant selected. If it is, then *BASE_MODE is updated to contain
6415 a machine mode describing each element of the argument's type and
6416 *COUNT to hold the number of such elements. */
6418 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6419 machine_mode mode
, const_tree type
,
6420 machine_mode
*base_mode
, int *count
)
6422 machine_mode new_mode
= VOIDmode
;
6424 /* If we have the type information, prefer that to working things
6425 out from the mode. */
6428 unsigned int warn_psabi_flags
= 0;
6429 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6431 if (ag_count
> 0 && ag_count
<= 4)
6433 static unsigned last_reported_type_uid
;
6434 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6438 && uid
!= last_reported_type_uid
6439 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6443 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6444 gcc_assert (alt
== -1);
6445 last_reported_type_uid
= uid
;
6446 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6448 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6449 inform (input_location
, "parameter passing for argument of "
6450 "type %qT with %<[[no_unique_address]]%> members "
6451 "changed %{in GCC 10.1%}",
6452 TYPE_MAIN_VARIANT (type
), url
);
6453 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6454 inform (input_location
, "parameter passing for argument of "
6455 "type %qT when C++17 is enabled changed to match "
6456 "C++14 %{in GCC 10.1%}",
6457 TYPE_MAIN_VARIANT (type
), url
);
6464 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6465 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6466 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6471 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6474 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6480 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6483 *base_mode
= new_mode
;
6485 if (TARGET_GENERAL_REGS_ONLY
)
6486 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6494 machine_mode mode
, const_tree type
)
6496 int count ATTRIBUTE_UNUSED
;
6497 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6499 if (!use_vfp_abi (pcs_variant
, false))
6501 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6509 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6512 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6513 &pcum
->aapcs_vfp_rmode
,
6514 &pcum
->aapcs_vfp_rcount
);
6517 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6518 for the behaviour of this function. */
6521 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6522 const_tree type ATTRIBUTE_UNUSED
)
6525 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6526 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6527 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6530 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6531 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6533 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6535 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6536 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6539 int rcount
= pcum
->aapcs_vfp_rcount
;
6541 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6543 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6545 /* Avoid using unsupported vector modes. */
6546 if (rmode
== V2SImode
)
6548 else if (rmode
== V4SImode
)
6555 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6556 for (i
= 0; i
< rcount
; i
++)
6558 rtx tmp
= gen_rtx_REG (rmode
,
6559 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6560 tmp
= gen_rtx_EXPR_LIST
6562 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6563 XVECEXP (par
, 0, i
) = tmp
;
6566 pcum
->aapcs_reg
= par
;
6569 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6575 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6576 comment there for the behaviour of this function. */
6579 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6581 const_tree type ATTRIBUTE_UNUSED
)
6583 if (!use_vfp_abi (pcs_variant
, false))
6587 || (GET_MODE_CLASS (mode
) == MODE_INT
6588 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6589 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6592 machine_mode ag_mode
;
6597 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6600 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6602 if (ag_mode
== V2SImode
)
6604 else if (ag_mode
== V4SImode
)
6610 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6611 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6612 for (i
= 0; i
< count
; i
++)
6614 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6615 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6616 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6617 XVECEXP (par
, 0, i
) = tmp
;
6623 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6627 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6628 machine_mode mode ATTRIBUTE_UNUSED
,
6629 const_tree type ATTRIBUTE_UNUSED
)
6631 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6632 pcum
->aapcs_vfp_reg_alloc
= 0;
6636 #define AAPCS_CP(X) \
6638 aapcs_ ## X ## _cum_init, \
6639 aapcs_ ## X ## _is_call_candidate, \
6640 aapcs_ ## X ## _allocate, \
6641 aapcs_ ## X ## _is_return_candidate, \
6642 aapcs_ ## X ## _allocate_return_reg, \
6643 aapcs_ ## X ## _advance \
6646 /* Table of co-processors that can be used to pass arguments in
6647 registers. Idealy no arugment should be a candidate for more than
6648 one co-processor table entry, but the table is processed in order
6649 and stops after the first match. If that entry then fails to put
6650 the argument into a co-processor register, the argument will go on
6654 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6655 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6657 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6658 BLKmode) is a candidate for this co-processor's registers; this
6659 function should ignore any position-dependent state in
6660 CUMULATIVE_ARGS and only use call-type dependent information. */
6661 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6663 /* Return true if the argument does get a co-processor register; it
6664 should set aapcs_reg to an RTX of the register allocated as is
6665 required for a return from FUNCTION_ARG. */
6666 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6668 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6669 be returned in this co-processor's registers. */
6670 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6672 /* Allocate and return an RTX element to hold the return type of a call. This
6673 routine must not fail and will only be called if is_return_candidate
6674 returned true with the same parameters. */
6675 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6677 /* Finish processing this argument and prepare to start processing
6679 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6680 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6688 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6693 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6694 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6701 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6703 /* We aren't passed a decl, so we can't check that a call is local.
6704 However, it isn't clear that that would be a win anyway, since it
6705 might limit some tail-calling opportunities. */
6706 enum arm_pcs pcs_variant
;
6710 const_tree fndecl
= NULL_TREE
;
6712 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6715 fntype
= TREE_TYPE (fntype
);
6718 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6721 pcs_variant
= arm_pcs_default
;
6723 if (pcs_variant
!= ARM_PCS_AAPCS
)
6727 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6728 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6737 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6740 /* We aren't passed a decl, so we can't check that a call is local.
6741 However, it isn't clear that that would be a win anyway, since it
6742 might limit some tail-calling opportunities. */
6743 enum arm_pcs pcs_variant
;
6744 int unsignedp ATTRIBUTE_UNUSED
;
6748 const_tree fndecl
= NULL_TREE
;
6750 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6753 fntype
= TREE_TYPE (fntype
);
6756 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6759 pcs_variant
= arm_pcs_default
;
6761 /* Promote integer types. */
6762 if (type
&& INTEGRAL_TYPE_P (type
))
6763 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6765 if (pcs_variant
!= ARM_PCS_AAPCS
)
6769 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6770 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6772 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6776 /* Promotes small structs returned in a register to full-word size
6777 for big-endian AAPCS. */
6778 if (type
&& arm_return_in_msb (type
))
6780 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6781 if (size
% UNITS_PER_WORD
!= 0)
6783 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6784 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6788 return gen_rtx_REG (mode
, R0_REGNUM
);
6792 aapcs_libcall_value (machine_mode mode
)
6794 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6795 && GET_MODE_SIZE (mode
) <= 4)
6798 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6801 /* Lay out a function argument using the AAPCS rules. The rule
6802 numbers referred to here are those in the AAPCS. */
6804 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6805 const_tree type
, bool named
)
6810 /* We only need to do this once per argument. */
6811 if (pcum
->aapcs_arg_processed
)
6814 pcum
->aapcs_arg_processed
= true;
6816 /* Special case: if named is false then we are handling an incoming
6817 anonymous argument which is on the stack. */
6821 /* Is this a potential co-processor register candidate? */
6822 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6824 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6825 pcum
->aapcs_cprc_slot
= slot
;
6827 /* We don't have to apply any of the rules from part B of the
6828 preparation phase, these are handled elsewhere in the
6833 /* A Co-processor register candidate goes either in its own
6834 class of registers or on the stack. */
6835 if (!pcum
->aapcs_cprc_failed
[slot
])
6837 /* C1.cp - Try to allocate the argument to co-processor
6839 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6842 /* C2.cp - Put the argument on the stack and note that we
6843 can't assign any more candidates in this slot. We also
6844 need to note that we have allocated stack space, so that
6845 we won't later try to split a non-cprc candidate between
6846 core registers and the stack. */
6847 pcum
->aapcs_cprc_failed
[slot
] = true;
6848 pcum
->can_split
= false;
6851 /* We didn't get a register, so this argument goes on the
6853 gcc_assert (pcum
->can_split
== false);
6858 /* C3 - For double-word aligned arguments, round the NCRN up to the
6859 next even number. */
6860 ncrn
= pcum
->aapcs_ncrn
;
6863 int res
= arm_needs_doubleword_align (mode
, type
);
6864 /* Only warn during RTL expansion of call stmts, otherwise we would
6865 warn e.g. during gimplification even on functions that will be
6866 always inlined, and we'd warn multiple times. Don't warn when
6867 called in expand_function_start either, as we warn instead in
6868 arm_function_arg_boundary in that case. */
6869 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6870 inform (input_location
, "parameter passing for argument of type "
6871 "%qT changed in GCC 7.1", type
);
6876 nregs
= ARM_NUM_REGS2(mode
, type
);
6878 /* Sigh, this test should really assert that nregs > 0, but a GCC
6879 extension allows empty structs and then gives them empty size; it
6880 then allows such a structure to be passed by value. For some of
6881 the code below we have to pretend that such an argument has
6882 non-zero size so that we 'locate' it correctly either in
6883 registers or on the stack. */
6884 gcc_assert (nregs
>= 0);
6886 nregs2
= nregs
? nregs
: 1;
6888 /* C4 - Argument fits entirely in core registers. */
6889 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6891 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6892 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6896 /* C5 - Some core registers left and there are no arguments already
6897 on the stack: split this argument between the remaining core
6898 registers and the stack. */
6899 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6901 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6902 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6903 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6907 /* C6 - NCRN is set to 4. */
6908 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6910 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6914 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6915 for a call to a function whose data type is FNTYPE.
6916 For a library call, FNTYPE is NULL. */
6918 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6920 tree fndecl ATTRIBUTE_UNUSED
)
6922 /* Long call handling. */
6924 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6926 pcum
->pcs_variant
= arm_pcs_default
;
6928 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6930 if (arm_libcall_uses_aapcs_base (libname
))
6931 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6933 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6934 pcum
->aapcs_reg
= NULL_RTX
;
6935 pcum
->aapcs_partial
= 0;
6936 pcum
->aapcs_arg_processed
= false;
6937 pcum
->aapcs_cprc_slot
= -1;
6938 pcum
->can_split
= true;
6940 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6944 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6946 pcum
->aapcs_cprc_failed
[i
] = false;
6947 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6955 /* On the ARM, the offset starts at 0. */
6957 pcum
->iwmmxt_nregs
= 0;
6958 pcum
->can_split
= true;
6960 /* Varargs vectors are treated the same as long long.
6961 named_count avoids having to change the way arm handles 'named' */
6962 pcum
->named_count
= 0;
6965 if (TARGET_REALLY_IWMMXT
&& fntype
)
6969 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6971 fn_arg
= TREE_CHAIN (fn_arg
))
6972 pcum
->named_count
+= 1;
6974 if (! pcum
->named_count
)
6975 pcum
->named_count
= INT_MAX
;
6979 /* Return 2 if double word alignment is required for argument passing,
6980 but wasn't required before the fix for PR88469.
6981 Return 1 if double word alignment is required for argument passing.
6982 Return -1 if double word alignment used to be required for argument
6983 passing before PR77728 ABI fix, but is not required anymore.
6984 Return 0 if double word alignment is not required and wasn't requried
6987 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6990 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6992 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6993 if (!AGGREGATE_TYPE_P (type
))
6994 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6996 /* Array types: Use member alignment of element type. */
6997 if (TREE_CODE (type
) == ARRAY_TYPE
)
6998 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7002 /* Record/aggregate types: Use greatest member alignment of any member.
7004 Note that we explicitly consider zero-sized fields here, even though
7005 they don't map to AAPCS machine types. For example, in:
7007 struct __attribute__((aligned(8))) empty {};
7010 [[no_unique_address]] empty e;
7014 "s" contains only one Fundamental Data Type (the int field)
7015 but gains 8-byte alignment and size thanks to "e". */
7016 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7017 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7019 if (TREE_CODE (field
) == FIELD_DECL
)
7022 /* Before PR77728 fix, we were incorrectly considering also
7023 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7024 Make sure we can warn about that with -Wpsabi. */
7027 else if (TREE_CODE (field
) == FIELD_DECL
7028 && DECL_BIT_FIELD_TYPE (field
)
7029 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7039 /* Determine where to put an argument to a function.
7040 Value is zero to push the argument on the stack,
7041 or a hard register in which to store the argument.
7043 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7044 the preceding args and about the function being called.
7045 ARG is a description of the argument.
7047 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7048 other arguments are passed on the stack. If (NAMED == 0) (which happens
7049 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7050 defined), say it is passed in the stack (function_prologue will
7051 indeed make it pass in the stack if necessary). */
7054 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7056 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7059 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7060 a call insn (op3 of a call_value insn). */
7061 if (arg
.end_marker_p ())
7064 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7066 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7067 return pcum
->aapcs_reg
;
7070 /* Varargs vectors are treated the same as long long.
7071 named_count avoids having to change the way arm handles 'named' */
7072 if (TARGET_IWMMXT_ABI
7073 && arm_vector_mode_supported_p (arg
.mode
)
7074 && pcum
->named_count
> pcum
->nargs
+ 1)
7076 if (pcum
->iwmmxt_nregs
<= 9)
7077 return gen_rtx_REG (arg
.mode
,
7078 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7081 pcum
->can_split
= false;
7086 /* Put doubleword aligned quantities in even register pairs. */
7087 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7089 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7090 if (res
< 0 && warn_psabi
)
7091 inform (input_location
, "parameter passing for argument of type "
7092 "%qT changed in GCC 7.1", arg
.type
);
7096 if (res
> 1 && warn_psabi
)
7097 inform (input_location
, "parameter passing for argument of type "
7098 "%qT changed in GCC 9.1", arg
.type
);
7102 /* Only allow splitting an arg between regs and memory if all preceding
7103 args were allocated to regs. For args passed by reference we only count
7104 the reference pointer. */
7105 if (pcum
->can_split
)
7108 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7110 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7113 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7117 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7119 if (!ARM_DOUBLEWORD_ALIGN
)
7120 return PARM_BOUNDARY
;
7122 int res
= arm_needs_doubleword_align (mode
, type
);
7123 if (res
< 0 && warn_psabi
)
7124 inform (input_location
, "parameter passing for argument of type %qT "
7125 "changed in GCC 7.1", type
);
7126 if (res
> 1 && warn_psabi
)
7127 inform (input_location
, "parameter passing for argument of type "
7128 "%qT changed in GCC 9.1", type
);
7130 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7134 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7136 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7137 int nregs
= pcum
->nregs
;
7139 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7141 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7142 return pcum
->aapcs_partial
;
7145 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7148 if (NUM_ARG_REGS
> nregs
7149 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7151 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7156 /* Update the data in PCUM to advance over argument ARG. */
7159 arm_function_arg_advance (cumulative_args_t pcum_v
,
7160 const function_arg_info
&arg
)
7162 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7164 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7166 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7168 if (pcum
->aapcs_cprc_slot
>= 0)
7170 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7172 pcum
->aapcs_cprc_slot
= -1;
7175 /* Generic stuff. */
7176 pcum
->aapcs_arg_processed
= false;
7177 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7178 pcum
->aapcs_reg
= NULL_RTX
;
7179 pcum
->aapcs_partial
= 0;
7184 if (arm_vector_mode_supported_p (arg
.mode
)
7185 && pcum
->named_count
> pcum
->nargs
7186 && TARGET_IWMMXT_ABI
)
7187 pcum
->iwmmxt_nregs
+= 1;
7189 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7193 /* Variable sized types are passed by reference. This is a GCC
7194 extension to the ARM ABI. */
7197 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7199 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7202 /* Encode the current state of the #pragma [no_]long_calls. */
7205 OFF
, /* No #pragma [no_]long_calls is in effect. */
7206 LONG
, /* #pragma long_calls is in effect. */
7207 SHORT
/* #pragma no_long_calls is in effect. */
7210 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7213 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7215 arm_pragma_long_calls
= LONG
;
7219 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7221 arm_pragma_long_calls
= SHORT
;
7225 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7227 arm_pragma_long_calls
= OFF
;
7230 /* Handle an attribute requiring a FUNCTION_DECL;
7231 arguments as in struct attribute_spec.handler. */
7233 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7234 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7236 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7238 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7240 *no_add_attrs
= true;
7246 /* Handle an "interrupt" or "isr" attribute;
7247 arguments as in struct attribute_spec.handler. */
7249 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7254 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7256 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7258 *no_add_attrs
= true;
7260 else if (TARGET_VFP_BASE
)
7262 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7265 /* FIXME: the argument if any is checked for type attributes;
7266 should it be checked for decl ones? */
7270 if (TREE_CODE (*node
) == FUNCTION_TYPE
7271 || TREE_CODE (*node
) == METHOD_TYPE
)
7273 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7275 warning (OPT_Wattributes
, "%qE attribute ignored",
7277 *no_add_attrs
= true;
7280 else if (TREE_CODE (*node
) == POINTER_TYPE
7281 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
7282 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
7283 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7285 *node
= build_variant_type_copy (*node
);
7286 TREE_TYPE (*node
) = build_type_attribute_variant
7288 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7289 *no_add_attrs
= true;
7293 /* Possibly pass this attribute on from the type to a decl. */
7294 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7295 | (int) ATTR_FLAG_FUNCTION_NEXT
7296 | (int) ATTR_FLAG_ARRAY_NEXT
))
7298 *no_add_attrs
= true;
7299 return tree_cons (name
, args
, NULL_TREE
);
7303 warning (OPT_Wattributes
, "%qE attribute ignored",
7312 /* Handle a "pcs" attribute; arguments as in struct
7313 attribute_spec.handler. */
7315 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7316 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7318 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7320 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7321 *no_add_attrs
= true;
7326 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7327 /* Handle the "notshared" attribute. This attribute is another way of
7328 requesting hidden visibility. ARM's compiler supports
7329 "__declspec(notshared)"; we support the same thing via an
7333 arm_handle_notshared_attribute (tree
*node
,
7334 tree name ATTRIBUTE_UNUSED
,
7335 tree args ATTRIBUTE_UNUSED
,
7336 int flags ATTRIBUTE_UNUSED
,
7339 tree decl
= TYPE_NAME (*node
);
7343 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7344 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7345 *no_add_attrs
= false;
7351 /* This function returns true if a function with declaration FNDECL and type
7352 FNTYPE uses the stack to pass arguments or return variables and false
7353 otherwise. This is used for functions with the attributes
7354 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7355 diagnostic messages if the stack is used. NAME is the name of the attribute
7359 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7361 function_args_iterator args_iter
;
7362 CUMULATIVE_ARGS args_so_far_v
;
7363 cumulative_args_t args_so_far
;
7364 bool first_param
= true;
7365 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7367 /* Error out if any argument is passed on the stack. */
7368 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7369 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7370 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7374 prev_arg_type
= arg_type
;
7375 if (VOID_TYPE_P (arg_type
))
7378 function_arg_info
arg (arg_type
, /*named=*/true);
7380 /* ??? We should advance after processing the argument and pass
7381 the argument we're advancing past. */
7382 arm_function_arg_advance (args_so_far
, arg
);
7383 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7384 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7386 error ("%qE attribute not available to functions with arguments "
7387 "passed on the stack", name
);
7390 first_param
= false;
7393 /* Error out for variadic functions since we cannot control how many
7394 arguments will be passed and thus stack could be used. stdarg_p () is not
7395 used for the checking to avoid browsing arguments twice. */
7396 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7398 error ("%qE attribute not available to functions with variable number "
7399 "of arguments", name
);
7403 /* Error out if return value is passed on the stack. */
7404 ret_type
= TREE_TYPE (fntype
);
7405 if (arm_return_in_memory (ret_type
, fntype
))
7407 error ("%qE attribute not available to functions that return value on "
7414 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7415 function will check whether the attribute is allowed here and will add the
7416 attribute to the function declaration tree or otherwise issue a warning. */
7419 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7428 *no_add_attrs
= true;
7429 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7434 /* Ignore attribute for function types. */
7435 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7437 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7439 *no_add_attrs
= true;
7445 /* Warn for static linkage functions. */
7446 if (!TREE_PUBLIC (fndecl
))
7448 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7449 "with static linkage", name
);
7450 *no_add_attrs
= true;
7454 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7455 TREE_TYPE (fndecl
));
7460 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7461 function will check whether the attribute is allowed here and will add the
7462 attribute to the function type tree or otherwise issue a diagnostic. The
7463 reason we check this at declaration time is to only allow the use of the
7464 attribute with declarations of function pointers and not function
7465 declarations. This function checks NODE is of the expected type and issues
7466 diagnostics otherwise using NAME. If it is not of the expected type
7467 *NO_ADD_ATTRS will be set to true. */
7470 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7475 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7480 *no_add_attrs
= true;
7481 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7486 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7489 fntype
= TREE_TYPE (decl
);
7492 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7493 fntype
= TREE_TYPE (fntype
);
7495 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7497 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7498 "function pointer", name
);
7499 *no_add_attrs
= true;
7503 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7508 /* Prevent trees being shared among function types with and without
7509 cmse_nonsecure_call attribute. */
7510 type
= TREE_TYPE (decl
);
7512 type
= build_distinct_type_copy (type
);
7513 TREE_TYPE (decl
) = type
;
7516 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7519 fntype
= TREE_TYPE (fntype
);
7520 fntype
= build_distinct_type_copy (fntype
);
7521 TREE_TYPE (type
) = fntype
;
7524 /* Construct a type attribute and add it to the function type. */
7525 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7526 TYPE_ATTRIBUTES (fntype
));
7527 TYPE_ATTRIBUTES (fntype
) = attrs
;
7531 /* Return 0 if the attributes for two types are incompatible, 1 if they
7532 are compatible, and 2 if they are nearly compatible (which causes a
7533 warning to be generated). */
7535 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7539 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7540 TYPE_ATTRIBUTES (type1
));
7541 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7542 TYPE_ATTRIBUTES (type2
));
7543 if (bool (attrs1
) != bool (attrs2
))
7545 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7548 /* Check for mismatch of non-default calling convention. */
7549 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7552 /* Check for mismatched call attributes. */
7553 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7554 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7555 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7556 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7558 /* Only bother to check if an attribute is defined. */
7559 if (l1
| l2
| s1
| s2
)
7561 /* If one type has an attribute, the other must have the same attribute. */
7562 if ((l1
!= l2
) || (s1
!= s2
))
7565 /* Disallow mixed attributes. */
7566 if ((l1
& s2
) || (l2
& s1
))
7570 /* Check for mismatched ISR attribute. */
7571 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7573 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7574 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7576 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7580 l1
= lookup_attribute ("cmse_nonsecure_call",
7581 TYPE_ATTRIBUTES (type1
)) != NULL
;
7582 l2
= lookup_attribute ("cmse_nonsecure_call",
7583 TYPE_ATTRIBUTES (type2
)) != NULL
;
7591 /* Assigns default attributes to newly defined type. This is used to
7592 set short_call/long_call attributes for function types of
7593 functions defined inside corresponding #pragma scopes. */
7595 arm_set_default_type_attributes (tree type
)
7597 /* Add __attribute__ ((long_call)) to all functions, when
7598 inside #pragma long_calls or __attribute__ ((short_call)),
7599 when inside #pragma no_long_calls. */
7600 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7602 tree type_attr_list
, attr_name
;
7603 type_attr_list
= TYPE_ATTRIBUTES (type
);
7605 if (arm_pragma_long_calls
== LONG
)
7606 attr_name
= get_identifier ("long_call");
7607 else if (arm_pragma_long_calls
== SHORT
)
7608 attr_name
= get_identifier ("short_call");
7612 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7613 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7617 /* Return true if DECL is known to be linked into section SECTION. */
7620 arm_function_in_section_p (tree decl
, section
*section
)
7622 /* We can only be certain about the prevailing symbol definition. */
7623 if (!decl_binds_to_current_def_p (decl
))
7626 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7627 if (!DECL_SECTION_NAME (decl
))
7629 /* Make sure that we will not create a unique section for DECL. */
7630 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7634 return function_section (decl
) == section
;
7637 /* Return nonzero if a 32-bit "long_call" should be generated for
7638 a call from the current function to DECL. We generate a long_call
7641 a. has an __attribute__((long call))
7642 or b. is within the scope of a #pragma long_calls
7643 or c. the -mlong-calls command line switch has been specified
7645 However we do not generate a long call if the function:
7647 d. has an __attribute__ ((short_call))
7648 or e. is inside the scope of a #pragma no_long_calls
7649 or f. is defined in the same section as the current function. */
7652 arm_is_long_call_p (tree decl
)
7657 return TARGET_LONG_CALLS
;
7659 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7660 if (lookup_attribute ("short_call", attrs
))
7663 /* For "f", be conservative, and only cater for cases in which the
7664 whole of the current function is placed in the same section. */
7665 if (!flag_reorder_blocks_and_partition
7666 && TREE_CODE (decl
) == FUNCTION_DECL
7667 && arm_function_in_section_p (decl
, current_function_section ()))
7670 if (lookup_attribute ("long_call", attrs
))
7673 return TARGET_LONG_CALLS
;
7676 /* Return nonzero if it is ok to make a tail-call to DECL. */
7678 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7680 unsigned long func_type
;
7682 if (cfun
->machine
->sibcall_blocked
)
7687 /* In FDPIC, never tailcall something for which we have no decl:
7688 the target function could be in a different module, requiring
7689 a different FDPIC register value. */
7694 /* Never tailcall something if we are generating code for Thumb-1. */
7698 /* The PIC register is live on entry to VxWorks PLT entries, so we
7699 must make the call before restoring the PIC register. */
7700 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7703 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7704 may be used both as target of the call and base register for restoring
7705 the VFP registers */
7706 if (TARGET_APCS_FRAME
&& TARGET_ARM
7707 && TARGET_HARD_FLOAT
7708 && decl
&& arm_is_long_call_p (decl
))
7711 /* If we are interworking and the function is not declared static
7712 then we can't tail-call it unless we know that it exists in this
7713 compilation unit (since it might be a Thumb routine). */
7714 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7715 && !TREE_ASM_WRITTEN (decl
))
7718 func_type
= arm_current_func_type ();
7719 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7720 if (IS_INTERRUPT (func_type
))
7723 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7724 generated for entry functions themselves. */
7725 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7728 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7729 this would complicate matters for later code generation. */
7730 if (TREE_CODE (exp
) == CALL_EXPR
)
7732 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7733 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7737 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7739 /* Check that the return value locations are the same. For
7740 example that we aren't returning a value from the sibling in
7741 a VFP register but then need to transfer it to a core
7744 tree decl_or_type
= decl
;
7746 /* If it is an indirect function pointer, get the function type. */
7748 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7750 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7751 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7753 if (!rtx_equal_p (a
, b
))
7757 /* Never tailcall if function may be called with a misaligned SP. */
7758 if (IS_STACKALIGN (func_type
))
7761 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7762 references should become a NOP. Don't convert such calls into
7764 if (TARGET_AAPCS_BASED
7765 && arm_abi
== ARM_ABI_AAPCS
7767 && DECL_WEAK (decl
))
7770 /* We cannot do a tailcall for an indirect call by descriptor if all the
7771 argument registers are used because the only register left to load the
7772 address is IP and it will already contain the static chain. */
7773 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7775 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7776 CUMULATIVE_ARGS cum
;
7777 cumulative_args_t cum_v
;
7779 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7780 cum_v
= pack_cumulative_args (&cum
);
7782 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7784 tree type
= TREE_VALUE (t
);
7785 if (!VOID_TYPE_P (type
))
7787 function_arg_info
arg (type
, /*named=*/true);
7788 arm_function_arg_advance (cum_v
, arg
);
7792 function_arg_info
arg (integer_type_node
, /*named=*/true);
7793 if (!arm_function_arg (cum_v
, arg
))
7797 /* Everything else is ok. */
7802 /* Addressing mode support functions. */
7804 /* Return nonzero if X is a legitimate immediate operand when compiling
7805 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7807 legitimate_pic_operand_p (rtx x
)
7809 if (GET_CODE (x
) == SYMBOL_REF
7810 || (GET_CODE (x
) == CONST
7811 && GET_CODE (XEXP (x
, 0)) == PLUS
7812 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7818 /* Record that the current function needs a PIC register. If PIC_REG is null,
7819 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7820 both case cfun->machine->pic_reg is initialized if we have not already done
7821 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7822 PIC register is reloaded in the current position of the instruction stream
7823 irregardless of whether it was loaded before. Otherwise, it is only loaded
7824 if not already done so (crtl->uses_pic_offset_table is null). Note that
7825 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7826 is only supported iff COMPUTE_NOW is false. */
7829 require_pic_register (rtx pic_reg
, bool compute_now
)
7831 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7833 /* A lot of the logic here is made obscure by the fact that this
7834 routine gets called as part of the rtx cost estimation process.
7835 We don't want those calls to affect any assumptions about the real
7836 function; and further, we can't call entry_of_function() until we
7837 start the real expansion process. */
7838 if (!crtl
->uses_pic_offset_table
|| compute_now
)
7840 gcc_assert (can_create_pseudo_p ()
7841 || (pic_reg
!= NULL_RTX
7843 && GET_MODE (pic_reg
) == Pmode
));
7844 if (arm_pic_register
!= INVALID_REGNUM
7846 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7848 if (!cfun
->machine
->pic_reg
)
7849 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7851 /* Play games to avoid marking the function as needing pic
7852 if we are being called as part of the cost-estimation
7854 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7855 crtl
->uses_pic_offset_table
= 1;
7859 rtx_insn
*seq
, *insn
;
7861 if (pic_reg
== NULL_RTX
)
7862 pic_reg
= gen_reg_rtx (Pmode
);
7863 if (!cfun
->machine
->pic_reg
)
7864 cfun
->machine
->pic_reg
= pic_reg
;
7866 /* Play games to avoid marking the function as needing pic
7867 if we are being called as part of the cost-estimation
7869 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7871 crtl
->uses_pic_offset_table
= 1;
7874 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7875 && arm_pic_register
> LAST_LO_REGNUM
7877 emit_move_insn (cfun
->machine
->pic_reg
,
7878 gen_rtx_REG (Pmode
, arm_pic_register
));
7880 arm_load_pic_register (0UL, pic_reg
);
7885 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7887 INSN_LOCATION (insn
) = prologue_location
;
7889 /* We can be called during expansion of PHI nodes, where
7890 we can't yet emit instructions directly in the final
7891 insn stream. Queue the insns on the entry edge, they will
7892 be committed after everything else is expanded. */
7893 if (currently_expanding_to_rtl
)
7894 insert_insn_on_edge (seq
,
7896 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7904 /* Generate insns to calculate the address of ORIG in pic mode. */
7906 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
7911 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
7913 /* Make the MEM as close to a constant as possible. */
7914 mem
= SET_SRC (pat
);
7915 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7916 MEM_READONLY_P (mem
) = 1;
7917 MEM_NOTRAP_P (mem
) = 1;
7919 return emit_insn (pat
);
7922 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7923 created to hold the result of the load. If not NULL, PIC_REG indicates
7924 which register to use as PIC register, otherwise it is decided by register
7925 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7926 location in the instruction stream, irregardless of whether it was loaded
7927 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7928 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7930 Returns the register REG into which the PIC load is performed. */
7933 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
7936 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7938 if (GET_CODE (orig
) == SYMBOL_REF
7939 || GET_CODE (orig
) == LABEL_REF
)
7943 gcc_assert (can_create_pseudo_p ());
7944 reg
= gen_reg_rtx (Pmode
);
7947 /* VxWorks does not impose a fixed gap between segments; the run-time
7948 gap can be different from the object-file gap. We therefore can't
7949 use GOTOFF unless we are absolutely sure that the symbol is in the
7950 same segment as the GOT. Unfortunately, the flexibility of linker
7951 scripts means that we can't be sure of that in general, so assume
7952 that GOTOFF is never valid on VxWorks. */
7953 /* References to weak symbols cannot be resolved locally: they
7954 may be overridden by a non-weak definition at link time. */
7956 if ((GET_CODE (orig
) == LABEL_REF
7957 || (GET_CODE (orig
) == SYMBOL_REF
7958 && SYMBOL_REF_LOCAL_P (orig
)
7959 && (SYMBOL_REF_DECL (orig
)
7960 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
7961 && (!SYMBOL_REF_FUNCTION_P (orig
)
7962 || arm_fdpic_local_funcdesc_p (orig
))))
7964 && arm_pic_data_is_text_relative
)
7965 insn
= arm_pic_static_addr (orig
, reg
);
7968 /* If this function doesn't have a pic register, create one now. */
7969 require_pic_register (pic_reg
, compute_now
);
7971 if (pic_reg
== NULL_RTX
)
7972 pic_reg
= cfun
->machine
->pic_reg
;
7974 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
7977 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7979 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7983 else if (GET_CODE (orig
) == CONST
)
7987 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7988 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7991 /* Handle the case where we have: const (UNSPEC_TLS). */
7992 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7993 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7996 /* Handle the case where we have:
7997 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7999 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8000 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8001 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8003 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8009 gcc_assert (can_create_pseudo_p ());
8010 reg
= gen_reg_rtx (Pmode
);
8013 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8015 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8016 pic_reg
, compute_now
);
8017 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8018 base
== reg
? 0 : reg
, pic_reg
,
8021 if (CONST_INT_P (offset
))
8023 /* The base register doesn't really matter, we only want to
8024 test the index for the appropriate mode. */
8025 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8027 gcc_assert (can_create_pseudo_p ());
8028 offset
= force_reg (Pmode
, offset
);
8031 if (CONST_INT_P (offset
))
8032 return plus_constant (Pmode
, base
, INTVAL (offset
));
8035 if (GET_MODE_SIZE (mode
) > 4
8036 && (GET_MODE_CLASS (mode
) == MODE_INT
8037 || TARGET_SOFT_FLOAT
))
8039 emit_insn (gen_addsi3 (reg
, base
, offset
));
8043 return gen_rtx_PLUS (Pmode
, base
, offset
);
8050 /* Whether a register is callee saved or not. This is necessary because high
8051 registers are marked as caller saved when optimizing for size on Thumb-1
8052 targets despite being callee saved in order to avoid using them. */
8053 #define callee_saved_reg_p(reg) \
8054 (!call_used_or_fixed_reg_p (reg) \
8055 || (TARGET_THUMB1 && optimize_size \
8056 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8058 /* Return a mask for the call-clobbered low registers that are unused
8059 at the end of the prologue. */
8060 static unsigned long
8061 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8063 unsigned long mask
= 0;
8064 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8066 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8067 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8068 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8072 /* Similarly for the start of the epilogue. */
8073 static unsigned long
8074 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8076 unsigned long mask
= 0;
8077 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8079 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8080 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8081 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8085 /* Find a spare register to use during the prolog of a function. */
8088 thumb_find_work_register (unsigned long pushed_regs_mask
)
8092 unsigned long unused_regs
8093 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8095 /* Check the argument registers first as these are call-used. The
8096 register allocation order means that sometimes r3 might be used
8097 but earlier argument registers might not, so check them all. */
8098 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8099 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8102 /* Otherwise look for a call-saved register that is going to be pushed. */
8103 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8104 if (pushed_regs_mask
& (1 << reg
))
8109 /* Thumb-2 can use high regs. */
8110 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8111 if (pushed_regs_mask
& (1 << reg
))
8114 /* Something went wrong - thumb_compute_save_reg_mask()
8115 should have arranged for a suitable register to be pushed. */
8119 static GTY(()) int pic_labelno
;
8121 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8125 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8127 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8129 if (crtl
->uses_pic_offset_table
== 0
8130 || TARGET_SINGLE_PIC_BASE
8134 gcc_assert (flag_pic
);
8136 if (pic_reg
== NULL_RTX
)
8137 pic_reg
= cfun
->machine
->pic_reg
;
8138 if (TARGET_VXWORKS_RTP
)
8140 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8141 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8142 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8144 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8146 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8147 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8151 /* We use an UNSPEC rather than a LABEL_REF because this label
8152 never appears in the code stream. */
8154 labelno
= GEN_INT (pic_labelno
++);
8155 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8156 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8158 /* On the ARM the PC register contains 'dot + 8' at the time of the
8159 addition, on the Thumb it is 'dot + 4'. */
8160 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8161 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8163 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8167 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8169 else /* TARGET_THUMB1 */
8171 if (arm_pic_register
!= INVALID_REGNUM
8172 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8174 /* We will have pushed the pic register, so we should always be
8175 able to find a work register. */
8176 pic_tmp
= gen_rtx_REG (SImode
,
8177 thumb_find_work_register (saved_regs
));
8178 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8179 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8180 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8182 else if (arm_pic_register
!= INVALID_REGNUM
8183 && arm_pic_register
> LAST_LO_REGNUM
8184 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8186 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8187 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8188 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8191 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8195 /* Need to emit this whether or not we obey regdecls,
8196 since setjmp/longjmp can cause life info to screw up. */
8200 /* Try to determine whether an object, referenced via ORIG, will be
8201 placed in the text or data segment. This is used in FDPIC mode, to
8202 decide which relocations to use when accessing ORIG. *IS_READONLY
8203 is set to true if ORIG is a read-only location, false otherwise.
8204 Return true if we could determine the location of ORIG, false
8205 otherwise. *IS_READONLY is valid only when we return true. */
8207 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8209 *is_readonly
= false;
8211 if (GET_CODE (orig
) == LABEL_REF
)
8213 *is_readonly
= true;
8217 if (SYMBOL_REF_P (orig
))
8219 if (CONSTANT_POOL_ADDRESS_P (orig
))
8221 *is_readonly
= true;
8224 if (SYMBOL_REF_LOCAL_P (orig
)
8225 && !SYMBOL_REF_EXTERNAL_P (orig
)
8226 && SYMBOL_REF_DECL (orig
)
8227 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8228 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8230 tree decl
= SYMBOL_REF_DECL (orig
);
8231 tree init
= (TREE_CODE (decl
) == VAR_DECL
)
8232 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8235 bool named_section
, readonly
;
8237 if (init
&& init
!= error_mark_node
)
8238 reloc
= compute_reloc_for_constant (init
);
8240 named_section
= TREE_CODE (decl
) == VAR_DECL
8241 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8242 readonly
= decl_readonly_section (decl
, reloc
);
8244 /* We don't know where the link script will put a named
8245 section, so return false in such a case. */
8249 *is_readonly
= readonly
;
8253 /* We don't know. */
8260 /* Generate code to load the address of a static var when flag_pic is set. */
8262 arm_pic_static_addr (rtx orig
, rtx reg
)
8264 rtx l1
, labelno
, offset_rtx
;
8267 gcc_assert (flag_pic
);
8269 bool is_readonly
= false;
8270 bool info_known
= false;
8273 && SYMBOL_REF_P (orig
)
8274 && !SYMBOL_REF_FUNCTION_P (orig
))
8275 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8278 && SYMBOL_REF_P (orig
)
8279 && !SYMBOL_REF_FUNCTION_P (orig
)
8282 /* We don't know where orig is stored, so we have be
8283 pessimistic and use a GOT relocation. */
8284 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8286 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8288 else if (TARGET_FDPIC
8289 && SYMBOL_REF_P (orig
)
8290 && (SYMBOL_REF_FUNCTION_P (orig
)
8293 /* We use the GOTOFF relocation. */
8294 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8296 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8297 emit_insn (gen_movsi (reg
, l1
));
8298 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8302 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8303 PC-relative access. */
8304 /* We use an UNSPEC rather than a LABEL_REF because this label
8305 never appears in the code stream. */
8306 labelno
= GEN_INT (pic_labelno
++);
8307 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8308 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8310 /* On the ARM the PC register contains 'dot + 8' at the time of the
8311 addition, on the Thumb it is 'dot + 4'. */
8312 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8313 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8314 UNSPEC_SYMBOL_OFFSET
);
8315 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8317 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8324 /* Return nonzero if X is valid as an ARM state addressing register. */
8326 arm_address_register_rtx_p (rtx x
, int strict_p
)
8336 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8338 return (regno
<= LAST_ARM_REGNUM
8339 || regno
>= FIRST_PSEUDO_REGISTER
8340 || regno
== FRAME_POINTER_REGNUM
8341 || regno
== ARG_POINTER_REGNUM
);
8344 /* Return TRUE if this rtx is the difference of a symbol and a label,
8345 and will reduce to a PC-relative relocation in the object file.
8346 Expressions like this can be left alone when generating PIC, rather
8347 than forced through the GOT. */
8349 pcrel_constant_p (rtx x
)
8351 if (GET_CODE (x
) == MINUS
)
8352 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8357 /* Return true if X will surely end up in an index register after next
8360 will_be_in_index_register (const_rtx x
)
8362 /* arm.md: calculate_pic_address will split this into a register. */
8363 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8366 /* Return nonzero if X is a valid ARM state address operand. */
8368 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8372 enum rtx_code code
= GET_CODE (x
);
8374 if (arm_address_register_rtx_p (x
, strict_p
))
8377 use_ldrd
= (TARGET_LDRD
8378 && (mode
== DImode
|| mode
== DFmode
));
8380 if (code
== POST_INC
|| code
== PRE_DEC
8381 || ((code
== PRE_INC
|| code
== POST_DEC
)
8382 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8383 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8385 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8386 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8387 && GET_CODE (XEXP (x
, 1)) == PLUS
8388 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8390 rtx addend
= XEXP (XEXP (x
, 1), 1);
8392 /* Don't allow ldrd post increment by register because it's hard
8393 to fixup invalid register choices. */
8395 && GET_CODE (x
) == POST_MODIFY
8399 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8400 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8403 /* After reload constants split into minipools will have addresses
8404 from a LABEL_REF. */
8405 else if (reload_completed
8406 && (code
== LABEL_REF
8408 && GET_CODE (XEXP (x
, 0)) == PLUS
8409 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8410 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8413 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8416 else if (code
== PLUS
)
8418 rtx xop0
= XEXP (x
, 0);
8419 rtx xop1
= XEXP (x
, 1);
8421 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8422 && ((CONST_INT_P (xop1
)
8423 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8424 || (!strict_p
&& will_be_in_index_register (xop1
))))
8425 || (arm_address_register_rtx_p (xop1
, strict_p
)
8426 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8430 /* Reload currently can't handle MINUS, so disable this for now */
8431 else if (GET_CODE (x
) == MINUS
)
8433 rtx xop0
= XEXP (x
, 0);
8434 rtx xop1
= XEXP (x
, 1);
8436 return (arm_address_register_rtx_p (xop0
, strict_p
)
8437 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8441 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8442 && code
== SYMBOL_REF
8443 && CONSTANT_POOL_ADDRESS_P (x
)
8445 && symbol_mentioned_p (get_pool_constant (x
))
8446 && ! pcrel_constant_p (get_pool_constant (x
))))
8452 /* Return true if we can avoid creating a constant pool entry for x. */
8454 can_avoid_literal_pool_for_label_p (rtx x
)
8456 /* Normally we can assign constant values to target registers without
8457 the help of constant pool. But there are cases we have to use constant
8459 1) assign a label to register.
8460 2) sign-extend a 8bit value to 32bit and then assign to register.
8462 Constant pool access in format:
8463 (set (reg r0) (mem (symbol_ref (".LC0"))))
8464 will cause the use of literal pool (later in function arm_reorg).
8465 So here we mark such format as an invalid format, then the compiler
8466 will adjust it into:
8467 (set (reg r0) (symbol_ref (".LC0")))
8468 (set (reg r0) (mem (reg r0))).
8469 No extra register is required, and (mem (reg r0)) won't cause the use
8470 of literal pools. */
8471 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
8472 && CONSTANT_POOL_ADDRESS_P (x
))
8478 /* Return nonzero if X is a valid Thumb-2 address operand. */
8480 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8483 enum rtx_code code
= GET_CODE (x
);
8486 && (mode
== V8QImode
|| mode
== E_V4QImode
|| mode
== V4HImode
))
8487 return mve_vector_mem_operand (mode
, x
, strict_p
);
8489 if (arm_address_register_rtx_p (x
, strict_p
))
8492 use_ldrd
= (TARGET_LDRD
8493 && (mode
== DImode
|| mode
== DFmode
));
8495 if (code
== POST_INC
|| code
== PRE_DEC
8496 || ((code
== PRE_INC
|| code
== POST_DEC
)
8497 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8498 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8500 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8501 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8502 && GET_CODE (XEXP (x
, 1)) == PLUS
8503 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8505 /* Thumb-2 only has autoincrement by constant. */
8506 rtx addend
= XEXP (XEXP (x
, 1), 1);
8507 HOST_WIDE_INT offset
;
8509 if (!CONST_INT_P (addend
))
8512 offset
= INTVAL(addend
);
8513 if (GET_MODE_SIZE (mode
) <= 4)
8514 return (offset
> -256 && offset
< 256);
8516 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8517 && (offset
& 3) == 0);
8520 /* After reload constants split into minipools will have addresses
8521 from a LABEL_REF. */
8522 else if (reload_completed
8523 && (code
== LABEL_REF
8525 && GET_CODE (XEXP (x
, 0)) == PLUS
8526 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8527 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8530 else if (mode
== TImode
8531 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8532 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8535 else if (code
== PLUS
)
8537 rtx xop0
= XEXP (x
, 0);
8538 rtx xop1
= XEXP (x
, 1);
8540 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8541 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8542 || (!strict_p
&& will_be_in_index_register (xop1
))))
8543 || (arm_address_register_rtx_p (xop1
, strict_p
)
8544 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8547 else if (can_avoid_literal_pool_for_label_p (x
))
8550 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8551 && code
== SYMBOL_REF
8552 && CONSTANT_POOL_ADDRESS_P (x
)
8554 && symbol_mentioned_p (get_pool_constant (x
))
8555 && ! pcrel_constant_p (get_pool_constant (x
))))
8561 /* Return nonzero if INDEX is valid for an address index operand in
8564 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8567 HOST_WIDE_INT range
;
8568 enum rtx_code code
= GET_CODE (index
);
8570 /* Standard coprocessor addressing modes. */
8571 if (TARGET_HARD_FLOAT
8572 && (mode
== SFmode
|| mode
== DFmode
))
8573 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8574 && INTVAL (index
) > -1024
8575 && (INTVAL (index
) & 3) == 0);
8577 /* For quad modes, we restrict the constant offset to be slightly less
8578 than what the instruction format permits. We do this because for
8579 quad mode moves, we will actually decompose them into two separate
8580 double-mode reads or writes. INDEX must therefore be a valid
8581 (double-mode) offset and so should INDEX+8. */
8582 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8583 return (code
== CONST_INT
8584 && INTVAL (index
) < 1016
8585 && INTVAL (index
) > -1024
8586 && (INTVAL (index
) & 3) == 0);
8588 /* We have no such constraint on double mode offsets, so we permit the
8589 full range of the instruction format. */
8590 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8591 return (code
== CONST_INT
8592 && INTVAL (index
) < 1024
8593 && INTVAL (index
) > -1024
8594 && (INTVAL (index
) & 3) == 0);
8596 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8597 return (code
== CONST_INT
8598 && INTVAL (index
) < 1024
8599 && INTVAL (index
) > -1024
8600 && (INTVAL (index
) & 3) == 0);
8602 if (arm_address_register_rtx_p (index
, strict_p
)
8603 && (GET_MODE_SIZE (mode
) <= 4))
8606 if (mode
== DImode
|| mode
== DFmode
)
8608 if (code
== CONST_INT
)
8610 HOST_WIDE_INT val
= INTVAL (index
);
8612 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8613 If vldr is selected it uses arm_coproc_mem_operand. */
8615 return val
> -256 && val
< 256;
8617 return val
> -4096 && val
< 4092;
8620 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8623 if (GET_MODE_SIZE (mode
) <= 4
8627 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8631 rtx xiop0
= XEXP (index
, 0);
8632 rtx xiop1
= XEXP (index
, 1);
8634 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8635 && power_of_two_operand (xiop1
, SImode
))
8636 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8637 && power_of_two_operand (xiop0
, SImode
)));
8639 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8640 || code
== ASHIFT
|| code
== ROTATERT
)
8642 rtx op
= XEXP (index
, 1);
8644 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8647 && INTVAL (op
) <= 31);
8651 /* For ARM v4 we may be doing a sign-extend operation during the
8657 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8663 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8665 return (code
== CONST_INT
8666 && INTVAL (index
) < range
8667 && INTVAL (index
) > -range
);
8670 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8671 index operand. i.e. 1, 2, 4 or 8. */
8673 thumb2_index_mul_operand (rtx op
)
8677 if (!CONST_INT_P (op
))
8681 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8684 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8686 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8688 enum rtx_code code
= GET_CODE (index
);
8690 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8691 /* Standard coprocessor addressing modes. */
8693 && (mode
== SFmode
|| mode
== DFmode
))
8694 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8695 /* Thumb-2 allows only > -256 index range for it's core register
8696 load/stores. Since we allow SF/DF in core registers, we have
8697 to use the intersection between -256~4096 (core) and -1024~1024
8699 && INTVAL (index
) > -256
8700 && (INTVAL (index
) & 3) == 0);
8702 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8704 /* For DImode assume values will usually live in core regs
8705 and only allow LDRD addressing modes. */
8706 if (!TARGET_LDRD
|| mode
!= DImode
)
8707 return (code
== CONST_INT
8708 && INTVAL (index
) < 1024
8709 && INTVAL (index
) > -1024
8710 && (INTVAL (index
) & 3) == 0);
8713 /* For quad modes, we restrict the constant offset to be slightly less
8714 than what the instruction format permits. We do this because for
8715 quad mode moves, we will actually decompose them into two separate
8716 double-mode reads or writes. INDEX must therefore be a valid
8717 (double-mode) offset and so should INDEX+8. */
8718 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8719 return (code
== CONST_INT
8720 && INTVAL (index
) < 1016
8721 && INTVAL (index
) > -1024
8722 && (INTVAL (index
) & 3) == 0);
8724 /* We have no such constraint on double mode offsets, so we permit the
8725 full range of the instruction format. */
8726 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8727 return (code
== CONST_INT
8728 && INTVAL (index
) < 1024
8729 && INTVAL (index
) > -1024
8730 && (INTVAL (index
) & 3) == 0);
8732 if (arm_address_register_rtx_p (index
, strict_p
)
8733 && (GET_MODE_SIZE (mode
) <= 4))
8736 if (mode
== DImode
|| mode
== DFmode
)
8738 if (code
== CONST_INT
)
8740 HOST_WIDE_INT val
= INTVAL (index
);
8741 /* Thumb-2 ldrd only has reg+const addressing modes.
8742 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8743 If vldr is selected it uses arm_coproc_mem_operand. */
8745 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8747 return IN_RANGE (val
, -255, 4095 - 4);
8755 rtx xiop0
= XEXP (index
, 0);
8756 rtx xiop1
= XEXP (index
, 1);
8758 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8759 && thumb2_index_mul_operand (xiop1
))
8760 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8761 && thumb2_index_mul_operand (xiop0
)));
8763 else if (code
== ASHIFT
)
8765 rtx op
= XEXP (index
, 1);
8767 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8770 && INTVAL (op
) <= 3);
8773 return (code
== CONST_INT
8774 && INTVAL (index
) < 4096
8775 && INTVAL (index
) > -256);
8778 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8780 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8790 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8792 return (regno
<= LAST_LO_REGNUM
8793 || regno
> LAST_VIRTUAL_REGISTER
8794 || regno
== FRAME_POINTER_REGNUM
8795 || (GET_MODE_SIZE (mode
) >= 4
8796 && (regno
== STACK_POINTER_REGNUM
8797 || regno
>= FIRST_PSEUDO_REGISTER
8798 || x
== hard_frame_pointer_rtx
8799 || x
== arg_pointer_rtx
)));
8802 /* Return nonzero if x is a legitimate index register. This is the case
8803 for any base register that can access a QImode object. */
8805 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8807 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8810 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8812 The AP may be eliminated to either the SP or the FP, so we use the
8813 least common denominator, e.g. SImode, and offsets from 0 to 64.
8815 ??? Verify whether the above is the right approach.
8817 ??? Also, the FP may be eliminated to the SP, so perhaps that
8818 needs special handling also.
8820 ??? Look at how the mips16 port solves this problem. It probably uses
8821 better ways to solve some of these problems.
8823 Although it is not incorrect, we don't accept QImode and HImode
8824 addresses based on the frame pointer or arg pointer until the
8825 reload pass starts. This is so that eliminating such addresses
8826 into stack based ones won't produce impossible code. */
8828 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8830 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8833 /* ??? Not clear if this is right. Experiment. */
8834 if (GET_MODE_SIZE (mode
) < 4
8835 && !(reload_in_progress
|| reload_completed
)
8836 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8837 || reg_mentioned_p (arg_pointer_rtx
, x
)
8838 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8839 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8840 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8841 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8844 /* Accept any base register. SP only in SImode or larger. */
8845 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8848 /* This is PC relative data before arm_reorg runs. */
8849 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8850 && GET_CODE (x
) == SYMBOL_REF
8851 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
8852 && !arm_disable_literal_pool
)
8855 /* This is PC relative data after arm_reorg runs. */
8856 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8858 && (GET_CODE (x
) == LABEL_REF
8859 || (GET_CODE (x
) == CONST
8860 && GET_CODE (XEXP (x
, 0)) == PLUS
8861 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8862 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8865 /* Post-inc indexing only supported for SImode and larger. */
8866 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8867 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8870 else if (GET_CODE (x
) == PLUS
)
8872 /* REG+REG address can be any two index registers. */
8873 /* We disallow FRAME+REG addressing since we know that FRAME
8874 will be replaced with STACK, and SP relative addressing only
8875 permits SP+OFFSET. */
8876 if (GET_MODE_SIZE (mode
) <= 4
8877 && XEXP (x
, 0) != frame_pointer_rtx
8878 && XEXP (x
, 1) != frame_pointer_rtx
8879 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8880 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8881 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8884 /* REG+const has 5-7 bit offset for non-SP registers. */
8885 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8886 || XEXP (x
, 0) == arg_pointer_rtx
)
8887 && CONST_INT_P (XEXP (x
, 1))
8888 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8891 /* REG+const has 10-bit offset for SP, but only SImode and
8892 larger is supported. */
8893 /* ??? Should probably check for DI/DFmode overflow here
8894 just like GO_IF_LEGITIMATE_OFFSET does. */
8895 else if (REG_P (XEXP (x
, 0))
8896 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8897 && GET_MODE_SIZE (mode
) >= 4
8898 && CONST_INT_P (XEXP (x
, 1))
8899 && INTVAL (XEXP (x
, 1)) >= 0
8900 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8901 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8904 else if (REG_P (XEXP (x
, 0))
8905 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8906 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8907 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8908 && REGNO (XEXP (x
, 0))
8909 <= LAST_VIRTUAL_POINTER_REGISTER
))
8910 && GET_MODE_SIZE (mode
) >= 4
8911 && CONST_INT_P (XEXP (x
, 1))
8912 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8916 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8917 && GET_MODE_SIZE (mode
) == 4
8918 && GET_CODE (x
) == SYMBOL_REF
8919 && CONSTANT_POOL_ADDRESS_P (x
)
8920 && !arm_disable_literal_pool
8922 && symbol_mentioned_p (get_pool_constant (x
))
8923 && ! pcrel_constant_p (get_pool_constant (x
))))
8929 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8930 instruction of mode MODE. */
8932 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8934 switch (GET_MODE_SIZE (mode
))
8937 return val
>= 0 && val
< 32;
8940 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8944 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8950 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8953 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8954 else if (TARGET_THUMB2
)
8955 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8956 else /* if (TARGET_THUMB1) */
8957 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8960 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8962 Given an rtx X being reloaded into a reg required to be
8963 in class CLASS, return the class of reg to actually use.
8964 In general this is just CLASS, but for the Thumb core registers and
8965 immediate constants we prefer a LO_REGS class or a subset. */
8968 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8974 if (rclass
== GENERAL_REGS
)
8981 /* Build the SYMBOL_REF for __tls_get_addr. */
8983 static GTY(()) rtx tls_get_addr_libfunc
;
8986 get_tls_get_addr (void)
8988 if (!tls_get_addr_libfunc
)
8989 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8990 return tls_get_addr_libfunc
;
8994 arm_load_tp (rtx target
)
8997 target
= gen_reg_rtx (SImode
);
9001 /* Can return in any reg. */
9002 emit_insn (gen_load_tp_hard (target
));
9006 /* Always returned in r0. Immediately copy the result into a pseudo,
9007 otherwise other uses of r0 (e.g. setting up function arguments) may
9008 clobber the value. */
9014 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9015 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9017 emit_insn (gen_load_tp_soft_fdpic ());
9020 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9023 emit_insn (gen_load_tp_soft ());
9025 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9026 emit_move_insn (target
, tmp
);
9032 load_tls_operand (rtx x
, rtx reg
)
9036 if (reg
== NULL_RTX
)
9037 reg
= gen_reg_rtx (SImode
);
9039 tmp
= gen_rtx_CONST (SImode
, x
);
9041 emit_move_insn (reg
, tmp
);
9047 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9049 rtx label
, labelno
= NULL_RTX
, sum
;
9051 gcc_assert (reloc
!= TLS_DESCSEQ
);
9056 sum
= gen_rtx_UNSPEC (Pmode
,
9057 gen_rtvec (2, x
, GEN_INT (reloc
)),
9062 labelno
= GEN_INT (pic_labelno
++);
9063 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9064 label
= gen_rtx_CONST (VOIDmode
, label
);
9066 sum
= gen_rtx_UNSPEC (Pmode
,
9067 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9068 GEN_INT (TARGET_ARM
? 8 : 4)),
9071 reg
= load_tls_operand (sum
, reg
);
9074 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9075 else if (TARGET_ARM
)
9076 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9078 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9080 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9081 LCT_PURE
, /* LCT_CONST? */
9084 rtx_insn
*insns
= get_insns ();
9091 arm_tls_descseq_addr (rtx x
, rtx reg
)
9093 rtx labelno
= GEN_INT (pic_labelno
++);
9094 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9095 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9096 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9097 gen_rtx_CONST (VOIDmode
, label
),
9098 GEN_INT (!TARGET_ARM
)),
9100 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9102 emit_insn (gen_tlscall (x
, labelno
));
9104 reg
= gen_reg_rtx (SImode
);
9106 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9108 emit_move_insn (reg
, reg0
);
9115 legitimize_tls_address (rtx x
, rtx reg
)
9117 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9119 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9123 case TLS_MODEL_GLOBAL_DYNAMIC
:
9124 if (TARGET_GNU2_TLS
)
9126 gcc_assert (!TARGET_FDPIC
);
9128 reg
= arm_tls_descseq_addr (x
, reg
);
9130 tp
= arm_load_tp (NULL_RTX
);
9132 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9136 /* Original scheme */
9138 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9140 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9141 dest
= gen_reg_rtx (Pmode
);
9142 emit_libcall_block (insns
, dest
, ret
, x
);
9146 case TLS_MODEL_LOCAL_DYNAMIC
:
9147 if (TARGET_GNU2_TLS
)
9149 gcc_assert (!TARGET_FDPIC
);
9151 reg
= arm_tls_descseq_addr (x
, reg
);
9153 tp
= arm_load_tp (NULL_RTX
);
9155 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9160 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9162 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9164 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9165 share the LDM result with other LD model accesses. */
9166 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9168 dest
= gen_reg_rtx (Pmode
);
9169 emit_libcall_block (insns
, dest
, ret
, eqv
);
9171 /* Load the addend. */
9172 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9173 GEN_INT (TLS_LDO32
)),
9175 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9176 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9180 case TLS_MODEL_INITIAL_EXEC
:
9183 sum
= gen_rtx_UNSPEC (Pmode
,
9184 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9186 reg
= load_tls_operand (sum
, reg
);
9187 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9188 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9192 labelno
= GEN_INT (pic_labelno
++);
9193 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9194 label
= gen_rtx_CONST (VOIDmode
, label
);
9195 sum
= gen_rtx_UNSPEC (Pmode
,
9196 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9197 GEN_INT (TARGET_ARM
? 8 : 4)),
9199 reg
= load_tls_operand (sum
, reg
);
9202 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9203 else if (TARGET_THUMB2
)
9204 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9207 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9208 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9212 tp
= arm_load_tp (NULL_RTX
);
9214 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9216 case TLS_MODEL_LOCAL_EXEC
:
9217 tp
= arm_load_tp (NULL_RTX
);
9219 reg
= gen_rtx_UNSPEC (Pmode
,
9220 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9222 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9224 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9231 /* Try machine-dependent ways of modifying an illegitimate address
9232 to be legitimate. If we find one, return the new, valid address. */
9234 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9236 if (arm_tls_referenced_p (x
))
9240 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9242 addend
= XEXP (XEXP (x
, 0), 1);
9243 x
= XEXP (XEXP (x
, 0), 0);
9246 if (GET_CODE (x
) != SYMBOL_REF
)
9249 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9251 x
= legitimize_tls_address (x
, NULL_RTX
);
9255 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9263 return thumb_legitimize_address (x
, orig_x
, mode
);
9265 if (GET_CODE (x
) == PLUS
)
9267 rtx xop0
= XEXP (x
, 0);
9268 rtx xop1
= XEXP (x
, 1);
9270 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9271 xop0
= force_reg (SImode
, xop0
);
9273 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9274 && !symbol_mentioned_p (xop1
))
9275 xop1
= force_reg (SImode
, xop1
);
9277 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9278 && CONST_INT_P (xop1
))
9280 HOST_WIDE_INT n
, low_n
;
9284 /* VFP addressing modes actually allow greater offsets, but for
9285 now we just stick with the lowest common denominator. */
9286 if (mode
== DImode
|| mode
== DFmode
)
9298 low_n
= ((mode
) == TImode
? 0
9299 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9303 base_reg
= gen_reg_rtx (SImode
);
9304 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9305 emit_move_insn (base_reg
, val
);
9306 x
= plus_constant (Pmode
, base_reg
, low_n
);
9308 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9309 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9312 /* XXX We don't allow MINUS any more -- see comment in
9313 arm_legitimate_address_outer_p (). */
9314 else if (GET_CODE (x
) == MINUS
)
9316 rtx xop0
= XEXP (x
, 0);
9317 rtx xop1
= XEXP (x
, 1);
9319 if (CONSTANT_P (xop0
))
9320 xop0
= force_reg (SImode
, xop0
);
9322 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9323 xop1
= force_reg (SImode
, xop1
);
9325 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9326 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9329 /* Make sure to take full advantage of the pre-indexed addressing mode
9330 with absolute addresses which often allows for the base register to
9331 be factorized for multiple adjacent memory references, and it might
9332 even allows for the mini pool to be avoided entirely. */
9333 else if (CONST_INT_P (x
) && optimize
> 0)
9336 HOST_WIDE_INT mask
, base
, index
;
9339 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9340 only use a 8-bit index. So let's use a 12-bit index for
9341 SImode only and hope that arm_gen_constant will enable LDRB
9342 to use more bits. */
9343 bits
= (mode
== SImode
) ? 12 : 8;
9344 mask
= (1 << bits
) - 1;
9345 base
= INTVAL (x
) & ~mask
;
9346 index
= INTVAL (x
) & mask
;
9347 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9349 /* It'll most probably be more efficient to generate the
9350 base with more bits set and use a negative index instead.
9351 Don't do this for Thumb as negative offsets are much more
9356 base_reg
= force_reg (SImode
, GEN_INT (base
));
9357 x
= plus_constant (Pmode
, base_reg
, index
);
9362 /* We need to find and carefully transform any SYMBOL and LABEL
9363 references; so go back to the original address expression. */
9364 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9365 false /*compute_now*/);
9367 if (new_x
!= orig_x
)
9375 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9376 to be legitimate. If we find one, return the new, valid address. */
9378 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9380 if (GET_CODE (x
) == PLUS
9381 && CONST_INT_P (XEXP (x
, 1))
9382 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9383 || INTVAL (XEXP (x
, 1)) < 0))
9385 rtx xop0
= XEXP (x
, 0);
9386 rtx xop1
= XEXP (x
, 1);
9387 HOST_WIDE_INT offset
= INTVAL (xop1
);
9389 /* Try and fold the offset into a biasing of the base register and
9390 then offsetting that. Don't do this when optimizing for space
9391 since it can cause too many CSEs. */
9392 if (optimize_size
&& offset
>= 0
9393 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9395 HOST_WIDE_INT delta
;
9398 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9399 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9400 delta
= 31 * GET_MODE_SIZE (mode
);
9402 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9404 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9406 x
= plus_constant (Pmode
, xop0
, delta
);
9408 else if (offset
< 0 && offset
> -256)
9409 /* Small negative offsets are best done with a subtract before the
9410 dereference, forcing these into a register normally takes two
9412 x
= force_operand (x
, NULL_RTX
);
9415 /* For the remaining cases, force the constant into a register. */
9416 xop1
= force_reg (SImode
, xop1
);
9417 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9420 else if (GET_CODE (x
) == PLUS
9421 && s_register_operand (XEXP (x
, 1), SImode
)
9422 && !s_register_operand (XEXP (x
, 0), SImode
))
9424 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9426 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9431 /* We need to find and carefully transform any SYMBOL and LABEL
9432 references; so go back to the original address expression. */
9433 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9434 false /*compute_now*/);
9436 if (new_x
!= orig_x
)
9443 /* Return TRUE if X contains any TLS symbol references. */
9446 arm_tls_referenced_p (rtx x
)
9448 if (! TARGET_HAVE_TLS
)
9451 subrtx_iterator::array_type array
;
9452 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9454 const_rtx x
= *iter
;
9455 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
9457 /* ARM currently does not provide relocations to encode TLS variables
9458 into AArch32 instructions, only data, so there is no way to
9459 currently implement these if a literal pool is disabled. */
9460 if (arm_disable_literal_pool
)
9461 sorry ("accessing thread-local storage is not currently supported "
9462 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9467 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9468 TLS offsets, not real symbol references. */
9469 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9470 iter
.skip_subrtxes ();
9475 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9477 On the ARM, allow any integer (invalid ones are removed later by insn
9478 patterns), nice doubles and symbol_refs which refer to the function's
9481 When generating pic allow anything. */
9484 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9486 return flag_pic
|| !label_mentioned_p (x
);
9490 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9492 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9493 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9494 for ARMv8-M Baseline or later the result is valid. */
9495 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9498 return (CONST_INT_P (x
)
9499 || CONST_DOUBLE_P (x
)
9500 || CONSTANT_ADDRESS_P (x
)
9501 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
9506 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9508 return (!arm_cannot_force_const_mem (mode
, x
)
9510 ? arm_legitimate_constant_p_1 (mode
, x
)
9511 : thumb_legitimate_constant_p (mode
, x
)));
9514 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9517 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9520 split_const (x
, &base
, &offset
);
9522 if (SYMBOL_REF_P (base
))
9524 /* Function symbols cannot have an offset due to the Thumb bit. */
9525 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9526 && INTVAL (offset
) != 0)
9529 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9530 && !offset_within_block_p (base
, INTVAL (offset
)))
9533 return arm_tls_referenced_p (x
);
9536 #define REG_OR_SUBREG_REG(X) \
9538 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9540 #define REG_OR_SUBREG_RTX(X) \
9541 (REG_P (X) ? (X) : SUBREG_REG (X))
9544 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9546 machine_mode mode
= GET_MODE (x
);
9555 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9562 return COSTS_N_INSNS (1);
9565 if (arm_arch6m
&& arm_m_profile_small_mul
)
9566 return COSTS_N_INSNS (32);
9568 if (CONST_INT_P (XEXP (x
, 1)))
9571 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9578 return COSTS_N_INSNS (2) + cycles
;
9580 return COSTS_N_INSNS (1) + 16;
9583 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9585 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9586 return (COSTS_N_INSNS (words
)
9587 + 4 * ((MEM_P (SET_SRC (x
)))
9588 + MEM_P (SET_DEST (x
))));
9593 if (UINTVAL (x
) < 256
9594 /* 16-bit constant. */
9595 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9597 if (thumb_shiftable_const (INTVAL (x
)))
9598 return COSTS_N_INSNS (2);
9599 return arm_disable_literal_pool
9601 : COSTS_N_INSNS (3);
9603 else if ((outer
== PLUS
|| outer
== COMPARE
)
9604 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9606 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9607 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9608 return COSTS_N_INSNS (1);
9609 else if (outer
== AND
)
9612 /* This duplicates the tests in the andsi3 expander. */
9613 for (i
= 9; i
<= 31; i
++)
9614 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9615 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9616 return COSTS_N_INSNS (2);
9618 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9619 || outer
== LSHIFTRT
)
9621 return COSTS_N_INSNS (2);
9627 return COSTS_N_INSNS (3);
9645 /* XXX another guess. */
9646 /* Memory costs quite a lot for the first word, but subsequent words
9647 load at the equivalent of a single insn each. */
9648 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9649 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9654 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9660 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9661 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9667 return total
+ COSTS_N_INSNS (1);
9669 /* Assume a two-shift sequence. Increase the cost slightly so
9670 we prefer actual shifts over an extend operation. */
9671 return total
+ 1 + COSTS_N_INSNS (2);
9678 /* Estimates the size cost of thumb1 instructions.
9679 For now most of the code is copied from thumb1_rtx_costs. We need more
9680 fine grain tuning when we have more related test cases. */
9682 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9684 machine_mode mode
= GET_MODE (x
);
9693 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9697 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9698 defined by RTL expansion, especially for the expansion of
9700 if ((GET_CODE (XEXP (x
, 0)) == MULT
9701 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9702 || (GET_CODE (XEXP (x
, 1)) == MULT
9703 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9704 return COSTS_N_INSNS (2);
9709 return COSTS_N_INSNS (1);
9712 if (CONST_INT_P (XEXP (x
, 1)))
9714 /* Thumb1 mul instruction can't operate on const. We must Load it
9715 into a register first. */
9716 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9717 /* For the targets which have a very small and high-latency multiply
9718 unit, we prefer to synthesize the mult with up to 5 instructions,
9719 giving a good balance between size and performance. */
9720 if (arm_arch6m
&& arm_m_profile_small_mul
)
9721 return COSTS_N_INSNS (5);
9723 return COSTS_N_INSNS (1) + const_size
;
9725 return COSTS_N_INSNS (1);
9728 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9730 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9731 cost
= COSTS_N_INSNS (words
);
9732 if (satisfies_constraint_J (SET_SRC (x
))
9733 || satisfies_constraint_K (SET_SRC (x
))
9734 /* Too big an immediate for a 2-byte mov, using MOVT. */
9735 || (CONST_INT_P (SET_SRC (x
))
9736 && UINTVAL (SET_SRC (x
)) >= 256
9738 && satisfies_constraint_j (SET_SRC (x
)))
9739 /* thumb1_movdi_insn. */
9740 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9741 cost
+= COSTS_N_INSNS (1);
9747 if (UINTVAL (x
) < 256)
9748 return COSTS_N_INSNS (1);
9749 /* movw is 4byte long. */
9750 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9751 return COSTS_N_INSNS (2);
9752 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9753 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9754 return COSTS_N_INSNS (2);
9755 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9756 if (thumb_shiftable_const (INTVAL (x
)))
9757 return COSTS_N_INSNS (2);
9758 return arm_disable_literal_pool
9760 : COSTS_N_INSNS (3);
9762 else if ((outer
== PLUS
|| outer
== COMPARE
)
9763 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9765 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9766 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9767 return COSTS_N_INSNS (1);
9768 else if (outer
== AND
)
9771 /* This duplicates the tests in the andsi3 expander. */
9772 for (i
= 9; i
<= 31; i
++)
9773 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9774 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9775 return COSTS_N_INSNS (2);
9777 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9778 || outer
== LSHIFTRT
)
9780 return COSTS_N_INSNS (2);
9786 return COSTS_N_INSNS (3);
9800 return COSTS_N_INSNS (1);
9803 return (COSTS_N_INSNS (1)
9805 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9806 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9807 ? COSTS_N_INSNS (1) : 0));
9811 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9816 /* XXX still guessing. */
9817 switch (GET_MODE (XEXP (x
, 0)))
9820 return (1 + (mode
== DImode
? 4 : 0)
9821 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9824 return (4 + (mode
== DImode
? 4 : 0)
9825 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9828 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9839 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9840 PLUS, adds the carry flag, then return the other operand. If
9841 neither is a carry, return OP unchanged. */
9843 strip_carry_operation (rtx op
)
9845 gcc_assert (GET_CODE (op
) == PLUS
);
9846 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
9847 return XEXP (op
, 1);
9848 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
9849 return XEXP (op
, 0);
9853 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9854 operand, then return the operand that is being shifted. If the shift
9855 is not by a constant, then set SHIFT_REG to point to the operand.
9856 Return NULL if OP is not a shifter operand. */
9858 shifter_op_p (rtx op
, rtx
*shift_reg
)
9860 enum rtx_code code
= GET_CODE (op
);
9862 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9863 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9864 return XEXP (op
, 0);
9865 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9866 return XEXP (op
, 0);
9867 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9868 || code
== ASHIFTRT
)
9870 if (!CONST_INT_P (XEXP (op
, 1)))
9871 *shift_reg
= XEXP (op
, 1);
9872 return XEXP (op
, 0);
9879 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9881 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9882 rtx_code code
= GET_CODE (x
);
9883 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9885 switch (XINT (x
, 1))
9887 case UNSPEC_UNALIGNED_LOAD
:
9888 /* We can only do unaligned loads into the integer unit, and we can't
9890 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9892 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9893 + extra_cost
->ldst
.load_unaligned
);
9896 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9897 ADDR_SPACE_GENERIC
, speed_p
);
9901 case UNSPEC_UNALIGNED_STORE
:
9902 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9904 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9905 + extra_cost
->ldst
.store_unaligned
);
9907 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9909 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9910 ADDR_SPACE_GENERIC
, speed_p
);
9921 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9925 *cost
= COSTS_N_INSNS (2);
9931 /* Cost of a libcall. We assume one insn per argument, an amount for the
9932 call (one insn for -Os) and then one for processing the result. */
9933 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9935 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9938 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9939 if (shift_op != NULL \
9940 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9945 *cost += extra_cost->alu.arith_shift_reg; \
9946 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9947 ASHIFT, 1, speed_p); \
9950 *cost += extra_cost->alu.arith_shift; \
9952 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9953 ASHIFT, 0, speed_p) \
9954 + rtx_cost (XEXP (x, 1 - IDX), \
9955 GET_MODE (shift_op), \
9962 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9963 considering the costs of the addressing mode and memory access
9966 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
9967 int *cost
, bool speed_p
)
9969 machine_mode mode
= GET_MODE (x
);
9971 *cost
= COSTS_N_INSNS (1);
9974 && GET_CODE (XEXP (x
, 0)) == PLUS
9975 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9976 /* This will be split into two instructions. Add the cost of the
9977 additional instruction here. The cost of the memory access is computed
9978 below. See arm.md:calculate_pic_address. */
9979 *cost
+= COSTS_N_INSNS (1);
9981 /* Calculate cost of the addressing mode. */
9984 arm_addr_mode_op op_type
;
9985 switch (GET_CODE (XEXP (x
, 0)))
9989 op_type
= AMO_DEFAULT
;
9992 /* MINUS does not appear in RTL, but the architecture supports it,
9993 so handle this case defensively. */
9996 op_type
= AMO_NO_WB
;
10008 if (VECTOR_MODE_P (mode
))
10009 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10010 else if (FLOAT_MODE_P (mode
))
10011 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10013 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10016 /* Calculate cost of memory access. */
10019 if (FLOAT_MODE_P (mode
))
10021 if (GET_MODE_SIZE (mode
) == 8)
10022 *cost
+= extra_cost
->ldst
.loadd
;
10024 *cost
+= extra_cost
->ldst
.loadf
;
10026 else if (VECTOR_MODE_P (mode
))
10027 *cost
+= extra_cost
->ldst
.loadv
;
10030 /* Integer modes */
10031 if (GET_MODE_SIZE (mode
) == 8)
10032 *cost
+= extra_cost
->ldst
.ldrd
;
10034 *cost
+= extra_cost
->ldst
.load
;
10041 /* RTX costs. Make an estimate of the cost of executing the operation
10042 X, which is contained within an operation with code OUTER_CODE.
10043 SPEED_P indicates whether the cost desired is the performance cost,
10044 or the size cost. The estimate is stored in COST and the return
10045 value is TRUE if the cost calculation is final, or FALSE if the
10046 caller should recurse through the operands of X to add additional
10049 We currently make no attempt to model the size savings of Thumb-2
10050 16-bit instructions. At the normal points in compilation where
10051 this code is called we have no measure of whether the condition
10052 flags are live or not, and thus no realistic way to determine what
10053 the size will eventually be. */
10055 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10056 const struct cpu_cost_table
*extra_cost
,
10057 int *cost
, bool speed_p
)
10059 machine_mode mode
= GET_MODE (x
);
10061 *cost
= COSTS_N_INSNS (1);
10066 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10068 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10076 /* SET RTXs don't have a mode so we get it from the destination. */
10077 mode
= GET_MODE (SET_DEST (x
));
10079 if (REG_P (SET_SRC (x
))
10080 && REG_P (SET_DEST (x
)))
10082 /* Assume that most copies can be done with a single insn,
10083 unless we don't have HW FP, in which case everything
10084 larger than word mode will require two insns. */
10085 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10086 && GET_MODE_SIZE (mode
) > 4)
10089 /* Conditional register moves can be encoded
10090 in 16 bits in Thumb mode. */
10091 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10097 if (CONST_INT_P (SET_SRC (x
)))
10099 /* Handle CONST_INT here, since the value doesn't have a mode
10100 and we would otherwise be unable to work out the true cost. */
10101 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10104 /* Slightly lower the cost of setting a core reg to a constant.
10105 This helps break up chains and allows for better scheduling. */
10106 if (REG_P (SET_DEST (x
))
10107 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10110 /* Immediate moves with an immediate in the range [0, 255] can be
10111 encoded in 16 bits in Thumb mode. */
10112 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10113 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10115 goto const_int_cost
;
10121 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10125 /* Calculations of LDM costs are complex. We assume an initial cost
10126 (ldm_1st) which will load the number of registers mentioned in
10127 ldm_regs_per_insn_1st registers; then each additional
10128 ldm_regs_per_insn_subsequent registers cost one more insn. The
10129 formula for N regs is thus:
10131 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10132 + ldm_regs_per_insn_subsequent - 1)
10133 / ldm_regs_per_insn_subsequent).
10135 Additional costs may also be added for addressing. A similar
10136 formula is used for STM. */
10138 bool is_ldm
= load_multiple_operation (x
, SImode
);
10139 bool is_stm
= store_multiple_operation (x
, SImode
);
10141 if (is_ldm
|| is_stm
)
10145 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10146 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10147 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10148 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10149 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10150 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10151 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10153 *cost
+= regs_per_insn_1st
10154 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10155 + regs_per_insn_sub
- 1)
10156 / regs_per_insn_sub
);
10165 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10166 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10167 *cost
+= COSTS_N_INSNS (speed_p
10168 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10169 else if (mode
== SImode
&& TARGET_IDIV
)
10170 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10172 *cost
= LIBCALL_COST (2);
10174 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10175 possible udiv is prefered. */
10176 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10177 return false; /* All arguments must be in registers. */
10180 /* MOD by a power of 2 can be expanded as:
10182 and r0, r0, #(n - 1)
10183 and r1, r1, #(n - 1)
10184 rsbpl r0, r1, #0. */
10185 if (CONST_INT_P (XEXP (x
, 1))
10186 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10189 *cost
+= COSTS_N_INSNS (3);
10192 *cost
+= 2 * extra_cost
->alu
.logical
10193 + extra_cost
->alu
.arith
;
10197 /* Fall-through. */
10199 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10200 possible udiv is prefered. */
10201 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10202 return false; /* All arguments must be in registers. */
10205 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10207 *cost
+= (COSTS_N_INSNS (1)
10208 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10210 *cost
+= extra_cost
->alu
.shift_reg
;
10218 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10220 *cost
+= (COSTS_N_INSNS (2)
10221 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10223 *cost
+= 2 * extra_cost
->alu
.shift
;
10224 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10225 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10229 else if (mode
== SImode
)
10231 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10232 /* Slightly disparage register shifts at -Os, but not by much. */
10233 if (!CONST_INT_P (XEXP (x
, 1)))
10234 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10235 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10238 else if (GET_MODE_CLASS (mode
) == MODE_INT
10239 && GET_MODE_SIZE (mode
) < 4)
10241 if (code
== ASHIFT
)
10243 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10244 /* Slightly disparage register shifts at -Os, but not by
10246 if (!CONST_INT_P (XEXP (x
, 1)))
10247 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10248 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10250 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10252 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10254 /* Can use SBFX/UBFX. */
10256 *cost
+= extra_cost
->alu
.bfx
;
10257 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10261 *cost
+= COSTS_N_INSNS (1);
10262 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10265 if (CONST_INT_P (XEXP (x
, 1)))
10266 *cost
+= 2 * extra_cost
->alu
.shift
;
10268 *cost
+= (extra_cost
->alu
.shift
10269 + extra_cost
->alu
.shift_reg
);
10272 /* Slightly disparage register shifts. */
10273 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10276 else /* Rotates. */
10278 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10279 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10282 if (CONST_INT_P (XEXP (x
, 1)))
10283 *cost
+= (2 * extra_cost
->alu
.shift
10284 + extra_cost
->alu
.log_shift
);
10286 *cost
+= (extra_cost
->alu
.shift
10287 + extra_cost
->alu
.shift_reg
10288 + extra_cost
->alu
.log_shift_reg
);
10294 *cost
= LIBCALL_COST (2);
10300 if (mode
== SImode
)
10303 *cost
+= extra_cost
->alu
.rev
;
10310 /* No rev instruction available. Look at arm_legacy_rev
10311 and thumb_legacy_rev for the form of RTL used then. */
10314 *cost
+= COSTS_N_INSNS (9);
10318 *cost
+= 6 * extra_cost
->alu
.shift
;
10319 *cost
+= 3 * extra_cost
->alu
.logical
;
10324 *cost
+= COSTS_N_INSNS (4);
10328 *cost
+= 2 * extra_cost
->alu
.shift
;
10329 *cost
+= extra_cost
->alu
.arith_shift
;
10330 *cost
+= 2 * extra_cost
->alu
.logical
;
10338 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10339 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10341 if (GET_CODE (XEXP (x
, 0)) == MULT
10342 || GET_CODE (XEXP (x
, 1)) == MULT
)
10344 rtx mul_op0
, mul_op1
, sub_op
;
10347 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10349 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10351 mul_op0
= XEXP (XEXP (x
, 0), 0);
10352 mul_op1
= XEXP (XEXP (x
, 0), 1);
10353 sub_op
= XEXP (x
, 1);
10357 mul_op0
= XEXP (XEXP (x
, 1), 0);
10358 mul_op1
= XEXP (XEXP (x
, 1), 1);
10359 sub_op
= XEXP (x
, 0);
10362 /* The first operand of the multiply may be optionally
10364 if (GET_CODE (mul_op0
) == NEG
)
10365 mul_op0
= XEXP (mul_op0
, 0);
10367 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10368 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10369 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10375 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10379 if (mode
== SImode
)
10381 rtx shift_by_reg
= NULL
;
10384 rtx op0
= XEXP (x
, 0);
10385 rtx op1
= XEXP (x
, 1);
10387 /* Factor out any borrow operation. There's more than one way
10388 of expressing this; try to recognize them all. */
10389 if (GET_CODE (op0
) == MINUS
)
10391 if (arm_borrow_operation (op1
, SImode
))
10393 op1
= XEXP (op0
, 1);
10394 op0
= XEXP (op0
, 0);
10396 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10397 op0
= XEXP (op0
, 0);
10399 else if (GET_CODE (op1
) == PLUS
10400 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10401 op1
= XEXP (op1
, 0);
10402 else if (GET_CODE (op0
) == NEG
10403 && arm_borrow_operation (op1
, SImode
))
10405 /* Negate with carry-in. For Thumb2 this is done with
10406 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10407 RSC instruction that exists in Arm mode. */
10409 *cost
+= (TARGET_THUMB2
10410 ? extra_cost
->alu
.arith_shift
10411 : extra_cost
->alu
.arith
);
10412 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10415 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10416 Note we do mean ~borrow here. */
10417 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10419 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10423 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10424 if (shift_op
== NULL
)
10426 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10427 non_shift_op
= op0
;
10430 non_shift_op
= op1
;
10432 if (shift_op
!= NULL
)
10434 if (shift_by_reg
!= NULL
)
10437 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10438 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10441 *cost
+= extra_cost
->alu
.arith_shift
;
10443 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10444 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10448 if (arm_arch_thumb2
10449 && GET_CODE (XEXP (x
, 1)) == MULT
)
10453 *cost
+= extra_cost
->mult
[0].add
;
10454 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10455 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10456 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10460 if (CONST_INT_P (op0
))
10462 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10463 INTVAL (op0
), NULL_RTX
,
10465 *cost
= COSTS_N_INSNS (insns
);
10467 *cost
+= insns
* extra_cost
->alu
.arith
;
10468 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10472 *cost
+= extra_cost
->alu
.arith
;
10474 /* Don't recurse as we don't want to cost any borrow that
10476 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10477 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10481 if (GET_MODE_CLASS (mode
) == MODE_INT
10482 && GET_MODE_SIZE (mode
) < 4)
10484 rtx shift_op
, shift_reg
;
10487 /* We check both sides of the MINUS for shifter operands since,
10488 unlike PLUS, it's not commutative. */
10490 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10491 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10493 /* Slightly disparage, as we might need to widen the result. */
10496 *cost
+= extra_cost
->alu
.arith
;
10498 if (CONST_INT_P (XEXP (x
, 0)))
10500 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10507 if (mode
== DImode
)
10509 *cost
+= COSTS_N_INSNS (1);
10511 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10513 rtx op1
= XEXP (x
, 1);
10516 *cost
+= 2 * extra_cost
->alu
.arith
;
10518 if (GET_CODE (op1
) == ZERO_EXTEND
)
10519 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10522 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10523 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10527 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10530 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10531 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10533 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10536 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10537 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10540 *cost
+= (extra_cost
->alu
.arith
10541 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10542 ? extra_cost
->alu
.arith
10543 : extra_cost
->alu
.arith_shift
));
10544 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10545 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10546 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10551 *cost
+= 2 * extra_cost
->alu
.arith
;
10557 *cost
= LIBCALL_COST (2);
10561 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10562 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10564 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10566 rtx mul_op0
, mul_op1
, add_op
;
10569 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10571 mul_op0
= XEXP (XEXP (x
, 0), 0);
10572 mul_op1
= XEXP (XEXP (x
, 0), 1);
10573 add_op
= XEXP (x
, 1);
10575 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10576 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10577 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10583 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10586 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10588 *cost
= LIBCALL_COST (2);
10592 /* Narrow modes can be synthesized in SImode, but the range
10593 of useful sub-operations is limited. Check for shift operations
10594 on one of the operands. Only left shifts can be used in the
10596 if (GET_MODE_CLASS (mode
) == MODE_INT
10597 && GET_MODE_SIZE (mode
) < 4)
10599 rtx shift_op
, shift_reg
;
10602 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10604 if (CONST_INT_P (XEXP (x
, 1)))
10606 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10607 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10609 *cost
= COSTS_N_INSNS (insns
);
10611 *cost
+= insns
* extra_cost
->alu
.arith
;
10612 /* Slightly penalize a narrow operation as the result may
10614 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10618 /* Slightly penalize a narrow operation as the result may
10622 *cost
+= extra_cost
->alu
.arith
;
10627 if (mode
== SImode
)
10629 rtx shift_op
, shift_reg
;
10631 if (TARGET_INT_SIMD
10632 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10633 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10635 /* UXTA[BH] or SXTA[BH]. */
10637 *cost
+= extra_cost
->alu
.extend_arith
;
10638 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10640 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10644 rtx op0
= XEXP (x
, 0);
10645 rtx op1
= XEXP (x
, 1);
10647 /* Handle a side effect of adding in the carry to an addition. */
10648 if (GET_CODE (op0
) == PLUS
10649 && arm_carry_operation (op1
, mode
))
10651 op1
= XEXP (op0
, 1);
10652 op0
= XEXP (op0
, 0);
10654 else if (GET_CODE (op1
) == PLUS
10655 && arm_carry_operation (op0
, mode
))
10657 op0
= XEXP (op1
, 0);
10658 op1
= XEXP (op1
, 1);
10660 else if (GET_CODE (op0
) == PLUS
)
10662 op0
= strip_carry_operation (op0
);
10663 if (swap_commutative_operands_p (op0
, op1
))
10664 std::swap (op0
, op1
);
10667 if (arm_carry_operation (op0
, mode
))
10669 /* Adding the carry to a register is a canonicalization of
10670 adding 0 to the register plus the carry. */
10672 *cost
+= extra_cost
->alu
.arith
;
10673 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10678 shift_op
= shifter_op_p (op0
, &shift_reg
);
10679 if (shift_op
!= NULL
)
10684 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10685 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10688 *cost
+= extra_cost
->alu
.arith_shift
;
10690 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10691 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10695 if (GET_CODE (op0
) == MULT
)
10699 if (TARGET_DSP_MULTIPLY
10700 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10701 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10702 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10703 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10704 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10705 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10706 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10707 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10708 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10709 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10710 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10711 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10714 /* SMLA[BT][BT]. */
10716 *cost
+= extra_cost
->mult
[0].extend_add
;
10717 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10718 SIGN_EXTEND
, 0, speed_p
)
10719 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10720 SIGN_EXTEND
, 0, speed_p
)
10721 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10726 *cost
+= extra_cost
->mult
[0].add
;
10727 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10728 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10729 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10733 if (CONST_INT_P (op1
))
10735 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10736 INTVAL (op1
), NULL_RTX
,
10738 *cost
= COSTS_N_INSNS (insns
);
10740 *cost
+= insns
* extra_cost
->alu
.arith
;
10741 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10746 *cost
+= extra_cost
->alu
.arith
;
10748 /* Don't recurse here because we want to test the operands
10749 without any carry operation. */
10750 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10751 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10755 if (mode
== DImode
)
10757 if (GET_CODE (XEXP (x
, 0)) == MULT
10758 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10759 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10760 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10761 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10764 *cost
+= extra_cost
->mult
[1].extend_add
;
10765 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10766 ZERO_EXTEND
, 0, speed_p
)
10767 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10768 ZERO_EXTEND
, 0, speed_p
)
10769 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10773 *cost
+= COSTS_N_INSNS (1);
10775 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10776 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10779 *cost
+= (extra_cost
->alu
.arith
10780 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10781 ? extra_cost
->alu
.arith
10782 : extra_cost
->alu
.arith_shift
));
10784 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10786 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10791 *cost
+= 2 * extra_cost
->alu
.arith
;
10796 *cost
= LIBCALL_COST (2);
10799 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10802 *cost
+= extra_cost
->alu
.rev
;
10806 /* Fall through. */
10807 case AND
: case XOR
:
10808 if (mode
== SImode
)
10810 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10811 rtx op0
= XEXP (x
, 0);
10812 rtx shift_op
, shift_reg
;
10816 || (code
== IOR
&& TARGET_THUMB2
)))
10817 op0
= XEXP (op0
, 0);
10820 shift_op
= shifter_op_p (op0
, &shift_reg
);
10821 if (shift_op
!= NULL
)
10826 *cost
+= extra_cost
->alu
.log_shift_reg
;
10827 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10830 *cost
+= extra_cost
->alu
.log_shift
;
10832 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10833 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10837 if (CONST_INT_P (XEXP (x
, 1)))
10839 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10840 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10843 *cost
= COSTS_N_INSNS (insns
);
10845 *cost
+= insns
* extra_cost
->alu
.logical
;
10846 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10851 *cost
+= extra_cost
->alu
.logical
;
10852 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10853 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10857 if (mode
== DImode
)
10859 rtx op0
= XEXP (x
, 0);
10860 enum rtx_code subcode
= GET_CODE (op0
);
10862 *cost
+= COSTS_N_INSNS (1);
10866 || (code
== IOR
&& TARGET_THUMB2
)))
10867 op0
= XEXP (op0
, 0);
10869 if (GET_CODE (op0
) == ZERO_EXTEND
)
10872 *cost
+= 2 * extra_cost
->alu
.logical
;
10874 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10876 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10879 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10882 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10884 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10886 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10891 *cost
+= 2 * extra_cost
->alu
.logical
;
10897 *cost
= LIBCALL_COST (2);
10901 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10902 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10904 rtx op0
= XEXP (x
, 0);
10906 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10907 op0
= XEXP (op0
, 0);
10910 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10912 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10913 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10916 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10918 *cost
= LIBCALL_COST (2);
10922 if (mode
== SImode
)
10924 if (TARGET_DSP_MULTIPLY
10925 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10926 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10927 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10928 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10929 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10930 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10931 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10932 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10933 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10934 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10935 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10936 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10939 /* SMUL[TB][TB]. */
10941 *cost
+= extra_cost
->mult
[0].extend
;
10942 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10943 SIGN_EXTEND
, 0, speed_p
);
10944 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10945 SIGN_EXTEND
, 1, speed_p
);
10949 *cost
+= extra_cost
->mult
[0].simple
;
10953 if (mode
== DImode
)
10955 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10956 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10957 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10958 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
10961 *cost
+= extra_cost
->mult
[1].extend
;
10962 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10963 ZERO_EXTEND
, 0, speed_p
)
10964 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10965 ZERO_EXTEND
, 0, speed_p
));
10969 *cost
= LIBCALL_COST (2);
10974 *cost
= LIBCALL_COST (2);
10978 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10979 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10981 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10984 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10989 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10993 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10995 *cost
= LIBCALL_COST (1);
10999 if (mode
== SImode
)
11001 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11003 *cost
+= COSTS_N_INSNS (1);
11004 /* Assume the non-flag-changing variant. */
11006 *cost
+= (extra_cost
->alu
.log_shift
11007 + extra_cost
->alu
.arith_shift
);
11008 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11012 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11013 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11015 *cost
+= COSTS_N_INSNS (1);
11016 /* No extra cost for MOV imm and MVN imm. */
11017 /* If the comparison op is using the flags, there's no further
11018 cost, otherwise we need to add the cost of the comparison. */
11019 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11020 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11021 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11023 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11024 *cost
+= (COSTS_N_INSNS (1)
11025 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11027 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11030 *cost
+= extra_cost
->alu
.arith
;
11036 *cost
+= extra_cost
->alu
.arith
;
11040 if (GET_MODE_CLASS (mode
) == MODE_INT
11041 && GET_MODE_SIZE (mode
) < 4)
11043 /* Slightly disparage, as we might need an extend operation. */
11046 *cost
+= extra_cost
->alu
.arith
;
11050 if (mode
== DImode
)
11052 *cost
+= COSTS_N_INSNS (1);
11054 *cost
+= 2 * extra_cost
->alu
.arith
;
11059 *cost
= LIBCALL_COST (1);
11063 if (mode
== SImode
)
11066 rtx shift_reg
= NULL
;
11068 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11072 if (shift_reg
!= NULL
)
11075 *cost
+= extra_cost
->alu
.log_shift_reg
;
11076 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11079 *cost
+= extra_cost
->alu
.log_shift
;
11080 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11085 *cost
+= extra_cost
->alu
.logical
;
11088 if (mode
== DImode
)
11090 *cost
+= COSTS_N_INSNS (1);
11096 *cost
+= LIBCALL_COST (1);
11101 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11103 *cost
+= COSTS_N_INSNS (3);
11106 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11107 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11109 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11110 /* Assume that if one arm of the if_then_else is a register,
11111 that it will be tied with the result and eliminate the
11112 conditional insn. */
11113 if (REG_P (XEXP (x
, 1)))
11115 else if (REG_P (XEXP (x
, 2)))
11121 if (extra_cost
->alu
.non_exec_costs_exec
)
11122 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11124 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11127 *cost
+= op1cost
+ op2cost
;
11133 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11137 machine_mode op0mode
;
11138 /* We'll mostly assume that the cost of a compare is the cost of the
11139 LHS. However, there are some notable exceptions. */
11141 /* Floating point compares are never done as side-effects. */
11142 op0mode
= GET_MODE (XEXP (x
, 0));
11143 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11144 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11147 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11149 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11151 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11157 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11159 *cost
= LIBCALL_COST (2);
11163 /* DImode compares normally take two insns. */
11164 if (op0mode
== DImode
)
11166 *cost
+= COSTS_N_INSNS (1);
11168 *cost
+= 2 * extra_cost
->alu
.arith
;
11172 if (op0mode
== SImode
)
11177 if (XEXP (x
, 1) == const0_rtx
11178 && !(REG_P (XEXP (x
, 0))
11179 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11180 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11182 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11184 /* Multiply operations that set the flags are often
11185 significantly more expensive. */
11187 && GET_CODE (XEXP (x
, 0)) == MULT
11188 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11189 *cost
+= extra_cost
->mult
[0].flag_setting
;
11192 && GET_CODE (XEXP (x
, 0)) == PLUS
11193 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11194 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11196 *cost
+= extra_cost
->mult
[0].flag_setting
;
11201 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11202 if (shift_op
!= NULL
)
11204 if (shift_reg
!= NULL
)
11206 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11209 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11212 *cost
+= extra_cost
->alu
.arith_shift
;
11213 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11214 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11219 *cost
+= extra_cost
->alu
.arith
;
11220 if (CONST_INT_P (XEXP (x
, 1))
11221 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11223 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11231 *cost
= LIBCALL_COST (2);
11254 if (outer_code
== SET
)
11256 /* Is it a store-flag operation? */
11257 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11258 && XEXP (x
, 1) == const0_rtx
)
11260 /* Thumb also needs an IT insn. */
11261 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11264 if (XEXP (x
, 1) == const0_rtx
)
11269 /* LSR Rd, Rn, #31. */
11271 *cost
+= extra_cost
->alu
.shift
;
11281 *cost
+= COSTS_N_INSNS (1);
11285 /* RSBS T1, Rn, Rn, LSR #31
11287 *cost
+= COSTS_N_INSNS (1);
11289 *cost
+= extra_cost
->alu
.arith_shift
;
11293 /* RSB Rd, Rn, Rn, ASR #1
11294 LSR Rd, Rd, #31. */
11295 *cost
+= COSTS_N_INSNS (1);
11297 *cost
+= (extra_cost
->alu
.arith_shift
11298 + extra_cost
->alu
.shift
);
11304 *cost
+= COSTS_N_INSNS (1);
11306 *cost
+= extra_cost
->alu
.shift
;
11310 /* Remaining cases are either meaningless or would take
11311 three insns anyway. */
11312 *cost
= COSTS_N_INSNS (3);
11315 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11320 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11321 if (CONST_INT_P (XEXP (x
, 1))
11322 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11324 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11331 /* Not directly inside a set. If it involves the condition code
11332 register it must be the condition for a branch, cond_exec or
11333 I_T_E operation. Since the comparison is performed elsewhere
11334 this is just the control part which has no additional
11336 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11337 && XEXP (x
, 1) == const0_rtx
)
11345 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11346 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11349 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11353 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11355 *cost
= LIBCALL_COST (1);
11359 if (mode
== SImode
)
11362 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11366 *cost
= LIBCALL_COST (1);
11370 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11371 && MEM_P (XEXP (x
, 0)))
11373 if (mode
== DImode
)
11374 *cost
+= COSTS_N_INSNS (1);
11379 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11380 *cost
+= extra_cost
->ldst
.load
;
11382 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11384 if (mode
== DImode
)
11385 *cost
+= extra_cost
->alu
.shift
;
11390 /* Widening from less than 32-bits requires an extend operation. */
11391 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11393 /* We have SXTB/SXTH. */
11394 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11396 *cost
+= extra_cost
->alu
.extend
;
11398 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11400 /* Needs two shifts. */
11401 *cost
+= COSTS_N_INSNS (1);
11402 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11404 *cost
+= 2 * extra_cost
->alu
.shift
;
11407 /* Widening beyond 32-bits requires one more insn. */
11408 if (mode
== DImode
)
11410 *cost
+= COSTS_N_INSNS (1);
11412 *cost
+= extra_cost
->alu
.shift
;
11419 || GET_MODE (XEXP (x
, 0)) == SImode
11420 || GET_MODE (XEXP (x
, 0)) == QImode
)
11421 && MEM_P (XEXP (x
, 0)))
11423 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11425 if (mode
== DImode
)
11426 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11431 /* Widening from less than 32-bits requires an extend operation. */
11432 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11434 /* UXTB can be a shorter instruction in Thumb2, but it might
11435 be slower than the AND Rd, Rn, #255 alternative. When
11436 optimizing for speed it should never be slower to use
11437 AND, and we don't really model 16-bit vs 32-bit insns
11440 *cost
+= extra_cost
->alu
.logical
;
11442 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11444 /* We have UXTB/UXTH. */
11445 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11447 *cost
+= extra_cost
->alu
.extend
;
11449 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11451 /* Needs two shifts. It's marginally preferable to use
11452 shifts rather than two BIC instructions as the second
11453 shift may merge with a subsequent insn as a shifter
11455 *cost
= COSTS_N_INSNS (2);
11456 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11458 *cost
+= 2 * extra_cost
->alu
.shift
;
11461 /* Widening beyond 32-bits requires one more insn. */
11462 if (mode
== DImode
)
11464 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11471 /* CONST_INT has no mode, so we cannot tell for sure how many
11472 insns are really going to be needed. The best we can do is
11473 look at the value passed. If it fits in SImode, then assume
11474 that's the mode it will be used for. Otherwise assume it
11475 will be used in DImode. */
11476 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11481 /* Avoid blowing up in arm_gen_constant (). */
11482 if (!(outer_code
== PLUS
11483 || outer_code
== AND
11484 || outer_code
== IOR
11485 || outer_code
== XOR
11486 || outer_code
== MINUS
))
11490 if (mode
== SImode
)
11492 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11493 INTVAL (x
), NULL
, NULL
,
11499 *cost
+= COSTS_N_INSNS (arm_gen_constant
11500 (outer_code
, SImode
, NULL
,
11501 trunc_int_for_mode (INTVAL (x
), SImode
),
11503 + arm_gen_constant (outer_code
, SImode
, NULL
,
11504 INTVAL (x
) >> 32, NULL
,
11516 if (arm_arch_thumb2
&& !flag_pic
)
11517 *cost
+= COSTS_N_INSNS (1);
11519 *cost
+= extra_cost
->ldst
.load
;
11522 *cost
+= COSTS_N_INSNS (1);
11526 *cost
+= COSTS_N_INSNS (1);
11528 *cost
+= extra_cost
->alu
.arith
;
11534 *cost
= COSTS_N_INSNS (4);
11539 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11540 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11542 if (vfp3_const_double_rtx (x
))
11545 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11551 if (mode
== DFmode
)
11552 *cost
+= extra_cost
->ldst
.loadd
;
11554 *cost
+= extra_cost
->ldst
.loadf
;
11557 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11561 *cost
= COSTS_N_INSNS (4);
11566 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11567 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11568 || TARGET_HAVE_MVE
)
11569 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11570 *cost
= COSTS_N_INSNS (1);
11572 *cost
= COSTS_N_INSNS (4);
11577 /* When optimizing for size, we prefer constant pool entries to
11578 MOVW/MOVT pairs, so bump the cost of these slightly. */
11585 *cost
+= extra_cost
->alu
.clz
;
11589 if (XEXP (x
, 1) == const0_rtx
)
11592 *cost
+= extra_cost
->alu
.log_shift
;
11593 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11596 /* Fall through. */
11600 *cost
+= COSTS_N_INSNS (1);
11604 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11605 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11606 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11607 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11608 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11609 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11610 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11611 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11615 *cost
+= extra_cost
->mult
[1].extend
;
11616 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11617 ZERO_EXTEND
, 0, speed_p
)
11618 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11619 ZERO_EXTEND
, 0, speed_p
));
11622 *cost
= LIBCALL_COST (1);
11625 case UNSPEC_VOLATILE
:
11627 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11630 /* Reading the PC is like reading any other register. Writing it
11631 is more expensive, but we take that into account elsewhere. */
11636 /* TODO: Simple zero_extract of bottom bits using AND. */
11637 /* Fall through. */
11641 && CONST_INT_P (XEXP (x
, 1))
11642 && CONST_INT_P (XEXP (x
, 2)))
11645 *cost
+= extra_cost
->alu
.bfx
;
11646 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11649 /* Without UBFX/SBFX, need to resort to shift operations. */
11650 *cost
+= COSTS_N_INSNS (1);
11652 *cost
+= 2 * extra_cost
->alu
.shift
;
11653 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11657 if (TARGET_HARD_FLOAT
)
11660 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11662 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11664 /* Pre v8, widening HF->DF is a two-step process, first
11665 widening to SFmode. */
11666 *cost
+= COSTS_N_INSNS (1);
11668 *cost
+= extra_cost
->fp
[0].widen
;
11670 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11674 *cost
= LIBCALL_COST (1);
11677 case FLOAT_TRUNCATE
:
11678 if (TARGET_HARD_FLOAT
)
11681 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11682 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11684 /* Vector modes? */
11686 *cost
= LIBCALL_COST (1);
11690 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11692 rtx op0
= XEXP (x
, 0);
11693 rtx op1
= XEXP (x
, 1);
11694 rtx op2
= XEXP (x
, 2);
11697 /* vfms or vfnma. */
11698 if (GET_CODE (op0
) == NEG
)
11699 op0
= XEXP (op0
, 0);
11701 /* vfnms or vfnma. */
11702 if (GET_CODE (op2
) == NEG
)
11703 op2
= XEXP (op2
, 0);
11705 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11706 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11707 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11710 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11715 *cost
= LIBCALL_COST (3);
11720 if (TARGET_HARD_FLOAT
)
11722 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11723 a vcvt fixed-point conversion. */
11724 if (code
== FIX
&& mode
== SImode
11725 && GET_CODE (XEXP (x
, 0)) == FIX
11726 && GET_MODE (XEXP (x
, 0)) == SFmode
11727 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11728 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11732 *cost
+= extra_cost
->fp
[0].toint
;
11734 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11739 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11741 mode
= GET_MODE (XEXP (x
, 0));
11743 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11744 /* Strip of the 'cost' of rounding towards zero. */
11745 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11746 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11749 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11750 /* ??? Increase the cost to deal with transferring from
11751 FP -> CORE registers? */
11754 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11758 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11761 /* Vector costs? */
11763 *cost
= LIBCALL_COST (1);
11767 case UNSIGNED_FLOAT
:
11768 if (TARGET_HARD_FLOAT
)
11770 /* ??? Increase the cost to deal with transferring from CORE
11771 -> FP registers? */
11773 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11776 *cost
= LIBCALL_COST (1);
11784 /* Just a guess. Guess number of instructions in the asm
11785 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11786 though (see PR60663). */
11787 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11788 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11790 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11794 if (mode
!= VOIDmode
)
11795 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11797 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11802 #undef HANDLE_NARROW_SHIFT_ARITH
11804 /* RTX costs entry point. */
11807 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11808 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11811 int code
= GET_CODE (x
);
11812 gcc_assert (current_tune
->insn_extra_cost
);
11814 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
11815 (enum rtx_code
) outer_code
,
11816 current_tune
->insn_extra_cost
,
11819 if (dump_file
&& arm_verbose_cost
)
11821 print_rtl_single (dump_file
, x
);
11822 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11823 *total
, result
? "final" : "partial");
11829 arm_insn_cost (rtx_insn
*insn
, bool speed
)
11833 /* Don't cost a simple reg-reg move at a full insn cost: such moves
11834 will likely disappear during register allocation. */
11835 if (!reload_completed
11836 && GET_CODE (PATTERN (insn
)) == SET
11837 && REG_P (SET_DEST (PATTERN (insn
)))
11838 && REG_P (SET_SRC (PATTERN (insn
))))
11840 cost
= pattern_cost (PATTERN (insn
), speed
);
11841 /* If the cost is zero, then it's likely a complex insn. We don't want the
11842 cost of these to be less than something we know about. */
11843 return cost
? cost
: COSTS_N_INSNS (2);
11846 /* All address computations that can be done are free, but rtx cost returns
11847 the same for practically all of them. So we weight the different types
11848 of address here in the order (most pref first):
11849 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11851 arm_arm_address_cost (rtx x
)
11853 enum rtx_code c
= GET_CODE (x
);
11855 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11857 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11862 if (CONST_INT_P (XEXP (x
, 1)))
11865 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11875 arm_thumb_address_cost (rtx x
)
11877 enum rtx_code c
= GET_CODE (x
);
11882 && REG_P (XEXP (x
, 0))
11883 && CONST_INT_P (XEXP (x
, 1)))
11890 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11891 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11893 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11896 /* Adjust cost hook for XScale. */
11898 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11901 /* Some true dependencies can have a higher cost depending
11902 on precisely how certain input operands are used. */
11904 && recog_memoized (insn
) >= 0
11905 && recog_memoized (dep
) >= 0)
11907 int shift_opnum
= get_attr_shift (insn
);
11908 enum attr_type attr_type
= get_attr_type (dep
);
11910 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11911 operand for INSN. If we have a shifted input operand and the
11912 instruction we depend on is another ALU instruction, then we may
11913 have to account for an additional stall. */
11914 if (shift_opnum
!= 0
11915 && (attr_type
== TYPE_ALU_SHIFT_IMM
11916 || attr_type
== TYPE_ALUS_SHIFT_IMM
11917 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11918 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11919 || attr_type
== TYPE_ALU_SHIFT_REG
11920 || attr_type
== TYPE_ALUS_SHIFT_REG
11921 || attr_type
== TYPE_LOGIC_SHIFT_REG
11922 || attr_type
== TYPE_LOGICS_SHIFT_REG
11923 || attr_type
== TYPE_MOV_SHIFT
11924 || attr_type
== TYPE_MVN_SHIFT
11925 || attr_type
== TYPE_MOV_SHIFT_REG
11926 || attr_type
== TYPE_MVN_SHIFT_REG
))
11928 rtx shifted_operand
;
11931 /* Get the shifted operand. */
11932 extract_insn (insn
);
11933 shifted_operand
= recog_data
.operand
[shift_opnum
];
11935 /* Iterate over all the operands in DEP. If we write an operand
11936 that overlaps with SHIFTED_OPERAND, then we have increase the
11937 cost of this dependency. */
11938 extract_insn (dep
);
11939 preprocess_constraints (dep
);
11940 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11942 /* We can ignore strict inputs. */
11943 if (recog_data
.operand_type
[opno
] == OP_IN
)
11946 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11958 /* Adjust cost hook for Cortex A9. */
11960 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11970 case REG_DEP_OUTPUT
:
11971 if (recog_memoized (insn
) >= 0
11972 && recog_memoized (dep
) >= 0)
11974 if (GET_CODE (PATTERN (insn
)) == SET
)
11977 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11979 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11981 enum attr_type attr_type_insn
= get_attr_type (insn
);
11982 enum attr_type attr_type_dep
= get_attr_type (dep
);
11984 /* By default all dependencies of the form
11987 have an extra latency of 1 cycle because
11988 of the input and output dependency in this
11989 case. However this gets modeled as an true
11990 dependency and hence all these checks. */
11991 if (REG_P (SET_DEST (PATTERN (insn
)))
11992 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11994 /* FMACS is a special case where the dependent
11995 instruction can be issued 3 cycles before
11996 the normal latency in case of an output
11998 if ((attr_type_insn
== TYPE_FMACS
11999 || attr_type_insn
== TYPE_FMACD
)
12000 && (attr_type_dep
== TYPE_FMACS
12001 || attr_type_dep
== TYPE_FMACD
))
12003 if (dep_type
== REG_DEP_OUTPUT
)
12004 *cost
= insn_default_latency (dep
) - 3;
12006 *cost
= insn_default_latency (dep
);
12011 if (dep_type
== REG_DEP_OUTPUT
)
12012 *cost
= insn_default_latency (dep
) + 1;
12014 *cost
= insn_default_latency (dep
);
12024 gcc_unreachable ();
12030 /* Adjust cost hook for FA726TE. */
12032 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12035 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12036 have penalty of 3. */
12037 if (dep_type
== REG_DEP_TRUE
12038 && recog_memoized (insn
) >= 0
12039 && recog_memoized (dep
) >= 0
12040 && get_attr_conds (dep
) == CONDS_SET
)
12042 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12043 if (get_attr_conds (insn
) == CONDS_USE
12044 && get_attr_type (insn
) != TYPE_BRANCH
)
12050 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12051 || get_attr_conds (insn
) == CONDS_USE
)
12061 /* Implement TARGET_REGISTER_MOVE_COST.
12063 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12064 it is typically more expensive than a single memory access. We set
12065 the cost to less than two memory accesses so that floating
12066 point to integer conversion does not go through memory. */
12069 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12070 reg_class_t from
, reg_class_t to
)
12074 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12075 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12077 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12078 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12080 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12087 if (from
== HI_REGS
|| to
== HI_REGS
)
12094 /* Implement TARGET_MEMORY_MOVE_COST. */
12097 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12098 bool in ATTRIBUTE_UNUSED
)
12104 if (GET_MODE_SIZE (mode
) < 4)
12107 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12111 /* Vectorizer cost model implementation. */
12113 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12115 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12117 int misalign ATTRIBUTE_UNUSED
)
12121 switch (type_of_cost
)
12124 return current_tune
->vec_costs
->scalar_stmt_cost
;
12127 return current_tune
->vec_costs
->scalar_load_cost
;
12130 return current_tune
->vec_costs
->scalar_store_cost
;
12133 return current_tune
->vec_costs
->vec_stmt_cost
;
12136 return current_tune
->vec_costs
->vec_align_load_cost
;
12139 return current_tune
->vec_costs
->vec_store_cost
;
12141 case vec_to_scalar
:
12142 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12144 case scalar_to_vec
:
12145 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12147 case unaligned_load
:
12148 case vector_gather_load
:
12149 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12151 case unaligned_store
:
12152 case vector_scatter_store
:
12153 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12155 case cond_branch_taken
:
12156 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12158 case cond_branch_not_taken
:
12159 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12162 case vec_promote_demote
:
12163 return current_tune
->vec_costs
->vec_stmt_cost
;
12165 case vec_construct
:
12166 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12167 return elements
/ 2 + 1;
12170 gcc_unreachable ();
12174 /* Implement targetm.vectorize.add_stmt_cost. */
12177 arm_add_stmt_cost (vec_info
*vinfo
, void *data
, int count
,
12178 enum vect_cost_for_stmt kind
,
12179 struct _stmt_vec_info
*stmt_info
, tree vectype
,
12180 int misalign
, enum vect_cost_model_location where
)
12182 unsigned *cost
= (unsigned *) data
;
12183 unsigned retval
= 0;
12185 if (flag_vect_cost_model
)
12187 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
12189 /* Statements in an inner loop relative to the loop being
12190 vectorized are weighted more heavily. The value here is
12191 arbitrary and could potentially be improved with analysis. */
12192 if (where
== vect_body
&& stmt_info
12193 && stmt_in_inner_loop_p (vinfo
, stmt_info
))
12194 count
*= 50; /* FIXME. */
12196 retval
= (unsigned) (count
* stmt_cost
);
12197 cost
[where
] += retval
;
12203 /* Return true if and only if this insn can dual-issue only as older. */
12205 cortexa7_older_only (rtx_insn
*insn
)
12207 if (recog_memoized (insn
) < 0)
12210 switch (get_attr_type (insn
))
12212 case TYPE_ALU_DSP_REG
:
12213 case TYPE_ALU_SREG
:
12214 case TYPE_ALUS_SREG
:
12215 case TYPE_LOGIC_REG
:
12216 case TYPE_LOGICS_REG
:
12218 case TYPE_ADCS_REG
:
12223 case TYPE_SHIFT_IMM
:
12224 case TYPE_SHIFT_REG
:
12225 case TYPE_LOAD_BYTE
:
12228 case TYPE_FFARITHS
:
12230 case TYPE_FFARITHD
:
12248 case TYPE_F_STORES
:
12255 /* Return true if and only if this insn can dual-issue as younger. */
12257 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12259 if (recog_memoized (insn
) < 0)
12262 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12266 switch (get_attr_type (insn
))
12269 case TYPE_ALUS_IMM
:
12270 case TYPE_LOGIC_IMM
:
12271 case TYPE_LOGICS_IMM
:
12276 case TYPE_MOV_SHIFT
:
12277 case TYPE_MOV_SHIFT_REG
:
12287 /* Look for an instruction that can dual issue only as an older
12288 instruction, and move it in front of any instructions that can
12289 dual-issue as younger, while preserving the relative order of all
12290 other instructions in the ready list. This is a hueuristic to help
12291 dual-issue in later cycles, by postponing issue of more flexible
12292 instructions. This heuristic may affect dual issue opportunities
12293 in the current cycle. */
12295 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12296 int *n_readyp
, int clock
)
12299 int first_older_only
= -1, first_younger
= -1;
12303 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12307 /* Traverse the ready list from the head (the instruction to issue
12308 first), and looking for the first instruction that can issue as
12309 younger and the first instruction that can dual-issue only as
12311 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12313 rtx_insn
*insn
= ready
[i
];
12314 if (cortexa7_older_only (insn
))
12316 first_older_only
= i
;
12318 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12321 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12325 /* Nothing to reorder because either no younger insn found or insn
12326 that can dual-issue only as older appears before any insn that
12327 can dual-issue as younger. */
12328 if (first_younger
== -1)
12331 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12335 /* Nothing to reorder because no older-only insn in the ready list. */
12336 if (first_older_only
== -1)
12339 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12343 /* Move first_older_only insn before first_younger. */
12345 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12346 INSN_UID(ready
[first_older_only
]),
12347 INSN_UID(ready
[first_younger
]));
12348 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12349 for (i
= first_older_only
; i
< first_younger
; i
++)
12351 ready
[i
] = ready
[i
+1];
12354 ready
[i
] = first_older_only_insn
;
12358 /* Implement TARGET_SCHED_REORDER. */
12360 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12365 case TARGET_CPU_cortexa7
:
12366 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12369 /* Do nothing for other cores. */
12373 return arm_issue_rate ();
12376 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12377 It corrects the value of COST based on the relationship between
12378 INSN and DEP through the dependence LINK. It returns the new
12379 value. There is a per-core adjust_cost hook to adjust scheduler costs
12380 and the per-core hook can choose to completely override the generic
12381 adjust_cost function. Only put bits of code into arm_adjust_cost that
12382 are common across all cores. */
12384 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12389 /* When generating Thumb-1 code, we want to place flag-setting operations
12390 close to a conditional branch which depends on them, so that we can
12391 omit the comparison. */
12394 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12395 && recog_memoized (dep
) >= 0
12396 && get_attr_conds (dep
) == CONDS_SET
)
12399 if (current_tune
->sched_adjust_cost
!= NULL
)
12401 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12405 /* XXX Is this strictly true? */
12406 if (dep_type
== REG_DEP_ANTI
12407 || dep_type
== REG_DEP_OUTPUT
)
12410 /* Call insns don't incur a stall, even if they follow a load. */
12415 if ((i_pat
= single_set (insn
)) != NULL
12416 && MEM_P (SET_SRC (i_pat
))
12417 && (d_pat
= single_set (dep
)) != NULL
12418 && MEM_P (SET_DEST (d_pat
)))
12420 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12421 /* This is a load after a store, there is no conflict if the load reads
12422 from a cached area. Assume that loads from the stack, and from the
12423 constant pool are cached, and that others will miss. This is a
12426 if ((GET_CODE (src_mem
) == SYMBOL_REF
12427 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12428 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12429 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12430 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12438 arm_max_conditional_execute (void)
12440 return max_insns_skipped
;
12444 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12447 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12449 return (optimize
> 0) ? 2 : 0;
12453 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12455 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12458 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12459 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12460 sequences of non-executed instructions in IT blocks probably take the same
12461 amount of time as executed instructions (and the IT instruction itself takes
12462 space in icache). This function was experimentally determined to give good
12463 results on a popular embedded benchmark. */
12466 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12468 return (TARGET_32BIT
&& speed_p
) ? 1
12469 : arm_default_branch_cost (speed_p
, predictable_p
);
12473 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12475 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12478 static bool fp_consts_inited
= false;
12480 static REAL_VALUE_TYPE value_fp0
;
12483 init_fp_table (void)
12487 r
= REAL_VALUE_ATOF ("0", DFmode
);
12489 fp_consts_inited
= true;
12492 /* Return TRUE if rtx X is a valid immediate FP constant. */
12494 arm_const_double_rtx (rtx x
)
12496 const REAL_VALUE_TYPE
*r
;
12498 if (!fp_consts_inited
)
12501 r
= CONST_DOUBLE_REAL_VALUE (x
);
12502 if (REAL_VALUE_MINUS_ZERO (*r
))
12505 if (real_equal (r
, &value_fp0
))
12511 /* VFPv3 has a fairly wide range of representable immediates, formed from
12512 "quarter-precision" floating-point values. These can be evaluated using this
12513 formula (with ^ for exponentiation):
12517 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12518 16 <= n <= 31 and 0 <= r <= 7.
12520 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12522 - A (most-significant) is the sign bit.
12523 - BCD are the exponent (encoded as r XOR 3).
12524 - EFGH are the mantissa (encoded as n - 16).
12527 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12528 fconst[sd] instruction, or -1 if X isn't suitable. */
12530 vfp3_const_double_index (rtx x
)
12532 REAL_VALUE_TYPE r
, m
;
12533 int sign
, exponent
;
12534 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12535 unsigned HOST_WIDE_INT mask
;
12536 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12539 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12542 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12544 /* We can't represent these things, so detect them first. */
12545 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12548 /* Extract sign, exponent and mantissa. */
12549 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12550 r
= real_value_abs (&r
);
12551 exponent
= REAL_EXP (&r
);
12552 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12553 highest (sign) bit, with a fixed binary point at bit point_pos.
12554 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12555 bits for the mantissa, this may fail (low bits would be lost). */
12556 real_ldexp (&m
, &r
, point_pos
- exponent
);
12557 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12558 mantissa
= w
.elt (0);
12559 mant_hi
= w
.elt (1);
12561 /* If there are bits set in the low part of the mantissa, we can't
12562 represent this value. */
12566 /* Now make it so that mantissa contains the most-significant bits, and move
12567 the point_pos to indicate that the least-significant bits have been
12569 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12570 mantissa
= mant_hi
;
12572 /* We can permit four significant bits of mantissa only, plus a high bit
12573 which is always 1. */
12574 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12575 if ((mantissa
& mask
) != 0)
12578 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12579 mantissa
>>= point_pos
- 5;
12581 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12582 floating-point immediate zero with Neon using an integer-zero load, but
12583 that case is handled elsewhere.) */
12587 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12589 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12590 normalized significands are in the range [1, 2). (Our mantissa is shifted
12591 left 4 places at this point relative to normalized IEEE754 values). GCC
12592 internally uses [0.5, 1) (see real.c), so the exponent returned from
12593 REAL_EXP must be altered. */
12594 exponent
= 5 - exponent
;
12596 if (exponent
< 0 || exponent
> 7)
12599 /* Sign, mantissa and exponent are now in the correct form to plug into the
12600 formula described in the comment above. */
12601 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12604 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12606 vfp3_const_double_rtx (rtx x
)
12611 return vfp3_const_double_index (x
) != -1;
12614 /* Recognize immediates which can be used in various Neon and MVE instructions.
12615 Legal immediates are described by the following table (for VMVN variants, the
12616 bitwise inverse of the constant shown is recognized. In either case, VMOV
12617 is output and the correct instruction to use for a given constant is chosen
12618 by the assembler). The constant shown is replicated across all elements of
12619 the destination vector.
12621 insn elems variant constant (binary)
12622 ---- ----- ------- -----------------
12623 vmov i32 0 00000000 00000000 00000000 abcdefgh
12624 vmov i32 1 00000000 00000000 abcdefgh 00000000
12625 vmov i32 2 00000000 abcdefgh 00000000 00000000
12626 vmov i32 3 abcdefgh 00000000 00000000 00000000
12627 vmov i16 4 00000000 abcdefgh
12628 vmov i16 5 abcdefgh 00000000
12629 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12630 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12631 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12632 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12633 vmvn i16 10 00000000 abcdefgh
12634 vmvn i16 11 abcdefgh 00000000
12635 vmov i32 12 00000000 00000000 abcdefgh 11111111
12636 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12637 vmov i32 14 00000000 abcdefgh 11111111 11111111
12638 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12639 vmov i8 16 abcdefgh
12640 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12641 eeeeeeee ffffffff gggggggg hhhhhhhh
12642 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12643 vmov f32 19 00000000 00000000 00000000 00000000
12645 For case 18, B = !b. Representable values are exactly those accepted by
12646 vfp3_const_double_index, but are output as floating-point numbers rather
12649 For case 19, we will change it to vmov.i32 when assembling.
12651 Variants 0-5 (inclusive) may also be used as immediates for the second
12652 operand of VORR/VBIC instructions.
12654 The INVERSE argument causes the bitwise inverse of the given operand to be
12655 recognized instead (used for recognizing legal immediates for the VAND/VORN
12656 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12657 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12658 output, rather than the real insns vbic/vorr).
12660 INVERSE makes no difference to the recognition of float vectors.
12662 The return value is the variant of immediate as shown in the above table, or
12663 -1 if the given value doesn't match any of the listed patterns.
12666 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12667 rtx
*modconst
, int *elementwidth
)
12669 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12671 for (i = 0; i < idx; i += (STRIDE)) \
12676 immtype = (CLASS); \
12677 elsize = (ELSIZE); \
12681 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12682 unsigned int innersize
;
12683 unsigned char bytes
[16] = {};
12684 int immtype
= -1, matches
;
12685 unsigned int invmask
= inverse
? 0xff : 0;
12686 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12689 n_elts
= CONST_VECTOR_NUNITS (op
);
12693 gcc_assert (mode
!= VOIDmode
);
12696 innersize
= GET_MODE_UNIT_SIZE (mode
);
12698 /* Only support 128-bit vectors for MVE. */
12699 if (TARGET_HAVE_MVE
&& (!vector
|| n_elts
* innersize
!= 16))
12702 /* Vectors of float constants. */
12703 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12705 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12707 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12710 /* FP16 vectors cannot be represented. */
12711 if (GET_MODE_INNER (mode
) == HFmode
)
12714 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12715 are distinct in this context. */
12716 if (!const_vec_duplicate_p (op
))
12720 *modconst
= CONST_VECTOR_ELT (op
, 0);
12725 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12731 /* The tricks done in the code below apply for little-endian vector layout.
12732 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12733 FIXME: Implement logic for big-endian vectors. */
12734 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
12737 /* Splat vector constant out into a byte vector. */
12738 for (i
= 0; i
< n_elts
; i
++)
12740 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12741 unsigned HOST_WIDE_INT elpart
;
12743 gcc_assert (CONST_INT_P (el
));
12744 elpart
= INTVAL (el
);
12746 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12748 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12749 elpart
>>= BITS_PER_UNIT
;
12753 /* Sanity check. */
12754 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12758 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12759 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12761 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12762 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12764 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12765 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12767 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12768 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12770 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12772 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12774 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12775 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12777 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12778 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12780 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12781 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12783 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12784 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12786 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12788 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12790 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12791 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12793 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12794 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12796 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12797 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12799 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12800 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12802 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12804 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12805 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12813 *elementwidth
= elsize
;
12817 unsigned HOST_WIDE_INT imm
= 0;
12819 /* Un-invert bytes of recognized vector, if necessary. */
12821 for (i
= 0; i
< idx
; i
++)
12822 bytes
[i
] ^= invmask
;
12826 /* FIXME: Broken on 32-bit H_W_I hosts. */
12827 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12829 for (i
= 0; i
< 8; i
++)
12830 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12831 << (i
* BITS_PER_UNIT
);
12833 *modconst
= GEN_INT (imm
);
12837 unsigned HOST_WIDE_INT imm
= 0;
12839 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12840 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12842 *modconst
= GEN_INT (imm
);
12850 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12851 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
12852 (or zero for float elements), and a modified constant (whatever should be
12853 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
12854 modified to "simd_immediate_valid_for_move" as this function will be used
12855 both by neon and mve. */
12857 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
12858 rtx
*modconst
, int *elementwidth
)
12862 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12868 *modconst
= tmpconst
;
12871 *elementwidth
= tmpwidth
;
12876 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12877 the immediate is valid, write a constant suitable for using as an operand
12878 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12879 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
12882 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12883 rtx
*modconst
, int *elementwidth
)
12887 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12889 if (retval
< 0 || retval
> 5)
12893 *modconst
= tmpconst
;
12896 *elementwidth
= tmpwidth
;
12901 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12902 the immediate is valid, write a constant suitable for using as an operand
12903 to VSHR/VSHL to *MODCONST and the corresponding element width to
12904 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12905 because they have different limitations. */
12908 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12909 rtx
*modconst
, int *elementwidth
,
12912 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12913 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12914 unsigned HOST_WIDE_INT last_elt
= 0;
12915 unsigned HOST_WIDE_INT maxshift
;
12917 /* Split vector constant out into a byte vector. */
12918 for (i
= 0; i
< n_elts
; i
++)
12920 rtx el
= CONST_VECTOR_ELT (op
, i
);
12921 unsigned HOST_WIDE_INT elpart
;
12923 if (CONST_INT_P (el
))
12924 elpart
= INTVAL (el
);
12925 else if (CONST_DOUBLE_P (el
))
12928 gcc_unreachable ();
12930 if (i
!= 0 && elpart
!= last_elt
)
12936 /* Shift less than element size. */
12937 maxshift
= innersize
* 8;
12941 /* Left shift immediate value can be from 0 to <size>-1. */
12942 if (last_elt
>= maxshift
)
12947 /* Right shift immediate value can be from 1 to <size>. */
12948 if (last_elt
== 0 || last_elt
> maxshift
)
12953 *elementwidth
= innersize
* 8;
12956 *modconst
= CONST_VECTOR_ELT (op
, 0);
12961 /* Return a string suitable for output of Neon immediate logic operation
12965 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12966 int inverse
, int quad
)
12968 int width
, is_valid
;
12969 static char templ
[40];
12971 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12973 gcc_assert (is_valid
!= 0);
12976 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12978 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12983 /* Return a string suitable for output of Neon immediate shift operation
12984 (VSHR or VSHL) MNEM. */
12987 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12988 machine_mode mode
, int quad
,
12991 int width
, is_valid
;
12992 static char templ
[40];
12994 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12995 gcc_assert (is_valid
!= 0);
12998 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13000 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13005 /* Output a sequence of pairwise operations to implement a reduction.
13006 NOTE: We do "too much work" here, because pairwise operations work on two
13007 registers-worth of operands in one go. Unfortunately we can't exploit those
13008 extra calculations to do the full operation in fewer steps, I don't think.
13009 Although all vector elements of the result but the first are ignored, we
13010 actually calculate the same result in each of the elements. An alternative
13011 such as initially loading a vector with zero to use as each of the second
13012 operands would use up an additional register and take an extra instruction,
13013 for no particular gain. */
13016 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13017 rtx (*reduc
) (rtx
, rtx
, rtx
))
13019 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13022 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13024 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13025 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13030 /* If VALS is a vector constant that can be loaded into a register
13031 using VDUP, generate instructions to do so and return an RTX to
13032 assign to the register. Otherwise return NULL_RTX. */
13035 neon_vdup_constant (rtx vals
)
13037 machine_mode mode
= GET_MODE (vals
);
13038 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13041 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13044 if (!const_vec_duplicate_p (vals
, &x
))
13045 /* The elements are not all the same. We could handle repeating
13046 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13047 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13051 /* We can load this constant by using VDUP and a constant in a
13052 single ARM register. This will be cheaper than a vector
13055 x
= copy_to_mode_reg (inner_mode
, x
);
13056 return gen_vec_duplicate (mode
, x
);
13059 /* Generate code to load VALS, which is a PARALLEL containing only
13060 constants (for vec_init) or CONST_VECTOR, efficiently into a
13061 register. Returns an RTX to copy into the register, or NULL_RTX
13062 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
13065 neon_make_constant (rtx vals
)
13067 machine_mode mode
= GET_MODE (vals
);
13069 rtx const_vec
= NULL_RTX
;
13070 int n_elts
= GET_MODE_NUNITS (mode
);
13074 if (GET_CODE (vals
) == CONST_VECTOR
)
13076 else if (GET_CODE (vals
) == PARALLEL
)
13078 /* A CONST_VECTOR must contain only CONST_INTs and
13079 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13080 Only store valid constants in a CONST_VECTOR. */
13081 for (i
= 0; i
< n_elts
; ++i
)
13083 rtx x
= XVECEXP (vals
, 0, i
);
13084 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13087 if (n_const
== n_elts
)
13088 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13091 gcc_unreachable ();
13093 if (const_vec
!= NULL
13094 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13095 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13097 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
13098 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13099 pipeline cycle; creating the constant takes one or two ARM
13100 pipeline cycles. */
13102 else if (const_vec
!= NULL_RTX
)
13103 /* Load from constant pool. On Cortex-A8 this takes two cycles
13104 (for either double or quad vectors). We cannot take advantage
13105 of single-cycle VLD1 because we need a PC-relative addressing
13109 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13110 We cannot construct an initializer. */
13114 /* Initialize vector TARGET to VALS. */
13117 neon_expand_vector_init (rtx target
, rtx vals
)
13119 machine_mode mode
= GET_MODE (target
);
13120 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13121 int n_elts
= GET_MODE_NUNITS (mode
);
13122 int n_var
= 0, one_var
= -1;
13123 bool all_same
= true;
13127 for (i
= 0; i
< n_elts
; ++i
)
13129 x
= XVECEXP (vals
, 0, i
);
13130 if (!CONSTANT_P (x
))
13131 ++n_var
, one_var
= i
;
13133 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13139 rtx constant
= neon_make_constant (vals
);
13140 if (constant
!= NULL_RTX
)
13142 emit_move_insn (target
, constant
);
13147 /* Splat a single non-constant element if we can. */
13148 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13150 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13151 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13155 /* One field is non-constant. Load constant then overwrite varying
13156 field. This is more efficient than using the stack. */
13159 rtx copy
= copy_rtx (vals
);
13160 rtx merge_mask
= GEN_INT (1 << one_var
);
13162 /* Load constant part of vector, substitute neighboring value for
13163 varying element. */
13164 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13165 neon_expand_vector_init (target
, copy
);
13167 /* Insert variable. */
13168 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13169 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13173 /* Construct the vector in memory one field at a time
13174 and load the whole vector. */
13175 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13176 for (i
= 0; i
< n_elts
; i
++)
13177 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13178 i
* GET_MODE_SIZE (inner_mode
)),
13179 XVECEXP (vals
, 0, i
));
13180 emit_move_insn (target
, mem
);
13183 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13184 ERR if it doesn't. EXP indicates the source location, which includes the
13185 inlining history for intrinsics. */
13188 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13189 const_tree exp
, const char *desc
)
13191 HOST_WIDE_INT lane
;
13193 gcc_assert (CONST_INT_P (operand
));
13195 lane
= INTVAL (operand
);
13197 if (lane
< low
|| lane
>= high
)
13200 error ("%K%s %wd out of range %wd - %wd",
13201 exp
, desc
, lane
, low
, high
- 1);
13203 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13207 /* Bounds-check lanes. */
13210 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13213 bounds_check (operand
, low
, high
, exp
, "lane");
13216 /* Bounds-check constants. */
13219 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13221 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13225 neon_element_bits (machine_mode mode
)
13227 return GET_MODE_UNIT_BITSIZE (mode
);
13231 /* Predicates for `match_operand' and `match_operator'. */
13233 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13234 WB level is 2 if full writeback address modes are allowed, 1
13235 if limited writeback address modes (POST_INC and PRE_DEC) are
13236 allowed and 0 if no writeback at all is supported. */
13239 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13241 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13244 /* Reject eliminable registers. */
13245 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13246 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13247 || reg_mentioned_p (arg_pointer_rtx
, op
)
13248 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13249 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13250 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13251 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13254 /* Constants are converted into offsets from labels. */
13258 ind
= XEXP (op
, 0);
13260 if (reload_completed
13261 && (GET_CODE (ind
) == LABEL_REF
13262 || (GET_CODE (ind
) == CONST
13263 && GET_CODE (XEXP (ind
, 0)) == PLUS
13264 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13265 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13268 /* Match: (mem (reg)). */
13270 return arm_address_register_rtx_p (ind
, 0);
13272 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13273 acceptable in any case (subject to verification by
13274 arm_address_register_rtx_p). We need full writeback to accept
13275 PRE_INC and POST_DEC, and at least restricted writeback for
13276 PRE_INC and POST_DEC. */
13278 && (GET_CODE (ind
) == POST_INC
13279 || GET_CODE (ind
) == PRE_DEC
13281 && (GET_CODE (ind
) == PRE_INC
13282 || GET_CODE (ind
) == POST_DEC
))))
13283 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13286 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13287 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13288 && GET_CODE (XEXP (ind
, 1)) == PLUS
13289 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13290 ind
= XEXP (ind
, 1);
13296 The encoded immediate for 16-bit modes is multiplied by 2,
13297 while the encoded immediate for 32-bit and 64-bit modes is
13298 multiplied by 4. */
13299 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13300 if (GET_CODE (ind
) == PLUS
13301 && REG_P (XEXP (ind
, 0))
13302 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13303 && CONST_INT_P (XEXP (ind
, 1))
13304 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13305 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13311 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13312 WB is true if full writeback address modes are allowed and is false
13313 if limited writeback address modes (POST_INC and PRE_DEC) are
13316 int arm_coproc_mem_operand (rtx op
, bool wb
)
13318 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13321 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13322 context in which no writeback address modes are allowed. */
13325 arm_coproc_mem_operand_no_writeback (rtx op
)
13327 return arm_coproc_mem_operand_wb (op
, 0);
13330 /* This function returns TRUE on matching mode and op.
13331 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13332 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13334 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13336 enum rtx_code code
;
13339 /* Match: (mem (reg)). */
13342 int reg_no
= REGNO (op
);
13343 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13344 ? reg_no
<= LAST_LO_REGNUM
13345 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13346 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13348 code
= GET_CODE (op
);
13350 if (code
== POST_INC
|| code
== PRE_DEC
13351 || code
== PRE_INC
|| code
== POST_DEC
)
13353 reg_no
= REGNO (XEXP (op
, 0));
13354 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13355 ? reg_no
<= LAST_LO_REGNUM
13356 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13357 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13359 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13360 && GET_CODE (XEXP (op
, 1)) == PLUS
&& REG_P (XEXP (XEXP (op
, 1), 1)))
13362 reg_no
= REGNO (XEXP (op
, 0));
13363 val
= INTVAL (XEXP ( XEXP (op
, 1), 1));
13367 if (abs (val
) <= 127)
13368 return ((reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
)
13369 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13373 if (abs (val
) <= 255)
13374 return ((reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
)
13375 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13379 if (abs (val
) <= 127)
13380 return (reg_no
<= LAST_LO_REGNUM
13381 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13385 if (val
% 2 == 0 && abs (val
) <= 254)
13386 return (reg_no
<= LAST_LO_REGNUM
13387 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13391 if (val
% 4 == 0 && abs (val
) <= 508)
13392 return ((reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
)
13393 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13398 if (val
% 4 == 0 && val
>= 0 && val
<= 1020)
13399 return ((reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
)
13400 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13409 /* Return TRUE if OP is a memory operand which we can load or store a vector
13410 to/from. TYPE is one of the following values:
13411 0 - Vector load/stor (vldr)
13412 1 - Core registers (ldm)
13413 2 - Element/structure loads (vld1)
13416 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13420 /* Reject eliminable registers. */
13421 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13422 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13423 || reg_mentioned_p (arg_pointer_rtx
, op
)
13424 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13425 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13426 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13427 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13430 /* Constants are converted into offsets from labels. */
13434 ind
= XEXP (op
, 0);
13436 if (reload_completed
13437 && (GET_CODE (ind
) == LABEL_REF
13438 || (GET_CODE (ind
) == CONST
13439 && GET_CODE (XEXP (ind
, 0)) == PLUS
13440 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13441 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13444 /* Match: (mem (reg)). */
13446 return arm_address_register_rtx_p (ind
, 0);
13448 /* Allow post-increment with Neon registers. */
13449 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13450 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13451 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13453 /* Allow post-increment by register for VLDn */
13454 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13455 && GET_CODE (XEXP (ind
, 1)) == PLUS
13456 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13463 && GET_CODE (ind
) == PLUS
13464 && REG_P (XEXP (ind
, 0))
13465 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13466 && CONST_INT_P (XEXP (ind
, 1))
13467 && INTVAL (XEXP (ind
, 1)) > -1024
13468 /* For quad modes, we restrict the constant offset to be slightly less
13469 than what the instruction format permits. We have no such constraint
13470 on double mode offsets. (This must match arm_legitimate_index_p.) */
13471 && (INTVAL (XEXP (ind
, 1))
13472 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13473 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13479 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13482 neon_struct_mem_operand (rtx op
)
13486 /* Reject eliminable registers. */
13487 if (! (reload_in_progress
|| reload_completed
)
13488 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13489 || reg_mentioned_p (arg_pointer_rtx
, op
)
13490 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13491 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13492 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13493 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13496 /* Constants are converted into offsets from labels. */
13500 ind
= XEXP (op
, 0);
13502 if (reload_completed
13503 && (GET_CODE (ind
) == LABEL_REF
13504 || (GET_CODE (ind
) == CONST
13505 && GET_CODE (XEXP (ind
, 0)) == PLUS
13506 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13507 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13510 /* Match: (mem (reg)). */
13512 return arm_address_register_rtx_p (ind
, 0);
13514 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13515 if (GET_CODE (ind
) == POST_INC
13516 || GET_CODE (ind
) == PRE_DEC
)
13517 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13522 /* Prepares the operands for the VCMLA by lane instruction such that the right
13523 register number is selected. This instruction is special in that it always
13524 requires a D register, however there is a choice to be made between Dn[0],
13525 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13527 The VCMLA by lane function always selects two values. For instance given D0
13528 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13529 used by the instruction. However given V4SF then index 0 and 1 are valid as
13530 D0[0] or D1[0] are both valid.
13532 This function centralizes that information based on OPERANDS, OPERANDS[3]
13533 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13534 updated to contain the right index. */
13537 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13539 int lane
= INTVAL (operands
[4]);
13540 machine_mode constmode
= SImode
;
13541 machine_mode mode
= GET_MODE (operands
[3]);
13542 int regno
= REGNO (operands
[3]);
13543 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13544 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13546 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13548 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13552 operands
[3] = gen_int_mode (regno
, constmode
);
13553 operands
[4] = gen_int_mode (lane
, constmode
);
13559 /* Return true if X is a register that will be eliminated later on. */
13561 arm_eliminable_register (rtx x
)
13563 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13564 || REGNO (x
) == ARG_POINTER_REGNUM
13565 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13566 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13569 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13570 coprocessor registers. Otherwise return NO_REGS. */
13573 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13575 if (mode
== HFmode
)
13577 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13578 return GENERAL_REGS
;
13579 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13581 return GENERAL_REGS
;
13584 /* The neon move patterns handle all legitimate vector and struct
13587 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13588 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13589 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13590 || VALID_NEON_STRUCT_MODE (mode
)))
13593 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13596 return GENERAL_REGS
;
13599 /* Values which must be returned in the most-significant end of the return
13603 arm_return_in_msb (const_tree valtype
)
13605 return (TARGET_AAPCS_BASED
13606 && BYTES_BIG_ENDIAN
13607 && (AGGREGATE_TYPE_P (valtype
)
13608 || TREE_CODE (valtype
) == COMPLEX_TYPE
13609 || FIXED_POINT_TYPE_P (valtype
)));
13612 /* Return TRUE if X references a SYMBOL_REF. */
13614 symbol_mentioned_p (rtx x
)
13619 if (GET_CODE (x
) == SYMBOL_REF
)
13622 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13623 are constant offsets, not symbols. */
13624 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13627 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13629 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13635 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13636 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13639 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13646 /* Return TRUE if X references a LABEL_REF. */
13648 label_mentioned_p (rtx x
)
13653 if (GET_CODE (x
) == LABEL_REF
)
13656 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13657 instruction, but they are constant offsets, not symbols. */
13658 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13661 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13662 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13668 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13669 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13672 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13680 tls_mentioned_p (rtx x
)
13682 switch (GET_CODE (x
))
13685 return tls_mentioned_p (XEXP (x
, 0));
13688 if (XINT (x
, 1) == UNSPEC_TLS
)
13691 /* Fall through. */
13697 /* Must not copy any rtx that uses a pc-relative address.
13698 Also, disallow copying of load-exclusive instructions that
13699 may appear after splitting of compare-and-swap-style operations
13700 so as to prevent those loops from being transformed away from their
13701 canonical forms (see PR 69904). */
13704 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13706 /* The tls call insn cannot be copied, as it is paired with a data
13708 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13711 subrtx_iterator::array_type array
;
13712 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13714 const_rtx x
= *iter
;
13715 if (GET_CODE (x
) == UNSPEC
13716 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13717 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13721 rtx set
= single_set (insn
);
13724 rtx src
= SET_SRC (set
);
13725 if (GET_CODE (src
) == ZERO_EXTEND
)
13726 src
= XEXP (src
, 0);
13728 /* Catch the load-exclusive and load-acquire operations. */
13729 if (GET_CODE (src
) == UNSPEC_VOLATILE
13730 && (XINT (src
, 1) == VUNSPEC_LL
13731 || XINT (src
, 1) == VUNSPEC_LAX
))
13738 minmax_code (rtx x
)
13740 enum rtx_code code
= GET_CODE (x
);
13753 gcc_unreachable ();
13757 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13760 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13761 int *mask
, bool *signed_sat
)
13763 /* The high bound must be a power of two minus one. */
13764 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13768 /* The low bound is either zero (for usat) or one less than the
13769 negation of the high bound (for ssat). */
13770 if (INTVAL (lo_bound
) == 0)
13775 *signed_sat
= false;
13780 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13785 *signed_sat
= true;
13793 /* Return 1 if memory locations are adjacent. */
13795 adjacent_mem_locations (rtx a
, rtx b
)
13797 /* We don't guarantee to preserve the order of these memory refs. */
13798 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13801 if ((REG_P (XEXP (a
, 0))
13802 || (GET_CODE (XEXP (a
, 0)) == PLUS
13803 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13804 && (REG_P (XEXP (b
, 0))
13805 || (GET_CODE (XEXP (b
, 0)) == PLUS
13806 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13808 HOST_WIDE_INT val0
= 0, val1
= 0;
13812 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13814 reg0
= XEXP (XEXP (a
, 0), 0);
13815 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13818 reg0
= XEXP (a
, 0);
13820 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13822 reg1
= XEXP (XEXP (b
, 0), 0);
13823 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13826 reg1
= XEXP (b
, 0);
13828 /* Don't accept any offset that will require multiple
13829 instructions to handle, since this would cause the
13830 arith_adjacentmem pattern to output an overlong sequence. */
13831 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13834 /* Don't allow an eliminable register: register elimination can make
13835 the offset too large. */
13836 if (arm_eliminable_register (reg0
))
13839 val_diff
= val1
- val0
;
13843 /* If the target has load delay slots, then there's no benefit
13844 to using an ldm instruction unless the offset is zero and
13845 we are optimizing for size. */
13846 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13847 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13848 && (val_diff
== 4 || val_diff
== -4));
13851 return ((REGNO (reg0
) == REGNO (reg1
))
13852 && (val_diff
== 4 || val_diff
== -4));
13858 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13859 for load operations, false for store operations. CONSECUTIVE is true
13860 if the register numbers in the operation must be consecutive in the register
13861 bank. RETURN_PC is true if value is to be loaded in PC.
13862 The pattern we are trying to match for load is:
13863 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13864 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13867 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13870 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13871 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13872 3. If consecutive is TRUE, then for kth register being loaded,
13873 REGNO (R_dk) = REGNO (R_d0) + k.
13874 The pattern for store is similar. */
13876 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13877 bool consecutive
, bool return_pc
)
13879 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13880 rtx reg
, mem
, addr
;
13882 unsigned first_regno
;
13883 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13885 bool addr_reg_in_reglist
= false;
13886 bool update
= false;
13891 /* If not in SImode, then registers must be consecutive
13892 (e.g., VLDM instructions for DFmode). */
13893 gcc_assert ((mode
== SImode
) || consecutive
);
13894 /* Setting return_pc for stores is illegal. */
13895 gcc_assert (!return_pc
|| load
);
13897 /* Set up the increments and the regs per val based on the mode. */
13898 reg_increment
= GET_MODE_SIZE (mode
);
13899 regs_per_val
= reg_increment
/ 4;
13900 offset_adj
= return_pc
? 1 : 0;
13903 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13904 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13907 /* Check if this is a write-back. */
13908 elt
= XVECEXP (op
, 0, offset_adj
);
13909 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13915 /* The offset adjustment must be the number of registers being
13916 popped times the size of a single register. */
13917 if (!REG_P (SET_DEST (elt
))
13918 || !REG_P (XEXP (SET_SRC (elt
), 0))
13919 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13920 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13921 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13922 ((count
- 1 - offset_adj
) * reg_increment
))
13926 i
= i
+ offset_adj
;
13927 base
= base
+ offset_adj
;
13928 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13929 success depends on the type: VLDM can do just one reg,
13930 LDM must do at least two. */
13931 if ((count
<= i
) && (mode
== SImode
))
13934 elt
= XVECEXP (op
, 0, i
- 1);
13935 if (GET_CODE (elt
) != SET
)
13940 reg
= SET_DEST (elt
);
13941 mem
= SET_SRC (elt
);
13945 reg
= SET_SRC (elt
);
13946 mem
= SET_DEST (elt
);
13949 if (!REG_P (reg
) || !MEM_P (mem
))
13952 regno
= REGNO (reg
);
13953 first_regno
= regno
;
13954 addr
= XEXP (mem
, 0);
13955 if (GET_CODE (addr
) == PLUS
)
13957 if (!CONST_INT_P (XEXP (addr
, 1)))
13960 offset
= INTVAL (XEXP (addr
, 1));
13961 addr
= XEXP (addr
, 0);
13967 /* Don't allow SP to be loaded unless it is also the base register. It
13968 guarantees that SP is reset correctly when an LDM instruction
13969 is interrupted. Otherwise, we might end up with a corrupt stack. */
13970 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13973 if (regno
== REGNO (addr
))
13974 addr_reg_in_reglist
= true;
13976 for (; i
< count
; i
++)
13978 elt
= XVECEXP (op
, 0, i
);
13979 if (GET_CODE (elt
) != SET
)
13984 reg
= SET_DEST (elt
);
13985 mem
= SET_SRC (elt
);
13989 reg
= SET_SRC (elt
);
13990 mem
= SET_DEST (elt
);
13994 || GET_MODE (reg
) != mode
13995 || REGNO (reg
) <= regno
13998 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13999 /* Don't allow SP to be loaded unless it is also the base register. It
14000 guarantees that SP is reset correctly when an LDM instruction
14001 is interrupted. Otherwise, we might end up with a corrupt stack. */
14002 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14004 || GET_MODE (mem
) != mode
14005 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14006 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14007 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14008 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14009 offset
+ (i
- base
) * reg_increment
))
14010 && (!REG_P (XEXP (mem
, 0))
14011 || offset
+ (i
- base
) * reg_increment
!= 0)))
14014 regno
= REGNO (reg
);
14015 if (regno
== REGNO (addr
))
14016 addr_reg_in_reglist
= true;
14021 if (update
&& addr_reg_in_reglist
)
14024 /* For Thumb-1, address register is always modified - either by write-back
14025 or by explicit load. If the pattern does not describe an update,
14026 then the address register must be in the list of loaded registers. */
14028 return update
|| addr_reg_in_reglist
;
14034 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14035 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14038 [(set (reg:SI <N>) (const_int 0))
14039 (set (reg:SI <M>) (const_int 0))
14041 (unspec_volatile [(const_int 0)]
14043 (clobber (reg:CC CC_REGNUM))
14046 Any number (including 0) of set expressions is valid, the volatile unspec is
14047 optional. All registers but SP and PC are allowed and registers must be in
14048 strict increasing order.
14050 To be a valid VSCCLRM pattern, OP must have the following form:
14052 [(unspec_volatile [(const_int 0)]
14053 VUNSPEC_VSCCLRM_VPR)
14054 (set (reg:SF <N>) (const_int 0))
14055 (set (reg:SF <M>) (const_int 0))
14059 As with CLRM, any number (including 0) of set expressions is valid, however
14060 the volatile unspec is mandatory here. Any VFP single-precision register is
14061 accepted but all registers must be consecutive and in increasing order. */
14064 clear_operation_p (rtx op
, bool vfp
)
14067 unsigned last_regno
= INVALID_REGNUM
;
14068 rtx elt
, reg
, zero
;
14069 int count
= XVECLEN (op
, 0);
14070 int first_set
= vfp
? 1 : 0;
14071 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14073 for (int i
= first_set
; i
< count
; i
++)
14075 elt
= XVECEXP (op
, 0, i
);
14077 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14079 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14080 || XVECLEN (elt
, 0) != 1
14081 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14088 if (GET_CODE (elt
) == CLOBBER
)
14091 if (GET_CODE (elt
) != SET
)
14094 reg
= SET_DEST (elt
);
14095 zero
= SET_SRC (elt
);
14098 || GET_MODE (reg
) != expected_mode
14099 || zero
!= CONST0_RTX (SImode
))
14102 regno
= REGNO (reg
);
14106 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14111 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14113 if (i
!= first_set
&& regno
<= last_regno
)
14117 last_regno
= regno
;
14123 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14124 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14125 instruction. ADD_OFFSET is nonzero if the base address register needs
14126 to be modified with an add instruction before we can use it. */
14129 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14130 int nops
, HOST_WIDE_INT add_offset
)
14132 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14133 if the offset isn't small enough. The reason 2 ldrs are faster
14134 is because these ARMs are able to do more than one cache access
14135 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14136 whilst the ARM8 has a double bandwidth cache. This means that
14137 these cores can do both an instruction fetch and a data fetch in
14138 a single cycle, so the trick of calculating the address into a
14139 scratch register (one of the result regs) and then doing a load
14140 multiple actually becomes slower (and no smaller in code size).
14141 That is the transformation
14143 ldr rd1, [rbase + offset]
14144 ldr rd2, [rbase + offset + 4]
14148 add rd1, rbase, offset
14149 ldmia rd1, {rd1, rd2}
14151 produces worse code -- '3 cycles + any stalls on rd2' instead of
14152 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14153 access per cycle, the first sequence could never complete in less
14154 than 6 cycles, whereas the ldm sequence would only take 5 and
14155 would make better use of sequential accesses if not hitting the
14158 We cheat here and test 'arm_ld_sched' which we currently know to
14159 only be true for the ARM8, ARM9 and StrongARM. If this ever
14160 changes, then the test below needs to be reworked. */
14161 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14164 /* XScale has load-store double instructions, but they have stricter
14165 alignment requirements than load-store multiple, so we cannot
14168 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14169 the pipeline until completion.
14177 An ldr instruction takes 1-3 cycles, but does not block the
14186 Best case ldr will always win. However, the more ldr instructions
14187 we issue, the less likely we are to be able to schedule them well.
14188 Using ldr instructions also increases code size.
14190 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14191 for counts of 3 or 4 regs. */
14192 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14197 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14198 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14199 an array ORDER which describes the sequence to use when accessing the
14200 offsets that produces an ascending order. In this sequence, each
14201 offset must be larger by exactly 4 than the previous one. ORDER[0]
14202 must have been filled in with the lowest offset by the caller.
14203 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14204 we use to verify that ORDER produces an ascending order of registers.
14205 Return true if it was possible to construct such an order, false if
14209 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14210 int *unsorted_regs
)
14213 for (i
= 1; i
< nops
; i
++)
14217 order
[i
] = order
[i
- 1];
14218 for (j
= 0; j
< nops
; j
++)
14219 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14221 /* We must find exactly one offset that is higher than the
14222 previous one by 4. */
14223 if (order
[i
] != order
[i
- 1])
14227 if (order
[i
] == order
[i
- 1])
14229 /* The register numbers must be ascending. */
14230 if (unsorted_regs
!= NULL
14231 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14237 /* Used to determine in a peephole whether a sequence of load
14238 instructions can be changed into a load-multiple instruction.
14239 NOPS is the number of separate load instructions we are examining. The
14240 first NOPS entries in OPERANDS are the destination registers, the
14241 next NOPS entries are memory operands. If this function is
14242 successful, *BASE is set to the common base register of the memory
14243 accesses; *LOAD_OFFSET is set to the first memory location's offset
14244 from that base register.
14245 REGS is an array filled in with the destination register numbers.
14246 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14247 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14248 the sequence of registers in REGS matches the loads from ascending memory
14249 locations, and the function verifies that the register numbers are
14250 themselves ascending. If CHECK_REGS is false, the register numbers
14251 are stored in the order they are found in the operands. */
14253 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14254 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14256 int unsorted_regs
[MAX_LDM_STM_OPS
];
14257 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14258 int order
[MAX_LDM_STM_OPS
];
14262 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14263 easily extended if required. */
14264 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14266 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14268 /* Loop over the operands and check that the memory references are
14269 suitable (i.e. immediate offsets from the same base register). At
14270 the same time, extract the target register, and the memory
14272 for (i
= 0; i
< nops
; i
++)
14277 /* Convert a subreg of a mem into the mem itself. */
14278 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14279 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14281 gcc_assert (MEM_P (operands
[nops
+ i
]));
14283 /* Don't reorder volatile memory references; it doesn't seem worth
14284 looking for the case where the order is ok anyway. */
14285 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14288 offset
= const0_rtx
;
14290 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14291 || (GET_CODE (reg
) == SUBREG
14292 && REG_P (reg
= SUBREG_REG (reg
))))
14293 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14294 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14295 || (GET_CODE (reg
) == SUBREG
14296 && REG_P (reg
= SUBREG_REG (reg
))))
14297 && (CONST_INT_P (offset
14298 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14302 base_reg
= REGNO (reg
);
14303 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14306 else if (base_reg
!= (int) REGNO (reg
))
14307 /* Not addressed from the same base register. */
14310 unsorted_regs
[i
] = (REG_P (operands
[i
])
14311 ? REGNO (operands
[i
])
14312 : REGNO (SUBREG_REG (operands
[i
])));
14314 /* If it isn't an integer register, or if it overwrites the
14315 base register but isn't the last insn in the list, then
14316 we can't do this. */
14317 if (unsorted_regs
[i
] < 0
14318 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14319 || unsorted_regs
[i
] > 14
14320 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14323 /* Don't allow SP to be loaded unless it is also the base
14324 register. It guarantees that SP is reset correctly when
14325 an LDM instruction is interrupted. Otherwise, we might
14326 end up with a corrupt stack. */
14327 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14330 unsorted_offsets
[i
] = INTVAL (offset
);
14331 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14335 /* Not a suitable memory address. */
14339 /* All the useful information has now been extracted from the
14340 operands into unsorted_regs and unsorted_offsets; additionally,
14341 order[0] has been set to the lowest offset in the list. Sort
14342 the offsets into order, verifying that they are adjacent, and
14343 check that the register numbers are ascending. */
14344 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14345 check_regs
? unsorted_regs
: NULL
))
14349 memcpy (saved_order
, order
, sizeof order
);
14355 for (i
= 0; i
< nops
; i
++)
14356 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14358 *load_offset
= unsorted_offsets
[order
[0]];
14361 if (unsorted_offsets
[order
[0]] == 0)
14362 ldm_case
= 1; /* ldmia */
14363 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14364 ldm_case
= 2; /* ldmib */
14365 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14366 ldm_case
= 3; /* ldmda */
14367 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14368 ldm_case
= 4; /* ldmdb */
14369 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14370 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14375 if (!multiple_operation_profitable_p (false, nops
,
14377 ? unsorted_offsets
[order
[0]] : 0))
14383 /* Used to determine in a peephole whether a sequence of store instructions can
14384 be changed into a store-multiple instruction.
14385 NOPS is the number of separate store instructions we are examining.
14386 NOPS_TOTAL is the total number of instructions recognized by the peephole
14388 The first NOPS entries in OPERANDS are the source registers, the next
14389 NOPS entries are memory operands. If this function is successful, *BASE is
14390 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14391 to the first memory location's offset from that base register. REGS is an
14392 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14393 likewise filled with the corresponding rtx's.
14394 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14395 numbers to an ascending order of stores.
14396 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14397 from ascending memory locations, and the function verifies that the register
14398 numbers are themselves ascending. If CHECK_REGS is false, the register
14399 numbers are stored in the order they are found in the operands. */
14401 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14402 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14403 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14405 int unsorted_regs
[MAX_LDM_STM_OPS
];
14406 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14407 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14408 int order
[MAX_LDM_STM_OPS
];
14410 rtx base_reg_rtx
= NULL
;
14413 /* Write back of base register is currently only supported for Thumb 1. */
14414 int base_writeback
= TARGET_THUMB1
;
14416 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14417 easily extended if required. */
14418 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14420 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14422 /* Loop over the operands and check that the memory references are
14423 suitable (i.e. immediate offsets from the same base register). At
14424 the same time, extract the target register, and the memory
14426 for (i
= 0; i
< nops
; i
++)
14431 /* Convert a subreg of a mem into the mem itself. */
14432 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14433 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14435 gcc_assert (MEM_P (operands
[nops
+ i
]));
14437 /* Don't reorder volatile memory references; it doesn't seem worth
14438 looking for the case where the order is ok anyway. */
14439 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14442 offset
= const0_rtx
;
14444 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14445 || (GET_CODE (reg
) == SUBREG
14446 && REG_P (reg
= SUBREG_REG (reg
))))
14447 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14448 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14449 || (GET_CODE (reg
) == SUBREG
14450 && REG_P (reg
= SUBREG_REG (reg
))))
14451 && (CONST_INT_P (offset
14452 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14454 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14455 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14456 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14460 base_reg
= REGNO (reg
);
14461 base_reg_rtx
= reg
;
14462 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14465 else if (base_reg
!= (int) REGNO (reg
))
14466 /* Not addressed from the same base register. */
14469 /* If it isn't an integer register, then we can't do this. */
14470 if (unsorted_regs
[i
] < 0
14471 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14472 /* The effects are unpredictable if the base register is
14473 both updated and stored. */
14474 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14475 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14476 || unsorted_regs
[i
] > 14)
14479 unsorted_offsets
[i
] = INTVAL (offset
);
14480 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14484 /* Not a suitable memory address. */
14488 /* All the useful information has now been extracted from the
14489 operands into unsorted_regs and unsorted_offsets; additionally,
14490 order[0] has been set to the lowest offset in the list. Sort
14491 the offsets into order, verifying that they are adjacent, and
14492 check that the register numbers are ascending. */
14493 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14494 check_regs
? unsorted_regs
: NULL
))
14498 memcpy (saved_order
, order
, sizeof order
);
14504 for (i
= 0; i
< nops
; i
++)
14506 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14508 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14511 *load_offset
= unsorted_offsets
[order
[0]];
14515 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14518 if (unsorted_offsets
[order
[0]] == 0)
14519 stm_case
= 1; /* stmia */
14520 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14521 stm_case
= 2; /* stmib */
14522 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14523 stm_case
= 3; /* stmda */
14524 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14525 stm_case
= 4; /* stmdb */
14529 if (!multiple_operation_profitable_p (false, nops
, 0))
14535 /* Routines for use in generating RTL. */
14537 /* Generate a load-multiple instruction. COUNT is the number of loads in
14538 the instruction; REGS and MEMS are arrays containing the operands.
14539 BASEREG is the base register to be used in addressing the memory operands.
14540 WBACK_OFFSET is nonzero if the instruction should update the base
14544 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14545 HOST_WIDE_INT wback_offset
)
14550 if (!multiple_operation_profitable_p (false, count
, 0))
14556 for (i
= 0; i
< count
; i
++)
14557 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14559 if (wback_offset
!= 0)
14560 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14562 seq
= get_insns ();
14568 result
= gen_rtx_PARALLEL (VOIDmode
,
14569 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14570 if (wback_offset
!= 0)
14572 XVECEXP (result
, 0, 0)
14573 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14578 for (j
= 0; i
< count
; i
++, j
++)
14579 XVECEXP (result
, 0, i
)
14580 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14585 /* Generate a store-multiple instruction. COUNT is the number of stores in
14586 the instruction; REGS and MEMS are arrays containing the operands.
14587 BASEREG is the base register to be used in addressing the memory operands.
14588 WBACK_OFFSET is nonzero if the instruction should update the base
14592 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14593 HOST_WIDE_INT wback_offset
)
14598 if (GET_CODE (basereg
) == PLUS
)
14599 basereg
= XEXP (basereg
, 0);
14601 if (!multiple_operation_profitable_p (false, count
, 0))
14607 for (i
= 0; i
< count
; i
++)
14608 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14610 if (wback_offset
!= 0)
14611 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14613 seq
= get_insns ();
14619 result
= gen_rtx_PARALLEL (VOIDmode
,
14620 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14621 if (wback_offset
!= 0)
14623 XVECEXP (result
, 0, 0)
14624 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14629 for (j
= 0; i
< count
; i
++, j
++)
14630 XVECEXP (result
, 0, i
)
14631 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14636 /* Generate either a load-multiple or a store-multiple instruction. This
14637 function can be used in situations where we can start with a single MEM
14638 rtx and adjust its address upwards.
14639 COUNT is the number of operations in the instruction, not counting a
14640 possible update of the base register. REGS is an array containing the
14642 BASEREG is the base register to be used in addressing the memory operands,
14643 which are constructed from BASEMEM.
14644 WRITE_BACK specifies whether the generated instruction should include an
14645 update of the base register.
14646 OFFSETP is used to pass an offset to and from this function; this offset
14647 is not used when constructing the address (instead BASEMEM should have an
14648 appropriate offset in its address), it is used only for setting
14649 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14652 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14653 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14655 rtx mems
[MAX_LDM_STM_OPS
];
14656 HOST_WIDE_INT offset
= *offsetp
;
14659 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14661 if (GET_CODE (basereg
) == PLUS
)
14662 basereg
= XEXP (basereg
, 0);
14664 for (i
= 0; i
< count
; i
++)
14666 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14667 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14675 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14676 write_back
? 4 * count
: 0);
14678 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14679 write_back
? 4 * count
: 0);
14683 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14684 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14686 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14691 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14692 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14694 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14698 /* Called from a peephole2 expander to turn a sequence of loads into an
14699 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14700 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14701 is true if we can reorder the registers because they are used commutatively
14703 Returns true iff we could generate a new instruction. */
14706 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14708 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14709 rtx mems
[MAX_LDM_STM_OPS
];
14710 int i
, j
, base_reg
;
14712 HOST_WIDE_INT offset
;
14713 int write_back
= FALSE
;
14717 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14718 &base_reg
, &offset
, !sort_regs
);
14724 for (i
= 0; i
< nops
- 1; i
++)
14725 for (j
= i
+ 1; j
< nops
; j
++)
14726 if (regs
[i
] > regs
[j
])
14732 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14736 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14738 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14740 for (i
= 0; i
< nops
; i
++)
14741 if (base_reg
== regs
[i
])
14742 write_back
= false;
14744 /* Ensure the base is dead if it is updated. */
14745 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
14751 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14752 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14754 base_reg_rtx
= newbase
;
14757 for (i
= 0; i
< nops
; i
++)
14759 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14760 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14763 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14764 write_back
? offset
+ i
* 4 : 0));
14768 /* Called from a peephole2 expander to turn a sequence of stores into an
14769 STM instruction. OPERANDS are the operands found by the peephole matcher;
14770 NOPS indicates how many separate stores we are trying to combine.
14771 Returns true iff we could generate a new instruction. */
14774 gen_stm_seq (rtx
*operands
, int nops
)
14777 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14778 rtx mems
[MAX_LDM_STM_OPS
];
14781 HOST_WIDE_INT offset
;
14782 int write_back
= FALSE
;
14785 bool base_reg_dies
;
14787 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14788 mem_order
, &base_reg
, &offset
, true);
14793 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14795 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14798 gcc_assert (base_reg_dies
);
14804 gcc_assert (base_reg_dies
);
14805 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14809 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14811 for (i
= 0; i
< nops
; i
++)
14813 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14814 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14817 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14818 write_back
? offset
+ i
* 4 : 0));
14822 /* Called from a peephole2 expander to turn a sequence of stores that are
14823 preceded by constant loads into an STM instruction. OPERANDS are the
14824 operands found by the peephole matcher; NOPS indicates how many
14825 separate stores we are trying to combine; there are 2 * NOPS
14826 instructions in the peephole.
14827 Returns true iff we could generate a new instruction. */
14830 gen_const_stm_seq (rtx
*operands
, int nops
)
14832 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14833 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14834 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14835 rtx mems
[MAX_LDM_STM_OPS
];
14838 HOST_WIDE_INT offset
;
14839 int write_back
= FALSE
;
14842 bool base_reg_dies
;
14844 HARD_REG_SET allocated
;
14846 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14847 mem_order
, &base_reg
, &offset
, false);
14852 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14854 /* If the same register is used more than once, try to find a free
14856 CLEAR_HARD_REG_SET (allocated
);
14857 for (i
= 0; i
< nops
; i
++)
14859 for (j
= i
+ 1; j
< nops
; j
++)
14860 if (regs
[i
] == regs
[j
])
14862 rtx t
= peep2_find_free_register (0, nops
* 2,
14863 TARGET_THUMB1
? "l" : "r",
14864 SImode
, &allocated
);
14868 regs
[i
] = REGNO (t
);
14872 /* Compute an ordering that maps the register numbers to an ascending
14875 for (i
= 0; i
< nops
; i
++)
14876 if (regs
[i
] < regs
[reg_order
[0]])
14879 for (i
= 1; i
< nops
; i
++)
14881 int this_order
= reg_order
[i
- 1];
14882 for (j
= 0; j
< nops
; j
++)
14883 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14884 && (this_order
== reg_order
[i
- 1]
14885 || regs
[j
] < regs
[this_order
]))
14887 reg_order
[i
] = this_order
;
14890 /* Ensure that registers that must be live after the instruction end
14891 up with the correct value. */
14892 for (i
= 0; i
< nops
; i
++)
14894 int this_order
= reg_order
[i
];
14895 if ((this_order
!= mem_order
[i
]
14896 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14897 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14901 /* Load the constants. */
14902 for (i
= 0; i
< nops
; i
++)
14904 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14905 sorted_regs
[i
] = regs
[reg_order
[i
]];
14906 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14909 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14911 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14914 gcc_assert (base_reg_dies
);
14920 gcc_assert (base_reg_dies
);
14921 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14925 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14927 for (i
= 0; i
< nops
; i
++)
14929 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14930 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14933 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14934 write_back
? offset
+ i
* 4 : 0));
14938 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14939 unaligned copies on processors which support unaligned semantics for those
14940 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14941 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14942 An interleave factor of 1 (the minimum) will perform no interleaving.
14943 Load/store multiple are used for aligned addresses where possible. */
14946 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14947 HOST_WIDE_INT length
,
14948 unsigned int interleave_factor
)
14950 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14951 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14952 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14953 HOST_WIDE_INT i
, j
;
14954 HOST_WIDE_INT remaining
= length
, words
;
14955 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14957 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14958 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14959 HOST_WIDE_INT srcoffset
, dstoffset
;
14960 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14963 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
14965 /* Use hard registers if we have aligned source or destination so we can use
14966 load/store multiple with contiguous registers. */
14967 if (dst_aligned
|| src_aligned
)
14968 for (i
= 0; i
< interleave_factor
; i
++)
14969 regs
[i
] = gen_rtx_REG (SImode
, i
);
14971 for (i
= 0; i
< interleave_factor
; i
++)
14972 regs
[i
] = gen_reg_rtx (SImode
);
14974 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14975 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14977 srcoffset
= dstoffset
= 0;
14979 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14980 For copying the last bytes we want to subtract this offset again. */
14981 src_autoinc
= dst_autoinc
= 0;
14983 for (i
= 0; i
< interleave_factor
; i
++)
14986 /* Copy BLOCK_SIZE_BYTES chunks. */
14988 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14991 if (src_aligned
&& interleave_factor
> 1)
14993 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14994 TRUE
, srcbase
, &srcoffset
));
14995 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14999 for (j
= 0; j
< interleave_factor
; j
++)
15001 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15003 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15004 srcoffset
+ j
* UNITS_PER_WORD
);
15005 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15007 srcoffset
+= block_size_bytes
;
15011 if (dst_aligned
&& interleave_factor
> 1)
15013 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15014 TRUE
, dstbase
, &dstoffset
));
15015 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15019 for (j
= 0; j
< interleave_factor
; j
++)
15021 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15023 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15024 dstoffset
+ j
* UNITS_PER_WORD
);
15025 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15027 dstoffset
+= block_size_bytes
;
15030 remaining
-= block_size_bytes
;
15033 /* Copy any whole words left (note these aren't interleaved with any
15034 subsequent halfword/byte load/stores in the interests of simplicity). */
15036 words
= remaining
/ UNITS_PER_WORD
;
15038 gcc_assert (words
< interleave_factor
);
15040 if (src_aligned
&& words
> 1)
15042 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15044 src_autoinc
+= UNITS_PER_WORD
* words
;
15048 for (j
= 0; j
< words
; j
++)
15050 addr
= plus_constant (Pmode
, src
,
15051 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15052 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15053 srcoffset
+ j
* UNITS_PER_WORD
);
15055 emit_move_insn (regs
[j
], mem
);
15057 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15059 srcoffset
+= words
* UNITS_PER_WORD
;
15062 if (dst_aligned
&& words
> 1)
15064 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15066 dst_autoinc
+= words
* UNITS_PER_WORD
;
15070 for (j
= 0; j
< words
; j
++)
15072 addr
= plus_constant (Pmode
, dst
,
15073 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15074 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15075 dstoffset
+ j
* UNITS_PER_WORD
);
15077 emit_move_insn (mem
, regs
[j
]);
15079 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15081 dstoffset
+= words
* UNITS_PER_WORD
;
15084 remaining
-= words
* UNITS_PER_WORD
;
15086 gcc_assert (remaining
< 4);
15088 /* Copy a halfword if necessary. */
15090 if (remaining
>= 2)
15092 halfword_tmp
= gen_reg_rtx (SImode
);
15094 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15095 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15096 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15098 /* Either write out immediately, or delay until we've loaded the last
15099 byte, depending on interleave factor. */
15100 if (interleave_factor
== 1)
15102 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15103 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15104 emit_insn (gen_unaligned_storehi (mem
,
15105 gen_lowpart (HImode
, halfword_tmp
)));
15106 halfword_tmp
= NULL
;
15114 gcc_assert (remaining
< 2);
15116 /* Copy last byte. */
15118 if ((remaining
& 1) != 0)
15120 byte_tmp
= gen_reg_rtx (SImode
);
15122 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15123 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15124 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15126 if (interleave_factor
== 1)
15128 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15129 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15130 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15139 /* Store last halfword if we haven't done so already. */
15143 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15144 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15145 emit_insn (gen_unaligned_storehi (mem
,
15146 gen_lowpart (HImode
, halfword_tmp
)));
15150 /* Likewise for last byte. */
15154 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15155 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15156 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15160 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15163 /* From mips_adjust_block_mem:
15165 Helper function for doing a loop-based block operation on memory
15166 reference MEM. Each iteration of the loop will operate on LENGTH
15169 Create a new base register for use within the loop and point it to
15170 the start of MEM. Create a new memory reference that uses this
15171 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15174 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15177 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15179 /* Although the new mem does not refer to a known location,
15180 it does keep up to LENGTH bytes of alignment. */
15181 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15182 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15185 /* From mips_block_move_loop:
15187 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15188 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15189 the memory regions do not overlap. */
15192 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15193 unsigned int interleave_factor
,
15194 HOST_WIDE_INT bytes_per_iter
)
15196 rtx src_reg
, dest_reg
, final_src
, test
;
15197 HOST_WIDE_INT leftover
;
15199 leftover
= length
% bytes_per_iter
;
15200 length
-= leftover
;
15202 /* Create registers and memory references for use within the loop. */
15203 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15204 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15206 /* Calculate the value that SRC_REG should have after the last iteration of
15208 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15209 0, 0, OPTAB_WIDEN
);
15211 /* Emit the start of the loop. */
15212 rtx_code_label
*label
= gen_label_rtx ();
15213 emit_label (label
);
15215 /* Emit the loop body. */
15216 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15217 interleave_factor
);
15219 /* Move on to the next block. */
15220 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15221 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15223 /* Emit the loop condition. */
15224 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15225 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15227 /* Mop up any left-over bytes. */
15229 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15232 /* Emit a block move when either the source or destination is unaligned (not
15233 aligned to a four-byte boundary). This may need further tuning depending on
15234 core type, optimize_size setting, etc. */
15237 arm_cpymemqi_unaligned (rtx
*operands
)
15239 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15243 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15244 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15245 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15246 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15247 or dst_aligned though: allow more interleaving in those cases since the
15248 resulting code can be smaller. */
15249 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15250 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15253 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15254 interleave_factor
, bytes_per_iter
);
15256 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15257 interleave_factor
);
15261 /* Note that the loop created by arm_block_move_unaligned_loop may be
15262 subject to loop unrolling, which makes tuning this condition a little
15265 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15267 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15274 arm_gen_cpymemqi (rtx
*operands
)
15276 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15277 HOST_WIDE_INT srcoffset
, dstoffset
;
15278 rtx src
, dst
, srcbase
, dstbase
;
15279 rtx part_bytes_reg
= NULL
;
15282 if (!CONST_INT_P (operands
[2])
15283 || !CONST_INT_P (operands
[3])
15284 || INTVAL (operands
[2]) > 64)
15287 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15288 return arm_cpymemqi_unaligned (operands
);
15290 if (INTVAL (operands
[3]) & 3)
15293 dstbase
= operands
[0];
15294 srcbase
= operands
[1];
15296 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15297 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15299 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15300 out_words_to_go
= INTVAL (operands
[2]) / 4;
15301 last_bytes
= INTVAL (operands
[2]) & 3;
15302 dstoffset
= srcoffset
= 0;
15304 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15305 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15307 while (in_words_to_go
>= 2)
15309 if (in_words_to_go
> 4)
15310 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15311 TRUE
, srcbase
, &srcoffset
));
15313 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15314 src
, FALSE
, srcbase
,
15317 if (out_words_to_go
)
15319 if (out_words_to_go
> 4)
15320 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15321 TRUE
, dstbase
, &dstoffset
));
15322 else if (out_words_to_go
!= 1)
15323 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15324 out_words_to_go
, dst
,
15327 dstbase
, &dstoffset
));
15330 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15331 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15332 if (last_bytes
!= 0)
15334 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15340 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15341 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15344 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15345 if (out_words_to_go
)
15349 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15350 sreg
= copy_to_reg (mem
);
15352 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15353 emit_move_insn (mem
, sreg
);
15356 gcc_assert (!in_words_to_go
); /* Sanity check */
15359 if (in_words_to_go
)
15361 gcc_assert (in_words_to_go
> 0);
15363 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15364 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15367 gcc_assert (!last_bytes
|| part_bytes_reg
);
15369 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15371 rtx tmp
= gen_reg_rtx (SImode
);
15373 /* The bytes we want are in the top end of the word. */
15374 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15375 GEN_INT (8 * (4 - last_bytes
))));
15376 part_bytes_reg
= tmp
;
15380 mem
= adjust_automodify_address (dstbase
, QImode
,
15381 plus_constant (Pmode
, dst
,
15383 dstoffset
+ last_bytes
- 1);
15384 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15388 tmp
= gen_reg_rtx (SImode
);
15389 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15390 part_bytes_reg
= tmp
;
15397 if (last_bytes
> 1)
15399 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15400 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15404 rtx tmp
= gen_reg_rtx (SImode
);
15405 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15406 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15407 part_bytes_reg
= tmp
;
15414 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15415 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15422 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15425 next_consecutive_mem (rtx mem
)
15427 machine_mode mode
= GET_MODE (mem
);
15428 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15429 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15431 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15434 /* Copy using LDRD/STRD instructions whenever possible.
15435 Returns true upon success. */
15437 gen_cpymem_ldrd_strd (rtx
*operands
)
15439 unsigned HOST_WIDE_INT len
;
15440 HOST_WIDE_INT align
;
15441 rtx src
, dst
, base
;
15443 bool src_aligned
, dst_aligned
;
15444 bool src_volatile
, dst_volatile
;
15446 gcc_assert (CONST_INT_P (operands
[2]));
15447 gcc_assert (CONST_INT_P (operands
[3]));
15449 len
= UINTVAL (operands
[2]);
15453 /* Maximum alignment we can assume for both src and dst buffers. */
15454 align
= INTVAL (operands
[3]);
15456 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15459 /* Place src and dst addresses in registers
15460 and update the corresponding mem rtx. */
15462 dst_volatile
= MEM_VOLATILE_P (dst
);
15463 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15464 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15465 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15468 src_volatile
= MEM_VOLATILE_P (src
);
15469 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15470 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15471 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15473 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15476 if (src_volatile
|| dst_volatile
)
15479 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15480 if (!(dst_aligned
|| src_aligned
))
15481 return arm_gen_cpymemqi (operands
);
15483 /* If the either src or dst is unaligned we'll be accessing it as pairs
15484 of unaligned SImode accesses. Otherwise we can generate DImode
15485 ldrd/strd instructions. */
15486 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15487 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15492 reg0
= gen_reg_rtx (DImode
);
15493 rtx low_reg
= NULL_RTX
;
15494 rtx hi_reg
= NULL_RTX
;
15496 if (!src_aligned
|| !dst_aligned
)
15498 low_reg
= gen_lowpart (SImode
, reg0
);
15499 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15501 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15502 emit_move_insn (reg0
, src
);
15503 else if (src_aligned
)
15504 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15507 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
15508 src
= next_consecutive_mem (src
);
15509 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
15512 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15513 emit_move_insn (dst
, reg0
);
15514 else if (dst_aligned
)
15515 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15518 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
15519 dst
= next_consecutive_mem (dst
);
15520 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
15523 src
= next_consecutive_mem (src
);
15524 dst
= next_consecutive_mem (dst
);
15527 gcc_assert (len
< 8);
15530 /* More than a word but less than a double-word to copy. Copy a word. */
15531 reg0
= gen_reg_rtx (SImode
);
15532 src
= adjust_address (src
, SImode
, 0);
15533 dst
= adjust_address (dst
, SImode
, 0);
15535 emit_move_insn (reg0
, src
);
15537 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15540 emit_move_insn (dst
, reg0
);
15542 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15544 src
= next_consecutive_mem (src
);
15545 dst
= next_consecutive_mem (dst
);
15552 /* Copy the remaining bytes. */
15555 dst
= adjust_address (dst
, HImode
, 0);
15556 src
= adjust_address (src
, HImode
, 0);
15557 reg0
= gen_reg_rtx (SImode
);
15559 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15561 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15564 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15566 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15568 src
= next_consecutive_mem (src
);
15569 dst
= next_consecutive_mem (dst
);
15574 dst
= adjust_address (dst
, QImode
, 0);
15575 src
= adjust_address (src
, QImode
, 0);
15576 reg0
= gen_reg_rtx (QImode
);
15577 emit_move_insn (reg0
, src
);
15578 emit_move_insn (dst
, reg0
);
15582 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15583 into its component 32-bit subregs. OP2 may be an immediate
15584 constant and we want to simplify it in that case. */
15586 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15587 rtx
*lo_op2
, rtx
*hi_op2
)
15589 *lo_op1
= gen_lowpart (SImode
, op1
);
15590 *hi_op1
= gen_highpart (SImode
, op1
);
15591 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15592 subreg_lowpart_offset (SImode
, DImode
));
15593 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15594 subreg_highpart_offset (SImode
, DImode
));
15597 /* Select a dominance comparison mode if possible for a test of the general
15598 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15599 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15600 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15601 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15602 In all cases OP will be either EQ or NE, but we don't need to know which
15603 here. If we are unable to support a dominance comparison we return
15604 CC mode. This will then fail to match for the RTL expressions that
15605 generate this call. */
15607 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15609 enum rtx_code cond1
, cond2
;
15612 /* Currently we will probably get the wrong result if the individual
15613 comparisons are not simple. This also ensures that it is safe to
15614 reverse a comparison if necessary. */
15615 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15617 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15621 /* The if_then_else variant of this tests the second condition if the
15622 first passes, but is true if the first fails. Reverse the first
15623 condition to get a true "inclusive-or" expression. */
15624 if (cond_or
== DOM_CC_NX_OR_Y
)
15625 cond1
= reverse_condition (cond1
);
15627 /* If the comparisons are not equal, and one doesn't dominate the other,
15628 then we can't do this. */
15630 && !comparison_dominates_p (cond1
, cond2
)
15631 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15635 std::swap (cond1
, cond2
);
15640 if (cond_or
== DOM_CC_X_AND_Y
)
15645 case EQ
: return CC_DEQmode
;
15646 case LE
: return CC_DLEmode
;
15647 case LEU
: return CC_DLEUmode
;
15648 case GE
: return CC_DGEmode
;
15649 case GEU
: return CC_DGEUmode
;
15650 default: gcc_unreachable ();
15654 if (cond_or
== DOM_CC_X_AND_Y
)
15666 gcc_unreachable ();
15670 if (cond_or
== DOM_CC_X_AND_Y
)
15682 gcc_unreachable ();
15686 if (cond_or
== DOM_CC_X_AND_Y
)
15687 return CC_DLTUmode
;
15692 return CC_DLTUmode
;
15694 return CC_DLEUmode
;
15698 gcc_unreachable ();
15702 if (cond_or
== DOM_CC_X_AND_Y
)
15703 return CC_DGTUmode
;
15708 return CC_DGTUmode
;
15710 return CC_DGEUmode
;
15714 gcc_unreachable ();
15717 /* The remaining cases only occur when both comparisons are the
15720 gcc_assert (cond1
== cond2
);
15724 gcc_assert (cond1
== cond2
);
15728 gcc_assert (cond1
== cond2
);
15732 gcc_assert (cond1
== cond2
);
15733 return CC_DLEUmode
;
15736 gcc_assert (cond1
== cond2
);
15737 return CC_DGEUmode
;
15740 gcc_unreachable ();
15745 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15747 /* All floating point compares return CCFP if it is an equality
15748 comparison, and CCFPE otherwise. */
15749 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15772 gcc_unreachable ();
15776 /* A compare with a shifted operand. Because of canonicalization, the
15777 comparison will have to be swapped when we emit the assembler. */
15778 if (GET_MODE (y
) == SImode
15779 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15780 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15781 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15782 || GET_CODE (x
) == ROTATERT
))
15785 /* A widened compare of the sum of a value plus a carry against a
15786 constant. This is a representation of RSC. We want to swap the
15787 result of the comparison at output. Not valid if the Z bit is
15789 if (GET_MODE (x
) == DImode
15790 && GET_CODE (x
) == PLUS
15791 && arm_borrow_operation (XEXP (x
, 1), DImode
)
15793 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
15794 && (op
== LE
|| op
== GT
))
15795 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
15796 && (op
== LEU
|| op
== GTU
))))
15799 /* If X is a constant we want to use CC_RSBmode. This is
15800 non-canonical, but arm_gen_compare_reg uses this to generate the
15801 correct canonical form. */
15802 if (GET_MODE (y
) == SImode
15803 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
15804 && CONST_INT_P (x
))
15807 /* This operation is performed swapped, but since we only rely on the Z
15808 flag we don't need an additional mode. */
15809 if (GET_MODE (y
) == SImode
15810 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15811 && GET_CODE (x
) == NEG
15812 && (op
== EQ
|| op
== NE
))
15815 /* This is a special case that is used by combine to allow a
15816 comparison of a shifted byte load to be split into a zero-extend
15817 followed by a comparison of the shifted integer (only valid for
15818 equalities and unsigned inequalities). */
15819 if (GET_MODE (x
) == SImode
15820 && GET_CODE (x
) == ASHIFT
15821 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15822 && GET_CODE (XEXP (x
, 0)) == SUBREG
15823 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15824 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15825 && (op
== EQ
|| op
== NE
15826 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15827 && CONST_INT_P (y
))
15830 /* A construct for a conditional compare, if the false arm contains
15831 0, then both conditions must be true, otherwise either condition
15832 must be true. Not all conditions are possible, so CCmode is
15833 returned if it can't be done. */
15834 if (GET_CODE (x
) == IF_THEN_ELSE
15835 && (XEXP (x
, 2) == const0_rtx
15836 || XEXP (x
, 2) == const1_rtx
)
15837 && COMPARISON_P (XEXP (x
, 0))
15838 && COMPARISON_P (XEXP (x
, 1)))
15839 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15840 INTVAL (XEXP (x
, 2)));
15842 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15843 if (GET_CODE (x
) == AND
15844 && (op
== EQ
|| op
== NE
)
15845 && COMPARISON_P (XEXP (x
, 0))
15846 && COMPARISON_P (XEXP (x
, 1)))
15847 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15850 if (GET_CODE (x
) == IOR
15851 && (op
== EQ
|| op
== NE
)
15852 && COMPARISON_P (XEXP (x
, 0))
15853 && COMPARISON_P (XEXP (x
, 1)))
15854 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15857 /* An operation (on Thumb) where we want to test for a single bit.
15858 This is done by shifting that bit up into the top bit of a
15859 scratch register; we can then branch on the sign bit. */
15861 && GET_MODE (x
) == SImode
15862 && (op
== EQ
|| op
== NE
)
15863 && GET_CODE (x
) == ZERO_EXTRACT
15864 && XEXP (x
, 1) == const1_rtx
)
15867 /* An operation that sets the condition codes as a side-effect, the
15868 V flag is not set correctly, so we can only use comparisons where
15869 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15871 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15872 if (GET_MODE (x
) == SImode
15874 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15875 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15876 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15877 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15878 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15879 || GET_CODE (x
) == LSHIFTRT
15880 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15881 || GET_CODE (x
) == ROTATERT
15882 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15885 /* A comparison of ~reg with a const is really a special
15886 canoncialization of compare (~const, reg), which is a reverse
15887 subtract operation. We may not get here if CONST is 0, but that
15888 doesn't matter because ~0 isn't a valid immediate for RSB. */
15889 if (GET_MODE (x
) == SImode
15890 && GET_CODE (x
) == NOT
15891 && CONST_INT_P (y
))
15894 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15897 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15898 && GET_CODE (x
) == PLUS
15899 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15902 if (GET_MODE (x
) == DImode
15903 && GET_CODE (x
) == PLUS
15904 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
15906 && UINTVAL (y
) == 0x800000000
15907 && (op
== GEU
|| op
== LTU
))
15910 if (GET_MODE (x
) == DImode
15911 && (op
== GE
|| op
== LT
)
15912 && GET_CODE (x
) == SIGN_EXTEND
15913 && ((GET_CODE (y
) == PLUS
15914 && arm_borrow_operation (XEXP (y
, 0), DImode
))
15915 || arm_borrow_operation (y
, DImode
)))
15918 if (GET_MODE (x
) == DImode
15919 && (op
== GEU
|| op
== LTU
)
15920 && GET_CODE (x
) == ZERO_EXTEND
15921 && ((GET_CODE (y
) == PLUS
15922 && arm_borrow_operation (XEXP (y
, 0), DImode
))
15923 || arm_borrow_operation (y
, DImode
)))
15926 if (GET_MODE (x
) == DImode
15927 && (op
== EQ
|| op
== NE
)
15928 && (GET_CODE (x
) == PLUS
15929 || GET_CODE (x
) == MINUS
)
15930 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
15931 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
15932 && GET_CODE (y
) == SIGN_EXTEND
15933 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
15936 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15937 return GET_MODE (x
);
15942 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
15943 the sequence of instructions needed to generate a suitable condition
15944 code register. Return the CC register result. */
15946 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15951 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
15952 gcc_assert (TARGET_32BIT
);
15953 gcc_assert (!CONST_INT_P (x
));
15955 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
15956 subreg_lowpart_offset (SImode
, DImode
));
15957 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
15958 subreg_highpart_offset (SImode
, DImode
));
15959 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
15960 subreg_lowpart_offset (SImode
, DImode
));
15961 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
15962 subreg_highpart_offset (SImode
, DImode
));
15968 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
15970 if (y_lo
!= const0_rtx
)
15972 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
15974 gcc_assert (y_hi
== const0_rtx
);
15975 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
15976 if (!arm_add_operand (y_lo
, SImode
))
15977 y_lo
= force_reg (SImode
, y_lo
);
15978 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
15981 else if (y_hi
!= const0_rtx
)
15983 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
15985 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
15986 if (!arm_add_operand (y_hi
, SImode
))
15987 y_hi
= force_reg (SImode
, y_hi
);
15988 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
15994 gcc_assert (!reload_completed
);
15995 scratch
= gen_rtx_SCRATCH (SImode
);
15998 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15999 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16002 = gen_rtx_SET (cc_reg
,
16003 gen_rtx_COMPARE (CC_NZmode
,
16004 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16006 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16011 if (!arm_add_operand (y_lo
, SImode
))
16012 y_lo
= force_reg (SImode
, y_lo
);
16014 if (!arm_add_operand (y_hi
, SImode
))
16015 y_hi
= force_reg (SImode
, y_hi
);
16017 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16018 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16019 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16020 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16021 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16023 emit_insn (gen_rtx_SET (cc_reg
,
16024 gen_rtx_COMPARE (mode
, conjunction
,
16032 if (y_lo
== const0_rtx
)
16034 /* If the low word of y is 0, then this is simply a normal
16035 compare of the upper words. */
16036 if (!arm_add_operand (y_hi
, SImode
))
16037 y_hi
= force_reg (SImode
, y_hi
);
16039 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16042 if (!arm_add_operand (y_lo
, SImode
))
16043 y_lo
= force_reg (SImode
, y_lo
);
16046 = gen_rtx_LTU (DImode
,
16047 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16051 scratch
= gen_rtx_SCRATCH (SImode
);
16053 if (!arm_not_operand (y_hi
, SImode
))
16054 y_hi
= force_reg (SImode
, y_hi
);
16057 if (y_hi
== const0_rtx
)
16058 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16060 else if (CONST_INT_P (y_hi
))
16061 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16064 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16066 return SET_DEST (single_set (insn
));
16072 /* During expansion, we only expect to get here if y is a
16073 constant that we want to handle, otherwise we should have
16074 swapped the operands already. */
16075 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16077 if (!const_ok_for_arm (INTVAL (y_lo
)))
16078 y_lo
= force_reg (SImode
, y_lo
);
16080 /* Perform a reverse subtract and compare. */
16082 = gen_rtx_LTU (DImode
,
16083 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16085 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16087 return SET_DEST (single_set (insn
));
16093 if (y_lo
== const0_rtx
)
16095 /* If the low word of y is 0, then this is simply a normal
16096 compare of the upper words. */
16097 if (!arm_add_operand (y_hi
, SImode
))
16098 y_hi
= force_reg (SImode
, y_hi
);
16100 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16103 if (!arm_add_operand (y_lo
, SImode
))
16104 y_lo
= force_reg (SImode
, y_lo
);
16107 = gen_rtx_LTU (DImode
,
16108 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16112 scratch
= gen_rtx_SCRATCH (SImode
);
16113 if (!arm_not_operand (y_hi
, SImode
))
16114 y_hi
= force_reg (SImode
, y_hi
);
16117 if (y_hi
== const0_rtx
)
16118 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16120 else if (CONST_INT_P (y_hi
))
16122 /* Constant is viewed as unsigned when zero-extended. */
16123 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16124 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16128 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16130 return SET_DEST (single_set (insn
));
16136 /* During expansion, we only expect to get here if y is a
16137 constant that we want to handle, otherwise we should have
16138 swapped the operands already. */
16139 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16141 if (!const_ok_for_arm (INTVAL (y_lo
)))
16142 y_lo
= force_reg (SImode
, y_lo
);
16144 /* Perform a reverse subtract and compare. */
16146 = gen_rtx_LTU (DImode
,
16147 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16149 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16150 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16152 return SET_DEST (single_set (insn
));
16156 gcc_unreachable ();
16160 /* X and Y are two things to compare using CODE. Emit the compare insn and
16161 return the rtx for register 0 in the proper mode. */
16163 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16165 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16166 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16168 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16169 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16170 if (mode
== CC_RSBmode
)
16173 scratch
= gen_rtx_SCRATCH (SImode
);
16174 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16175 GEN_INT (~UINTVAL (x
)), y
));
16178 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16183 /* Generate a sequence of insns that will generate the correct return
16184 address mask depending on the physical architecture that the program
16187 arm_gen_return_addr_mask (void)
16189 rtx reg
= gen_reg_rtx (Pmode
);
16191 emit_insn (gen_return_addr_mask (reg
));
16196 arm_reload_in_hi (rtx
*operands
)
16198 rtx ref
= operands
[1];
16200 HOST_WIDE_INT offset
= 0;
16202 if (GET_CODE (ref
) == SUBREG
)
16204 offset
= SUBREG_BYTE (ref
);
16205 ref
= SUBREG_REG (ref
);
16210 /* We have a pseudo which has been spilt onto the stack; there
16211 are two cases here: the first where there is a simple
16212 stack-slot replacement and a second where the stack-slot is
16213 out of range, or is used as a subreg. */
16214 if (reg_equiv_mem (REGNO (ref
)))
16216 ref
= reg_equiv_mem (REGNO (ref
));
16217 base
= find_replacement (&XEXP (ref
, 0));
16220 /* The slot is out of range, or was dressed up in a SUBREG. */
16221 base
= reg_equiv_address (REGNO (ref
));
16223 /* PR 62554: If there is no equivalent memory location then just move
16224 the value as an SImode register move. This happens when the target
16225 architecture variant does not have an HImode register move. */
16228 gcc_assert (REG_P (operands
[0]));
16229 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16230 gen_rtx_SUBREG (SImode
, ref
, 0)));
16235 base
= find_replacement (&XEXP (ref
, 0));
16237 /* Handle the case where the address is too complex to be offset by 1. */
16238 if (GET_CODE (base
) == MINUS
16239 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16241 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16243 emit_set_insn (base_plus
, base
);
16246 else if (GET_CODE (base
) == PLUS
)
16248 /* The addend must be CONST_INT, or we would have dealt with it above. */
16249 HOST_WIDE_INT hi
, lo
;
16251 offset
+= INTVAL (XEXP (base
, 1));
16252 base
= XEXP (base
, 0);
16254 /* Rework the address into a legal sequence of insns. */
16255 /* Valid range for lo is -4095 -> 4095 */
16258 : -((-offset
) & 0xfff));
16260 /* Corner case, if lo is the max offset then we would be out of range
16261 once we have added the additional 1 below, so bump the msb into the
16262 pre-loading insn(s). */
16266 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16267 ^ (HOST_WIDE_INT
) 0x80000000)
16268 - (HOST_WIDE_INT
) 0x80000000);
16270 gcc_assert (hi
+ lo
== offset
);
16274 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16276 /* Get the base address; addsi3 knows how to handle constants
16277 that require more than one insn. */
16278 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16284 /* Operands[2] may overlap operands[0] (though it won't overlap
16285 operands[1]), that's why we asked for a DImode reg -- so we can
16286 use the bit that does not overlap. */
16287 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16288 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16290 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16292 emit_insn (gen_zero_extendqisi2 (scratch
,
16293 gen_rtx_MEM (QImode
,
16294 plus_constant (Pmode
, base
,
16296 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16297 gen_rtx_MEM (QImode
,
16298 plus_constant (Pmode
, base
,
16300 if (!BYTES_BIG_ENDIAN
)
16301 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16302 gen_rtx_IOR (SImode
,
16305 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16309 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16310 gen_rtx_IOR (SImode
,
16311 gen_rtx_ASHIFT (SImode
, scratch
,
16313 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16316 /* Handle storing a half-word to memory during reload by synthesizing as two
16317 byte stores. Take care not to clobber the input values until after we
16318 have moved them somewhere safe. This code assumes that if the DImode
16319 scratch in operands[2] overlaps either the input value or output address
16320 in some way, then that value must die in this insn (we absolutely need
16321 two scratch registers for some corner cases). */
16323 arm_reload_out_hi (rtx
*operands
)
16325 rtx ref
= operands
[0];
16326 rtx outval
= operands
[1];
16328 HOST_WIDE_INT offset
= 0;
16330 if (GET_CODE (ref
) == SUBREG
)
16332 offset
= SUBREG_BYTE (ref
);
16333 ref
= SUBREG_REG (ref
);
16338 /* We have a pseudo which has been spilt onto the stack; there
16339 are two cases here: the first where there is a simple
16340 stack-slot replacement and a second where the stack-slot is
16341 out of range, or is used as a subreg. */
16342 if (reg_equiv_mem (REGNO (ref
)))
16344 ref
= reg_equiv_mem (REGNO (ref
));
16345 base
= find_replacement (&XEXP (ref
, 0));
16348 /* The slot is out of range, or was dressed up in a SUBREG. */
16349 base
= reg_equiv_address (REGNO (ref
));
16351 /* PR 62254: If there is no equivalent memory location then just move
16352 the value as an SImode register move. This happens when the target
16353 architecture variant does not have an HImode register move. */
16356 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16358 if (REG_P (outval
))
16360 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16361 gen_rtx_SUBREG (SImode
, outval
, 0)));
16363 else /* SUBREG_P (outval) */
16365 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16366 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16367 SUBREG_REG (outval
)));
16369 /* FIXME: Handle other cases ? */
16370 gcc_unreachable ();
16376 base
= find_replacement (&XEXP (ref
, 0));
16378 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16380 /* Handle the case where the address is too complex to be offset by 1. */
16381 if (GET_CODE (base
) == MINUS
16382 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16384 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16386 /* Be careful not to destroy OUTVAL. */
16387 if (reg_overlap_mentioned_p (base_plus
, outval
))
16389 /* Updating base_plus might destroy outval, see if we can
16390 swap the scratch and base_plus. */
16391 if (!reg_overlap_mentioned_p (scratch
, outval
))
16392 std::swap (scratch
, base_plus
);
16395 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16397 /* Be conservative and copy OUTVAL into the scratch now,
16398 this should only be necessary if outval is a subreg
16399 of something larger than a word. */
16400 /* XXX Might this clobber base? I can't see how it can,
16401 since scratch is known to overlap with OUTVAL, and
16402 must be wider than a word. */
16403 emit_insn (gen_movhi (scratch_hi
, outval
));
16404 outval
= scratch_hi
;
16408 emit_set_insn (base_plus
, base
);
16411 else if (GET_CODE (base
) == PLUS
)
16413 /* The addend must be CONST_INT, or we would have dealt with it above. */
16414 HOST_WIDE_INT hi
, lo
;
16416 offset
+= INTVAL (XEXP (base
, 1));
16417 base
= XEXP (base
, 0);
16419 /* Rework the address into a legal sequence of insns. */
16420 /* Valid range for lo is -4095 -> 4095 */
16423 : -((-offset
) & 0xfff));
16425 /* Corner case, if lo is the max offset then we would be out of range
16426 once we have added the additional 1 below, so bump the msb into the
16427 pre-loading insn(s). */
16431 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16432 ^ (HOST_WIDE_INT
) 0x80000000)
16433 - (HOST_WIDE_INT
) 0x80000000);
16435 gcc_assert (hi
+ lo
== offset
);
16439 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16441 /* Be careful not to destroy OUTVAL. */
16442 if (reg_overlap_mentioned_p (base_plus
, outval
))
16444 /* Updating base_plus might destroy outval, see if we
16445 can swap the scratch and base_plus. */
16446 if (!reg_overlap_mentioned_p (scratch
, outval
))
16447 std::swap (scratch
, base_plus
);
16450 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16452 /* Be conservative and copy outval into scratch now,
16453 this should only be necessary if outval is a
16454 subreg of something larger than a word. */
16455 /* XXX Might this clobber base? I can't see how it
16456 can, since scratch is known to overlap with
16458 emit_insn (gen_movhi (scratch_hi
, outval
));
16459 outval
= scratch_hi
;
16463 /* Get the base address; addsi3 knows how to handle constants
16464 that require more than one insn. */
16465 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16471 if (BYTES_BIG_ENDIAN
)
16473 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16474 plus_constant (Pmode
, base
,
16476 gen_lowpart (QImode
, outval
)));
16477 emit_insn (gen_lshrsi3 (scratch
,
16478 gen_rtx_SUBREG (SImode
, outval
, 0),
16480 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16482 gen_lowpart (QImode
, scratch
)));
16486 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16488 gen_lowpart (QImode
, outval
)));
16489 emit_insn (gen_lshrsi3 (scratch
,
16490 gen_rtx_SUBREG (SImode
, outval
, 0),
16492 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16493 plus_constant (Pmode
, base
,
16495 gen_lowpart (QImode
, scratch
)));
16499 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16500 (padded to the size of a word) should be passed in a register. */
16503 arm_must_pass_in_stack (const function_arg_info
&arg
)
16505 if (TARGET_AAPCS_BASED
)
16506 return must_pass_in_stack_var_size (arg
);
16508 return must_pass_in_stack_var_size_or_pad (arg
);
16512 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16513 byte of a stack argument has useful data. For legacy APCS ABIs we use
16514 the default. For AAPCS based ABIs small aggregate types are placed
16515 in the lowest memory address. */
16517 static pad_direction
16518 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16520 if (!TARGET_AAPCS_BASED
)
16521 return default_function_arg_padding (mode
, type
);
16523 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16524 return PAD_DOWNWARD
;
16530 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16531 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16532 register has useful data, and return the opposite if the most
16533 significant byte does. */
16536 arm_pad_reg_upward (machine_mode mode
,
16537 tree type
, int first ATTRIBUTE_UNUSED
)
16539 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16541 /* For AAPCS, small aggregates, small fixed-point types,
16542 and small complex types are always padded upwards. */
16545 if ((AGGREGATE_TYPE_P (type
)
16546 || TREE_CODE (type
) == COMPLEX_TYPE
16547 || FIXED_POINT_TYPE_P (type
))
16548 && int_size_in_bytes (type
) <= 4)
16553 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16554 && GET_MODE_SIZE (mode
) <= 4)
16559 /* Otherwise, use default padding. */
16560 return !BYTES_BIG_ENDIAN
;
16563 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16564 assuming that the address in the base register is word aligned. */
16566 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16568 HOST_WIDE_INT max_offset
;
16570 /* Offset must be a multiple of 4 in Thumb mode. */
16571 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16576 else if (TARGET_ARM
)
16581 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16584 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16585 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16586 Assumes that the address in the base register RN is word aligned. Pattern
16587 guarantees that both memory accesses use the same base register,
16588 the offsets are constants within the range, and the gap between the offsets is 4.
16589 If preload complete then check that registers are legal. WBACK indicates whether
16590 address is updated. LOAD indicates whether memory access is load or store. */
16592 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16593 bool wback
, bool load
)
16595 unsigned int t
, t2
, n
;
16597 if (!reload_completed
)
16600 if (!offset_ok_for_ldrd_strd (offset
))
16607 if ((TARGET_THUMB2
)
16608 && ((wback
&& (n
== t
|| n
== t2
))
16609 || (t
== SP_REGNUM
)
16610 || (t
== PC_REGNUM
)
16611 || (t2
== SP_REGNUM
)
16612 || (t2
== PC_REGNUM
)
16613 || (!load
&& (n
== PC_REGNUM
))
16614 || (load
&& (t
== t2
))
16615 /* Triggers Cortex-M3 LDRD errata. */
16616 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
16620 && ((wback
&& (n
== t
|| n
== t2
))
16621 || (t2
== PC_REGNUM
)
16622 || (t
% 2 != 0) /* First destination register is not even. */
16624 /* PC can be used as base register (for offset addressing only),
16625 but it is depricated. */
16626 || (n
== PC_REGNUM
)))
16632 /* Return true if a 64-bit access with alignment ALIGN and with a
16633 constant offset OFFSET from the base pointer is permitted on this
16636 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
16638 return (unaligned_access
16639 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
16640 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
16643 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16644 operand MEM's address contains an immediate offset from the base
16645 register and has no side effects, in which case it sets BASE,
16646 OFFSET and ALIGN accordingly. */
16648 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
16652 gcc_assert (base
!= NULL
&& offset
!= NULL
);
16654 /* TODO: Handle more general memory operand patterns, such as
16655 PRE_DEC and PRE_INC. */
16657 if (side_effects_p (mem
))
16660 /* Can't deal with subregs. */
16661 if (GET_CODE (mem
) == SUBREG
)
16664 gcc_assert (MEM_P (mem
));
16666 *offset
= const0_rtx
;
16667 *align
= MEM_ALIGN (mem
);
16669 addr
= XEXP (mem
, 0);
16671 /* If addr isn't valid for DImode, then we can't handle it. */
16672 if (!arm_legitimate_address_p (DImode
, addr
,
16673 reload_in_progress
|| reload_completed
))
16681 else if (GET_CODE (addr
) == PLUS
)
16683 *base
= XEXP (addr
, 0);
16684 *offset
= XEXP (addr
, 1);
16685 return (REG_P (*base
) && CONST_INT_P (*offset
));
16691 /* Called from a peephole2 to replace two word-size accesses with a
16692 single LDRD/STRD instruction. Returns true iff we can generate a
16693 new instruction sequence. That is, both accesses use the same base
16694 register and the gap between constant offsets is 4. This function
16695 may reorder its operands to match ldrd/strd RTL templates.
16696 OPERANDS are the operands found by the peephole matcher;
16697 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16698 corresponding memory operands. LOAD indicaates whether the access
16699 is load or store. CONST_STORE indicates a store of constant
16700 integer values held in OPERANDS[4,5] and assumes that the pattern
16701 is of length 4 insn, for the purpose of checking dead registers.
16702 COMMUTE indicates that register operands may be reordered. */
16704 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
16705 bool const_store
, bool commute
)
16708 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16709 rtx base
= NULL_RTX
;
16710 rtx cur_base
, cur_offset
, tmp
;
16712 HARD_REG_SET regset
;
16714 gcc_assert (!const_store
|| !load
);
16715 /* Check that the memory references are immediate offsets from the
16716 same base register. Extract the base register, the destination
16717 registers, and the corresponding memory offsets. */
16718 for (i
= 0; i
< nops
; i
++)
16720 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
16726 else if (REGNO (base
) != REGNO (cur_base
))
16729 offsets
[i
] = INTVAL (cur_offset
);
16730 if (GET_CODE (operands
[i
]) == SUBREG
)
16732 tmp
= SUBREG_REG (operands
[i
]);
16733 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
16738 /* Make sure there is no dependency between the individual loads. */
16739 if (load
&& REGNO (operands
[0]) == REGNO (base
))
16740 return false; /* RAW */
16742 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
16743 return false; /* WAW */
16745 /* If the same input register is used in both stores
16746 when storing different constants, try to find a free register.
16747 For example, the code
16752 can be transformed into
16756 in Thumb mode assuming that r1 is free.
16757 For ARM mode do the same but only if the starting register
16758 can be made to be even. */
16760 && REGNO (operands
[0]) == REGNO (operands
[1])
16761 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
16765 CLEAR_HARD_REG_SET (regset
);
16766 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16767 if (tmp
== NULL_RTX
)
16770 /* Use the new register in the first load to ensure that
16771 if the original input register is not dead after peephole,
16772 then it will have the correct constant value. */
16775 else if (TARGET_ARM
)
16777 int regno
= REGNO (operands
[0]);
16778 if (!peep2_reg_dead_p (4, operands
[0]))
16780 /* When the input register is even and is not dead after the
16781 pattern, it has to hold the second constant but we cannot
16782 form a legal STRD in ARM mode with this register as the second
16784 if (regno
% 2 == 0)
16787 /* Is regno-1 free? */
16788 SET_HARD_REG_SET (regset
);
16789 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
16790 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16791 if (tmp
== NULL_RTX
)
16798 /* Find a DImode register. */
16799 CLEAR_HARD_REG_SET (regset
);
16800 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16801 if (tmp
!= NULL_RTX
)
16803 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16804 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16808 /* Can we use the input register to form a DI register? */
16809 SET_HARD_REG_SET (regset
);
16810 CLEAR_HARD_REG_BIT(regset
,
16811 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
16812 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16813 if (tmp
== NULL_RTX
)
16815 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
16819 gcc_assert (operands
[0] != NULL_RTX
);
16820 gcc_assert (operands
[1] != NULL_RTX
);
16821 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16822 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
16826 /* Make sure the instructions are ordered with lower memory access first. */
16827 if (offsets
[0] > offsets
[1])
16829 gap
= offsets
[0] - offsets
[1];
16830 offset
= offsets
[1];
16832 /* Swap the instructions such that lower memory is accessed first. */
16833 std::swap (operands
[0], operands
[1]);
16834 std::swap (operands
[2], operands
[3]);
16835 std::swap (align
[0], align
[1]);
16837 std::swap (operands
[4], operands
[5]);
16841 gap
= offsets
[1] - offsets
[0];
16842 offset
= offsets
[0];
16845 /* Make sure accesses are to consecutive memory locations. */
16846 if (gap
!= GET_MODE_SIZE (SImode
))
16849 if (!align_ok_ldrd_strd (align
[0], offset
))
16852 /* Make sure we generate legal instructions. */
16853 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16857 /* In Thumb state, where registers are almost unconstrained, there
16858 is little hope to fix it. */
16862 if (load
&& commute
)
16864 /* Try reordering registers. */
16865 std::swap (operands
[0], operands
[1]);
16866 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16873 /* If input registers are dead after this pattern, they can be
16874 reordered or replaced by other registers that are free in the
16875 current pattern. */
16876 if (!peep2_reg_dead_p (4, operands
[0])
16877 || !peep2_reg_dead_p (4, operands
[1]))
16880 /* Try to reorder the input registers. */
16881 /* For example, the code
16886 can be transformed into
16891 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16894 std::swap (operands
[0], operands
[1]);
16898 /* Try to find a free DI register. */
16899 CLEAR_HARD_REG_SET (regset
);
16900 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16901 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16904 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16905 if (tmp
== NULL_RTX
)
16908 /* DREG must be an even-numbered register in DImode.
16909 Split it into SI registers. */
16910 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16911 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16912 gcc_assert (operands
[0] != NULL_RTX
);
16913 gcc_assert (operands
[1] != NULL_RTX
);
16914 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16915 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16917 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16927 /* Return true if parallel execution of the two word-size accesses provided
16928 could be satisfied with a single LDRD/STRD instruction. Two word-size
16929 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16930 register operands and OPERANDS[2,3] are the corresponding memory operands.
16933 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
16936 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16937 rtx base
= NULL_RTX
;
16938 rtx cur_base
, cur_offset
;
16941 /* Check that the memory references are immediate offsets from the
16942 same base register. Extract the base register, the destination
16943 registers, and the corresponding memory offsets. */
16944 for (i
= 0; i
< nops
; i
++)
16946 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
16952 else if (REGNO (base
) != REGNO (cur_base
))
16955 offsets
[i
] = INTVAL (cur_offset
);
16956 if (GET_CODE (operands
[i
]) == SUBREG
)
16960 if (offsets
[0] > offsets
[1])
16963 gap
= offsets
[1] - offsets
[0];
16964 offset
= offsets
[0];
16966 /* Make sure accesses are to consecutive memory locations. */
16967 if (gap
!= GET_MODE_SIZE (SImode
))
16970 if (!align_ok_ldrd_strd (align
[0], offset
))
16973 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16978 /* Print a symbolic form of X to the debug file, F. */
16980 arm_print_value (FILE *f
, rtx x
)
16982 switch (GET_CODE (x
))
16985 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16991 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16992 sizeof (fpstr
), 0, 1);
17002 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17004 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17005 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17013 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17017 fprintf (f
, "`%s'", XSTR (x
, 0));
17021 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17025 arm_print_value (f
, XEXP (x
, 0));
17029 arm_print_value (f
, XEXP (x
, 0));
17031 arm_print_value (f
, XEXP (x
, 1));
17039 fprintf (f
, "????");
17044 /* Routines for manipulation of the constant pool. */
17046 /* Arm instructions cannot load a large constant directly into a
17047 register; they have to come from a pc relative load. The constant
17048 must therefore be placed in the addressable range of the pc
17049 relative load. Depending on the precise pc relative load
17050 instruction the range is somewhere between 256 bytes and 4k. This
17051 means that we often have to dump a constant inside a function, and
17052 generate code to branch around it.
17054 It is important to minimize this, since the branches will slow
17055 things down and make the code larger.
17057 Normally we can hide the table after an existing unconditional
17058 branch so that there is no interruption of the flow, but in the
17059 worst case the code looks like this:
17077 We fix this by performing a scan after scheduling, which notices
17078 which instructions need to have their operands fetched from the
17079 constant table and builds the table.
17081 The algorithm starts by building a table of all the constants that
17082 need fixing up and all the natural barriers in the function (places
17083 where a constant table can be dropped without breaking the flow).
17084 For each fixup we note how far the pc-relative replacement will be
17085 able to reach and the offset of the instruction into the function.
17087 Having built the table we then group the fixes together to form
17088 tables that are as large as possible (subject to addressing
17089 constraints) and emit each table of constants after the last
17090 barrier that is within range of all the instructions in the group.
17091 If a group does not contain a barrier, then we forcibly create one
17092 by inserting a jump instruction into the flow. Once the table has
17093 been inserted, the insns are then modified to reference the
17094 relevant entry in the pool.
17096 Possible enhancements to the algorithm (not implemented) are:
17098 1) For some processors and object formats, there may be benefit in
17099 aligning the pools to the start of cache lines; this alignment
17100 would need to be taken into account when calculating addressability
17103 /* These typedefs are located at the start of this file, so that
17104 they can be used in the prototypes there. This comment is to
17105 remind readers of that fact so that the following structures
17106 can be understood more easily.
17108 typedef struct minipool_node Mnode;
17109 typedef struct minipool_fixup Mfix; */
17111 struct minipool_node
17113 /* Doubly linked chain of entries. */
17116 /* The maximum offset into the code that this entry can be placed. While
17117 pushing fixes for forward references, all entries are sorted in order
17118 of increasing max_address. */
17119 HOST_WIDE_INT max_address
;
17120 /* Similarly for an entry inserted for a backwards ref. */
17121 HOST_WIDE_INT min_address
;
17122 /* The number of fixes referencing this entry. This can become zero
17123 if we "unpush" an entry. In this case we ignore the entry when we
17124 come to emit the code. */
17126 /* The offset from the start of the minipool. */
17127 HOST_WIDE_INT offset
;
17128 /* The value in table. */
17130 /* The mode of value. */
17132 /* The size of the value. With iWMMXt enabled
17133 sizes > 4 also imply an alignment of 8-bytes. */
17137 struct minipool_fixup
17141 HOST_WIDE_INT address
;
17147 HOST_WIDE_INT forwards
;
17148 HOST_WIDE_INT backwards
;
17151 /* Fixes less than a word need padding out to a word boundary. */
17152 #define MINIPOOL_FIX_SIZE(mode) \
17153 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17155 static Mnode
* minipool_vector_head
;
17156 static Mnode
* minipool_vector_tail
;
17157 static rtx_code_label
*minipool_vector_label
;
17158 static int minipool_pad
;
17160 /* The linked list of all minipool fixes required for this function. */
17161 Mfix
* minipool_fix_head
;
17162 Mfix
* minipool_fix_tail
;
17163 /* The fix entry for the current minipool, once it has been placed. */
17164 Mfix
* minipool_barrier
;
17166 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17167 #define JUMP_TABLES_IN_TEXT_SECTION 0
17170 static HOST_WIDE_INT
17171 get_jump_table_size (rtx_jump_table_data
*insn
)
17173 /* ADDR_VECs only take room if read-only data does into the text
17175 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17177 rtx body
= PATTERN (insn
);
17178 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17179 HOST_WIDE_INT size
;
17180 HOST_WIDE_INT modesize
;
17182 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17183 size
= modesize
* XVECLEN (body
, elt
);
17187 /* Round up size of TBB table to a halfword boundary. */
17188 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17191 /* No padding necessary for TBH. */
17194 /* Add two bytes for alignment on Thumb. */
17199 gcc_unreachable ();
17207 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17208 function descriptor) into a register and the GOT address into the
17209 FDPIC register, returning an rtx for the register holding the
17210 function address. */
17213 arm_load_function_descriptor (rtx funcdesc
)
17215 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17216 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17217 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17218 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17220 emit_move_insn (fnaddr_reg
, fnaddr
);
17222 /* The ABI requires the entry point address to be loaded first, but
17223 since we cannot support lazy binding for lack of atomic load of
17224 two 32-bits values, we do not need to bother to prevent the
17225 previous load from being moved after that of the GOT address. */
17226 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17231 /* Return the maximum amount of padding that will be inserted before
17233 static HOST_WIDE_INT
17234 get_label_padding (rtx label
)
17236 HOST_WIDE_INT align
, min_insn_size
;
17238 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17239 min_insn_size
= TARGET_THUMB
? 2 : 4;
17240 return align
> min_insn_size
? align
- min_insn_size
: 0;
17243 /* Move a minipool fix MP from its current location to before MAX_MP.
17244 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17245 constraints may need updating. */
17247 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17248 HOST_WIDE_INT max_address
)
17250 /* The code below assumes these are different. */
17251 gcc_assert (mp
!= max_mp
);
17253 if (max_mp
== NULL
)
17255 if (max_address
< mp
->max_address
)
17256 mp
->max_address
= max_address
;
17260 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17261 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17263 mp
->max_address
= max_address
;
17265 /* Unlink MP from its current position. Since max_mp is non-null,
17266 mp->prev must be non-null. */
17267 mp
->prev
->next
= mp
->next
;
17268 if (mp
->next
!= NULL
)
17269 mp
->next
->prev
= mp
->prev
;
17271 minipool_vector_tail
= mp
->prev
;
17273 /* Re-insert it before MAX_MP. */
17275 mp
->prev
= max_mp
->prev
;
17278 if (mp
->prev
!= NULL
)
17279 mp
->prev
->next
= mp
;
17281 minipool_vector_head
= mp
;
17284 /* Save the new entry. */
17287 /* Scan over the preceding entries and adjust their addresses as
17289 while (mp
->prev
!= NULL
17290 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17292 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17299 /* Add a constant to the minipool for a forward reference. Returns the
17300 node added or NULL if the constant will not fit in this pool. */
17302 add_minipool_forward_ref (Mfix
*fix
)
17304 /* If set, max_mp is the first pool_entry that has a lower
17305 constraint than the one we are trying to add. */
17306 Mnode
* max_mp
= NULL
;
17307 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17310 /* If the minipool starts before the end of FIX->INSN then this FIX
17311 cannot be placed into the current pool. Furthermore, adding the
17312 new constant pool entry may cause the pool to start FIX_SIZE bytes
17314 if (minipool_vector_head
&&
17315 (fix
->address
+ get_attr_length (fix
->insn
)
17316 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17319 /* Scan the pool to see if a constant with the same value has
17320 already been added. While we are doing this, also note the
17321 location where we must insert the constant if it doesn't already
17323 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17325 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17326 && fix
->mode
== mp
->mode
17327 && (!LABEL_P (fix
->value
)
17328 || (CODE_LABEL_NUMBER (fix
->value
)
17329 == CODE_LABEL_NUMBER (mp
->value
)))
17330 && rtx_equal_p (fix
->value
, mp
->value
))
17332 /* More than one fix references this entry. */
17334 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17337 /* Note the insertion point if necessary. */
17339 && mp
->max_address
> max_address
)
17342 /* If we are inserting an 8-bytes aligned quantity and
17343 we have not already found an insertion point, then
17344 make sure that all such 8-byte aligned quantities are
17345 placed at the start of the pool. */
17346 if (ARM_DOUBLEWORD_ALIGN
17348 && fix
->fix_size
>= 8
17349 && mp
->fix_size
< 8)
17352 max_address
= mp
->max_address
;
17356 /* The value is not currently in the minipool, so we need to create
17357 a new entry for it. If MAX_MP is NULL, the entry will be put on
17358 the end of the list since the placement is less constrained than
17359 any existing entry. Otherwise, we insert the new fix before
17360 MAX_MP and, if necessary, adjust the constraints on the other
17363 mp
->fix_size
= fix
->fix_size
;
17364 mp
->mode
= fix
->mode
;
17365 mp
->value
= fix
->value
;
17367 /* Not yet required for a backwards ref. */
17368 mp
->min_address
= -65536;
17370 if (max_mp
== NULL
)
17372 mp
->max_address
= max_address
;
17374 mp
->prev
= minipool_vector_tail
;
17376 if (mp
->prev
== NULL
)
17378 minipool_vector_head
= mp
;
17379 minipool_vector_label
= gen_label_rtx ();
17382 mp
->prev
->next
= mp
;
17384 minipool_vector_tail
= mp
;
17388 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17389 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17391 mp
->max_address
= max_address
;
17394 mp
->prev
= max_mp
->prev
;
17396 if (mp
->prev
!= NULL
)
17397 mp
->prev
->next
= mp
;
17399 minipool_vector_head
= mp
;
17402 /* Save the new entry. */
17405 /* Scan over the preceding entries and adjust their addresses as
17407 while (mp
->prev
!= NULL
17408 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17410 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17418 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17419 HOST_WIDE_INT min_address
)
17421 HOST_WIDE_INT offset
;
17423 /* The code below assumes these are different. */
17424 gcc_assert (mp
!= min_mp
);
17426 if (min_mp
== NULL
)
17428 if (min_address
> mp
->min_address
)
17429 mp
->min_address
= min_address
;
17433 /* We will adjust this below if it is too loose. */
17434 mp
->min_address
= min_address
;
17436 /* Unlink MP from its current position. Since min_mp is non-null,
17437 mp->next must be non-null. */
17438 mp
->next
->prev
= mp
->prev
;
17439 if (mp
->prev
!= NULL
)
17440 mp
->prev
->next
= mp
->next
;
17442 minipool_vector_head
= mp
->next
;
17444 /* Reinsert it after MIN_MP. */
17446 mp
->next
= min_mp
->next
;
17448 if (mp
->next
!= NULL
)
17449 mp
->next
->prev
= mp
;
17451 minipool_vector_tail
= mp
;
17457 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17459 mp
->offset
= offset
;
17460 if (mp
->refcount
> 0)
17461 offset
+= mp
->fix_size
;
17463 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17464 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17470 /* Add a constant to the minipool for a backward reference. Returns the
17471 node added or NULL if the constant will not fit in this pool.
17473 Note that the code for insertion for a backwards reference can be
17474 somewhat confusing because the calculated offsets for each fix do
17475 not take into account the size of the pool (which is still under
17478 add_minipool_backward_ref (Mfix
*fix
)
17480 /* If set, min_mp is the last pool_entry that has a lower constraint
17481 than the one we are trying to add. */
17482 Mnode
*min_mp
= NULL
;
17483 /* This can be negative, since it is only a constraint. */
17484 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17487 /* If we can't reach the current pool from this insn, or if we can't
17488 insert this entry at the end of the pool without pushing other
17489 fixes out of range, then we don't try. This ensures that we
17490 can't fail later on. */
17491 if (min_address
>= minipool_barrier
->address
17492 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17493 >= minipool_barrier
->address
))
17496 /* Scan the pool to see if a constant with the same value has
17497 already been added. While we are doing this, also note the
17498 location where we must insert the constant if it doesn't already
17500 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17502 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17503 && fix
->mode
== mp
->mode
17504 && (!LABEL_P (fix
->value
)
17505 || (CODE_LABEL_NUMBER (fix
->value
)
17506 == CODE_LABEL_NUMBER (mp
->value
)))
17507 && rtx_equal_p (fix
->value
, mp
->value
)
17508 /* Check that there is enough slack to move this entry to the
17509 end of the table (this is conservative). */
17510 && (mp
->max_address
17511 > (minipool_barrier
->address
17512 + minipool_vector_tail
->offset
17513 + minipool_vector_tail
->fix_size
)))
17516 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17519 if (min_mp
!= NULL
)
17520 mp
->min_address
+= fix
->fix_size
;
17523 /* Note the insertion point if necessary. */
17524 if (mp
->min_address
< min_address
)
17526 /* For now, we do not allow the insertion of 8-byte alignment
17527 requiring nodes anywhere but at the start of the pool. */
17528 if (ARM_DOUBLEWORD_ALIGN
17529 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17534 else if (mp
->max_address
17535 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17537 /* Inserting before this entry would push the fix beyond
17538 its maximum address (which can happen if we have
17539 re-located a forwards fix); force the new fix to come
17541 if (ARM_DOUBLEWORD_ALIGN
17542 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17547 min_address
= mp
->min_address
+ fix
->fix_size
;
17550 /* Do not insert a non-8-byte aligned quantity before 8-byte
17551 aligned quantities. */
17552 else if (ARM_DOUBLEWORD_ALIGN
17553 && fix
->fix_size
< 8
17554 && mp
->fix_size
>= 8)
17557 min_address
= mp
->min_address
+ fix
->fix_size
;
17562 /* We need to create a new entry. */
17564 mp
->fix_size
= fix
->fix_size
;
17565 mp
->mode
= fix
->mode
;
17566 mp
->value
= fix
->value
;
17568 mp
->max_address
= minipool_barrier
->address
+ 65536;
17570 mp
->min_address
= min_address
;
17572 if (min_mp
== NULL
)
17575 mp
->next
= minipool_vector_head
;
17577 if (mp
->next
== NULL
)
17579 minipool_vector_tail
= mp
;
17580 minipool_vector_label
= gen_label_rtx ();
17583 mp
->next
->prev
= mp
;
17585 minipool_vector_head
= mp
;
17589 mp
->next
= min_mp
->next
;
17593 if (mp
->next
!= NULL
)
17594 mp
->next
->prev
= mp
;
17596 minipool_vector_tail
= mp
;
17599 /* Save the new entry. */
17607 /* Scan over the following entries and adjust their offsets. */
17608 while (mp
->next
!= NULL
)
17610 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17611 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17614 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
17616 mp
->next
->offset
= mp
->offset
;
17625 assign_minipool_offsets (Mfix
*barrier
)
17627 HOST_WIDE_INT offset
= 0;
17630 minipool_barrier
= barrier
;
17632 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17634 mp
->offset
= offset
;
17636 if (mp
->refcount
> 0)
17637 offset
+= mp
->fix_size
;
17641 /* Output the literal table */
17643 dump_minipool (rtx_insn
*scan
)
17649 if (ARM_DOUBLEWORD_ALIGN
)
17650 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17651 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
17658 fprintf (dump_file
,
17659 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17660 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
17662 scan
= emit_label_after (gen_label_rtx (), scan
);
17663 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
17664 scan
= emit_label_after (minipool_vector_label
, scan
);
17666 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
17668 if (mp
->refcount
> 0)
17672 fprintf (dump_file
,
17673 ";; Offset %u, min %ld, max %ld ",
17674 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
17675 (unsigned long) mp
->max_address
);
17676 arm_print_value (dump_file
, mp
->value
);
17677 fputc ('\n', dump_file
);
17680 rtx val
= copy_rtx (mp
->value
);
17682 switch (GET_MODE_SIZE (mp
->mode
))
17684 #ifdef HAVE_consttable_1
17686 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
17690 #ifdef HAVE_consttable_2
17692 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
17696 #ifdef HAVE_consttable_4
17698 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
17702 #ifdef HAVE_consttable_8
17704 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
17708 #ifdef HAVE_consttable_16
17710 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
17715 gcc_unreachable ();
17723 minipool_vector_head
= minipool_vector_tail
= NULL
;
17724 scan
= emit_insn_after (gen_consttable_end (), scan
);
17725 scan
= emit_barrier_after (scan
);
17728 /* Return the cost of forcibly inserting a barrier after INSN. */
17730 arm_barrier_cost (rtx_insn
*insn
)
17732 /* Basing the location of the pool on the loop depth is preferable,
17733 but at the moment, the basic block information seems to be
17734 corrupt by this stage of the compilation. */
17735 int base_cost
= 50;
17736 rtx_insn
*next
= next_nonnote_insn (insn
);
17738 if (next
!= NULL
&& LABEL_P (next
))
17741 switch (GET_CODE (insn
))
17744 /* It will always be better to place the table before the label, rather
17753 return base_cost
- 10;
17756 return base_cost
+ 10;
17760 /* Find the best place in the insn stream in the range
17761 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17762 Create the barrier by inserting a jump and add a new fix entry for
17765 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
17767 HOST_WIDE_INT count
= 0;
17768 rtx_barrier
*barrier
;
17769 rtx_insn
*from
= fix
->insn
;
17770 /* The instruction after which we will insert the jump. */
17771 rtx_insn
*selected
= NULL
;
17773 /* The address at which the jump instruction will be placed. */
17774 HOST_WIDE_INT selected_address
;
17776 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
17777 rtx_code_label
*label
= gen_label_rtx ();
17779 selected_cost
= arm_barrier_cost (from
);
17780 selected_address
= fix
->address
;
17782 while (from
&& count
< max_count
)
17784 rtx_jump_table_data
*tmp
;
17787 /* This code shouldn't have been called if there was a natural barrier
17789 gcc_assert (!BARRIER_P (from
));
17791 /* Count the length of this insn. This must stay in sync with the
17792 code that pushes minipool fixes. */
17793 if (LABEL_P (from
))
17794 count
+= get_label_padding (from
);
17796 count
+= get_attr_length (from
);
17798 /* If there is a jump table, add its length. */
17799 if (tablejump_p (from
, NULL
, &tmp
))
17801 count
+= get_jump_table_size (tmp
);
17803 /* Jump tables aren't in a basic block, so base the cost on
17804 the dispatch insn. If we select this location, we will
17805 still put the pool after the table. */
17806 new_cost
= arm_barrier_cost (from
);
17808 if (count
< max_count
17809 && (!selected
|| new_cost
<= selected_cost
))
17812 selected_cost
= new_cost
;
17813 selected_address
= fix
->address
+ count
;
17816 /* Continue after the dispatch table. */
17817 from
= NEXT_INSN (tmp
);
17821 new_cost
= arm_barrier_cost (from
);
17823 if (count
< max_count
17824 && (!selected
|| new_cost
<= selected_cost
))
17827 selected_cost
= new_cost
;
17828 selected_address
= fix
->address
+ count
;
17831 from
= NEXT_INSN (from
);
17834 /* Make sure that we found a place to insert the jump. */
17835 gcc_assert (selected
);
17837 /* Create a new JUMP_INSN that branches around a barrier. */
17838 from
= emit_jump_insn_after (gen_jump (label
), selected
);
17839 JUMP_LABEL (from
) = label
;
17840 barrier
= emit_barrier_after (from
);
17841 emit_label_after (label
, barrier
);
17843 /* Create a minipool barrier entry for the new barrier. */
17844 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
17845 new_fix
->insn
= barrier
;
17846 new_fix
->address
= selected_address
;
17847 new_fix
->next
= fix
->next
;
17848 fix
->next
= new_fix
;
17853 /* Record that there is a natural barrier in the insn stream at
17856 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
17858 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17861 fix
->address
= address
;
17864 if (minipool_fix_head
!= NULL
)
17865 minipool_fix_tail
->next
= fix
;
17867 minipool_fix_head
= fix
;
17869 minipool_fix_tail
= fix
;
17872 /* Record INSN, which will need fixing up to load a value from the
17873 minipool. ADDRESS is the offset of the insn since the start of the
17874 function; LOC is a pointer to the part of the insn which requires
17875 fixing; VALUE is the constant that must be loaded, which is of type
17878 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
17879 machine_mode mode
, rtx value
)
17881 gcc_assert (!arm_disable_literal_pool
);
17882 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17885 fix
->address
= address
;
17888 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
17889 fix
->value
= value
;
17890 fix
->forwards
= get_attr_pool_range (insn
);
17891 fix
->backwards
= get_attr_neg_pool_range (insn
);
17892 fix
->minipool
= NULL
;
17894 /* If an insn doesn't have a range defined for it, then it isn't
17895 expecting to be reworked by this code. Better to stop now than
17896 to generate duff assembly code. */
17897 gcc_assert (fix
->forwards
|| fix
->backwards
);
17899 /* If an entry requires 8-byte alignment then assume all constant pools
17900 require 4 bytes of padding. Trying to do this later on a per-pool
17901 basis is awkward because existing pool entries have to be modified. */
17902 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
17907 fprintf (dump_file
,
17908 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17909 GET_MODE_NAME (mode
),
17910 INSN_UID (insn
), (unsigned long) address
,
17911 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
17912 arm_print_value (dump_file
, fix
->value
);
17913 fprintf (dump_file
, "\n");
17916 /* Add it to the chain of fixes. */
17919 if (minipool_fix_head
!= NULL
)
17920 minipool_fix_tail
->next
= fix
;
17922 minipool_fix_head
= fix
;
17924 minipool_fix_tail
= fix
;
17927 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17928 Returns the number of insns needed, or 99 if we always want to synthesize
17931 arm_max_const_double_inline_cost ()
17933 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17936 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17937 Returns the number of insns needed, or 99 if we don't know how to
17940 arm_const_double_inline_cost (rtx val
)
17942 rtx lowpart
, highpart
;
17945 mode
= GET_MODE (val
);
17947 if (mode
== VOIDmode
)
17950 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17952 lowpart
= gen_lowpart (SImode
, val
);
17953 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17955 gcc_assert (CONST_INT_P (lowpart
));
17956 gcc_assert (CONST_INT_P (highpart
));
17958 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17959 NULL_RTX
, NULL_RTX
, 0, 0)
17960 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17961 NULL_RTX
, NULL_RTX
, 0, 0));
17964 /* Cost of loading a SImode constant. */
17966 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17968 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17969 NULL_RTX
, NULL_RTX
, 1, 0);
17972 /* Return true if it is worthwhile to split a 64-bit constant into two
17973 32-bit operations. This is the case if optimizing for size, or
17974 if we have load delay slots, or if one 32-bit part can be done with
17975 a single data operation. */
17977 arm_const_double_by_parts (rtx val
)
17979 machine_mode mode
= GET_MODE (val
);
17982 if (optimize_size
|| arm_ld_sched
)
17985 if (mode
== VOIDmode
)
17988 part
= gen_highpart_mode (SImode
, mode
, val
);
17990 gcc_assert (CONST_INT_P (part
));
17992 if (const_ok_for_arm (INTVAL (part
))
17993 || const_ok_for_arm (~INTVAL (part
)))
17996 part
= gen_lowpart (SImode
, val
);
17998 gcc_assert (CONST_INT_P (part
));
18000 if (const_ok_for_arm (INTVAL (part
))
18001 || const_ok_for_arm (~INTVAL (part
)))
18007 /* Return true if it is possible to inline both the high and low parts
18008 of a 64-bit constant into 32-bit data processing instructions. */
18010 arm_const_double_by_immediates (rtx val
)
18012 machine_mode mode
= GET_MODE (val
);
18015 if (mode
== VOIDmode
)
18018 part
= gen_highpart_mode (SImode
, mode
, val
);
18020 gcc_assert (CONST_INT_P (part
));
18022 if (!const_ok_for_arm (INTVAL (part
)))
18025 part
= gen_lowpart (SImode
, val
);
18027 gcc_assert (CONST_INT_P (part
));
18029 if (!const_ok_for_arm (INTVAL (part
)))
18035 /* Scan INSN and note any of its operands that need fixing.
18036 If DO_PUSHES is false we do not actually push any of the fixups
18039 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18043 extract_constrain_insn (insn
);
18045 if (recog_data
.n_alternatives
== 0)
18048 /* Fill in recog_op_alt with information about the constraints of
18050 preprocess_constraints (insn
);
18052 const operand_alternative
*op_alt
= which_op_alt ();
18053 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18055 /* Things we need to fix can only occur in inputs. */
18056 if (recog_data
.operand_type
[opno
] != OP_IN
)
18059 /* If this alternative is a memory reference, then any mention
18060 of constants in this alternative is really to fool reload
18061 into allowing us to accept one there. We need to fix them up
18062 now so that we output the right code. */
18063 if (op_alt
[opno
].memory_ok
)
18065 rtx op
= recog_data
.operand
[opno
];
18067 if (CONSTANT_P (op
))
18070 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18071 recog_data
.operand_mode
[opno
], op
);
18073 else if (MEM_P (op
)
18074 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18075 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18079 rtx cop
= avoid_constant_pool_reference (op
);
18081 /* Casting the address of something to a mode narrower
18082 than a word can cause avoid_constant_pool_reference()
18083 to return the pool reference itself. That's no good to
18084 us here. Lets just hope that we can use the
18085 constant pool value directly. */
18087 cop
= get_pool_constant (XEXP (op
, 0));
18089 push_minipool_fix (insn
, address
,
18090 recog_data
.operand_loc
[opno
],
18091 recog_data
.operand_mode
[opno
], cop
);
18101 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18102 and unions in the context of ARMv8-M Security Extensions. It is used as a
18103 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18104 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18105 or four masks, depending on whether it is being computed for a
18106 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18107 respectively. The tree for the type of the argument or a field within an
18108 argument is passed in ARG_TYPE, the current register this argument or field
18109 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18110 argument or field starts at is passed in STARTING_BIT and the last used bit
18111 is kept in LAST_USED_BIT which is also updated accordingly. */
18113 static unsigned HOST_WIDE_INT
18114 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18115 uint32_t * padding_bits_to_clear
,
18116 unsigned starting_bit
, int * last_used_bit
)
18119 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18121 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18123 unsigned current_bit
= starting_bit
;
18125 long int offset
, size
;
18128 field
= TYPE_FIELDS (arg_type
);
18131 /* The offset within a structure is always an offset from
18132 the start of that structure. Make sure we take that into the
18133 calculation of the register based offset that we use here. */
18134 offset
= starting_bit
;
18135 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18138 /* This is the actual size of the field, for bitfields this is the
18139 bitfield width and not the container size. */
18140 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18142 if (*last_used_bit
!= offset
)
18144 if (offset
< *last_used_bit
)
18146 /* This field's offset is before the 'last_used_bit', that
18147 means this field goes on the next register. So we need to
18148 pad the rest of the current register and increase the
18149 register number. */
18151 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18154 padding_bits_to_clear
[*regno
] |= mask
;
18155 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18160 /* Otherwise we pad the bits between the last field's end and
18161 the start of the new field. */
18164 mask
= ((uint32_t)-1) >> (32 - offset
);
18165 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18166 padding_bits_to_clear
[*regno
] |= mask
;
18168 current_bit
= offset
;
18171 /* Calculate further padding bits for inner structs/unions too. */
18172 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18174 *last_used_bit
= current_bit
;
18175 not_to_clear_reg_mask
18176 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18177 padding_bits_to_clear
, offset
,
18182 /* Update 'current_bit' with this field's size. If the
18183 'current_bit' lies in a subsequent register, update 'regno' and
18184 reset 'current_bit' to point to the current bit in that new
18186 current_bit
+= size
;
18187 while (current_bit
>= 32)
18190 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18193 *last_used_bit
= current_bit
;
18196 field
= TREE_CHAIN (field
);
18198 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18200 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18202 tree field
, field_t
;
18203 int i
, regno_t
, field_size
;
18207 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18208 = {-1, -1, -1, -1};
18210 /* To compute the padding bits in a union we only consider bits as
18211 padding bits if they are always either a padding bit or fall outside a
18212 fields size for all fields in the union. */
18213 field
= TYPE_FIELDS (arg_type
);
18216 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18217 = {0U, 0U, 0U, 0U};
18218 int last_used_bit_t
= *last_used_bit
;
18220 field_t
= TREE_TYPE (field
);
18222 /* If the field's type is either a record or a union make sure to
18223 compute their padding bits too. */
18224 if (RECORD_OR_UNION_TYPE_P (field_t
))
18225 not_to_clear_reg_mask
18226 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18227 &padding_bits_to_clear_t
[0],
18228 starting_bit
, &last_used_bit_t
);
18231 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18232 regno_t
= (field_size
/ 32) + *regno
;
18233 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18236 for (i
= *regno
; i
< regno_t
; i
++)
18238 /* For all but the last register used by this field only keep the
18239 padding bits that were padding bits in this field. */
18240 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18243 /* For the last register, keep all padding bits that were padding
18244 bits in this field and any padding bits that are still valid
18245 as padding bits but fall outside of this field's size. */
18246 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18247 padding_bits_to_clear_res
[regno_t
]
18248 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18250 /* Update the maximum size of the fields in terms of registers used
18251 ('max_reg') and the 'last_used_bit' in said register. */
18252 if (max_reg
< regno_t
)
18255 max_bit
= last_used_bit_t
;
18257 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18258 max_bit
= last_used_bit_t
;
18260 field
= TREE_CHAIN (field
);
18263 /* Update the current padding_bits_to_clear using the intersection of the
18264 padding bits of all the fields. */
18265 for (i
=*regno
; i
< max_reg
; i
++)
18266 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18268 /* Do not keep trailing padding bits, we do not know yet whether this
18269 is the end of the argument. */
18270 mask
= ((uint32_t) 1 << max_bit
) - 1;
18271 padding_bits_to_clear
[max_reg
]
18272 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18275 *last_used_bit
= max_bit
;
18278 /* This function should only be used for structs and unions. */
18279 gcc_unreachable ();
18281 return not_to_clear_reg_mask
;
18284 /* In the context of ARMv8-M Security Extensions, this function is used for both
18285 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18286 registers are used when returning or passing arguments, which is then
18287 returned as a mask. It will also compute a mask to indicate padding/unused
18288 bits for each of these registers, and passes this through the
18289 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18290 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18291 the starting register used to pass this argument or return value is passed
18292 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18293 for struct and union types. */
18295 static unsigned HOST_WIDE_INT
18296 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18297 uint32_t * padding_bits_to_clear
)
18300 int last_used_bit
= 0;
18301 unsigned HOST_WIDE_INT not_to_clear_mask
;
18303 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18306 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18307 padding_bits_to_clear
, 0,
18311 /* If the 'last_used_bit' is not zero, that means we are still using a
18312 part of the last 'regno'. In such cases we must clear the trailing
18313 bits. Otherwise we are not using regno and we should mark it as to
18315 if (last_used_bit
!= 0)
18316 padding_bits_to_clear
[regno
]
18317 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18319 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18323 not_to_clear_mask
= 0;
18324 /* We are not dealing with structs nor unions. So these arguments may be
18325 passed in floating point registers too. In some cases a BLKmode is
18326 used when returning or passing arguments in multiple VFP registers. */
18327 if (GET_MODE (arg_rtx
) == BLKmode
)
18332 /* This should really only occur when dealing with the hard-float
18334 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18336 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18338 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18339 gcc_assert (REG_P (reg
));
18341 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18343 /* If we are dealing with DF mode, make sure we don't
18344 clear either of the registers it addresses. */
18345 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18348 unsigned HOST_WIDE_INT mask
;
18349 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18350 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18351 not_to_clear_mask
|= mask
;
18357 /* Otherwise we can rely on the MODE to determine how many registers
18358 are being used by this argument. */
18359 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18360 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18363 unsigned HOST_WIDE_INT
18364 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18365 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18366 not_to_clear_mask
|= mask
;
18371 return not_to_clear_mask
;
18374 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18375 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18376 are to be fully cleared, using the value in register CLEARING_REG if more
18377 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18378 the bits that needs to be cleared in caller-saved core registers, with
18379 SCRATCH_REG used as a scratch register for that clearing.
18381 NOTE: one of three following assertions must hold:
18382 - SCRATCH_REG is a low register
18383 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18384 in TO_CLEAR_BITMAP)
18385 - CLEARING_REG is a low register. */
18388 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18389 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18391 bool saved_clearing
= false;
18392 rtx saved_clearing_reg
= NULL_RTX
;
18393 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18395 gcc_assert (arm_arch_cmse
);
18397 if (!bitmap_empty_p (to_clear_bitmap
))
18399 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18400 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18402 clearing_regno
= REGNO (clearing_reg
);
18404 /* Clear padding bits. */
18405 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18406 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18409 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18411 if (padding_bits_to_clear
[i
] == 0)
18414 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18415 CLEARING_REG as scratch. */
18417 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18419 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18420 such that we can use clearing_reg to clear the unused bits in the
18422 if ((clearing_regno
> maxregno
18423 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18424 && !saved_clearing
)
18426 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18427 emit_move_insn (scratch_reg
, clearing_reg
);
18428 saved_clearing
= true;
18429 saved_clearing_reg
= scratch_reg
;
18431 scratch_reg
= clearing_reg
;
18434 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18435 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18436 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18438 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18439 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18440 rtx16
= gen_int_mode (16, SImode
);
18441 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18443 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18445 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18447 if (saved_clearing
)
18448 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18451 /* Clear full registers. */
18453 if (TARGET_HAVE_FPCXT_CMSE
)
18456 int i
, j
, k
, nb_regs
;
18457 rtx use_seq
, par
, reg
, set
, vunspec
;
18458 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18459 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18460 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18462 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18464 /* Find next register to clear and exit if none. */
18465 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18469 /* Compute number of consecutive registers to clear. */
18470 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18474 /* Create VSCCLRM RTX pattern. */
18475 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18476 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18477 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18478 VUNSPEC_VSCCLRM_VPR
);
18479 XVECEXP (par
, 0, 0) = vunspec
;
18481 /* Insert VFP register clearing RTX in the pattern. */
18483 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18485 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18488 reg
= gen_rtx_REG (SFmode
, j
);
18489 set
= gen_rtx_SET (reg
, const0_rtx
);
18490 XVECEXP (par
, 0, k
++) = set
;
18493 use_seq
= get_insns ();
18496 emit_insn_after (use_seq
, emit_insn (par
));
18499 /* Get set of core registers to clear. */
18500 bitmap_clear (core_regs_bitmap
);
18501 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18502 IP_REGNUM
- R0_REGNUM
+ 1);
18503 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18505 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18507 if (bitmap_empty_p (to_clear_core_bitmap
))
18510 /* Create clrm RTX pattern. */
18511 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18512 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18514 /* Insert core register clearing RTX in the pattern. */
18516 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18518 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18521 reg
= gen_rtx_REG (SImode
, i
);
18522 set
= gen_rtx_SET (reg
, const0_rtx
);
18523 XVECEXP (par
, 0, j
++) = set
;
18527 /* Insert APSR register clearing RTX in the pattern
18528 * along with clobbering CC. */
18529 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18530 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18531 VUNSPEC_CLRM_APSR
);
18533 XVECEXP (par
, 0, j
++) = vunspec
;
18535 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18536 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18537 XVECEXP (par
, 0, j
) = clobber
;
18539 use_seq
= get_insns ();
18542 emit_insn_after (use_seq
, emit_insn (par
));
18546 /* If not marked for clearing, clearing_reg already does not contain
18548 if (clearing_regno
<= maxregno
18549 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18551 emit_move_insn (clearing_reg
, const0_rtx
);
18552 emit_use (clearing_reg
);
18553 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18556 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18558 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18561 if (IS_VFP_REGNUM (regno
))
18563 /* If regno is an even vfp register and its successor is also to
18564 be cleared, use vmov. */
18565 if (TARGET_VFP_DOUBLE
18566 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18567 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18569 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18570 CONST1_RTX (DFmode
));
18571 emit_use (gen_rtx_REG (DFmode
, regno
));
18576 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18577 CONST1_RTX (SFmode
));
18578 emit_use (gen_rtx_REG (SFmode
, regno
));
18583 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18584 emit_use (gen_rtx_REG (SImode
, regno
));
18590 /* Clear core and caller-saved VFP registers not used to pass arguments before
18591 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18592 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18593 libgcc/config/arm/cmse_nonsecure_call.S. */
18596 cmse_nonsecure_call_inline_register_clear (void)
18600 FOR_EACH_BB_FN (bb
, cfun
)
18604 FOR_BB_INSNS (bb
, insn
)
18606 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18607 /* frame = VFP regs + FPSCR + VPR. */
18608 unsigned lazy_store_stack_frame_size
18609 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18610 unsigned long callee_saved_mask
18611 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18612 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18613 unsigned address_regnum
, regno
;
18614 unsigned max_int_regno
18615 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
18616 unsigned max_fp_regno
18617 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
18619 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
18620 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
18622 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
18624 CUMULATIVE_ARGS args_so_far_v
;
18625 cumulative_args_t args_so_far
;
18626 tree arg_type
, fntype
;
18627 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
18628 function_args_iterator args_iter
;
18629 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
18631 if (!NONDEBUG_INSN_P (insn
))
18634 if (!CALL_P (insn
))
18637 pat
= PATTERN (insn
);
18638 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
18639 call
= XVECEXP (pat
, 0, 0);
18641 /* Get the real call RTX if the insn sets a value, ie. returns. */
18642 if (GET_CODE (call
) == SET
)
18643 call
= SET_SRC (call
);
18645 /* Check if it is a cmse_nonsecure_call. */
18646 unspec
= XEXP (call
, 0);
18647 if (GET_CODE (unspec
) != UNSPEC
18648 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
18651 /* Mark registers that needs to be cleared. Those that holds a
18652 parameter are removed from the set further below. */
18653 bitmap_clear (to_clear_bitmap
);
18654 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
18655 max_int_regno
- R0_REGNUM
+ 1);
18657 /* Only look at the caller-saved floating point registers in case of
18658 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18659 lazy store and loads which clear both caller- and callee-saved
18663 auto_sbitmap
float_bitmap (maxregno
+ 1);
18665 bitmap_clear (float_bitmap
);
18666 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
18667 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
18668 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
18671 /* Make sure the register used to hold the function address is not
18673 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
18674 gcc_assert (MEM_P (address
));
18675 gcc_assert (REG_P (XEXP (address
, 0)));
18676 address_regnum
= REGNO (XEXP (address
, 0));
18677 if (address_regnum
<= max_int_regno
)
18678 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
18680 /* Set basic block of call insn so that df rescan is performed on
18681 insns inserted here. */
18682 set_block_for_insn (insn
, bb
);
18683 df_set_flags (DF_DEFER_INSN_RESCAN
);
18686 /* Make sure the scheduler doesn't schedule other insns beyond
18688 emit_insn (gen_blockage ());
18690 /* Walk through all arguments and clear registers appropriately.
18692 fntype
= TREE_TYPE (MEM_EXPR (address
));
18693 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
18695 args_so_far
= pack_cumulative_args (&args_so_far_v
);
18696 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
18699 uint64_t to_clear_args_mask
;
18701 if (VOID_TYPE_P (arg_type
))
18704 function_arg_info
arg (arg_type
, /*named=*/true);
18706 /* ??? We should advance after processing the argument and pass
18707 the argument we're advancing past. */
18708 arm_function_arg_advance (args_so_far
, arg
);
18710 arg_rtx
= arm_function_arg (args_so_far
, arg
);
18711 gcc_assert (REG_P (arg_rtx
));
18713 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
18715 &padding_bits_to_clear
[0]);
18716 if (to_clear_args_mask
)
18718 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
18720 if (to_clear_args_mask
& (1ULL << regno
))
18721 bitmap_clear_bit (to_clear_bitmap
, regno
);
18725 first_param
= false;
18728 /* We use right shift and left shift to clear the LSB of the address
18729 we jump to instead of using bic, to avoid having to use an extra
18730 register on Thumb-1. */
18731 clearing_reg
= XEXP (address
, 0);
18732 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
18733 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
18734 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
18735 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
18737 if (clear_callee_saved
)
18740 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
18741 /* Disable frame debug info in push because it needs to be
18742 disabled for pop (see below). */
18743 RTX_FRAME_RELATED_P (push_insn
) = 0;
18745 /* Lazy store multiple. */
18749 rtx_insn
*add_insn
;
18751 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
18752 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
18753 stack_pointer_rtx
, imm
));
18754 arm_add_cfa_adjust_cfa_note (add_insn
,
18755 - lazy_store_stack_frame_size
,
18757 stack_pointer_rtx
);
18758 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
18760 /* Save VFP callee-saved registers. */
18763 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
18764 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
18765 /* Disable frame debug info in push because it needs to be
18766 disabled for vpop (see below). */
18767 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18771 /* Clear caller-saved registers that leak before doing a non-secure
18773 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
18774 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
18775 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
18777 seq
= get_insns ();
18779 emit_insn_before (seq
, insn
);
18781 if (TARGET_HAVE_FPCXT_CMSE
)
18783 rtx_insn
*last
, *pop_insn
, *after
= insn
;
18787 /* Lazy load multiple done as part of libcall in Armv8-M. */
18790 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
18791 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
18792 rtx_insn
*add_insn
=
18793 emit_insn (gen_addsi3 (stack_pointer_rtx
,
18794 stack_pointer_rtx
, imm
));
18795 arm_add_cfa_adjust_cfa_note (add_insn
,
18796 lazy_store_stack_frame_size
,
18798 stack_pointer_rtx
);
18800 /* Restore VFP callee-saved registers. */
18803 int nb_callee_saved_vfp_regs
=
18804 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
18805 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
18806 nb_callee_saved_vfp_regs
,
18807 stack_pointer_rtx
);
18808 /* Disable frame debug info in vpop because the SP adjustment
18809 is made using a CFA adjustment note while CFA used is
18810 sometimes R7. This then causes an assert failure in the
18811 CFI note creation code. */
18812 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18815 arm_emit_multi_reg_pop (callee_saved_mask
);
18816 pop_insn
= get_last_insn ();
18818 /* Disable frame debug info in pop because they reset the state
18819 of popped registers to what it was at the beginning of the
18820 function, before the prologue. This leads to incorrect state
18821 when doing the pop after the nonsecure call for registers that
18822 are pushed both in prologue and before the nonsecure call.
18824 It also occasionally triggers an assert failure in CFI note
18825 creation code when there are two codepaths to the epilogue,
18826 one of which does not go through the nonsecure call.
18827 Obviously this mean that debugging between the push and pop is
18829 RTX_FRAME_RELATED_P (pop_insn
) = 0;
18831 seq
= get_insns ();
18832 last
= get_last_insn ();
18835 emit_insn_after (seq
, after
);
18837 /* Skip pop we have just inserted after nonsecure call, we know
18838 it does not contain a nonsecure call. */
18845 /* Rewrite move insn into subtract of 0 if the condition codes will
18846 be useful in next conditional jump insn. */
18849 thumb1_reorg (void)
18853 FOR_EACH_BB_FN (bb
, cfun
)
18856 rtx cmp
, op0
, op1
, set
= NULL
;
18857 rtx_insn
*prev
, *insn
= BB_END (bb
);
18858 bool insn_clobbered
= false;
18860 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
18861 insn
= PREV_INSN (insn
);
18863 /* Find the last cbranchsi4_insn in basic block BB. */
18864 if (insn
== BB_HEAD (bb
)
18865 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
18868 /* Get the register with which we are comparing. */
18869 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
18870 op0
= XEXP (cmp
, 0);
18871 op1
= XEXP (cmp
, 1);
18873 /* Check that comparison is against ZERO. */
18874 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
18877 /* Find the first flag setting insn before INSN in basic block BB. */
18878 gcc_assert (insn
!= BB_HEAD (bb
));
18879 for (prev
= PREV_INSN (insn
);
18881 && prev
!= BB_HEAD (bb
)
18883 || DEBUG_INSN_P (prev
)
18884 || ((set
= single_set (prev
)) != NULL
18885 && get_attr_conds (prev
) == CONDS_NOCOND
)));
18886 prev
= PREV_INSN (prev
))
18888 if (reg_set_p (op0
, prev
))
18889 insn_clobbered
= true;
18892 /* Skip if op0 is clobbered by insn other than prev. */
18893 if (insn_clobbered
)
18899 dest
= SET_DEST (set
);
18900 src
= SET_SRC (set
);
18901 if (!low_register_operand (dest
, SImode
)
18902 || !low_register_operand (src
, SImode
))
18905 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18906 in INSN. Both src and dest of the move insn are checked. */
18907 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
18909 dest
= copy_rtx (dest
);
18910 src
= copy_rtx (src
);
18911 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
18912 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
18913 INSN_CODE (prev
) = -1;
18914 /* Set test register in INSN to dest. */
18915 XEXP (cmp
, 0) = copy_rtx (dest
);
18916 INSN_CODE (insn
) = -1;
18921 /* Convert instructions to their cc-clobbering variant if possible, since
18922 that allows us to use smaller encodings. */
18925 thumb2_reorg (void)
18930 INIT_REG_SET (&live
);
18932 /* We are freeing block_for_insn in the toplev to keep compatibility
18933 with old MDEP_REORGS that are not CFG based. Recompute it now. */
18934 compute_bb_for_insn ();
18937 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
18939 FOR_EACH_BB_FN (bb
, cfun
)
18941 if ((current_tune
->disparage_flag_setting_t16_encodings
18942 == tune_params::DISPARAGE_FLAGS_ALL
)
18943 && optimize_bb_for_speed_p (bb
))
18947 Convert_Action action
= SKIP
;
18948 Convert_Action action_for_partial_flag_setting
18949 = ((current_tune
->disparage_flag_setting_t16_encodings
18950 != tune_params::DISPARAGE_FLAGS_NEITHER
)
18951 && optimize_bb_for_speed_p (bb
))
18954 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
18955 df_simulate_initialize_backwards (bb
, &live
);
18956 FOR_BB_INSNS_REVERSE (bb
, insn
)
18958 if (NONJUMP_INSN_P (insn
)
18959 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
18960 && GET_CODE (PATTERN (insn
)) == SET
)
18963 rtx pat
= PATTERN (insn
);
18964 rtx dst
= XEXP (pat
, 0);
18965 rtx src
= XEXP (pat
, 1);
18966 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
18968 if (UNARY_P (src
) || BINARY_P (src
))
18969 op0
= XEXP (src
, 0);
18971 if (BINARY_P (src
))
18972 op1
= XEXP (src
, 1);
18974 if (low_register_operand (dst
, SImode
))
18976 switch (GET_CODE (src
))
18979 /* Adding two registers and storing the result
18980 in the first source is already a 16-bit
18982 if (rtx_equal_p (dst
, op0
)
18983 && register_operand (op1
, SImode
))
18986 if (low_register_operand (op0
, SImode
))
18988 /* ADDS <Rd>,<Rn>,<Rm> */
18989 if (low_register_operand (op1
, SImode
))
18991 /* ADDS <Rdn>,#<imm8> */
18992 /* SUBS <Rdn>,#<imm8> */
18993 else if (rtx_equal_p (dst
, op0
)
18994 && CONST_INT_P (op1
)
18995 && IN_RANGE (INTVAL (op1
), -255, 255))
18997 /* ADDS <Rd>,<Rn>,#<imm3> */
18998 /* SUBS <Rd>,<Rn>,#<imm3> */
18999 else if (CONST_INT_P (op1
)
19000 && IN_RANGE (INTVAL (op1
), -7, 7))
19003 /* ADCS <Rd>, <Rn> */
19004 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19005 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19006 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19008 && COMPARISON_P (op1
)
19009 && cc_register (XEXP (op1
, 0), VOIDmode
)
19010 && maybe_get_arm_condition_code (op1
) == ARM_CS
19011 && XEXP (op1
, 1) == const0_rtx
)
19016 /* RSBS <Rd>,<Rn>,#0
19017 Not handled here: see NEG below. */
19018 /* SUBS <Rd>,<Rn>,#<imm3>
19020 Not handled here: see PLUS above. */
19021 /* SUBS <Rd>,<Rn>,<Rm> */
19022 if (low_register_operand (op0
, SImode
)
19023 && low_register_operand (op1
, SImode
))
19028 /* MULS <Rdm>,<Rn>,<Rdm>
19029 As an exception to the rule, this is only used
19030 when optimizing for size since MULS is slow on all
19031 known implementations. We do not even want to use
19032 MULS in cold code, if optimizing for speed, so we
19033 test the global flag here. */
19034 if (!optimize_size
)
19036 /* Fall through. */
19040 /* ANDS <Rdn>,<Rm> */
19041 if (rtx_equal_p (dst
, op0
)
19042 && low_register_operand (op1
, SImode
))
19043 action
= action_for_partial_flag_setting
;
19044 else if (rtx_equal_p (dst
, op1
)
19045 && low_register_operand (op0
, SImode
))
19046 action
= action_for_partial_flag_setting
== SKIP
19047 ? SKIP
: SWAP_CONV
;
19053 /* ASRS <Rdn>,<Rm> */
19054 /* LSRS <Rdn>,<Rm> */
19055 /* LSLS <Rdn>,<Rm> */
19056 if (rtx_equal_p (dst
, op0
)
19057 && low_register_operand (op1
, SImode
))
19058 action
= action_for_partial_flag_setting
;
19059 /* ASRS <Rd>,<Rm>,#<imm5> */
19060 /* LSRS <Rd>,<Rm>,#<imm5> */
19061 /* LSLS <Rd>,<Rm>,#<imm5> */
19062 else if (low_register_operand (op0
, SImode
)
19063 && CONST_INT_P (op1
)
19064 && IN_RANGE (INTVAL (op1
), 0, 31))
19065 action
= action_for_partial_flag_setting
;
19069 /* RORS <Rdn>,<Rm> */
19070 if (rtx_equal_p (dst
, op0
)
19071 && low_register_operand (op1
, SImode
))
19072 action
= action_for_partial_flag_setting
;
19076 /* MVNS <Rd>,<Rm> */
19077 if (low_register_operand (op0
, SImode
))
19078 action
= action_for_partial_flag_setting
;
19082 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19083 if (low_register_operand (op0
, SImode
))
19088 /* MOVS <Rd>,#<imm8> */
19089 if (CONST_INT_P (src
)
19090 && IN_RANGE (INTVAL (src
), 0, 255))
19091 action
= action_for_partial_flag_setting
;
19095 /* MOVS and MOV<c> with registers have different
19096 encodings, so are not relevant here. */
19104 if (action
!= SKIP
)
19106 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19107 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19110 if (action
== SWAP_CONV
)
19112 src
= copy_rtx (src
);
19113 XEXP (src
, 0) = op1
;
19114 XEXP (src
, 1) = op0
;
19115 pat
= gen_rtx_SET (dst
, src
);
19116 vec
= gen_rtvec (2, pat
, clobber
);
19118 else /* action == CONV */
19119 vec
= gen_rtvec (2, pat
, clobber
);
19121 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19122 INSN_CODE (insn
) = -1;
19126 if (NONDEBUG_INSN_P (insn
))
19127 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19131 CLEAR_REG_SET (&live
);
19134 /* Gcc puts the pool in the wrong place for ARM, since we can only
19135 load addresses a limited distance around the pc. We do some
19136 special munging to move the constant pool values to the correct
19137 point in the code. */
19142 HOST_WIDE_INT address
= 0;
19146 cmse_nonsecure_call_inline_register_clear ();
19148 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19149 if (cfun
->is_thunk
)
19151 else if (TARGET_THUMB1
)
19153 else if (TARGET_THUMB2
)
19156 /* Ensure all insns that must be split have been split at this point.
19157 Otherwise, the pool placement code below may compute incorrect
19158 insn lengths. Note that when optimizing, all insns have already
19159 been split at this point. */
19161 split_all_insns_noflow ();
19163 /* Make sure we do not attempt to create a literal pool even though it should
19164 no longer be necessary to create any. */
19165 if (arm_disable_literal_pool
)
19168 minipool_fix_head
= minipool_fix_tail
= NULL
;
19170 /* The first insn must always be a note, or the code below won't
19171 scan it properly. */
19172 insn
= get_insns ();
19173 gcc_assert (NOTE_P (insn
));
19176 /* Scan all the insns and record the operands that will need fixing. */
19177 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19179 if (BARRIER_P (insn
))
19180 push_minipool_barrier (insn
, address
);
19181 else if (INSN_P (insn
))
19183 rtx_jump_table_data
*table
;
19185 note_invalid_constants (insn
, address
, true);
19186 address
+= get_attr_length (insn
);
19188 /* If the insn is a vector jump, add the size of the table
19189 and skip the table. */
19190 if (tablejump_p (insn
, NULL
, &table
))
19192 address
+= get_jump_table_size (table
);
19196 else if (LABEL_P (insn
))
19197 /* Add the worst-case padding due to alignment. We don't add
19198 the _current_ padding because the minipool insertions
19199 themselves might change it. */
19200 address
+= get_label_padding (insn
);
19203 fix
= minipool_fix_head
;
19205 /* Now scan the fixups and perform the required changes. */
19210 Mfix
* last_added_fix
;
19211 Mfix
* last_barrier
= NULL
;
19214 /* Skip any further barriers before the next fix. */
19215 while (fix
&& BARRIER_P (fix
->insn
))
19218 /* No more fixes. */
19222 last_added_fix
= NULL
;
19224 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19226 if (BARRIER_P (ftmp
->insn
))
19228 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19231 last_barrier
= ftmp
;
19233 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19236 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19239 /* If we found a barrier, drop back to that; any fixes that we
19240 could have reached but come after the barrier will now go in
19241 the next mini-pool. */
19242 if (last_barrier
!= NULL
)
19244 /* Reduce the refcount for those fixes that won't go into this
19246 for (fdel
= last_barrier
->next
;
19247 fdel
&& fdel
!= ftmp
;
19250 fdel
->minipool
->refcount
--;
19251 fdel
->minipool
= NULL
;
19254 ftmp
= last_barrier
;
19258 /* ftmp is first fix that we can't fit into this pool and
19259 there no natural barriers that we could use. Insert a
19260 new barrier in the code somewhere between the previous
19261 fix and this one, and arrange to jump around it. */
19262 HOST_WIDE_INT max_address
;
19264 /* The last item on the list of fixes must be a barrier, so
19265 we can never run off the end of the list of fixes without
19266 last_barrier being set. */
19269 max_address
= minipool_vector_head
->max_address
;
19270 /* Check that there isn't another fix that is in range that
19271 we couldn't fit into this pool because the pool was
19272 already too large: we need to put the pool before such an
19273 instruction. The pool itself may come just after the
19274 fix because create_fix_barrier also allows space for a
19275 jump instruction. */
19276 if (ftmp
->address
< max_address
)
19277 max_address
= ftmp
->address
+ 1;
19279 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19282 assign_minipool_offsets (last_barrier
);
19286 if (!BARRIER_P (ftmp
->insn
)
19287 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19294 /* Scan over the fixes we have identified for this pool, fixing them
19295 up and adding the constants to the pool itself. */
19296 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19297 this_fix
= this_fix
->next
)
19298 if (!BARRIER_P (this_fix
->insn
))
19301 = plus_constant (Pmode
,
19302 gen_rtx_LABEL_REF (VOIDmode
,
19303 minipool_vector_label
),
19304 this_fix
->minipool
->offset
);
19305 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19308 dump_minipool (last_barrier
->insn
);
19312 /* From now on we must synthesize any constants that we can't handle
19313 directly. This can happen if the RTL gets split during final
19314 instruction generation. */
19315 cfun
->machine
->after_arm_reorg
= 1;
19317 /* Free the minipool memory. */
19318 obstack_free (&minipool_obstack
, minipool_startobj
);
19321 /* Routines to output assembly language. */
19323 /* Return string representation of passed in real value. */
19324 static const char *
19325 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19327 if (!fp_consts_inited
)
19330 gcc_assert (real_equal (r
, &value_fp0
));
19334 /* OPERANDS[0] is the entire list of insns that constitute pop,
19335 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19336 is in the list, UPDATE is true iff the list contains explicit
19337 update of base register. */
19339 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19345 const char *conditional
;
19346 int num_saves
= XVECLEN (operands
[0], 0);
19347 unsigned int regno
;
19348 unsigned int regno_base
= REGNO (operands
[1]);
19349 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19352 offset
+= update
? 1 : 0;
19353 offset
+= return_pc
? 1 : 0;
19355 /* Is the base register in the list? */
19356 for (i
= offset
; i
< num_saves
; i
++)
19358 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19359 /* If SP is in the list, then the base register must be SP. */
19360 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19361 /* If base register is in the list, there must be no explicit update. */
19362 if (regno
== regno_base
)
19363 gcc_assert (!update
);
19366 conditional
= reverse
? "%?%D0" : "%?%d0";
19367 /* Can't use POP if returning from an interrupt. */
19368 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19369 sprintf (pattern
, "pop%s\t{", conditional
);
19372 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19373 It's just a convention, their semantics are identical. */
19374 if (regno_base
== SP_REGNUM
)
19375 sprintf (pattern
, "ldmfd%s\t", conditional
);
19377 sprintf (pattern
, "ldmia%s\t", conditional
);
19379 sprintf (pattern
, "ldm%s\t", conditional
);
19381 strcat (pattern
, reg_names
[regno_base
]);
19383 strcat (pattern
, "!, {");
19385 strcat (pattern
, ", {");
19388 /* Output the first destination register. */
19390 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19392 /* Output the rest of the destination registers. */
19393 for (i
= offset
+ 1; i
< num_saves
; i
++)
19395 strcat (pattern
, ", ");
19397 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19400 strcat (pattern
, "}");
19402 if (interrupt_p
&& return_pc
)
19403 strcat (pattern
, "^");
19405 output_asm_insn (pattern
, &cond
);
19409 /* Output the assembly for a store multiple. */
19412 vfp_output_vstmd (rtx
* operands
)
19418 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19419 ? XEXP (operands
[0], 0)
19420 : XEXP (XEXP (operands
[0], 0), 0);
19421 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19424 strcpy (pattern
, "vpush%?.64\t{%P1");
19426 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19428 p
= strlen (pattern
);
19430 gcc_assert (REG_P (operands
[1]));
19432 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19433 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19435 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19437 strcpy (&pattern
[p
], "}");
19439 output_asm_insn (pattern
, operands
);
19444 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19445 number of bytes pushed. */
19448 vfp_emit_fstmd (int base_reg
, int count
)
19455 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19456 register pairs are stored by a store multiple insn. We avoid this
19457 by pushing an extra pair. */
19458 if (count
== 2 && !arm_arch6
)
19460 if (base_reg
== LAST_VFP_REGNUM
- 3)
19465 /* FSTMD may not store more than 16 doubleword registers at once. Split
19466 larger stores into multiple parts (up to a maximum of two, in
19471 /* NOTE: base_reg is an internal register number, so each D register
19473 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19474 saved
+= vfp_emit_fstmd (base_reg
, 16);
19478 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19479 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19481 reg
= gen_rtx_REG (DFmode
, base_reg
);
19484 XVECEXP (par
, 0, 0)
19485 = gen_rtx_SET (gen_frame_mem
19487 gen_rtx_PRE_MODIFY (Pmode
,
19490 (Pmode
, stack_pointer_rtx
,
19493 gen_rtx_UNSPEC (BLKmode
,
19494 gen_rtvec (1, reg
),
19495 UNSPEC_PUSH_MULT
));
19497 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19498 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19499 RTX_FRAME_RELATED_P (tmp
) = 1;
19500 XVECEXP (dwarf
, 0, 0) = tmp
;
19502 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19503 RTX_FRAME_RELATED_P (tmp
) = 1;
19504 XVECEXP (dwarf
, 0, 1) = tmp
;
19506 for (i
= 1; i
< count
; i
++)
19508 reg
= gen_rtx_REG (DFmode
, base_reg
);
19510 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19512 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19513 plus_constant (Pmode
,
19517 RTX_FRAME_RELATED_P (tmp
) = 1;
19518 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19521 par
= emit_insn (par
);
19522 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19523 RTX_FRAME_RELATED_P (par
) = 1;
19528 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19529 has the cmse_nonsecure_call attribute and returns false otherwise. */
19532 detect_cmse_nonsecure_call (tree addr
)
19537 tree fntype
= TREE_TYPE (addr
);
19538 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19539 TYPE_ATTRIBUTES (fntype
)))
19545 /* Emit a call instruction with pattern PAT. ADDR is the address of
19546 the call target. */
19549 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19553 insn
= emit_call_insn (pat
);
19555 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19556 If the call might use such an entry, add a use of the PIC register
19557 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19558 if (TARGET_VXWORKS_RTP
19561 && GET_CODE (addr
) == SYMBOL_REF
19562 && (SYMBOL_REF_DECL (addr
)
19563 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19564 : !SYMBOL_REF_LOCAL_P (addr
)))
19566 require_pic_register (NULL_RTX
, false /*compute_now*/);
19567 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19572 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19573 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19576 if (TARGET_AAPCS_BASED
)
19578 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19579 linker. We need to add an IP clobber to allow setting
19580 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19581 is not needed since it's a fixed register. */
19582 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19583 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19587 /* Output a 'call' insn. */
19589 output_call (rtx
*operands
)
19591 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19593 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19594 if (REGNO (operands
[0]) == LR_REGNUM
)
19596 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19597 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19600 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19602 if (TARGET_INTERWORK
|| arm_arch4t
)
19603 output_asm_insn ("bx%?\t%0", operands
);
19605 output_asm_insn ("mov%?\t%|pc, %0", operands
);
19610 /* Output a move from arm registers to arm registers of a long double
19611 OPERANDS[0] is the destination.
19612 OPERANDS[1] is the source. */
19614 output_mov_long_double_arm_from_arm (rtx
*operands
)
19616 /* We have to be careful here because the two might overlap. */
19617 int dest_start
= REGNO (operands
[0]);
19618 int src_start
= REGNO (operands
[1]);
19622 if (dest_start
< src_start
)
19624 for (i
= 0; i
< 3; i
++)
19626 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19627 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19628 output_asm_insn ("mov%?\t%0, %1", ops
);
19633 for (i
= 2; i
>= 0; i
--)
19635 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19636 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19637 output_asm_insn ("mov%?\t%0, %1", ops
);
19645 arm_emit_movpair (rtx dest
, rtx src
)
19647 /* If the src is an immediate, simplify it. */
19648 if (CONST_INT_P (src
))
19650 HOST_WIDE_INT val
= INTVAL (src
);
19651 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
19652 if ((val
>> 16) & 0x0000ffff)
19654 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
19656 GEN_INT ((val
>> 16) & 0x0000ffff));
19657 rtx_insn
*insn
= get_last_insn ();
19658 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
19662 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
19663 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
19664 rtx_insn
*insn
= get_last_insn ();
19665 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
19668 /* Output a move between double words. It must be REG<-MEM
19671 output_move_double (rtx
*operands
, bool emit
, int *count
)
19673 enum rtx_code code0
= GET_CODE (operands
[0]);
19674 enum rtx_code code1
= GET_CODE (operands
[1]);
19679 /* The only case when this might happen is when
19680 you are looking at the length of a DImode instruction
19681 that has an invalid constant in it. */
19682 if (code0
== REG
&& code1
!= MEM
)
19684 gcc_assert (!emit
);
19691 unsigned int reg0
= REGNO (operands
[0]);
19692 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
19694 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
19696 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
19698 switch (GET_CODE (XEXP (operands
[1], 0)))
19705 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
19706 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
19708 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
19713 gcc_assert (can_ldrd
);
19715 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
19722 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
19724 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
19732 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
19734 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
19739 gcc_assert (can_ldrd
);
19741 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
19746 /* Autoicrement addressing modes should never have overlapping
19747 base and destination registers, and overlapping index registers
19748 are already prohibited, so this doesn't need to worry about
19750 otherops
[0] = operands
[0];
19751 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
19752 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
19754 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
19756 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
19758 /* Registers overlap so split out the increment. */
19761 gcc_assert (can_ldrd
);
19762 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
19763 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
19770 /* Use a single insn if we can.
19771 FIXME: IWMMXT allows offsets larger than ldrd can
19772 handle, fix these up with a pair of ldr. */
19775 || !CONST_INT_P (otherops
[2])
19776 || (INTVAL (otherops
[2]) > -256
19777 && INTVAL (otherops
[2]) < 256)))
19780 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
19786 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
19787 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
19797 /* Use a single insn if we can.
19798 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19799 fix these up with a pair of ldr. */
19802 || !CONST_INT_P (otherops
[2])
19803 || (INTVAL (otherops
[2]) > -256
19804 && INTVAL (otherops
[2]) < 256)))
19807 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
19813 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
19814 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
19824 /* We might be able to use ldrd %0, %1 here. However the range is
19825 different to ldr/adr, and it is broken on some ARMv7-M
19826 implementations. */
19827 /* Use the second register of the pair to avoid problematic
19829 otherops
[1] = operands
[1];
19831 output_asm_insn ("adr%?\t%0, %1", otherops
);
19832 operands
[1] = otherops
[0];
19836 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
19838 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
19845 /* ??? This needs checking for thumb2. */
19847 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
19848 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
19850 otherops
[0] = operands
[0];
19851 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
19852 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
19854 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
19856 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
19858 switch ((int) INTVAL (otherops
[2]))
19862 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
19868 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
19874 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
19878 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
19879 operands
[1] = otherops
[0];
19881 && (REG_P (otherops
[2])
19883 || (CONST_INT_P (otherops
[2])
19884 && INTVAL (otherops
[2]) > -256
19885 && INTVAL (otherops
[2]) < 256)))
19887 if (reg_overlap_mentioned_p (operands
[0],
19890 /* Swap base and index registers over to
19891 avoid a conflict. */
19892 std::swap (otherops
[1], otherops
[2]);
19894 /* If both registers conflict, it will usually
19895 have been fixed by a splitter. */
19896 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
19897 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
19901 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
19902 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
19909 otherops
[0] = operands
[0];
19911 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
19916 if (CONST_INT_P (otherops
[2]))
19920 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
19921 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
19923 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
19929 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
19935 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
19942 return "ldrd%?\t%0, [%1]";
19944 return "ldmia%?\t%1, %M0";
19948 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
19949 /* Take care of overlapping base/data reg. */
19950 if (reg_mentioned_p (operands
[0], operands
[1]))
19954 output_asm_insn ("ldr%?\t%0, %1", otherops
);
19955 output_asm_insn ("ldr%?\t%0, %1", operands
);
19965 output_asm_insn ("ldr%?\t%0, %1", operands
);
19966 output_asm_insn ("ldr%?\t%0, %1", otherops
);
19976 /* Constraints should ensure this. */
19977 gcc_assert (code0
== MEM
&& code1
== REG
);
19978 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
19979 || (TARGET_ARM
&& TARGET_LDRD
));
19981 /* For TARGET_ARM the first source register of an STRD
19982 must be even. This is usually the case for double-word
19983 values but user assembly constraints can force an odd
19984 starting register. */
19985 bool allow_strd
= TARGET_LDRD
19986 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
19987 switch (GET_CODE (XEXP (operands
[0], 0)))
19993 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
19995 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20000 gcc_assert (allow_strd
);
20002 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20009 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20011 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20019 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20021 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20026 gcc_assert (allow_strd
);
20028 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20033 otherops
[0] = operands
[1];
20034 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20035 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20037 /* IWMMXT allows offsets larger than strd can handle,
20038 fix these up with a pair of str. */
20040 && CONST_INT_P (otherops
[2])
20041 && (INTVAL(otherops
[2]) <= -256
20042 || INTVAL(otherops
[2]) >= 256))
20044 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20048 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20049 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20058 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20059 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20065 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20068 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20073 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20078 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20079 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20081 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20085 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20092 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20099 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20104 && (REG_P (otherops
[2])
20106 || (CONST_INT_P (otherops
[2])
20107 && INTVAL (otherops
[2]) > -256
20108 && INTVAL (otherops
[2]) < 256)))
20110 otherops
[0] = operands
[1];
20111 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20113 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20119 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20120 otherops
[1] = operands
[1];
20123 output_asm_insn ("str%?\t%1, %0", operands
);
20124 output_asm_insn ("str%?\t%H1, %0", otherops
);
20134 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20135 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20138 output_move_quad (rtx
*operands
)
20140 if (REG_P (operands
[0]))
20142 /* Load, or reg->reg move. */
20144 if (MEM_P (operands
[1]))
20146 switch (GET_CODE (XEXP (operands
[1], 0)))
20149 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20154 output_asm_insn ("adr%?\t%0, %1", operands
);
20155 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20159 gcc_unreachable ();
20167 gcc_assert (REG_P (operands
[1]));
20169 dest
= REGNO (operands
[0]);
20170 src
= REGNO (operands
[1]);
20172 /* This seems pretty dumb, but hopefully GCC won't try to do it
20175 for (i
= 0; i
< 4; i
++)
20177 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20178 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20179 output_asm_insn ("mov%?\t%0, %1", ops
);
20182 for (i
= 3; i
>= 0; i
--)
20184 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20185 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20186 output_asm_insn ("mov%?\t%0, %1", ops
);
20192 gcc_assert (MEM_P (operands
[0]));
20193 gcc_assert (REG_P (operands
[1]));
20194 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20196 switch (GET_CODE (XEXP (operands
[0], 0)))
20199 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20203 gcc_unreachable ();
20210 /* Output a VFP load or store instruction. */
20213 output_move_vfp (rtx
*operands
)
20215 rtx reg
, mem
, addr
, ops
[2];
20216 int load
= REG_P (operands
[0]);
20217 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20218 int sp
= (!TARGET_VFP_FP16INST
20219 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20220 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20225 reg
= operands
[!load
];
20226 mem
= operands
[load
];
20228 mode
= GET_MODE (reg
);
20230 gcc_assert (REG_P (reg
));
20231 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20232 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20238 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20239 gcc_assert (MEM_P (mem
));
20241 addr
= XEXP (mem
, 0);
20243 switch (GET_CODE (addr
))
20246 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20247 ops
[0] = XEXP (addr
, 0);
20252 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20253 ops
[0] = XEXP (addr
, 0);
20258 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20264 sprintf (buff
, templ
,
20265 load
? "ld" : "st",
20266 dp
? "64" : sp
? "32" : "16",
20268 integer_p
? "\t%@ int" : "");
20269 output_asm_insn (buff
, ops
);
20274 /* Output a Neon double-word or quad-word load or store, or a load
20275 or store for larger structure modes.
20277 WARNING: The ordering of elements is weird in big-endian mode,
20278 because the EABI requires that vectors stored in memory appear
20279 as though they were stored by a VSTM, as required by the EABI.
20280 GCC RTL defines element ordering based on in-memory order.
20281 This can be different from the architectural ordering of elements
20282 within a NEON register. The intrinsics defined in arm_neon.h use the
20283 NEON register element ordering, not the GCC RTL element ordering.
20285 For example, the in-memory ordering of a big-endian a quadword
20286 vector with 16-bit elements when stored from register pair {d0,d1}
20287 will be (lowest address first, d0[N] is NEON register element N):
20289 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20291 When necessary, quadword registers (dN, dN+1) are moved to ARM
20292 registers from rN in the order:
20294 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20296 So that STM/LDM can be used on vectors in ARM registers, and the
20297 same memory layout will result as if VSTM/VLDM were used.
20299 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20300 possible, which allows use of appropriate alignment tags.
20301 Note that the choice of "64" is independent of the actual vector
20302 element size; this size simply ensures that the behavior is
20303 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20305 Due to limitations of those instructions, use of VST1.64/VLD1.64
20306 is not possible if:
20307 - the address contains PRE_DEC, or
20308 - the mode refers to more than 4 double-word registers
20310 In those cases, it would be possible to replace VSTM/VLDM by a
20311 sequence of instructions; this is not currently implemented since
20312 this is not certain to actually improve performance. */
20315 output_move_neon (rtx
*operands
)
20317 rtx reg
, mem
, addr
, ops
[2];
20318 int regno
, nregs
, load
= REG_P (operands
[0]);
20323 reg
= operands
[!load
];
20324 mem
= operands
[load
];
20326 mode
= GET_MODE (reg
);
20328 gcc_assert (REG_P (reg
));
20329 regno
= REGNO (reg
);
20330 nregs
= REG_NREGS (reg
) / 2;
20331 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20332 || NEON_REGNO_OK_FOR_QUAD (regno
));
20333 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20334 || VALID_NEON_QREG_MODE (mode
)
20335 || VALID_NEON_STRUCT_MODE (mode
));
20336 gcc_assert (MEM_P (mem
));
20338 addr
= XEXP (mem
, 0);
20340 /* Strip off const from addresses like (const (plus (...))). */
20341 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20342 addr
= XEXP (addr
, 0);
20344 switch (GET_CODE (addr
))
20347 /* We have to use vldm / vstm for too-large modes. */
20348 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20350 templ
= "v%smia%%?\t%%0!, %%h1";
20351 ops
[0] = XEXP (addr
, 0);
20355 templ
= "v%s1.64\t%%h1, %%A0";
20362 /* We have to use vldm / vstm in this case, since there is no
20363 pre-decrement form of the vld1 / vst1 instructions. */
20364 templ
= "v%smdb%%?\t%%0!, %%h1";
20365 ops
[0] = XEXP (addr
, 0);
20370 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20371 gcc_unreachable ();
20374 /* We have to use vldm / vstm for too-large modes. */
20377 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20378 templ
= "v%smia%%?\t%%m0, %%h1";
20380 templ
= "v%s1.64\t%%h1, %%A0";
20386 /* Fall through. */
20388 if (GET_CODE (addr
) == PLUS
)
20389 addr
= XEXP (addr
, 0);
20390 /* Fall through. */
20395 for (i
= 0; i
< nregs
; i
++)
20397 /* We're only using DImode here because it's a convenient
20399 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20400 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20401 if (reg_overlap_mentioned_p (ops
[0], mem
))
20403 gcc_assert (overlap
== -1);
20408 if (TARGET_HAVE_MVE
&& GET_CODE (addr
) == LABEL_REF
)
20409 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20411 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20412 output_asm_insn (buff
, ops
);
20417 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20418 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20419 if (TARGET_HAVE_MVE
&& GET_CODE (addr
) == LABEL_REF
)
20420 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20422 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20423 output_asm_insn (buff
, ops
);
20430 gcc_unreachable ();
20433 sprintf (buff
, templ
, load
? "ld" : "st");
20434 output_asm_insn (buff
, ops
);
20439 /* Compute and return the length of neon_mov<mode>, where <mode> is
20440 one of VSTRUCT modes: EI, OI, CI or XI. */
20442 arm_attr_length_move_neon (rtx_insn
*insn
)
20444 rtx reg
, mem
, addr
;
20448 extract_insn_cached (insn
);
20450 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20452 mode
= GET_MODE (recog_data
.operand
[0]);
20463 gcc_unreachable ();
20467 load
= REG_P (recog_data
.operand
[0]);
20468 reg
= recog_data
.operand
[!load
];
20469 mem
= recog_data
.operand
[load
];
20471 gcc_assert (MEM_P (mem
));
20473 addr
= XEXP (mem
, 0);
20475 /* Strip off const from addresses like (const (plus (...))). */
20476 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20477 addr
= XEXP (addr
, 0);
20479 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
20481 int insns
= REG_NREGS (reg
) / 2;
20488 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20492 arm_address_offset_is_imm (rtx_insn
*insn
)
20496 extract_insn_cached (insn
);
20498 if (REG_P (recog_data
.operand
[0]))
20501 mem
= recog_data
.operand
[0];
20503 gcc_assert (MEM_P (mem
));
20505 addr
= XEXP (mem
, 0);
20508 || (GET_CODE (addr
) == PLUS
20509 && REG_P (XEXP (addr
, 0))
20510 && CONST_INT_P (XEXP (addr
, 1))))
20516 /* Output an ADD r, s, #n where n may be too big for one instruction.
20517 If adding zero to one register, output nothing. */
20519 output_add_immediate (rtx
*operands
)
20521 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20523 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20526 output_multi_immediate (operands
,
20527 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20530 output_multi_immediate (operands
,
20531 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20538 /* Output a multiple immediate operation.
20539 OPERANDS is the vector of operands referred to in the output patterns.
20540 INSTR1 is the output pattern to use for the first constant.
20541 INSTR2 is the output pattern to use for subsequent constants.
20542 IMMED_OP is the index of the constant slot in OPERANDS.
20543 N is the constant value. */
20544 static const char *
20545 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20546 int immed_op
, HOST_WIDE_INT n
)
20548 #if HOST_BITS_PER_WIDE_INT > 32
20554 /* Quick and easy output. */
20555 operands
[immed_op
] = const0_rtx
;
20556 output_asm_insn (instr1
, operands
);
20561 const char * instr
= instr1
;
20563 /* Note that n is never zero here (which would give no output). */
20564 for (i
= 0; i
< 32; i
+= 2)
20568 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20569 output_asm_insn (instr
, operands
);
20579 /* Return the name of a shifter operation. */
20580 static const char *
20581 arm_shift_nmem(enum rtx_code code
)
20586 return ARM_LSL_NAME
;
20602 /* Return the appropriate ARM instruction for the operation code.
20603 The returned result should not be overwritten. OP is the rtx of the
20604 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20607 arithmetic_instr (rtx op
, int shift_first_arg
)
20609 switch (GET_CODE (op
))
20615 return shift_first_arg
? "rsb" : "sub";
20630 return arm_shift_nmem(GET_CODE(op
));
20633 gcc_unreachable ();
20637 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20638 for the operation code. The returned result should not be overwritten.
20639 OP is the rtx code of the shift.
20640 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20642 static const char *
20643 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
20646 enum rtx_code code
= GET_CODE (op
);
20651 if (!CONST_INT_P (XEXP (op
, 1)))
20653 output_operand_lossage ("invalid shift operand");
20658 *amountp
= 32 - INTVAL (XEXP (op
, 1));
20666 mnem
= arm_shift_nmem(code
);
20667 if (CONST_INT_P (XEXP (op
, 1)))
20669 *amountp
= INTVAL (XEXP (op
, 1));
20671 else if (REG_P (XEXP (op
, 1)))
20678 output_operand_lossage ("invalid shift operand");
20684 /* We never have to worry about the amount being other than a
20685 power of 2, since this case can never be reloaded from a reg. */
20686 if (!CONST_INT_P (XEXP (op
, 1)))
20688 output_operand_lossage ("invalid shift operand");
20692 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
20694 /* Amount must be a power of two. */
20695 if (*amountp
& (*amountp
- 1))
20697 output_operand_lossage ("invalid shift operand");
20701 *amountp
= exact_log2 (*amountp
);
20702 gcc_assert (IN_RANGE (*amountp
, 0, 31));
20703 return ARM_LSL_NAME
;
20706 output_operand_lossage ("invalid shift operand");
20710 /* This is not 100% correct, but follows from the desire to merge
20711 multiplication by a power of 2 with the recognizer for a
20712 shift. >=32 is not a valid shift for "lsl", so we must try and
20713 output a shift that produces the correct arithmetical result.
20714 Using lsr #32 is identical except for the fact that the carry bit
20715 is not set correctly if we set the flags; but we never use the
20716 carry bit from such an operation, so we can ignore that. */
20717 if (code
== ROTATERT
)
20718 /* Rotate is just modulo 32. */
20720 else if (*amountp
!= (*amountp
& 31))
20722 if (code
== ASHIFT
)
20727 /* Shifts of 0 are no-ops. */
20734 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20735 because /bin/as is horribly restrictive. The judgement about
20736 whether or not each character is 'printable' (and can be output as
20737 is) or not (and must be printed with an octal escape) must be made
20738 with reference to the *host* character set -- the situation is
20739 similar to that discussed in the comments above pp_c_char in
20740 c-pretty-print.c. */
20742 #define MAX_ASCII_LEN 51
20745 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
20748 int len_so_far
= 0;
20750 fputs ("\t.ascii\t\"", stream
);
20752 for (i
= 0; i
< len
; i
++)
20756 if (len_so_far
>= MAX_ASCII_LEN
)
20758 fputs ("\"\n\t.ascii\t\"", stream
);
20764 if (c
== '\\' || c
== '\"')
20766 putc ('\\', stream
);
20774 fprintf (stream
, "\\%03o", c
);
20779 fputs ("\"\n", stream
);
20783 /* Compute the register save mask for registers 0 through 12
20784 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
20786 static unsigned long
20787 arm_compute_save_reg0_reg12_mask (void)
20789 unsigned long func_type
= arm_current_func_type ();
20790 unsigned long save_reg_mask
= 0;
20793 if (IS_INTERRUPT (func_type
))
20795 unsigned int max_reg
;
20796 /* Interrupt functions must not corrupt any registers,
20797 even call clobbered ones. If this is a leaf function
20798 we can just examine the registers used by the RTL, but
20799 otherwise we have to assume that whatever function is
20800 called might clobber anything, and so we have to save
20801 all the call-clobbered registers as well. */
20802 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
20803 /* FIQ handlers have registers r8 - r12 banked, so
20804 we only need to check r0 - r7, Normal ISRs only
20805 bank r14 and r15, so we must check up to r12.
20806 r13 is the stack pointer which is always preserved,
20807 so we do not need to consider it here. */
20812 for (reg
= 0; reg
<= max_reg
; reg
++)
20813 if (reg_needs_saving_p (reg
))
20814 save_reg_mask
|= (1 << reg
);
20816 /* Also save the pic base register if necessary. */
20817 if (PIC_REGISTER_MAY_NEED_SAVING
20818 && crtl
->uses_pic_offset_table
)
20819 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
20821 else if (IS_VOLATILE(func_type
))
20823 /* For noreturn functions we historically omitted register saves
20824 altogether. However this really messes up debugging. As a
20825 compromise save just the frame pointers. Combined with the link
20826 register saved elsewhere this should be sufficient to get
20828 if (frame_pointer_needed
)
20829 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
20830 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
20831 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
20832 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
20833 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
20837 /* In the normal case we only need to save those registers
20838 which are call saved and which are used by this function. */
20839 for (reg
= 0; reg
<= 11; reg
++)
20840 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
20841 save_reg_mask
|= (1 << reg
);
20843 /* Handle the frame pointer as a special case. */
20844 if (frame_pointer_needed
)
20845 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
20847 /* If we aren't loading the PIC register,
20848 don't stack it even though it may be live. */
20849 if (PIC_REGISTER_MAY_NEED_SAVING
20850 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
20851 || crtl
->uses_pic_offset_table
))
20852 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
20854 /* The prologue will copy SP into R0, so save it. */
20855 if (IS_STACKALIGN (func_type
))
20856 save_reg_mask
|= 1;
20859 /* Save registers so the exception handler can modify them. */
20860 if (crtl
->calls_eh_return
)
20866 reg
= EH_RETURN_DATA_REGNO (i
);
20867 if (reg
== INVALID_REGNUM
)
20869 save_reg_mask
|= 1 << reg
;
20873 return save_reg_mask
;
20876 /* Return true if r3 is live at the start of the function. */
20879 arm_r3_live_at_start_p (void)
20881 /* Just look at cfg info, which is still close enough to correct at this
20882 point. This gives false positives for broken functions that might use
20883 uninitialized data that happens to be allocated in r3, but who cares? */
20884 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
20887 /* Compute the number of bytes used to store the static chain register on the
20888 stack, above the stack frame. We need to know this accurately to get the
20889 alignment of the rest of the stack frame correct. */
20892 arm_compute_static_chain_stack_bytes (void)
20894 /* Once the value is updated from the init value of -1, do not
20896 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
20897 return cfun
->machine
->static_chain_stack_bytes
;
20899 /* See the defining assertion in arm_expand_prologue. */
20900 if (IS_NESTED (arm_current_func_type ())
20901 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20902 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
20903 || flag_stack_clash_protection
)
20904 && !df_regs_ever_live_p (LR_REGNUM
)))
20905 && arm_r3_live_at_start_p ()
20906 && crtl
->args
.pretend_args_size
== 0)
20912 /* Compute a bit mask of which core registers need to be
20913 saved on the stack for the current function.
20914 This is used by arm_compute_frame_layout, which may add extra registers. */
20916 static unsigned long
20917 arm_compute_save_core_reg_mask (void)
20919 unsigned int save_reg_mask
= 0;
20920 unsigned long func_type
= arm_current_func_type ();
20923 if (IS_NAKED (func_type
))
20924 /* This should never really happen. */
20927 /* If we are creating a stack frame, then we must save the frame pointer,
20928 IP (which will hold the old stack pointer), LR and the PC. */
20929 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20931 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
20934 | (1 << PC_REGNUM
);
20936 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
20938 /* Decide if we need to save the link register.
20939 Interrupt routines have their own banked link register,
20940 so they never need to save it.
20941 Otherwise if we do not use the link register we do not need to save
20942 it. If we are pushing other registers onto the stack however, we
20943 can save an instruction in the epilogue by pushing the link register
20944 now and then popping it back into the PC. This incurs extra memory
20945 accesses though, so we only do it when optimizing for size, and only
20946 if we know that we will not need a fancy return sequence. */
20947 if (df_regs_ever_live_p (LR_REGNUM
)
20950 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
20951 && !crtl
->tail_call_emit
20952 && !crtl
->calls_eh_return
))
20953 save_reg_mask
|= 1 << LR_REGNUM
;
20955 if (cfun
->machine
->lr_save_eliminated
)
20956 save_reg_mask
&= ~ (1 << LR_REGNUM
);
20958 if (TARGET_REALLY_IWMMXT
20959 && ((bit_count (save_reg_mask
)
20960 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
20961 arm_compute_static_chain_stack_bytes())
20964 /* The total number of registers that are going to be pushed
20965 onto the stack is odd. We need to ensure that the stack
20966 is 64-bit aligned before we start to save iWMMXt registers,
20967 and also before we start to create locals. (A local variable
20968 might be a double or long long which we will load/store using
20969 an iWMMXt instruction). Therefore we need to push another
20970 ARM register, so that the stack will be 64-bit aligned. We
20971 try to avoid using the arg registers (r0 -r3) as they might be
20972 used to pass values in a tail call. */
20973 for (reg
= 4; reg
<= 12; reg
++)
20974 if ((save_reg_mask
& (1 << reg
)) == 0)
20978 save_reg_mask
|= (1 << reg
);
20981 cfun
->machine
->sibcall_blocked
= 1;
20982 save_reg_mask
|= (1 << 3);
20986 /* We may need to push an additional register for use initializing the
20987 PIC base register. */
20988 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
20989 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
20991 reg
= thumb_find_work_register (1 << 4);
20992 if (!call_used_or_fixed_reg_p (reg
))
20993 save_reg_mask
|= (1 << reg
);
20996 return save_reg_mask
;
20999 /* Compute a bit mask of which core registers need to be
21000 saved on the stack for the current function. */
21001 static unsigned long
21002 thumb1_compute_save_core_reg_mask (void)
21004 unsigned long mask
;
21008 for (reg
= 0; reg
< 12; reg
++)
21009 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21012 /* Handle the frame pointer as a special case. */
21013 if (frame_pointer_needed
)
21014 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21017 && !TARGET_SINGLE_PIC_BASE
21018 && arm_pic_register
!= INVALID_REGNUM
21019 && crtl
->uses_pic_offset_table
)
21020 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21022 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21023 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21024 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21026 /* LR will also be pushed if any lo regs are pushed. */
21027 if (mask
& 0xff || thumb_force_lr_save ())
21028 mask
|= (1 << LR_REGNUM
);
21030 bool call_clobbered_scratch
21031 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21032 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21034 /* Make sure we have a low work register if we need one. We will
21035 need one if we are going to push a high register, but we are not
21036 currently intending to push a low register. However if both the
21037 prologue and epilogue have a spare call-clobbered low register,
21038 then we won't need to find an additional work register. It does
21039 not need to be the same register in the prologue and
21041 if ((mask
& 0xff) == 0
21042 && !call_clobbered_scratch
21043 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21045 /* Use thumb_find_work_register to choose which register
21046 we will use. If the register is live then we will
21047 have to push it. Use LAST_LO_REGNUM as our fallback
21048 choice for the register to select. */
21049 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21050 /* Make sure the register returned by thumb_find_work_register is
21051 not part of the return value. */
21052 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21053 reg
= LAST_LO_REGNUM
;
21055 if (callee_saved_reg_p (reg
))
21059 /* The 504 below is 8 bytes less than 512 because there are two possible
21060 alignment words. We can't tell here if they will be present or not so we
21061 have to play it safe and assume that they are. */
21062 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21063 ROUND_UP_WORD (get_frame_size ()) +
21064 crtl
->outgoing_args_size
) >= 504)
21066 /* This is the same as the code in thumb1_expand_prologue() which
21067 determines which register to use for stack decrement. */
21068 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21069 if (mask
& (1 << reg
))
21072 if (reg
> LAST_LO_REGNUM
)
21074 /* Make sure we have a register available for stack decrement. */
21075 mask
|= 1 << LAST_LO_REGNUM
;
21082 /* Return the number of bytes required to save VFP registers. */
21084 arm_get_vfp_saved_size (void)
21086 unsigned int regno
;
21091 /* Space for saved VFP registers. */
21092 if (TARGET_VFP_BASE
)
21095 for (regno
= FIRST_VFP_REGNUM
;
21096 regno
< LAST_VFP_REGNUM
;
21099 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21103 /* Workaround ARM10 VFPr1 bug. */
21104 if (count
== 2 && !arm_arch6
)
21106 saved
+= count
* 8;
21115 if (count
== 2 && !arm_arch6
)
21117 saved
+= count
* 8;
21124 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21125 everything bar the final return instruction. If simple_return is true,
21126 then do not output epilogue, because it has already been emitted in RTL.
21128 Note: do not forget to update length attribute of corresponding insn pattern
21129 when changing assembly output (eg. length attribute of
21130 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21131 register clearing sequences). */
21133 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21134 bool simple_return
)
21136 char conditional
[10];
21139 unsigned long live_regs_mask
;
21140 unsigned long func_type
;
21141 arm_stack_offsets
*offsets
;
21143 func_type
= arm_current_func_type ();
21145 if (IS_NAKED (func_type
))
21148 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21150 /* If this function was declared non-returning, and we have
21151 found a tail call, then we have to trust that the called
21152 function won't return. */
21157 /* Otherwise, trap an attempted return by aborting. */
21159 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21161 assemble_external_libcall (ops
[1]);
21162 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21168 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21170 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21172 cfun
->machine
->return_used_this_function
= 1;
21174 offsets
= arm_get_frame_offsets ();
21175 live_regs_mask
= offsets
->saved_regs_mask
;
21177 if (!simple_return
&& live_regs_mask
)
21179 const char * return_reg
;
21181 /* If we do not have any special requirements for function exit
21182 (e.g. interworking) then we can load the return address
21183 directly into the PC. Otherwise we must load it into LR. */
21185 && !IS_CMSE_ENTRY (func_type
)
21186 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21187 return_reg
= reg_names
[PC_REGNUM
];
21189 return_reg
= reg_names
[LR_REGNUM
];
21191 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21193 /* There are three possible reasons for the IP register
21194 being saved. 1) a stack frame was created, in which case
21195 IP contains the old stack pointer, or 2) an ISR routine
21196 corrupted it, or 3) it was saved to align the stack on
21197 iWMMXt. In case 1, restore IP into SP, otherwise just
21199 if (frame_pointer_needed
)
21201 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21202 live_regs_mask
|= (1 << SP_REGNUM
);
21205 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21208 /* On some ARM architectures it is faster to use LDR rather than
21209 LDM to load a single register. On other architectures, the
21210 cost is the same. In 26 bit mode, or for exception handlers,
21211 we have to use LDM to load the PC so that the CPSR is also
21213 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21214 if (live_regs_mask
== (1U << reg
))
21217 if (reg
<= LAST_ARM_REGNUM
21218 && (reg
!= LR_REGNUM
21220 || ! IS_INTERRUPT (func_type
)))
21222 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21223 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21230 /* Generate the load multiple instruction to restore the
21231 registers. Note we can get here, even if
21232 frame_pointer_needed is true, but only if sp already
21233 points to the base of the saved core registers. */
21234 if (live_regs_mask
& (1 << SP_REGNUM
))
21236 unsigned HOST_WIDE_INT stack_adjust
;
21238 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21239 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21241 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21242 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21245 /* If we can't use ldmib (SA110 bug),
21246 then try to pop r3 instead. */
21248 live_regs_mask
|= 1 << 3;
21250 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21253 /* For interrupt returns we have to use an LDM rather than
21254 a POP so that we can use the exception return variant. */
21255 else if (IS_INTERRUPT (func_type
))
21256 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21258 sprintf (instr
, "pop%s\t{", conditional
);
21260 p
= instr
+ strlen (instr
);
21262 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21263 if (live_regs_mask
& (1 << reg
))
21265 int l
= strlen (reg_names
[reg
]);
21271 memcpy (p
, ", ", 2);
21275 memcpy (p
, "%|", 2);
21276 memcpy (p
+ 2, reg_names
[reg
], l
);
21280 if (live_regs_mask
& (1 << LR_REGNUM
))
21282 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21283 /* If returning from an interrupt, restore the CPSR. */
21284 if (IS_INTERRUPT (func_type
))
21291 output_asm_insn (instr
, & operand
);
21293 /* See if we need to generate an extra instruction to
21294 perform the actual function return. */
21296 && func_type
!= ARM_FT_INTERWORKED
21297 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21299 /* The return has already been handled
21300 by loading the LR into the PC. */
21307 switch ((int) ARM_FUNC_TYPE (func_type
))
21311 /* ??? This is wrong for unified assembly syntax. */
21312 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21315 case ARM_FT_INTERWORKED
:
21316 gcc_assert (arm_arch5t
|| arm_arch4t
);
21317 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21320 case ARM_FT_EXCEPTION
:
21321 /* ??? This is wrong for unified assembly syntax. */
21322 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21326 if (IS_CMSE_ENTRY (func_type
))
21328 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21329 emitted by cmse_nonsecure_entry_clear_before_return () and the
21330 VSTR/VLDR instructions in the prologue and epilogue. */
21331 if (!TARGET_HAVE_FPCXT_CMSE
)
21333 /* Check if we have to clear the 'GE bits' which is only used if
21334 parallel add and subtraction instructions are available. */
21335 if (TARGET_INT_SIMD
)
21336 snprintf (instr
, sizeof (instr
),
21337 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21339 snprintf (instr
, sizeof (instr
),
21340 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21342 output_asm_insn (instr
, & operand
);
21343 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21345 if (TARGET_HARD_FLOAT
)
21347 /* Clear the cumulative exception-status bits (0-4,7) and
21348 the condition code bits (28-31) of the FPSCR. We need
21349 to remember to clear the first scratch register used
21350 (IP) and save and restore the second (r4).
21352 Important note: the length of the
21353 thumb2_cmse_entry_return insn pattern must account for
21354 the size of the below instructions. */
21355 output_asm_insn ("push\t{%|r4}", & operand
);
21356 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21357 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21358 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21359 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21360 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21361 output_asm_insn ("pop\t{%|r4}", & operand
);
21362 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21365 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21367 /* Use bx if it's available. */
21368 else if (arm_arch5t
|| arm_arch4t
)
21369 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21371 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21375 output_asm_insn (instr
, & operand
);
21381 /* Output in FILE asm statements needed to declare the NAME of the function
21382 defined by its DECL node. */
21385 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21387 size_t cmse_name_len
;
21388 char *cmse_name
= 0;
21389 char cmse_prefix
[] = "__acle_se_";
21391 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21392 extra function label for each function with the 'cmse_nonsecure_entry'
21393 attribute. This extra function label should be prepended with
21394 '__acle_se_', telling the linker that it needs to create secure gateway
21395 veneers for this function. */
21396 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21397 DECL_ATTRIBUTES (decl
)))
21399 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21400 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21401 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21402 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21404 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21405 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21408 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21409 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21410 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21411 ASM_OUTPUT_LABEL (file
, name
);
21414 ASM_OUTPUT_LABEL (file
, cmse_name
);
21416 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21419 /* Write the function name into the code section, directly preceding
21420 the function prologue.
21422 Code will be output similar to this:
21424 .ascii "arm_poke_function_name", 0
21427 .word 0xff000000 + (t1 - t0)
21428 arm_poke_function_name
21430 stmfd sp!, {fp, ip, lr, pc}
21433 When performing a stack backtrace, code can inspect the value
21434 of 'pc' stored at 'fp' + 0. If the trace function then looks
21435 at location pc - 12 and the top 8 bits are set, then we know
21436 that there is a function name embedded immediately preceding this
21437 location and has length ((pc[-3]) & 0xff000000).
21439 We assume that pc is declared as a pointer to an unsigned long.
21441 It is of no benefit to output the function name if we are assembling
21442 a leaf function. These function types will not contain a stack
21443 backtrace structure, therefore it is not possible to determine the
21446 arm_poke_function_name (FILE *stream
, const char *name
)
21448 unsigned long alignlength
;
21449 unsigned long length
;
21452 length
= strlen (name
) + 1;
21453 alignlength
= ROUND_UP_WORD (length
);
21455 ASM_OUTPUT_ASCII (stream
, name
, length
);
21456 ASM_OUTPUT_ALIGN (stream
, 2);
21457 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21458 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21461 /* Place some comments into the assembler stream
21462 describing the current function. */
21464 arm_output_function_prologue (FILE *f
)
21466 unsigned long func_type
;
21468 /* Sanity check. */
21469 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21471 func_type
= arm_current_func_type ();
21473 switch ((int) ARM_FUNC_TYPE (func_type
))
21476 case ARM_FT_NORMAL
:
21478 case ARM_FT_INTERWORKED
:
21479 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21482 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21485 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21487 case ARM_FT_EXCEPTION
:
21488 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21492 if (IS_NAKED (func_type
))
21493 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21495 if (IS_VOLATILE (func_type
))
21496 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21498 if (IS_NESTED (func_type
))
21499 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21500 if (IS_STACKALIGN (func_type
))
21501 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21502 if (IS_CMSE_ENTRY (func_type
))
21503 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21505 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21506 (HOST_WIDE_INT
) crtl
->args
.size
,
21507 crtl
->args
.pretend_args_size
,
21508 (HOST_WIDE_INT
) get_frame_size ());
21510 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21511 frame_pointer_needed
,
21512 cfun
->machine
->uses_anonymous_args
);
21514 if (cfun
->machine
->lr_save_eliminated
)
21515 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21517 if (crtl
->calls_eh_return
)
21518 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21523 arm_output_function_epilogue (FILE *)
21525 arm_stack_offsets
*offsets
;
21531 /* Emit any call-via-reg trampolines that are needed for v4t support
21532 of call_reg and call_value_reg type insns. */
21533 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21535 rtx label
= cfun
->machine
->call_via
[regno
];
21539 switch_to_section (function_section (current_function_decl
));
21540 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21541 CODE_LABEL_NUMBER (label
));
21542 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21546 /* ??? Probably not safe to set this here, since it assumes that a
21547 function will be emitted as assembly immediately after we generate
21548 RTL for it. This does not happen for inline functions. */
21549 cfun
->machine
->return_used_this_function
= 0;
21551 else /* TARGET_32BIT */
21553 /* We need to take into account any stack-frame rounding. */
21554 offsets
= arm_get_frame_offsets ();
21556 gcc_assert (!use_return_insn (FALSE
, NULL
)
21557 || (cfun
->machine
->return_used_this_function
!= 0)
21558 || offsets
->saved_regs
== offsets
->outgoing_args
21559 || frame_pointer_needed
);
21563 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21564 STR and STRD. If an even number of registers are being pushed, one
21565 or more STRD patterns are created for each register pair. If an
21566 odd number of registers are pushed, emit an initial STR followed by
21567 as many STRD instructions as are needed. This works best when the
21568 stack is initially 64-bit aligned (the normal case), since it
21569 ensures that each STRD is also 64-bit aligned. */
21571 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21576 rtx par
= NULL_RTX
;
21577 rtx dwarf
= NULL_RTX
;
21581 num_regs
= bit_count (saved_regs_mask
);
21583 /* Must be at least one register to save, and can't save SP or PC. */
21584 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21585 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21586 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21588 /* Create sequence for DWARF info. All the frame-related data for
21589 debugging is held in this wrapper. */
21590 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21592 /* Describe the stack adjustment. */
21593 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21594 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21595 RTX_FRAME_RELATED_P (tmp
) = 1;
21596 XVECEXP (dwarf
, 0, 0) = tmp
;
21598 /* Find the first register. */
21599 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21604 /* If there's an odd number of registers to push. Start off by
21605 pushing a single register. This ensures that subsequent strd
21606 operations are dword aligned (assuming that SP was originally
21607 64-bit aligned). */
21608 if ((num_regs
& 1) != 0)
21610 rtx reg
, mem
, insn
;
21612 reg
= gen_rtx_REG (SImode
, regno
);
21614 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
21615 stack_pointer_rtx
));
21617 mem
= gen_frame_mem (Pmode
,
21619 (Pmode
, stack_pointer_rtx
,
21620 plus_constant (Pmode
, stack_pointer_rtx
,
21623 tmp
= gen_rtx_SET (mem
, reg
);
21624 RTX_FRAME_RELATED_P (tmp
) = 1;
21625 insn
= emit_insn (tmp
);
21626 RTX_FRAME_RELATED_P (insn
) = 1;
21627 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21628 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
21629 RTX_FRAME_RELATED_P (tmp
) = 1;
21632 XVECEXP (dwarf
, 0, i
) = tmp
;
21636 while (i
< num_regs
)
21637 if (saved_regs_mask
& (1 << regno
))
21639 rtx reg1
, reg2
, mem1
, mem2
;
21640 rtx tmp0
, tmp1
, tmp2
;
21643 /* Find the register to pair with this one. */
21644 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
21648 reg1
= gen_rtx_REG (SImode
, regno
);
21649 reg2
= gen_rtx_REG (SImode
, regno2
);
21656 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21659 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21661 -4 * (num_regs
- 1)));
21662 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
21663 plus_constant (Pmode
, stack_pointer_rtx
,
21665 tmp1
= gen_rtx_SET (mem1
, reg1
);
21666 tmp2
= gen_rtx_SET (mem2
, reg2
);
21667 RTX_FRAME_RELATED_P (tmp0
) = 1;
21668 RTX_FRAME_RELATED_P (tmp1
) = 1;
21669 RTX_FRAME_RELATED_P (tmp2
) = 1;
21670 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
21671 XVECEXP (par
, 0, 0) = tmp0
;
21672 XVECEXP (par
, 0, 1) = tmp1
;
21673 XVECEXP (par
, 0, 2) = tmp2
;
21674 insn
= emit_insn (par
);
21675 RTX_FRAME_RELATED_P (insn
) = 1;
21676 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21680 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21683 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21686 tmp1
= gen_rtx_SET (mem1
, reg1
);
21687 tmp2
= gen_rtx_SET (mem2
, reg2
);
21688 RTX_FRAME_RELATED_P (tmp1
) = 1;
21689 RTX_FRAME_RELATED_P (tmp2
) = 1;
21690 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21691 XVECEXP (par
, 0, 0) = tmp1
;
21692 XVECEXP (par
, 0, 1) = tmp2
;
21696 /* Create unwind information. This is an approximation. */
21697 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
21698 plus_constant (Pmode
,
21702 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
21703 plus_constant (Pmode
,
21708 RTX_FRAME_RELATED_P (tmp1
) = 1;
21709 RTX_FRAME_RELATED_P (tmp2
) = 1;
21710 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
21711 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
21713 regno
= regno2
+ 1;
21721 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21722 whenever possible, otherwise it emits single-word stores. The first store
21723 also allocates stack space for all saved registers, using writeback with
21724 post-addressing mode. All other stores use offset addressing. If no STRD
21725 can be emitted, this function emits a sequence of single-word stores,
21726 and not an STM as before, because single-word stores provide more freedom
21727 scheduling and can be turned into an STM by peephole optimizations. */
21729 arm_emit_strd_push (unsigned long saved_regs_mask
)
21732 int i
, j
, dwarf_index
= 0;
21734 rtx dwarf
= NULL_RTX
;
21735 rtx insn
= NULL_RTX
;
21738 /* TODO: A more efficient code can be emitted by changing the
21739 layout, e.g., first push all pairs that can use STRD to keep the
21740 stack aligned, and then push all other registers. */
21741 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21742 if (saved_regs_mask
& (1 << i
))
21745 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21746 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21747 gcc_assert (num_regs
> 0);
21749 /* Create sequence for DWARF info. */
21750 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21752 /* For dwarf info, we generate explicit stack update. */
21753 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21754 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21755 RTX_FRAME_RELATED_P (tmp
) = 1;
21756 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21758 /* Save registers. */
21759 offset
= - 4 * num_regs
;
21761 while (j
<= LAST_ARM_REGNUM
)
21762 if (saved_regs_mask
& (1 << j
))
21765 && (saved_regs_mask
& (1 << (j
+ 1))))
21767 /* Current register and previous register form register pair for
21768 which STRD can be generated. */
21771 /* Allocate stack space for all saved registers. */
21772 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
21773 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21774 mem
= gen_frame_mem (DImode
, tmp
);
21777 else if (offset
> 0)
21778 mem
= gen_frame_mem (DImode
,
21779 plus_constant (Pmode
,
21783 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
21785 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
21786 RTX_FRAME_RELATED_P (tmp
) = 1;
21787 tmp
= emit_insn (tmp
);
21789 /* Record the first store insn. */
21790 if (dwarf_index
== 1)
21793 /* Generate dwarf info. */
21794 mem
= gen_frame_mem (SImode
,
21795 plus_constant (Pmode
,
21798 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
21799 RTX_FRAME_RELATED_P (tmp
) = 1;
21800 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21802 mem
= gen_frame_mem (SImode
,
21803 plus_constant (Pmode
,
21806 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
21807 RTX_FRAME_RELATED_P (tmp
) = 1;
21808 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21815 /* Emit a single word store. */
21818 /* Allocate stack space for all saved registers. */
21819 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
21820 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21821 mem
= gen_frame_mem (SImode
, tmp
);
21824 else if (offset
> 0)
21825 mem
= gen_frame_mem (SImode
,
21826 plus_constant (Pmode
,
21830 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
21832 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
21833 RTX_FRAME_RELATED_P (tmp
) = 1;
21834 tmp
= emit_insn (tmp
);
21836 /* Record the first store insn. */
21837 if (dwarf_index
== 1)
21840 /* Generate dwarf info. */
21841 mem
= gen_frame_mem (SImode
,
21842 plus_constant(Pmode
,
21845 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
21846 RTX_FRAME_RELATED_P (tmp
) = 1;
21847 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21856 /* Attach dwarf info to the first insn we generate. */
21857 gcc_assert (insn
!= NULL_RTX
);
21858 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21859 RTX_FRAME_RELATED_P (insn
) = 1;
21862 /* Generate and emit an insn that we will recognize as a push_multi.
21863 Unfortunately, since this insn does not reflect very well the actual
21864 semantics of the operation, we need to annotate the insn for the benefit
21865 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
21866 MASK for registers that should be annotated for DWARF2 frame unwind
21869 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
21872 int num_dwarf_regs
= 0;
21876 int dwarf_par_index
;
21879 /* We don't record the PC in the dwarf frame information. */
21880 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
21882 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21884 if (mask
& (1 << i
))
21886 if (dwarf_regs_mask
& (1 << i
))
21890 gcc_assert (num_regs
&& num_regs
<= 16);
21891 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
21893 /* For the body of the insn we are going to generate an UNSPEC in
21894 parallel with several USEs. This allows the insn to be recognized
21895 by the push_multi pattern in the arm.md file.
21897 The body of the insn looks something like this:
21900 (set (mem:BLK (pre_modify:SI (reg:SI sp)
21901 (const_int:SI <num>)))
21902 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21908 For the frame note however, we try to be more explicit and actually
21909 show each register being stored into the stack frame, plus a (single)
21910 decrement of the stack pointer. We do it this way in order to be
21911 friendly to the stack unwinding code, which only wants to see a single
21912 stack decrement per instruction. The RTL we generate for the note looks
21913 something like this:
21916 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21917 (set (mem:SI (reg:SI sp)) (reg:SI r4))
21918 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21919 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21923 FIXME:: In an ideal world the PRE_MODIFY would not exist and
21924 instead we'd have a parallel expression detailing all
21925 the stores to the various memory addresses so that debug
21926 information is more up-to-date. Remember however while writing
21927 this to take care of the constraints with the push instruction.
21929 Note also that this has to be taken care of for the VFP registers.
21931 For more see PR43399. */
21933 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
21934 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
21935 dwarf_par_index
= 1;
21937 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21939 if (mask
& (1 << i
))
21941 reg
= gen_rtx_REG (SImode
, i
);
21943 XVECEXP (par
, 0, 0)
21944 = gen_rtx_SET (gen_frame_mem
21946 gen_rtx_PRE_MODIFY (Pmode
,
21949 (Pmode
, stack_pointer_rtx
,
21952 gen_rtx_UNSPEC (BLKmode
,
21953 gen_rtvec (1, reg
),
21954 UNSPEC_PUSH_MULT
));
21956 if (dwarf_regs_mask
& (1 << i
))
21958 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
21960 RTX_FRAME_RELATED_P (tmp
) = 1;
21961 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
21968 for (j
= 1, i
++; j
< num_regs
; i
++)
21970 if (mask
& (1 << i
))
21972 reg
= gen_rtx_REG (SImode
, i
);
21974 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
21976 if (dwarf_regs_mask
& (1 << i
))
21979 = gen_rtx_SET (gen_frame_mem
21981 plus_constant (Pmode
, stack_pointer_rtx
,
21984 RTX_FRAME_RELATED_P (tmp
) = 1;
21985 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
21992 par
= emit_insn (par
);
21994 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21995 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21996 RTX_FRAME_RELATED_P (tmp
) = 1;
21997 XVECEXP (dwarf
, 0, 0) = tmp
;
21999 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22004 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22005 SIZE is the offset to be adjusted.
22006 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22008 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22012 RTX_FRAME_RELATED_P (insn
) = 1;
22013 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22014 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22017 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22018 SAVED_REGS_MASK shows which registers need to be restored.
22020 Unfortunately, since this insn does not reflect very well the actual
22021 semantics of the operation, we need to annotate the insn for the benefit
22022 of DWARF2 frame unwind information. */
22024 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22029 rtx dwarf
= NULL_RTX
;
22031 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22035 offset_adj
= return_in_pc
? 1 : 0;
22036 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22037 if (saved_regs_mask
& (1 << i
))
22040 gcc_assert (num_regs
&& num_regs
<= 16);
22042 /* If SP is in reglist, then we don't emit SP update insn. */
22043 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22045 /* The parallel needs to hold num_regs SETs
22046 and one SET for the stack update. */
22047 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22050 XVECEXP (par
, 0, 0) = ret_rtx
;
22054 /* Increment the stack pointer, based on there being
22055 num_regs 4-byte registers to restore. */
22056 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22057 plus_constant (Pmode
,
22060 RTX_FRAME_RELATED_P (tmp
) = 1;
22061 XVECEXP (par
, 0, offset_adj
) = tmp
;
22064 /* Now restore every reg, which may include PC. */
22065 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22066 if (saved_regs_mask
& (1 << i
))
22068 reg
= gen_rtx_REG (SImode
, i
);
22069 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22071 /* Emit single load with writeback. */
22072 tmp
= gen_frame_mem (SImode
,
22073 gen_rtx_POST_INC (Pmode
,
22074 stack_pointer_rtx
));
22075 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22076 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22080 tmp
= gen_rtx_SET (reg
,
22083 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22084 RTX_FRAME_RELATED_P (tmp
) = 1;
22085 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22087 /* We need to maintain a sequence for DWARF info too. As dwarf info
22088 should not have PC, skip PC. */
22089 if (i
!= PC_REGNUM
)
22090 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22096 par
= emit_jump_insn (par
);
22098 par
= emit_insn (par
);
22100 REG_NOTES (par
) = dwarf
;
22102 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22103 stack_pointer_rtx
, stack_pointer_rtx
);
22106 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22107 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22109 Unfortunately, since this insn does not reflect very well the actual
22110 semantics of the operation, we need to annotate the insn for the benefit
22111 of DWARF2 frame unwind information. */
22113 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22117 rtx dwarf
= NULL_RTX
;
22120 gcc_assert (num_regs
&& num_regs
<= 32);
22122 /* Workaround ARM10 VFPr1 bug. */
22123 if (num_regs
== 2 && !arm_arch6
)
22125 if (first_reg
== 15)
22131 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22132 there could be up to 32 D-registers to restore.
22133 If there are more than 16 D-registers, make two recursive calls,
22134 each of which emits one pop_multi instruction. */
22137 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22138 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22142 /* The parallel needs to hold num_regs SETs
22143 and one SET for the stack update. */
22144 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22146 /* Increment the stack pointer, based on there being
22147 num_regs 8-byte registers to restore. */
22148 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22149 RTX_FRAME_RELATED_P (tmp
) = 1;
22150 XVECEXP (par
, 0, 0) = tmp
;
22152 /* Now show every reg that will be restored, using a SET for each. */
22153 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22155 reg
= gen_rtx_REG (DFmode
, i
);
22157 tmp
= gen_rtx_SET (reg
,
22160 plus_constant (Pmode
, base_reg
, 8 * j
)));
22161 RTX_FRAME_RELATED_P (tmp
) = 1;
22162 XVECEXP (par
, 0, j
+ 1) = tmp
;
22164 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22169 par
= emit_insn (par
);
22170 REG_NOTES (par
) = dwarf
;
22172 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22173 if (REGNO (base_reg
) == IP_REGNUM
)
22175 RTX_FRAME_RELATED_P (par
) = 1;
22176 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22179 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22180 base_reg
, base_reg
);
22183 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22184 number of registers are being popped, multiple LDRD patterns are created for
22185 all register pairs. If odd number of registers are popped, last register is
22186 loaded by using LDR pattern. */
22188 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22192 rtx par
= NULL_RTX
;
22193 rtx dwarf
= NULL_RTX
;
22194 rtx tmp
, reg
, tmp1
;
22195 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22197 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22198 if (saved_regs_mask
& (1 << i
))
22201 gcc_assert (num_regs
&& num_regs
<= 16);
22203 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22204 to be popped. So, if num_regs is even, now it will become odd,
22205 and we can generate pop with PC. If num_regs is odd, it will be
22206 even now, and ldr with return can be generated for PC. */
22210 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22212 /* Var j iterates over all the registers to gather all the registers in
22213 saved_regs_mask. Var i gives index of saved registers in stack frame.
22214 A PARALLEL RTX of register-pair is created here, so that pattern for
22215 LDRD can be matched. As PC is always last register to be popped, and
22216 we have already decremented num_regs if PC, we don't have to worry
22217 about PC in this loop. */
22218 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22219 if (saved_regs_mask
& (1 << j
))
22221 /* Create RTX for memory load. */
22222 reg
= gen_rtx_REG (SImode
, j
);
22223 tmp
= gen_rtx_SET (reg
,
22224 gen_frame_mem (SImode
,
22225 plus_constant (Pmode
,
22226 stack_pointer_rtx
, 4 * i
)));
22227 RTX_FRAME_RELATED_P (tmp
) = 1;
22231 /* When saved-register index (i) is even, the RTX to be emitted is
22232 yet to be created. Hence create it first. The LDRD pattern we
22233 are generating is :
22234 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22235 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22236 where target registers need not be consecutive. */
22237 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22241 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22242 added as 0th element and if i is odd, reg_i is added as 1st element
22243 of LDRD pattern shown above. */
22244 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22245 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22249 /* When saved-register index (i) is odd, RTXs for both the registers
22250 to be loaded are generated in above given LDRD pattern, and the
22251 pattern can be emitted now. */
22252 par
= emit_insn (par
);
22253 REG_NOTES (par
) = dwarf
;
22254 RTX_FRAME_RELATED_P (par
) = 1;
22260 /* If the number of registers pushed is odd AND return_in_pc is false OR
22261 number of registers are even AND return_in_pc is true, last register is
22262 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22263 then LDR with post increment. */
22265 /* Increment the stack pointer, based on there being
22266 num_regs 4-byte registers to restore. */
22267 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22268 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22269 RTX_FRAME_RELATED_P (tmp
) = 1;
22270 tmp
= emit_insn (tmp
);
22273 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22274 stack_pointer_rtx
, stack_pointer_rtx
);
22279 if (((num_regs
% 2) == 1 && !return_in_pc
)
22280 || ((num_regs
% 2) == 0 && return_in_pc
))
22282 /* Scan for the single register to be popped. Skip until the saved
22283 register is found. */
22284 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22286 /* Gen LDR with post increment here. */
22287 tmp1
= gen_rtx_MEM (SImode
,
22288 gen_rtx_POST_INC (SImode
,
22289 stack_pointer_rtx
));
22290 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22292 reg
= gen_rtx_REG (SImode
, j
);
22293 tmp
= gen_rtx_SET (reg
, tmp1
);
22294 RTX_FRAME_RELATED_P (tmp
) = 1;
22295 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22299 /* If return_in_pc, j must be PC_REGNUM. */
22300 gcc_assert (j
== PC_REGNUM
);
22301 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22302 XVECEXP (par
, 0, 0) = ret_rtx
;
22303 XVECEXP (par
, 0, 1) = tmp
;
22304 par
= emit_jump_insn (par
);
22308 par
= emit_insn (tmp
);
22309 REG_NOTES (par
) = dwarf
;
22310 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22311 stack_pointer_rtx
, stack_pointer_rtx
);
22315 else if ((num_regs
% 2) == 1 && return_in_pc
)
22317 /* There are 2 registers to be popped. So, generate the pattern
22318 pop_multiple_with_stack_update_and_return to pop in PC. */
22319 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22325 /* LDRD in ARM mode needs consecutive registers as operands. This function
22326 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22327 offset addressing and then generates one separate stack udpate. This provides
22328 more scheduling freedom, compared to writeback on every load. However,
22329 if the function returns using load into PC directly
22330 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22331 before the last load. TODO: Add a peephole optimization to recognize
22332 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22333 peephole optimization to merge the load at stack-offset zero
22334 with the stack update instruction using load with writeback
22335 in post-index addressing mode. */
22337 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22341 rtx par
= NULL_RTX
;
22342 rtx dwarf
= NULL_RTX
;
22345 /* Restore saved registers. */
22346 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22348 while (j
<= LAST_ARM_REGNUM
)
22349 if (saved_regs_mask
& (1 << j
))
22352 && (saved_regs_mask
& (1 << (j
+ 1)))
22353 && (j
+ 1) != PC_REGNUM
)
22355 /* Current register and next register form register pair for which
22356 LDRD can be generated. PC is always the last register popped, and
22357 we handle it separately. */
22359 mem
= gen_frame_mem (DImode
,
22360 plus_constant (Pmode
,
22364 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22366 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22367 tmp
= emit_insn (tmp
);
22368 RTX_FRAME_RELATED_P (tmp
) = 1;
22370 /* Generate dwarf info. */
22372 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22373 gen_rtx_REG (SImode
, j
),
22375 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22376 gen_rtx_REG (SImode
, j
+ 1),
22379 REG_NOTES (tmp
) = dwarf
;
22384 else if (j
!= PC_REGNUM
)
22386 /* Emit a single word load. */
22388 mem
= gen_frame_mem (SImode
,
22389 plus_constant (Pmode
,
22393 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22395 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22396 tmp
= emit_insn (tmp
);
22397 RTX_FRAME_RELATED_P (tmp
) = 1;
22399 /* Generate dwarf info. */
22400 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22401 gen_rtx_REG (SImode
, j
),
22407 else /* j == PC_REGNUM */
22413 /* Update the stack. */
22416 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22417 plus_constant (Pmode
,
22420 tmp
= emit_insn (tmp
);
22421 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22422 stack_pointer_rtx
, stack_pointer_rtx
);
22426 if (saved_regs_mask
& (1 << PC_REGNUM
))
22428 /* Only PC is to be popped. */
22429 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22430 XVECEXP (par
, 0, 0) = ret_rtx
;
22431 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22432 gen_frame_mem (SImode
,
22433 gen_rtx_POST_INC (SImode
,
22434 stack_pointer_rtx
)));
22435 RTX_FRAME_RELATED_P (tmp
) = 1;
22436 XVECEXP (par
, 0, 1) = tmp
;
22437 par
= emit_jump_insn (par
);
22439 /* Generate dwarf info. */
22440 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22441 gen_rtx_REG (SImode
, PC_REGNUM
),
22443 REG_NOTES (par
) = dwarf
;
22444 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22445 stack_pointer_rtx
, stack_pointer_rtx
);
22449 /* Calculate the size of the return value that is passed in registers. */
22451 arm_size_return_regs (void)
22455 if (crtl
->return_rtx
!= 0)
22456 mode
= GET_MODE (crtl
->return_rtx
);
22458 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22460 return GET_MODE_SIZE (mode
);
22463 /* Return true if the current function needs to save/restore LR. */
22465 thumb_force_lr_save (void)
22467 return !cfun
->machine
->lr_save_eliminated
22469 || thumb_far_jump_used_p ()
22470 || df_regs_ever_live_p (LR_REGNUM
));
22473 /* We do not know if r3 will be available because
22474 we do have an indirect tailcall happening in this
22475 particular case. */
22477 is_indirect_tailcall_p (rtx call
)
22479 rtx pat
= PATTERN (call
);
22481 /* Indirect tail call. */
22482 pat
= XVECEXP (pat
, 0, 0);
22483 if (GET_CODE (pat
) == SET
)
22484 pat
= SET_SRC (pat
);
22486 pat
= XEXP (XEXP (pat
, 0), 0);
22487 return REG_P (pat
);
22490 /* Return true if r3 is used by any of the tail call insns in the
22491 current function. */
22493 any_sibcall_could_use_r3 (void)
22498 if (!crtl
->tail_call_emit
)
22500 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22501 if (e
->flags
& EDGE_SIBCALL
)
22503 rtx_insn
*call
= BB_END (e
->src
);
22504 if (!CALL_P (call
))
22505 call
= prev_nonnote_nondebug_insn (call
);
22506 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22507 if (find_regno_fusage (call
, USE
, 3)
22508 || is_indirect_tailcall_p (call
))
22515 /* Compute the distance from register FROM to register TO.
22516 These can be the arg pointer (26), the soft frame pointer (25),
22517 the stack pointer (13) or the hard frame pointer (11).
22518 In thumb mode r7 is used as the soft frame pointer, if needed.
22519 Typical stack layout looks like this:
22521 old stack pointer -> | |
22524 | | saved arguments for
22525 | | vararg functions
22528 hard FP & arg pointer -> | | \
22536 soft frame pointer -> | | /
22541 locals base pointer -> | | /
22546 current stack pointer -> | | /
22549 For a given function some or all of these stack components
22550 may not be needed, giving rise to the possibility of
22551 eliminating some of the registers.
22553 The values returned by this function must reflect the behavior
22554 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22556 The sign of the number returned reflects the direction of stack
22557 growth, so the values are positive for all eliminations except
22558 from the soft frame pointer to the hard frame pointer.
22560 SFP may point just inside the local variables block to ensure correct
22564 /* Return cached stack offsets. */
22566 static arm_stack_offsets
*
22567 arm_get_frame_offsets (void)
22569 struct arm_stack_offsets
*offsets
;
22571 offsets
= &cfun
->machine
->stack_offsets
;
22577 /* Calculate stack offsets. These are used to calculate register elimination
22578 offsets and in prologue/epilogue code. Also calculates which registers
22579 should be saved. */
22582 arm_compute_frame_layout (void)
22584 struct arm_stack_offsets
*offsets
;
22585 unsigned long func_type
;
22588 HOST_WIDE_INT frame_size
;
22591 offsets
= &cfun
->machine
->stack_offsets
;
22593 /* Initially this is the size of the local variables. It will translated
22594 into an offset once we have determined the size of preceding data. */
22595 frame_size
= ROUND_UP_WORD (get_frame_size ());
22597 /* Space for variadic functions. */
22598 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
22600 /* In Thumb mode this is incorrect, but never used. */
22602 = (offsets
->saved_args
22603 + arm_compute_static_chain_stack_bytes ()
22604 + (frame_pointer_needed
? 4 : 0));
22608 unsigned int regno
;
22610 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
22611 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22612 saved
= core_saved
;
22614 /* We know that SP will be doubleword aligned on entry, and we must
22615 preserve that condition at any subroutine call. We also require the
22616 soft frame pointer to be doubleword aligned. */
22618 if (TARGET_REALLY_IWMMXT
)
22620 /* Check for the call-saved iWMMXt registers. */
22621 for (regno
= FIRST_IWMMXT_REGNUM
;
22622 regno
<= LAST_IWMMXT_REGNUM
;
22624 if (reg_needs_saving_p (regno
))
22628 func_type
= arm_current_func_type ();
22629 /* Space for saved VFP registers. */
22630 if (! IS_VOLATILE (func_type
)
22631 && TARGET_VFP_BASE
)
22632 saved
+= arm_get_vfp_saved_size ();
22634 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22635 nonecure entry functions with VSTR/VLDR. */
22636 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
22639 else /* TARGET_THUMB1 */
22641 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
22642 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22643 saved
= core_saved
;
22644 if (TARGET_BACKTRACE
)
22648 /* Saved registers include the stack frame. */
22649 offsets
->saved_regs
22650 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
22651 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
22653 /* A leaf function does not need any stack alignment if it has nothing
22655 if (crtl
->is_leaf
&& frame_size
== 0
22656 /* However if it calls alloca(), we have a dynamically allocated
22657 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22658 && ! cfun
->calls_alloca
)
22660 offsets
->outgoing_args
= offsets
->soft_frame
;
22661 offsets
->locals_base
= offsets
->soft_frame
;
22665 /* Ensure SFP has the correct alignment. */
22666 if (ARM_DOUBLEWORD_ALIGN
22667 && (offsets
->soft_frame
& 7))
22669 offsets
->soft_frame
+= 4;
22670 /* Try to align stack by pushing an extra reg. Don't bother doing this
22671 when there is a stack frame as the alignment will be rolled into
22672 the normal stack adjustment. */
22673 if (frame_size
+ crtl
->outgoing_args_size
== 0)
22677 /* Register r3 is caller-saved. Normally it does not need to be
22678 saved on entry by the prologue. However if we choose to save
22679 it for padding then we may confuse the compiler into thinking
22680 a prologue sequence is required when in fact it is not. This
22681 will occur when shrink-wrapping if r3 is used as a scratch
22682 register and there are no other callee-saved writes.
22684 This situation can be avoided when other callee-saved registers
22685 are available and r3 is not mandatory if we choose a callee-saved
22686 register for padding. */
22687 bool prefer_callee_reg_p
= false;
22689 /* If it is safe to use r3, then do so. This sometimes
22690 generates better code on Thumb-2 by avoiding the need to
22691 use 32-bit push/pop instructions. */
22692 if (! any_sibcall_could_use_r3 ()
22693 && arm_size_return_regs () <= 12
22694 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
22696 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
22699 if (!TARGET_THUMB2
)
22700 prefer_callee_reg_p
= true;
22703 || prefer_callee_reg_p
)
22705 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
22707 /* Avoid fixed registers; they may be changed at
22708 arbitrary times so it's unsafe to restore them
22709 during the epilogue. */
22711 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
22721 offsets
->saved_regs
+= 4;
22722 offsets
->saved_regs_mask
|= (1 << reg
);
22727 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
22728 offsets
->outgoing_args
= (offsets
->locals_base
22729 + crtl
->outgoing_args_size
);
22731 if (ARM_DOUBLEWORD_ALIGN
)
22733 /* Ensure SP remains doubleword aligned. */
22734 if (offsets
->outgoing_args
& 7)
22735 offsets
->outgoing_args
+= 4;
22736 gcc_assert (!(offsets
->outgoing_args
& 7));
22741 /* Calculate the relative offsets for the different stack pointers. Positive
22742 offsets are in the direction of stack growth. */
22745 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
22747 arm_stack_offsets
*offsets
;
22749 offsets
= arm_get_frame_offsets ();
22751 /* OK, now we have enough information to compute the distances.
22752 There must be an entry in these switch tables for each pair
22753 of registers in ELIMINABLE_REGS, even if some of the entries
22754 seem to be redundant or useless. */
22757 case ARG_POINTER_REGNUM
:
22760 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22763 case FRAME_POINTER_REGNUM
:
22764 /* This is the reverse of the soft frame pointer
22765 to hard frame pointer elimination below. */
22766 return offsets
->soft_frame
- offsets
->saved_args
;
22768 case ARM_HARD_FRAME_POINTER_REGNUM
:
22769 /* This is only non-zero in the case where the static chain register
22770 is stored above the frame. */
22771 return offsets
->frame
- offsets
->saved_args
- 4;
22773 case STACK_POINTER_REGNUM
:
22774 /* If nothing has been pushed on the stack at all
22775 then this will return -4. This *is* correct! */
22776 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
22779 gcc_unreachable ();
22781 gcc_unreachable ();
22783 case FRAME_POINTER_REGNUM
:
22786 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22789 case ARM_HARD_FRAME_POINTER_REGNUM
:
22790 /* The hard frame pointer points to the top entry in the
22791 stack frame. The soft frame pointer to the bottom entry
22792 in the stack frame. If there is no stack frame at all,
22793 then they are identical. */
22795 return offsets
->frame
- offsets
->soft_frame
;
22797 case STACK_POINTER_REGNUM
:
22798 return offsets
->outgoing_args
- offsets
->soft_frame
;
22801 gcc_unreachable ();
22803 gcc_unreachable ();
22806 /* You cannot eliminate from the stack pointer.
22807 In theory you could eliminate from the hard frame
22808 pointer to the stack pointer, but this will never
22809 happen, since if a stack frame is not needed the
22810 hard frame pointer will never be used. */
22811 gcc_unreachable ();
22815 /* Given FROM and TO register numbers, say whether this elimination is
22816 allowed. Frame pointer elimination is automatically handled.
22818 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
22819 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
22820 pointer, we must eliminate FRAME_POINTER_REGNUM into
22821 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22822 ARG_POINTER_REGNUM. */
22825 arm_can_eliminate (const int from
, const int to
)
22827 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
22828 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
22829 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
22830 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
22834 /* Emit RTL to save coprocessor registers on function entry. Returns the
22835 number of bytes pushed. */
22838 arm_save_coproc_regs(void)
22840 int saved_size
= 0;
22842 unsigned start_reg
;
22845 if (TARGET_REALLY_IWMMXT
)
22846 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
22847 if (reg_needs_saving_p (reg
))
22849 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22850 insn
= gen_rtx_MEM (V2SImode
, insn
);
22851 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
22852 RTX_FRAME_RELATED_P (insn
) = 1;
22856 if (TARGET_VFP_BASE
)
22858 start_reg
= FIRST_VFP_REGNUM
;
22860 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
22862 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
22864 if (start_reg
!= reg
)
22865 saved_size
+= vfp_emit_fstmd (start_reg
,
22866 (reg
- start_reg
) / 2);
22867 start_reg
= reg
+ 2;
22870 if (start_reg
!= reg
)
22871 saved_size
+= vfp_emit_fstmd (start_reg
,
22872 (reg
- start_reg
) / 2);
22878 /* Set the Thumb frame pointer from the stack pointer. */
22881 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
22883 HOST_WIDE_INT amount
;
22886 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
22888 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22889 stack_pointer_rtx
, GEN_INT (amount
)));
22892 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
22893 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
22894 expects the first two operands to be the same. */
22897 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22899 hard_frame_pointer_rtx
));
22903 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22904 hard_frame_pointer_rtx
,
22905 stack_pointer_rtx
));
22907 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
22908 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
22909 RTX_FRAME_RELATED_P (dwarf
) = 1;
22910 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22913 RTX_FRAME_RELATED_P (insn
) = 1;
22916 struct scratch_reg
{
22921 /* Return a short-lived scratch register for use as a 2nd scratch register on
22922 function entry after the registers are saved in the prologue. This register
22923 must be released by means of release_scratch_register_on_entry. IP is not
22924 considered since it is always used as the 1st scratch register if available.
22926 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22927 mask of live registers. */
22930 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
22931 unsigned long live_regs
)
22937 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
22943 for (i
= 4; i
< 11; i
++)
22944 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
22952 /* If IP is used as the 1st scratch register for a nested function,
22953 then either r3 wasn't available or is used to preserve IP. */
22954 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
22956 regno
= (regno1
== 3 ? 2 : 3);
22958 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
22963 sr
->reg
= gen_rtx_REG (SImode
, regno
);
22966 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22967 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
22968 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
22969 plus_constant (Pmode
, stack_pointer_rtx
, -4));
22970 RTX_FRAME_RELATED_P (insn
) = 1;
22971 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
22975 /* Release a scratch register obtained from the preceding function. */
22978 release_scratch_register_on_entry (struct scratch_reg
*sr
)
22982 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
22983 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
22984 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
22985 plus_constant (Pmode
, stack_pointer_rtx
, 4));
22986 RTX_FRAME_RELATED_P (insn
) = 1;
22987 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
22991 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22993 #if PROBE_INTERVAL > 4096
22994 #error Cannot use indexed addressing mode for stack probing
22997 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22998 inclusive. These are offsets from the current stack pointer. REGNO1
22999 is the index number of the 1st scratch register and LIVE_REGS is the
23000 mask of live registers. */
23003 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23004 unsigned int regno1
, unsigned long live_regs
)
23006 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23008 /* See if we have a constant small number of probes to generate. If so,
23009 that's the easy case. */
23010 if (size
<= PROBE_INTERVAL
)
23012 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23013 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23014 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23017 /* The run-time loop is made up of 10 insns in the generic case while the
23018 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23019 else if (size
<= 5 * PROBE_INTERVAL
)
23021 HOST_WIDE_INT i
, rem
;
23023 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23024 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23025 emit_stack_probe (reg1
);
23027 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23028 it exceeds SIZE. If only two probes are needed, this will not
23029 generate any code. Then probe at FIRST + SIZE. */
23030 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23032 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23033 emit_stack_probe (reg1
);
23036 rem
= size
- (i
- PROBE_INTERVAL
);
23037 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23039 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23040 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23043 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23046 /* Otherwise, do the same as above, but in a loop. Note that we must be
23047 extra careful with variables wrapping around because we might be at
23048 the very top (or the very bottom) of the address space and we have
23049 to be able to handle this case properly; in particular, we use an
23050 equality test for the loop condition. */
23053 HOST_WIDE_INT rounded_size
;
23054 struct scratch_reg sr
;
23056 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23058 emit_move_insn (reg1
, GEN_INT (first
));
23061 /* Step 1: round SIZE to the previous multiple of the interval. */
23063 rounded_size
= size
& -PROBE_INTERVAL
;
23064 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23067 /* Step 2: compute initial and final value of the loop counter. */
23069 /* TEST_ADDR = SP + FIRST. */
23070 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23072 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23073 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23076 /* Step 3: the loop
23080 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23083 while (TEST_ADDR != LAST_ADDR)
23085 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23086 until it is equal to ROUNDED_SIZE. */
23088 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23091 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23092 that SIZE is equal to ROUNDED_SIZE. */
23094 if (size
!= rounded_size
)
23096 HOST_WIDE_INT rem
= size
- rounded_size
;
23098 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23100 emit_set_insn (sr
.reg
,
23101 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23102 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23103 PROBE_INTERVAL
- rem
));
23106 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23109 release_scratch_register_on_entry (&sr
);
23112 /* Make sure nothing is scheduled before we are done. */
23113 emit_insn (gen_blockage ());
23116 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23117 absolute addresses. */
23120 output_probe_stack_range (rtx reg1
, rtx reg2
)
23122 static int labelno
= 0;
23126 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23129 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23131 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23133 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23134 output_asm_insn ("sub\t%0, %0, %1", xops
);
23136 /* Probe at TEST_ADDR. */
23137 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23139 /* Test if TEST_ADDR == LAST_ADDR. */
23141 output_asm_insn ("cmp\t%0, %1", xops
);
23144 fputs ("\tbne\t", asm_out_file
);
23145 assemble_name_raw (asm_out_file
, loop_lab
);
23146 fputc ('\n', asm_out_file
);
23151 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23154 arm_expand_prologue (void)
23159 unsigned long live_regs_mask
;
23160 unsigned long func_type
;
23162 int saved_pretend_args
= 0;
23163 int saved_regs
= 0;
23164 unsigned HOST_WIDE_INT args_to_push
;
23165 HOST_WIDE_INT size
;
23166 arm_stack_offsets
*offsets
;
23169 func_type
= arm_current_func_type ();
23171 /* Naked functions don't have prologues. */
23172 if (IS_NAKED (func_type
))
23174 if (flag_stack_usage_info
)
23175 current_function_static_stack_size
= 0;
23179 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23180 args_to_push
= crtl
->args
.pretend_args_size
;
23182 /* Compute which register we will have to save onto the stack. */
23183 offsets
= arm_get_frame_offsets ();
23184 live_regs_mask
= offsets
->saved_regs_mask
;
23186 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23188 if (IS_STACKALIGN (func_type
))
23192 /* Handle a word-aligned stack pointer. We generate the following:
23197 <save and restore r0 in normal prologue/epilogue>
23201 The unwinder doesn't need to know about the stack realignment.
23202 Just tell it we saved SP in r0. */
23203 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23205 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23206 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23208 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23209 RTX_FRAME_RELATED_P (insn
) = 1;
23210 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23212 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23214 /* ??? The CFA changes here, which may cause GDB to conclude that it
23215 has entered a different function. That said, the unwind info is
23216 correct, individually, before and after this instruction because
23217 we've described the save of SP, which will override the default
23218 handling of SP as restoring from the CFA. */
23219 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23222 /* Let's compute the static_chain_stack_bytes required and store it. Right
23223 now the value must be -1 as stored by arm_init_machine_status (). */
23224 cfun
->machine
->static_chain_stack_bytes
23225 = arm_compute_static_chain_stack_bytes ();
23227 /* The static chain register is the same as the IP register. If it is
23228 clobbered when creating the frame, we need to save and restore it. */
23229 clobber_ip
= IS_NESTED (func_type
)
23230 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23231 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23232 || flag_stack_clash_protection
)
23233 && !df_regs_ever_live_p (LR_REGNUM
)
23234 && arm_r3_live_at_start_p ()));
23236 /* Find somewhere to store IP whilst the frame is being created.
23237 We try the following places in order:
23239 1. The last argument register r3 if it is available.
23240 2. A slot on the stack above the frame if there are no
23241 arguments to push onto the stack.
23242 3. Register r3 again, after pushing the argument registers
23243 onto the stack, if this is a varargs function.
23244 4. The last slot on the stack created for the arguments to
23245 push, if this isn't a varargs function.
23247 Note - we only need to tell the dwarf2 backend about the SP
23248 adjustment in the second variant; the static chain register
23249 doesn't need to be unwound, as it doesn't contain a value
23250 inherited from the caller. */
23253 if (!arm_r3_live_at_start_p ())
23254 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23255 else if (args_to_push
== 0)
23259 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23262 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23263 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23266 /* Just tell the dwarf backend that we adjusted SP. */
23267 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23268 plus_constant (Pmode
, stack_pointer_rtx
,
23270 RTX_FRAME_RELATED_P (insn
) = 1;
23271 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23275 /* Store the args on the stack. */
23276 if (cfun
->machine
->uses_anonymous_args
)
23278 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23279 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23280 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23281 saved_pretend_args
= 1;
23287 if (args_to_push
== 4)
23288 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23290 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23291 plus_constant (Pmode
,
23295 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23297 /* Just tell the dwarf backend that we adjusted SP. */
23298 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23299 plus_constant (Pmode
, stack_pointer_rtx
,
23301 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23304 RTX_FRAME_RELATED_P (insn
) = 1;
23305 fp_offset
= args_to_push
;
23310 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23312 if (IS_INTERRUPT (func_type
))
23314 /* Interrupt functions must not corrupt any registers.
23315 Creating a frame pointer however, corrupts the IP
23316 register, so we must push it first. */
23317 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23319 /* Do not set RTX_FRAME_RELATED_P on this insn.
23320 The dwarf stack unwinding code only wants to see one
23321 stack decrement per function, and this is not it. If
23322 this instruction is labeled as being part of the frame
23323 creation sequence then dwarf2out_frame_debug_expr will
23324 die when it encounters the assignment of IP to FP
23325 later on, since the use of SP here establishes SP as
23326 the CFA register and not IP.
23328 Anyway this instruction is not really part of the stack
23329 frame creation although it is part of the prologue. */
23332 insn
= emit_set_insn (ip_rtx
,
23333 plus_constant (Pmode
, stack_pointer_rtx
,
23335 RTX_FRAME_RELATED_P (insn
) = 1;
23338 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23339 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23342 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23343 GEN_INT (FPCXTNS_ENUM
)));
23344 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23345 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23346 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23347 RTX_FRAME_RELATED_P (insn
) = 1;
23352 /* Push the argument registers, or reserve space for them. */
23353 if (cfun
->machine
->uses_anonymous_args
)
23354 insn
= emit_multi_reg_push
23355 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23356 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23359 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23360 GEN_INT (- args_to_push
)));
23361 RTX_FRAME_RELATED_P (insn
) = 1;
23364 /* If this is an interrupt service routine, and the link register
23365 is going to be pushed, and we're not generating extra
23366 push of IP (needed when frame is needed and frame layout if apcs),
23367 subtracting four from LR now will mean that the function return
23368 can be done with a single instruction. */
23369 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23370 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23371 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23374 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23376 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23379 if (live_regs_mask
)
23381 unsigned long dwarf_regs_mask
= live_regs_mask
;
23383 saved_regs
+= bit_count (live_regs_mask
) * 4;
23384 if (optimize_size
&& !frame_pointer_needed
23385 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23387 /* If no coprocessor registers are being pushed and we don't have
23388 to worry about a frame pointer then push extra registers to
23389 create the stack frame. This is done in a way that does not
23390 alter the frame layout, so is independent of the epilogue. */
23394 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23396 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23397 if (frame
&& n
* 4 >= frame
)
23400 live_regs_mask
|= (1 << n
) - 1;
23401 saved_regs
+= frame
;
23406 && current_tune
->prefer_ldrd_strd
23407 && !optimize_function_for_size_p (cfun
))
23409 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23411 thumb2_emit_strd_push (live_regs_mask
);
23412 else if (TARGET_ARM
23413 && !TARGET_APCS_FRAME
23414 && !IS_INTERRUPT (func_type
))
23415 arm_emit_strd_push (live_regs_mask
);
23418 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23419 RTX_FRAME_RELATED_P (insn
) = 1;
23424 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23425 RTX_FRAME_RELATED_P (insn
) = 1;
23429 if (! IS_VOLATILE (func_type
))
23430 saved_regs
+= arm_save_coproc_regs ();
23432 if (frame_pointer_needed
&& TARGET_ARM
)
23434 /* Create the new frame pointer. */
23435 if (TARGET_APCS_FRAME
)
23437 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23438 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23439 RTX_FRAME_RELATED_P (insn
) = 1;
23443 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23444 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23445 stack_pointer_rtx
, insn
));
23446 RTX_FRAME_RELATED_P (insn
) = 1;
23450 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23451 if (flag_stack_usage_info
)
23452 current_function_static_stack_size
= size
;
23454 /* If this isn't an interrupt service routine and we have a frame, then do
23455 stack checking. We use IP as the first scratch register, except for the
23456 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23457 if (!IS_INTERRUPT (func_type
)
23458 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23459 || flag_stack_clash_protection
))
23461 unsigned int regno
;
23463 if (!IS_NESTED (func_type
) || clobber_ip
)
23465 else if (df_regs_ever_live_p (LR_REGNUM
))
23470 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23472 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23473 arm_emit_probe_stack_range (get_stack_check_protect (),
23474 size
- get_stack_check_protect (),
23475 regno
, live_regs_mask
);
23478 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23479 regno
, live_regs_mask
);
23482 /* Recover the static chain register. */
23485 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23486 insn
= gen_rtx_REG (SImode
, 3);
23489 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23490 insn
= gen_frame_mem (SImode
, insn
);
23492 emit_set_insn (ip_rtx
, insn
);
23493 emit_insn (gen_force_register_use (ip_rtx
));
23496 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23498 /* This add can produce multiple insns for a large constant, so we
23499 need to get tricky. */
23500 rtx_insn
*last
= get_last_insn ();
23502 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23503 - offsets
->outgoing_args
);
23505 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23509 last
= last
? NEXT_INSN (last
) : get_insns ();
23510 RTX_FRAME_RELATED_P (last
) = 1;
23512 while (last
!= insn
);
23514 /* If the frame pointer is needed, emit a special barrier that
23515 will prevent the scheduler from moving stores to the frame
23516 before the stack adjustment. */
23517 if (frame_pointer_needed
)
23518 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23519 hard_frame_pointer_rtx
));
23523 if (frame_pointer_needed
&& TARGET_THUMB2
)
23524 thumb_set_frame_pointer (offsets
);
23526 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23528 unsigned long mask
;
23530 mask
= live_regs_mask
;
23531 mask
&= THUMB2_WORK_REGS
;
23532 if (!IS_NESTED (func_type
))
23533 mask
|= (1 << IP_REGNUM
);
23534 arm_load_pic_register (mask
, NULL_RTX
);
23537 /* If we are profiling, make sure no instructions are scheduled before
23538 the call to mcount. Similarly if the user has requested no
23539 scheduling in the prolog. Similarly if we want non-call exceptions
23540 using the EABI unwinder, to prevent faulting instructions from being
23541 swapped with a stack adjustment. */
23542 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23543 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23544 && cfun
->can_throw_non_call_exceptions
))
23545 emit_insn (gen_blockage ());
23547 /* If the link register is being kept alive, with the return address in it,
23548 then make sure that it does not get reused by the ce2 pass. */
23549 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23550 cfun
->machine
->lr_save_eliminated
= 1;
23553 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23555 arm_print_condition (FILE *stream
)
23557 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
23559 /* Branch conversion is not implemented for Thumb-2. */
23562 output_operand_lossage ("predicated Thumb instruction");
23565 if (current_insn_predicate
!= NULL
)
23567 output_operand_lossage
23568 ("predicated instruction in conditional sequence");
23572 fputs (arm_condition_codes
[arm_current_cc
], stream
);
23574 else if (current_insn_predicate
)
23576 enum arm_cond_code code
;
23580 output_operand_lossage ("predicated Thumb instruction");
23584 code
= get_arm_condition_code (current_insn_predicate
);
23585 fputs (arm_condition_codes
[code
], stream
);
23590 /* Globally reserved letters: acln
23591 Puncutation letters currently used: @_|?().!#
23592 Lower case letters currently used: bcdefhimpqtvwxyz
23593 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23594 Letters previously used, but now deprecated/obsolete: sVWXYZ.
23596 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23598 If CODE is 'd', then the X is a condition operand and the instruction
23599 should only be executed if the condition is true.
23600 if CODE is 'D', then the X is a condition operand and the instruction
23601 should only be executed if the condition is false: however, if the mode
23602 of the comparison is CCFPEmode, then always execute the instruction -- we
23603 do this because in these circumstances !GE does not necessarily imply LT;
23604 in these cases the instruction pattern will take care to make sure that
23605 an instruction containing %d will follow, thereby undoing the effects of
23606 doing this instruction unconditionally.
23607 If CODE is 'N' then X is a floating point operand that must be negated
23609 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23610 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
23612 arm_print_operand (FILE *stream
, rtx x
, int code
)
23617 fputs (ASM_COMMENT_START
, stream
);
23621 fputs (user_label_prefix
, stream
);
23625 fputs (REGISTER_PREFIX
, stream
);
23629 arm_print_condition (stream
);
23633 /* The current condition code for a condition code setting instruction.
23634 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23635 fputc('s', stream
);
23636 arm_print_condition (stream
);
23640 /* If the instruction is conditionally executed then print
23641 the current condition code, otherwise print 's'. */
23642 gcc_assert (TARGET_THUMB2
);
23643 if (current_insn_predicate
)
23644 arm_print_condition (stream
);
23646 fputc('s', stream
);
23649 /* %# is a "break" sequence. It doesn't output anything, but is used to
23650 separate e.g. operand numbers from following text, if that text consists
23651 of further digits which we don't want to be part of the operand
23659 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
23660 fprintf (stream
, "%s", fp_const_from_val (&r
));
23664 /* An integer or symbol address without a preceding # sign. */
23666 switch (GET_CODE (x
))
23669 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
23673 output_addr_const (stream
, x
);
23677 if (GET_CODE (XEXP (x
, 0)) == PLUS
23678 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
23680 output_addr_const (stream
, x
);
23683 /* Fall through. */
23686 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23690 /* An integer that we want to print in HEX. */
23692 switch (GET_CODE (x
))
23695 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
23699 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23704 if (CONST_INT_P (x
))
23707 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
23708 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
23712 putc ('~', stream
);
23713 output_addr_const (stream
, x
);
23718 /* Print the log2 of a CONST_INT. */
23722 if (!CONST_INT_P (x
)
23723 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
23724 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23726 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
23731 /* The low 16 bits of an immediate constant. */
23732 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
23736 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
23740 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
23748 shift
= shift_op (x
, &val
);
23752 fprintf (stream
, ", %s ", shift
);
23754 arm_print_operand (stream
, XEXP (x
, 1), 0);
23756 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
23761 /* An explanation of the 'Q', 'R' and 'H' register operands:
23763 In a pair of registers containing a DI or DF value the 'Q'
23764 operand returns the register number of the register containing
23765 the least significant part of the value. The 'R' operand returns
23766 the register number of the register containing the most
23767 significant part of the value.
23769 The 'H' operand returns the higher of the two register numbers.
23770 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23771 same as the 'Q' operand, since the most significant part of the
23772 value is held in the lower number register. The reverse is true
23773 on systems where WORDS_BIG_ENDIAN is false.
23775 The purpose of these operands is to distinguish between cases
23776 where the endian-ness of the values is important (for example
23777 when they are added together), and cases where the endian-ness
23778 is irrelevant, but the order of register operations is important.
23779 For example when loading a value from memory into a register
23780 pair, the endian-ness does not matter. Provided that the value
23781 from the lower memory address is put into the lower numbered
23782 register, and the value from the higher address is put into the
23783 higher numbered register, the load will work regardless of whether
23784 the value being loaded is big-wordian or little-wordian. The
23785 order of the two register loads can matter however, if the address
23786 of the memory location is actually held in one of the registers
23787 being overwritten by the load.
23789 The 'Q' and 'R' constraints are also available for 64-bit
23792 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
23794 rtx part
= gen_lowpart (SImode
, x
);
23795 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
23799 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23801 output_operand_lossage ("invalid operand for code '%c'", code
);
23805 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
23809 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
23811 machine_mode mode
= GET_MODE (x
);
23814 if (mode
== VOIDmode
)
23816 part
= gen_highpart_mode (SImode
, mode
, x
);
23817 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
23821 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23823 output_operand_lossage ("invalid operand for code '%c'", code
);
23827 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
23831 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23833 output_operand_lossage ("invalid operand for code '%c'", code
);
23837 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
23841 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23843 output_operand_lossage ("invalid operand for code '%c'", code
);
23847 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
23851 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23853 output_operand_lossage ("invalid operand for code '%c'", code
);
23857 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
23861 asm_fprintf (stream
, "%r",
23862 REG_P (XEXP (x
, 0))
23863 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
23867 asm_fprintf (stream
, "{%r-%r}",
23869 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
23872 /* Like 'M', but writing doubleword vector registers, for use by Neon
23876 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
23877 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
23879 asm_fprintf (stream
, "{d%d}", regno
);
23881 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
23886 /* CONST_TRUE_RTX means always -- that's the default. */
23887 if (x
== const_true_rtx
)
23890 if (!COMPARISON_P (x
))
23892 output_operand_lossage ("invalid operand for code '%c'", code
);
23896 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
23901 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
23902 want to do that. */
23903 if (x
== const_true_rtx
)
23905 output_operand_lossage ("instruction never executed");
23908 if (!COMPARISON_P (x
))
23910 output_operand_lossage ("invalid operand for code '%c'", code
);
23914 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
23915 (get_arm_condition_code (x
))],
23925 /* Former Maverick support, removed after GCC-4.7. */
23926 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
23931 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
23932 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
23933 /* Bad value for wCG register number. */
23935 output_operand_lossage ("invalid operand for code '%c'", code
);
23940 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
23943 /* Print an iWMMXt control register name. */
23945 if (!CONST_INT_P (x
)
23947 || INTVAL (x
) >= 16)
23948 /* Bad value for wC register number. */
23950 output_operand_lossage ("invalid operand for code '%c'", code
);
23956 static const char * wc_reg_names
[16] =
23958 "wCID", "wCon", "wCSSF", "wCASF",
23959 "wC4", "wC5", "wC6", "wC7",
23960 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23961 "wC12", "wC13", "wC14", "wC15"
23964 fputs (wc_reg_names
[INTVAL (x
)], stream
);
23968 /* Print the high single-precision register of a VFP double-precision
23972 machine_mode mode
= GET_MODE (x
);
23975 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
23977 output_operand_lossage ("invalid operand for code '%c'", code
);
23982 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
23984 output_operand_lossage ("invalid operand for code '%c'", code
);
23988 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
23992 /* Print a VFP/Neon double precision or quad precision register name. */
23996 machine_mode mode
= GET_MODE (x
);
23997 int is_quad
= (code
== 'q');
24000 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24002 output_operand_lossage ("invalid operand for code '%c'", code
);
24007 || !IS_VFP_REGNUM (REGNO (x
)))
24009 output_operand_lossage ("invalid operand for code '%c'", code
);
24014 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24015 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24017 output_operand_lossage ("invalid operand for code '%c'", code
);
24021 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24022 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24026 /* These two codes print the low/high doubleword register of a Neon quad
24027 register, respectively. For pair-structure types, can also print
24028 low/high quadword registers. */
24032 machine_mode mode
= GET_MODE (x
);
24035 if ((GET_MODE_SIZE (mode
) != 16
24036 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24038 output_operand_lossage ("invalid operand for code '%c'", code
);
24043 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24045 output_operand_lossage ("invalid operand for code '%c'", code
);
24049 if (GET_MODE_SIZE (mode
) == 16)
24050 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24051 + (code
== 'f' ? 1 : 0));
24053 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24054 + (code
== 'f' ? 1 : 0));
24058 /* Print a VFPv3 floating-point constant, represented as an integer
24062 int index
= vfp3_const_double_index (x
);
24063 gcc_assert (index
!= -1);
24064 fprintf (stream
, "%d", index
);
24068 /* Print bits representing opcode features for Neon.
24070 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24071 and polynomials as unsigned.
24073 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24075 Bit 2 is 1 for rounding functions, 0 otherwise. */
24077 /* Identify the type as 's', 'u', 'p' or 'f'. */
24080 HOST_WIDE_INT bits
= INTVAL (x
);
24081 fputc ("uspf"[bits
& 3], stream
);
24085 /* Likewise, but signed and unsigned integers are both 'i'. */
24088 HOST_WIDE_INT bits
= INTVAL (x
);
24089 fputc ("iipf"[bits
& 3], stream
);
24093 /* As for 'T', but emit 'u' instead of 'p'. */
24096 HOST_WIDE_INT bits
= INTVAL (x
);
24097 fputc ("usuf"[bits
& 3], stream
);
24101 /* Bit 2: rounding (vs none). */
24104 HOST_WIDE_INT bits
= INTVAL (x
);
24105 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24109 /* Memory operand for vld1/vst1 instruction. */
24113 bool postinc
= FALSE
;
24114 rtx postinc_reg
= NULL
;
24115 unsigned align
, memsize
, align_bits
;
24117 gcc_assert (MEM_P (x
));
24118 addr
= XEXP (x
, 0);
24119 if (GET_CODE (addr
) == POST_INC
)
24122 addr
= XEXP (addr
, 0);
24124 if (GET_CODE (addr
) == POST_MODIFY
)
24126 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24127 addr
= XEXP (addr
, 0);
24129 asm_fprintf (stream
, "[%r", REGNO (addr
));
24131 /* We know the alignment of this access, so we can emit a hint in the
24132 instruction (for some alignments) as an aid to the memory subsystem
24134 align
= MEM_ALIGN (x
) >> 3;
24135 memsize
= MEM_SIZE (x
);
24137 /* Only certain alignment specifiers are supported by the hardware. */
24138 if (memsize
== 32 && (align
% 32) == 0)
24140 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24142 else if (memsize
>= 8 && (align
% 8) == 0)
24147 if (align_bits
!= 0)
24148 asm_fprintf (stream
, ":%d", align_bits
);
24150 asm_fprintf (stream
, "]");
24153 fputs("!", stream
);
24155 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24159 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24160 rtx_code the memory operands output looks like following.
24162 2. [Rn, #+/-<imm>]!
24168 rtx postinc_reg
= NULL
;
24169 unsigned inc_val
= 0;
24170 enum rtx_code code
;
24172 gcc_assert (MEM_P (x
));
24173 addr
= XEXP (x
, 0);
24174 code
= GET_CODE (addr
);
24175 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24176 || code
== PRE_DEC
)
24178 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24179 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24180 if (code
== POST_INC
|| code
== POST_DEC
)
24181 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24182 ? "": "-", inc_val
);
24184 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24185 ? "": "-", inc_val
);
24187 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24189 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24190 postinc_reg
= XEXP ( XEXP (x
, 1), 1);
24191 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24193 if (code
== POST_MODIFY
)
24194 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24196 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24199 else if (code
== PLUS
)
24201 rtx base
= XEXP (addr
, 0);
24202 rtx index
= XEXP (addr
, 1);
24204 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24206 HOST_WIDE_INT offset
= INTVAL (index
);
24207 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24211 gcc_assert (REG_P (addr
));
24212 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24221 gcc_assert (MEM_P (x
));
24222 addr
= XEXP (x
, 0);
24223 gcc_assert (REG_P (addr
));
24224 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24228 /* Translate an S register number into a D register number and element index. */
24231 machine_mode mode
= GET_MODE (x
);
24234 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24236 output_operand_lossage ("invalid operand for code '%c'", code
);
24241 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24243 output_operand_lossage ("invalid operand for code '%c'", code
);
24247 regno
= regno
- FIRST_VFP_REGNUM
;
24248 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24253 gcc_assert (CONST_DOUBLE_P (x
));
24255 result
= vfp3_const_double_for_fract_bits (x
);
24257 result
= vfp3_const_double_for_bits (x
);
24258 fprintf (stream
, "#%d", result
);
24261 /* Register specifier for vld1.16/vst1.16. Translate the S register
24262 number into a D register number and element index. */
24265 machine_mode mode
= GET_MODE (x
);
24268 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24270 output_operand_lossage ("invalid operand for code '%c'", code
);
24275 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24277 output_operand_lossage ("invalid operand for code '%c'", code
);
24281 regno
= regno
- FIRST_VFP_REGNUM
;
24282 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24289 output_operand_lossage ("missing operand");
24293 switch (GET_CODE (x
))
24296 asm_fprintf (stream
, "%r", REGNO (x
));
24300 output_address (GET_MODE (x
), XEXP (x
, 0));
24306 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24307 sizeof (fpstr
), 0, 1);
24308 fprintf (stream
, "#%s", fpstr
);
24313 gcc_assert (GET_CODE (x
) != NEG
);
24314 fputc ('#', stream
);
24315 if (GET_CODE (x
) == HIGH
)
24317 fputs (":lower16:", stream
);
24321 output_addr_const (stream
, x
);
24327 /* Target hook for printing a memory address. */
24329 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24333 int is_minus
= GET_CODE (x
) == MINUS
;
24336 asm_fprintf (stream
, "[%r]", REGNO (x
));
24337 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24339 rtx base
= XEXP (x
, 0);
24340 rtx index
= XEXP (x
, 1);
24341 HOST_WIDE_INT offset
= 0;
24343 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24345 /* Ensure that BASE is a register. */
24346 /* (one of them must be). */
24347 /* Also ensure the SP is not used as in index register. */
24348 std::swap (base
, index
);
24350 switch (GET_CODE (index
))
24353 offset
= INTVAL (index
);
24356 asm_fprintf (stream
, "[%r, #%wd]",
24357 REGNO (base
), offset
);
24361 asm_fprintf (stream
, "[%r, %s%r]",
24362 REGNO (base
), is_minus
? "-" : "",
24372 asm_fprintf (stream
, "[%r, %s%r",
24373 REGNO (base
), is_minus
? "-" : "",
24374 REGNO (XEXP (index
, 0)));
24375 arm_print_operand (stream
, index
, 'S');
24376 fputs ("]", stream
);
24381 gcc_unreachable ();
24384 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24385 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24387 gcc_assert (REG_P (XEXP (x
, 0)));
24389 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24390 asm_fprintf (stream
, "[%r, #%s%d]!",
24391 REGNO (XEXP (x
, 0)),
24392 GET_CODE (x
) == PRE_DEC
? "-" : "",
24393 GET_MODE_SIZE (mode
));
24394 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24395 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24397 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24398 GET_CODE (x
) == POST_DEC
? "-" : "",
24399 GET_MODE_SIZE (mode
));
24401 else if (GET_CODE (x
) == PRE_MODIFY
)
24403 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24404 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24405 asm_fprintf (stream
, "#%wd]!",
24406 INTVAL (XEXP (XEXP (x
, 1), 1)));
24408 asm_fprintf (stream
, "%r]!",
24409 REGNO (XEXP (XEXP (x
, 1), 1)));
24411 else if (GET_CODE (x
) == POST_MODIFY
)
24413 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24414 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24415 asm_fprintf (stream
, "#%wd",
24416 INTVAL (XEXP (XEXP (x
, 1), 1)));
24418 asm_fprintf (stream
, "%r",
24419 REGNO (XEXP (XEXP (x
, 1), 1)));
24421 else output_addr_const (stream
, x
);
24426 asm_fprintf (stream
, "[%r]", REGNO (x
));
24427 else if (GET_CODE (x
) == POST_INC
)
24428 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24429 else if (GET_CODE (x
) == PLUS
)
24431 gcc_assert (REG_P (XEXP (x
, 0)));
24432 if (CONST_INT_P (XEXP (x
, 1)))
24433 asm_fprintf (stream
, "[%r, #%wd]",
24434 REGNO (XEXP (x
, 0)),
24435 INTVAL (XEXP (x
, 1)));
24437 asm_fprintf (stream
, "[%r, %r]",
24438 REGNO (XEXP (x
, 0)),
24439 REGNO (XEXP (x
, 1)));
24442 output_addr_const (stream
, x
);
24446 /* Target hook for indicating whether a punctuation character for
24447 TARGET_PRINT_OPERAND is valid. */
24449 arm_print_operand_punct_valid_p (unsigned char code
)
24451 return (code
== '@' || code
== '|' || code
== '.'
24452 || code
== '(' || code
== ')' || code
== '#'
24453 || (TARGET_32BIT
&& (code
== '?'))
24454 || (TARGET_THUMB2
&& (code
== '!'))
24455 || (TARGET_THUMB
&& (code
== '_')));
24458 /* Target hook for assembling integer objects. The ARM version needs to
24459 handle word-sized values specially. */
24461 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24465 if (size
== UNITS_PER_WORD
&& aligned_p
)
24467 fputs ("\t.word\t", asm_out_file
);
24468 output_addr_const (asm_out_file
, x
);
24470 /* Mark symbols as position independent. We only do this in the
24471 .text segment, not in the .data segment. */
24472 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24473 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
24475 /* See legitimize_pic_address for an explanation of the
24476 TARGET_VXWORKS_RTP check. */
24477 /* References to weak symbols cannot be resolved locally:
24478 they may be overridden by a non-weak definition at link
24480 if (!arm_pic_data_is_text_relative
24481 || (GET_CODE (x
) == SYMBOL_REF
24482 && (!SYMBOL_REF_LOCAL_P (x
)
24483 || (SYMBOL_REF_DECL (x
)
24484 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24485 || (SYMBOL_REF_FUNCTION_P (x
)
24486 && !arm_fdpic_local_funcdesc_p (x
)))))
24488 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24489 fputs ("(GOTFUNCDESC)", asm_out_file
);
24491 fputs ("(GOT)", asm_out_file
);
24495 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24496 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24502 || arm_is_segment_info_known (x
, &is_readonly
))
24503 fputs ("(GOTOFF)", asm_out_file
);
24505 fputs ("(GOT)", asm_out_file
);
24510 /* For FDPIC we also have to mark symbol for .data section. */
24512 && !making_const_table
24513 && SYMBOL_REF_P (x
)
24514 && SYMBOL_REF_FUNCTION_P (x
))
24515 fputs ("(FUNCDESC)", asm_out_file
);
24517 fputc ('\n', asm_out_file
);
24521 mode
= GET_MODE (x
);
24523 if (arm_vector_mode_supported_p (mode
))
24527 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24529 units
= CONST_VECTOR_NUNITS (x
);
24530 size
= GET_MODE_UNIT_SIZE (mode
);
24532 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24533 for (i
= 0; i
< units
; i
++)
24535 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24537 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
24540 for (i
= 0; i
< units
; i
++)
24542 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24544 (*CONST_DOUBLE_REAL_VALUE (elt
),
24545 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
24546 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
24552 return default_assemble_integer (x
, size
, aligned_p
);
24556 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
24560 if (!TARGET_AAPCS_BASED
)
24563 default_named_section_asm_out_constructor
24564 : default_named_section_asm_out_destructor
) (symbol
, priority
);
24568 /* Put these in the .init_array section, using a special relocation. */
24569 if (priority
!= DEFAULT_INIT_PRIORITY
)
24572 sprintf (buf
, "%s.%.5u",
24573 is_ctor
? ".init_array" : ".fini_array",
24575 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
24582 switch_to_section (s
);
24583 assemble_align (POINTER_SIZE
);
24584 fputs ("\t.word\t", asm_out_file
);
24585 output_addr_const (asm_out_file
, symbol
);
24586 fputs ("(target1)\n", asm_out_file
);
24589 /* Add a function to the list of static constructors. */
24592 arm_elf_asm_constructor (rtx symbol
, int priority
)
24594 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
24597 /* Add a function to the list of static destructors. */
24600 arm_elf_asm_destructor (rtx symbol
, int priority
)
24602 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
24605 /* A finite state machine takes care of noticing whether or not instructions
24606 can be conditionally executed, and thus decrease execution time and code
24607 size by deleting branch instructions. The fsm is controlled by
24608 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24610 /* The state of the fsm controlling condition codes are:
24611 0: normal, do nothing special
24612 1: make ASM_OUTPUT_OPCODE not output this instruction
24613 2: make ASM_OUTPUT_OPCODE not output this instruction
24614 3: make instructions conditional
24615 4: make instructions conditional
24617 State transitions (state->state by whom under condition):
24618 0 -> 1 final_prescan_insn if the `target' is a label
24619 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24620 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24621 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24622 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24623 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24624 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24625 (the target insn is arm_target_insn).
24627 If the jump clobbers the conditions then we use states 2 and 4.
24629 A similar thing can be done with conditional return insns.
24631 XXX In case the `target' is an unconditional branch, this conditionalising
24632 of the instructions always reduces code size, but not always execution
24633 time. But then, I want to reduce the code size to somewhere near what
24634 /bin/cc produces. */
24636 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24637 instructions. When a COND_EXEC instruction is seen the subsequent
24638 instructions are scanned so that multiple conditional instructions can be
24639 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24640 specify the length and true/false mask for the IT block. These will be
24641 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24643 /* Returns the index of the ARM condition code string in
24644 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24645 COMPARISON should be an rtx like `(eq (...) (...))'. */
24648 maybe_get_arm_condition_code (rtx comparison
)
24650 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
24651 enum arm_cond_code code
;
24652 enum rtx_code comp_code
= GET_CODE (comparison
);
24654 if (GET_MODE_CLASS (mode
) != MODE_CC
)
24655 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
24656 XEXP (comparison
, 1));
24660 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
24661 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
24662 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
24663 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
24664 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
24665 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
24666 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
24667 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
24668 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
24669 case E_CC_DLTUmode
: code
= ARM_CC
;
24672 if (comp_code
== EQ
)
24673 return ARM_INVERSE_CONDITION_CODE (code
);
24674 if (comp_code
== NE
)
24681 case NE
: return ARM_NE
;
24682 case EQ
: return ARM_EQ
;
24683 case GE
: return ARM_PL
;
24684 case LT
: return ARM_MI
;
24685 default: return ARM_NV
;
24691 case NE
: return ARM_NE
;
24692 case EQ
: return ARM_EQ
;
24693 default: return ARM_NV
;
24699 case NE
: return ARM_MI
;
24700 case EQ
: return ARM_PL
;
24701 default: return ARM_NV
;
24706 /* We can handle all cases except UNEQ and LTGT. */
24709 case GE
: return ARM_GE
;
24710 case GT
: return ARM_GT
;
24711 case LE
: return ARM_LS
;
24712 case LT
: return ARM_MI
;
24713 case NE
: return ARM_NE
;
24714 case EQ
: return ARM_EQ
;
24715 case ORDERED
: return ARM_VC
;
24716 case UNORDERED
: return ARM_VS
;
24717 case UNLT
: return ARM_LT
;
24718 case UNLE
: return ARM_LE
;
24719 case UNGT
: return ARM_HI
;
24720 case UNGE
: return ARM_PL
;
24721 /* UNEQ and LTGT do not have a representation. */
24722 case UNEQ
: /* Fall through. */
24723 case LTGT
: /* Fall through. */
24724 default: return ARM_NV
;
24730 case NE
: return ARM_NE
;
24731 case EQ
: return ARM_EQ
;
24732 case GE
: return ARM_LE
;
24733 case GT
: return ARM_LT
;
24734 case LE
: return ARM_GE
;
24735 case LT
: return ARM_GT
;
24736 case GEU
: return ARM_LS
;
24737 case GTU
: return ARM_CC
;
24738 case LEU
: return ARM_CS
;
24739 case LTU
: return ARM_HI
;
24740 default: return ARM_NV
;
24746 case LTU
: return ARM_CS
;
24747 case GEU
: return ARM_CC
;
24748 default: return ARM_NV
;
24754 case GE
: return ARM_GE
;
24755 case LT
: return ARM_LT
;
24756 default: return ARM_NV
;
24762 case GEU
: return ARM_CS
;
24763 case LTU
: return ARM_CC
;
24764 default: return ARM_NV
;
24770 case NE
: return ARM_VS
;
24771 case EQ
: return ARM_VC
;
24772 default: return ARM_NV
;
24778 case GEU
: return ARM_CS
;
24779 case LTU
: return ARM_CC
;
24780 default: return ARM_NV
;
24787 case NE
: return ARM_NE
;
24788 case EQ
: return ARM_EQ
;
24789 case GE
: return ARM_GE
;
24790 case GT
: return ARM_GT
;
24791 case LE
: return ARM_LE
;
24792 case LT
: return ARM_LT
;
24793 case GEU
: return ARM_CS
;
24794 case GTU
: return ARM_HI
;
24795 case LEU
: return ARM_LS
;
24796 case LTU
: return ARM_CC
;
24797 default: return ARM_NV
;
24800 default: gcc_unreachable ();
24804 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
24805 static enum arm_cond_code
24806 get_arm_condition_code (rtx comparison
)
24808 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
24809 gcc_assert (code
!= ARM_NV
);
24813 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
24814 code registers when not targetting Thumb1. The VFP condition register
24815 only exists when generating hard-float code. */
24817 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
24823 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
24827 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24830 thumb2_final_prescan_insn (rtx_insn
*insn
)
24832 rtx_insn
*first_insn
= insn
;
24833 rtx body
= PATTERN (insn
);
24835 enum arm_cond_code code
;
24840 /* max_insns_skipped in the tune was already taken into account in the
24841 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
24842 just emit the IT blocks as we can. It does not make sense to split
24844 max
= MAX_INSN_PER_IT_BLOCK
;
24846 /* Remove the previous insn from the count of insns to be output. */
24847 if (arm_condexec_count
)
24848 arm_condexec_count
--;
24850 /* Nothing to do if we are already inside a conditional block. */
24851 if (arm_condexec_count
)
24854 if (GET_CODE (body
) != COND_EXEC
)
24857 /* Conditional jumps are implemented directly. */
24861 predicate
= COND_EXEC_TEST (body
);
24862 arm_current_cc
= get_arm_condition_code (predicate
);
24864 n
= get_attr_ce_count (insn
);
24865 arm_condexec_count
= 1;
24866 arm_condexec_mask
= (1 << n
) - 1;
24867 arm_condexec_masklen
= n
;
24868 /* See if subsequent instructions can be combined into the same block. */
24871 insn
= next_nonnote_insn (insn
);
24873 /* Jumping into the middle of an IT block is illegal, so a label or
24874 barrier terminates the block. */
24875 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
24878 body
= PATTERN (insn
);
24879 /* USE and CLOBBER aren't really insns, so just skip them. */
24880 if (GET_CODE (body
) == USE
24881 || GET_CODE (body
) == CLOBBER
)
24884 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
24885 if (GET_CODE (body
) != COND_EXEC
)
24887 /* Maximum number of conditionally executed instructions in a block. */
24888 n
= get_attr_ce_count (insn
);
24889 if (arm_condexec_masklen
+ n
> max
)
24892 predicate
= COND_EXEC_TEST (body
);
24893 code
= get_arm_condition_code (predicate
);
24894 mask
= (1 << n
) - 1;
24895 if (arm_current_cc
== code
)
24896 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
24897 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
24900 arm_condexec_count
++;
24901 arm_condexec_masklen
+= n
;
24903 /* A jump must be the last instruction in a conditional block. */
24907 /* Restore recog_data (getting the attributes of other insns can
24908 destroy this array, but final.c assumes that it remains intact
24909 across this call). */
24910 extract_constrain_insn_cached (first_insn
);
24914 arm_final_prescan_insn (rtx_insn
*insn
)
24916 /* BODY will hold the body of INSN. */
24917 rtx body
= PATTERN (insn
);
24919 /* This will be 1 if trying to repeat the trick, and things need to be
24920 reversed if it appears to fail. */
24923 /* If we start with a return insn, we only succeed if we find another one. */
24924 int seeking_return
= 0;
24925 enum rtx_code return_code
= UNKNOWN
;
24927 /* START_INSN will hold the insn from where we start looking. This is the
24928 first insn after the following code_label if REVERSE is true. */
24929 rtx_insn
*start_insn
= insn
;
24931 /* If in state 4, check if the target branch is reached, in order to
24932 change back to state 0. */
24933 if (arm_ccfsm_state
== 4)
24935 if (insn
== arm_target_insn
)
24937 arm_target_insn
= NULL
;
24938 arm_ccfsm_state
= 0;
24943 /* If in state 3, it is possible to repeat the trick, if this insn is an
24944 unconditional branch to a label, and immediately following this branch
24945 is the previous target label which is only used once, and the label this
24946 branch jumps to is not too far off. */
24947 if (arm_ccfsm_state
== 3)
24949 if (simplejump_p (insn
))
24951 start_insn
= next_nonnote_insn (start_insn
);
24952 if (BARRIER_P (start_insn
))
24954 /* XXX Isn't this always a barrier? */
24955 start_insn
= next_nonnote_insn (start_insn
);
24957 if (LABEL_P (start_insn
)
24958 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
24959 && LABEL_NUSES (start_insn
) == 1)
24964 else if (ANY_RETURN_P (body
))
24966 start_insn
= next_nonnote_insn (start_insn
);
24967 if (BARRIER_P (start_insn
))
24968 start_insn
= next_nonnote_insn (start_insn
);
24969 if (LABEL_P (start_insn
)
24970 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
24971 && LABEL_NUSES (start_insn
) == 1)
24974 seeking_return
= 1;
24975 return_code
= GET_CODE (body
);
24984 gcc_assert (!arm_ccfsm_state
|| reverse
);
24985 if (!JUMP_P (insn
))
24988 /* This jump might be paralleled with a clobber of the condition codes
24989 the jump should always come first */
24990 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
24991 body
= XVECEXP (body
, 0, 0);
24994 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
24995 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
24998 int fail
= FALSE
, succeed
= FALSE
;
24999 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25000 int then_not_else
= TRUE
;
25001 rtx_insn
*this_insn
= start_insn
;
25004 /* Register the insn jumped to. */
25007 if (!seeking_return
)
25008 label
= XEXP (SET_SRC (body
), 0);
25010 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25011 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25012 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25014 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25015 then_not_else
= FALSE
;
25017 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25019 seeking_return
= 1;
25020 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25022 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25024 seeking_return
= 1;
25025 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25026 then_not_else
= FALSE
;
25029 gcc_unreachable ();
25031 /* See how many insns this branch skips, and what kind of insns. If all
25032 insns are okay, and the label or unconditional branch to the same
25033 label is not too far away, succeed. */
25034 for (insns_skipped
= 0;
25035 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25039 this_insn
= next_nonnote_insn (this_insn
);
25043 switch (GET_CODE (this_insn
))
25046 /* Succeed if it is the target label, otherwise fail since
25047 control falls in from somewhere else. */
25048 if (this_insn
== label
)
25050 arm_ccfsm_state
= 1;
25058 /* Succeed if the following insn is the target label.
25060 If return insns are used then the last insn in a function
25061 will be a barrier. */
25062 this_insn
= next_nonnote_insn (this_insn
);
25063 if (this_insn
&& this_insn
== label
)
25065 arm_ccfsm_state
= 1;
25073 /* The AAPCS says that conditional calls should not be
25074 used since they make interworking inefficient (the
25075 linker can't transform BL<cond> into BLX). That's
25076 only a problem if the machine has BLX. */
25083 /* Succeed if the following insn is the target label, or
25084 if the following two insns are a barrier and the
25086 this_insn
= next_nonnote_insn (this_insn
);
25087 if (this_insn
&& BARRIER_P (this_insn
))
25088 this_insn
= next_nonnote_insn (this_insn
);
25090 if (this_insn
&& this_insn
== label
25091 && insns_skipped
< max_insns_skipped
)
25093 arm_ccfsm_state
= 1;
25101 /* If this is an unconditional branch to the same label, succeed.
25102 If it is to another label, do nothing. If it is conditional,
25104 /* XXX Probably, the tests for SET and the PC are
25107 scanbody
= PATTERN (this_insn
);
25108 if (GET_CODE (scanbody
) == SET
25109 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25111 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25112 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25114 arm_ccfsm_state
= 2;
25117 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25120 /* Fail if a conditional return is undesirable (e.g. on a
25121 StrongARM), but still allow this if optimizing for size. */
25122 else if (GET_CODE (scanbody
) == return_code
25123 && !use_return_insn (TRUE
, NULL
)
25126 else if (GET_CODE (scanbody
) == return_code
)
25128 arm_ccfsm_state
= 2;
25131 else if (GET_CODE (scanbody
) == PARALLEL
)
25133 switch (get_attr_conds (this_insn
))
25143 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25148 /* Instructions using or affecting the condition codes make it
25150 scanbody
= PATTERN (this_insn
);
25151 if (!(GET_CODE (scanbody
) == SET
25152 || GET_CODE (scanbody
) == PARALLEL
)
25153 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25163 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25164 arm_target_label
= CODE_LABEL_NUMBER (label
);
25167 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25169 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25171 this_insn
= next_nonnote_insn (this_insn
);
25172 gcc_assert (!this_insn
25173 || (!BARRIER_P (this_insn
)
25174 && !LABEL_P (this_insn
)));
25178 /* Oh, dear! we ran off the end.. give up. */
25179 extract_constrain_insn_cached (insn
);
25180 arm_ccfsm_state
= 0;
25181 arm_target_insn
= NULL
;
25184 arm_target_insn
= this_insn
;
25187 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25190 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25192 if (reverse
|| then_not_else
)
25193 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25196 /* Restore recog_data (getting the attributes of other insns can
25197 destroy this array, but final.c assumes that it remains intact
25198 across this call. */
25199 extract_constrain_insn_cached (insn
);
25203 /* Output IT instructions. */
25205 thumb2_asm_output_opcode (FILE * stream
)
25210 if (arm_condexec_mask
)
25212 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25213 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25215 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25216 arm_condition_codes
[arm_current_cc
]);
25217 arm_condexec_mask
= 0;
25221 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25222 UNITS_PER_WORD bytes wide. */
25223 static unsigned int
25224 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25227 && regno
> PC_REGNUM
25228 && regno
!= FRAME_POINTER_REGNUM
25229 && regno
!= ARG_POINTER_REGNUM
25230 && !IS_VFP_REGNUM (regno
))
25233 return ARM_NUM_REGS (mode
);
25236 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25238 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25240 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25241 return (regno
== CC_REGNUM
25242 || (TARGET_VFP_BASE
25243 && regno
== VFPCC_REGNUM
));
25245 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25248 if (IS_VPR_REGNUM (regno
))
25252 /* For the Thumb we only allow values bigger than SImode in
25253 registers 0 - 6, so that there is always a second low
25254 register available to hold the upper part of the value.
25255 We probably we ought to ensure that the register is the
25256 start of an even numbered register pair. */
25257 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25259 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25261 if (mode
== DFmode
|| mode
== DImode
)
25262 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25264 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25265 || mode
== SFmode
|| mode
== SImode
)
25266 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25269 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25270 || (VALID_NEON_QREG_MODE (mode
)
25271 && NEON_REGNO_OK_FOR_QUAD (regno
))
25272 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25273 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25274 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25275 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25276 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25277 if (TARGET_HAVE_MVE
)
25278 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25279 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25280 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25285 if (TARGET_REALLY_IWMMXT
)
25287 if (IS_IWMMXT_GR_REGNUM (regno
))
25288 return mode
== SImode
;
25290 if (IS_IWMMXT_REGNUM (regno
))
25291 return VALID_IWMMXT_REG_MODE (mode
);
25294 /* We allow almost any value to be stored in the general registers.
25295 Restrict doubleword quantities to even register pairs in ARM state
25296 so that we can use ldrd. The same restriction applies for MVE
25297 in order to support Armv8.1-M Mainline instructions.
25298 Do not allow very large Neon structure opaque modes in general
25299 registers; they would use too many. */
25300 if (regno
<= LAST_ARM_REGNUM
)
25302 if (ARM_NUM_REGS (mode
) > 4)
25305 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25308 return !((TARGET_LDRD
|| TARGET_CDE
)
25309 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25312 if (regno
== FRAME_POINTER_REGNUM
25313 || regno
== ARG_POINTER_REGNUM
)
25314 /* We only allow integers in the fake hard registers. */
25315 return GET_MODE_CLASS (mode
) == MODE_INT
;
25320 /* Implement TARGET_MODES_TIEABLE_P. */
25323 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25325 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25328 /* We specifically want to allow elements of "structure" modes to
25329 be tieable to the structure. This more general condition allows
25330 other rarer situations too. */
25332 && (VALID_NEON_DREG_MODE (mode1
)
25333 || VALID_NEON_QREG_MODE (mode1
)
25334 || VALID_NEON_STRUCT_MODE (mode1
))
25335 && (VALID_NEON_DREG_MODE (mode2
)
25336 || VALID_NEON_QREG_MODE (mode2
)
25337 || VALID_NEON_STRUCT_MODE (mode2
)))
25338 || (TARGET_HAVE_MVE
25339 && (VALID_MVE_MODE (mode1
)
25340 || VALID_MVE_STRUCT_MODE (mode1
))
25341 && (VALID_MVE_MODE (mode2
)
25342 || VALID_MVE_STRUCT_MODE (mode2
))))
25348 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25349 not used in arm mode. */
25352 arm_regno_class (int regno
)
25354 if (regno
== PC_REGNUM
)
25357 if (IS_VPR_REGNUM (regno
))
25362 if (regno
== STACK_POINTER_REGNUM
)
25364 if (regno
== CC_REGNUM
)
25371 if (TARGET_THUMB2
&& regno
< 8)
25374 if ( regno
<= LAST_ARM_REGNUM
25375 || regno
== FRAME_POINTER_REGNUM
25376 || regno
== ARG_POINTER_REGNUM
)
25377 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25379 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25380 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25382 if (IS_VFP_REGNUM (regno
))
25384 if (regno
<= D7_VFP_REGNUM
)
25385 return VFP_D0_D7_REGS
;
25386 else if (regno
<= LAST_LO_VFP_REGNUM
)
25387 return VFP_LO_REGS
;
25389 return VFP_HI_REGS
;
25392 if (IS_IWMMXT_REGNUM (regno
))
25393 return IWMMXT_REGS
;
25395 if (IS_IWMMXT_GR_REGNUM (regno
))
25396 return IWMMXT_GR_REGS
;
25401 /* Handle a special case when computing the offset
25402 of an argument from the frame pointer. */
25404 arm_debugger_arg_offset (int value
, rtx addr
)
25408 /* We are only interested if dbxout_parms() failed to compute the offset. */
25412 /* We can only cope with the case where the address is held in a register. */
25416 /* If we are using the frame pointer to point at the argument, then
25417 an offset of 0 is correct. */
25418 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25421 /* If we are using the stack pointer to point at the
25422 argument, then an offset of 0 is correct. */
25423 /* ??? Check this is consistent with thumb2 frame layout. */
25424 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25425 && REGNO (addr
) == SP_REGNUM
)
25428 /* Oh dear. The argument is pointed to by a register rather
25429 than being held in a register, or being stored at a known
25430 offset from the frame pointer. Since GDB only understands
25431 those two kinds of argument we must translate the address
25432 held in the register into an offset from the frame pointer.
25433 We do this by searching through the insns for the function
25434 looking to see where this register gets its value. If the
25435 register is initialized from the frame pointer plus an offset
25436 then we are in luck and we can continue, otherwise we give up.
25438 This code is exercised by producing debugging information
25439 for a function with arguments like this:
25441 double func (double a, double b, int c, double d) {return d;}
25443 Without this code the stab for parameter 'd' will be set to
25444 an offset of 0 from the frame pointer, rather than 8. */
25446 /* The if() statement says:
25448 If the insn is a normal instruction
25449 and if the insn is setting the value in a register
25450 and if the register being set is the register holding the address of the argument
25451 and if the address is computing by an addition
25452 that involves adding to a register
25453 which is the frame pointer
25458 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25460 if ( NONJUMP_INSN_P (insn
)
25461 && GET_CODE (PATTERN (insn
)) == SET
25462 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25463 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25464 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25465 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25466 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25469 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25478 warning (0, "unable to compute real location of stacked parameter");
25479 value
= 8; /* XXX magic hack */
25485 /* Implement TARGET_PROMOTED_TYPE. */
25488 arm_promoted_type (const_tree t
)
25490 if (SCALAR_FLOAT_TYPE_P (t
)
25491 && TYPE_PRECISION (t
) == 16
25492 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25493 return float_type_node
;
25497 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25498 This simply adds HFmode as a supported mode; even though we don't
25499 implement arithmetic on this type directly, it's supported by
25500 optabs conversions, much the way the double-word arithmetic is
25501 special-cased in the default hook. */
25504 arm_scalar_mode_supported_p (scalar_mode mode
)
25506 if (mode
== HFmode
)
25507 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25508 else if (ALL_FIXED_POINT_MODE_P (mode
))
25511 return default_scalar_mode_supported_p (mode
);
25514 /* Set the value of FLT_EVAL_METHOD.
25515 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25517 0: evaluate all operations and constants, whose semantic type has at
25518 most the range and precision of type float, to the range and
25519 precision of float; evaluate all other operations and constants to
25520 the range and precision of the semantic type;
25522 N, where _FloatN is a supported interchange floating type
25523 evaluate all operations and constants, whose semantic type has at
25524 most the range and precision of _FloatN type, to the range and
25525 precision of the _FloatN type; evaluate all other operations and
25526 constants to the range and precision of the semantic type;
25528 If we have the ARMv8.2-A extensions then we support _Float16 in native
25529 precision, so we should set this to 16. Otherwise, we support the type,
25530 but want to evaluate expressions in float precision, so set this to
25533 static enum flt_eval_method
25534 arm_excess_precision (enum excess_precision_type type
)
25538 case EXCESS_PRECISION_TYPE_FAST
:
25539 case EXCESS_PRECISION_TYPE_STANDARD
:
25540 /* We can calculate either in 16-bit range and precision or
25541 32-bit range and precision. Make that decision based on whether
25542 we have native support for the ARMv8.2-A 16-bit floating-point
25543 instructions or not. */
25544 return (TARGET_VFP_FP16INST
25545 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25546 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
25547 case EXCESS_PRECISION_TYPE_IMPLICIT
:
25548 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
25550 gcc_unreachable ();
25552 return FLT_EVAL_METHOD_UNPREDICTABLE
;
25556 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25557 _Float16 if we are using anything other than ieee format for 16-bit
25558 floating point. Otherwise, punt to the default implementation. */
25559 static opt_scalar_float_mode
25560 arm_floatn_mode (int n
, bool extended
)
25562 if (!extended
&& n
== 16)
25564 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
25566 return opt_scalar_float_mode ();
25569 return default_floatn_mode (n
, extended
);
25573 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25574 not to early-clobber SRC registers in the process.
25576 We assume that the operands described by SRC and DEST represent a
25577 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25578 number of components into which the copy has been decomposed. */
25580 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25584 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25585 || REGNO (operands
[0]) < REGNO (operands
[1]))
25587 for (i
= 0; i
< count
; i
++)
25589 operands
[2 * i
] = dest
[i
];
25590 operands
[2 * i
+ 1] = src
[i
];
25595 for (i
= 0; i
< count
; i
++)
25597 operands
[2 * i
] = dest
[count
- i
- 1];
25598 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25603 /* Split operands into moves from op[1] + op[2] into op[0]. */
25606 neon_split_vcombine (rtx operands
[3])
25608 unsigned int dest
= REGNO (operands
[0]);
25609 unsigned int src1
= REGNO (operands
[1]);
25610 unsigned int src2
= REGNO (operands
[2]);
25611 machine_mode halfmode
= GET_MODE (operands
[1]);
25612 unsigned int halfregs
= REG_NREGS (operands
[1]);
25613 rtx destlo
, desthi
;
25615 if (src1
== dest
&& src2
== dest
+ halfregs
)
25617 /* No-op move. Can't split to nothing; emit something. */
25618 emit_note (NOTE_INSN_DELETED
);
25622 /* Preserve register attributes for variable tracking. */
25623 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25624 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25625 GET_MODE_SIZE (halfmode
));
25627 /* Special case of reversed high/low parts. Use VSWP. */
25628 if (src2
== dest
&& src1
== dest
+ halfregs
)
25630 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
25631 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
25632 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25636 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25638 /* Try to avoid unnecessary moves if part of the result
25639 is in the right place already. */
25641 emit_move_insn (destlo
, operands
[1]);
25642 if (src2
!= dest
+ halfregs
)
25643 emit_move_insn (desthi
, operands
[2]);
25647 if (src2
!= dest
+ halfregs
)
25648 emit_move_insn (desthi
, operands
[2]);
25650 emit_move_insn (destlo
, operands
[1]);
25654 /* Return the number (counting from 0) of
25655 the least significant set bit in MASK. */
25658 number_of_first_bit_set (unsigned mask
)
25660 return ctz_hwi (mask
);
25663 /* Like emit_multi_reg_push, but allowing for a different set of
25664 registers to be described as saved. MASK is the set of registers
25665 to be saved; REAL_REGS is the set of registers to be described as
25666 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25669 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25671 unsigned long regno
;
25672 rtx par
[10], tmp
, reg
;
25676 /* Build the parallel of the registers actually being stored. */
25677 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25679 regno
= ctz_hwi (mask
);
25680 reg
= gen_rtx_REG (SImode
, regno
);
25683 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25685 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25690 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25691 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25692 tmp
= gen_frame_mem (BLKmode
, tmp
);
25693 tmp
= gen_rtx_SET (tmp
, par
[0]);
25696 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25697 insn
= emit_insn (tmp
);
25699 /* Always build the stack adjustment note for unwind info. */
25700 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25701 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
25704 /* Build the parallel of the registers recorded as saved for unwind. */
25705 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
25707 regno
= ctz_hwi (real_regs
);
25708 reg
= gen_rtx_REG (SImode
, regno
);
25710 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
25711 tmp
= gen_frame_mem (SImode
, tmp
);
25712 tmp
= gen_rtx_SET (tmp
, reg
);
25713 RTX_FRAME_RELATED_P (tmp
) = 1;
25721 RTX_FRAME_RELATED_P (par
[0]) = 1;
25722 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
25725 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
25730 /* Emit code to push or pop registers to or from the stack. F is the
25731 assembly file. MASK is the registers to pop. */
25733 thumb_pop (FILE *f
, unsigned long mask
)
25736 int lo_mask
= mask
& 0xFF;
25740 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
25742 /* Special case. Do not generate a POP PC statement here, do it in
25744 thumb_exit (f
, -1);
25748 fprintf (f
, "\tpop\t{");
25750 /* Look at the low registers first. */
25751 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
25755 asm_fprintf (f
, "%r", regno
);
25757 if ((lo_mask
& ~1) != 0)
25762 if (mask
& (1 << PC_REGNUM
))
25764 /* Catch popping the PC. */
25765 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
25766 || IS_CMSE_ENTRY (arm_current_func_type ()))
25768 /* The PC is never poped directly, instead
25769 it is popped into r3 and then BX is used. */
25770 fprintf (f
, "}\n");
25772 thumb_exit (f
, -1);
25781 asm_fprintf (f
, "%r", PC_REGNUM
);
25785 fprintf (f
, "}\n");
25788 /* Generate code to return from a thumb function.
25789 If 'reg_containing_return_addr' is -1, then the return address is
25790 actually on the stack, at the stack pointer.
25792 Note: do not forget to update length attribute of corresponding insn pattern
25793 when changing assembly output (eg. length attribute of epilogue_insns when
25794 updating Armv8-M Baseline Security Extensions register clearing
25797 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25799 unsigned regs_available_for_popping
;
25800 unsigned regs_to_pop
;
25802 unsigned available
;
25806 int restore_a4
= FALSE
;
25808 /* Compute the registers we need to pop. */
25812 if (reg_containing_return_addr
== -1)
25814 regs_to_pop
|= 1 << LR_REGNUM
;
25818 if (TARGET_BACKTRACE
)
25820 /* Restore the (ARM) frame pointer and stack pointer. */
25821 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25825 /* If there is nothing to pop then just emit the BX instruction and
25827 if (pops_needed
== 0)
25829 if (crtl
->calls_eh_return
)
25830 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25832 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25834 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25835 emitted by cmse_nonsecure_entry_clear_before_return (). */
25836 if (!TARGET_HAVE_FPCXT_CMSE
)
25837 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
25838 reg_containing_return_addr
);
25839 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
25842 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25845 /* Otherwise if we are not supporting interworking and we have not created
25846 a backtrace structure and the function was not entered in ARM mode then
25847 just pop the return address straight into the PC. */
25848 else if (!TARGET_INTERWORK
25849 && !TARGET_BACKTRACE
25850 && !is_called_in_ARM_mode (current_function_decl
)
25851 && !crtl
->calls_eh_return
25852 && !IS_CMSE_ENTRY (arm_current_func_type ()))
25854 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25858 /* Find out how many of the (return) argument registers we can corrupt. */
25859 regs_available_for_popping
= 0;
25861 /* If returning via __builtin_eh_return, the bottom three registers
25862 all contain information needed for the return. */
25863 if (crtl
->calls_eh_return
)
25867 /* If we can deduce the registers used from the function's
25868 return value. This is more reliable that examining
25869 df_regs_ever_live_p () because that will be set if the register is
25870 ever used in the function, not just if the register is used
25871 to hold a return value. */
25873 if (crtl
->return_rtx
!= 0)
25874 mode
= GET_MODE (crtl
->return_rtx
);
25876 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25878 size
= GET_MODE_SIZE (mode
);
25882 /* In a void function we can use any argument register.
25883 In a function that returns a structure on the stack
25884 we can use the second and third argument registers. */
25885 if (mode
== VOIDmode
)
25886 regs_available_for_popping
=
25887 (1 << ARG_REGISTER (1))
25888 | (1 << ARG_REGISTER (2))
25889 | (1 << ARG_REGISTER (3));
25891 regs_available_for_popping
=
25892 (1 << ARG_REGISTER (2))
25893 | (1 << ARG_REGISTER (3));
25895 else if (size
<= 4)
25896 regs_available_for_popping
=
25897 (1 << ARG_REGISTER (2))
25898 | (1 << ARG_REGISTER (3));
25899 else if (size
<= 8)
25900 regs_available_for_popping
=
25901 (1 << ARG_REGISTER (3));
25904 /* Match registers to be popped with registers into which we pop them. */
25905 for (available
= regs_available_for_popping
,
25906 required
= regs_to_pop
;
25907 required
!= 0 && available
!= 0;
25908 available
&= ~(available
& - available
),
25909 required
&= ~(required
& - required
))
25912 /* If we have any popping registers left over, remove them. */
25914 regs_available_for_popping
&= ~available
;
25916 /* Otherwise if we need another popping register we can use
25917 the fourth argument register. */
25918 else if (pops_needed
)
25920 /* If we have not found any free argument registers and
25921 reg a4 contains the return address, we must move it. */
25922 if (regs_available_for_popping
== 0
25923 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25925 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25926 reg_containing_return_addr
= LR_REGNUM
;
25928 else if (size
> 12)
25930 /* Register a4 is being used to hold part of the return value,
25931 but we have dire need of a free, low register. */
25934 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
25937 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
25939 /* The fourth argument register is available. */
25940 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
25946 /* Pop as many registers as we can. */
25947 thumb_pop (f
, regs_available_for_popping
);
25949 /* Process the registers we popped. */
25950 if (reg_containing_return_addr
== -1)
25952 /* The return address was popped into the lowest numbered register. */
25953 regs_to_pop
&= ~(1 << LR_REGNUM
);
25955 reg_containing_return_addr
=
25956 number_of_first_bit_set (regs_available_for_popping
);
25958 /* Remove this register for the mask of available registers, so that
25959 the return address will not be corrupted by further pops. */
25960 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
25963 /* If we popped other registers then handle them here. */
25964 if (regs_available_for_popping
)
25968 /* Work out which register currently contains the frame pointer. */
25969 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25971 /* Move it into the correct place. */
25972 asm_fprintf (f
, "\tmov\t%r, %r\n",
25973 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
25975 /* (Temporarily) remove it from the mask of popped registers. */
25976 regs_available_for_popping
&= ~(1 << frame_pointer
);
25977 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
25979 if (regs_available_for_popping
)
25983 /* We popped the stack pointer as well,
25984 find the register that contains it. */
25985 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25987 /* Move it into the stack register. */
25988 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
25990 /* At this point we have popped all necessary registers, so
25991 do not worry about restoring regs_available_for_popping
25992 to its correct value:
25994 assert (pops_needed == 0)
25995 assert (regs_available_for_popping == (1 << frame_pointer))
25996 assert (regs_to_pop == (1 << STACK_POINTER)) */
26000 /* Since we have just move the popped value into the frame
26001 pointer, the popping register is available for reuse, and
26002 we know that we still have the stack pointer left to pop. */
26003 regs_available_for_popping
|= (1 << frame_pointer
);
26007 /* If we still have registers left on the stack, but we no longer have
26008 any registers into which we can pop them, then we must move the return
26009 address into the link register and make available the register that
26011 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26013 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26015 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26016 reg_containing_return_addr
);
26018 reg_containing_return_addr
= LR_REGNUM
;
26021 /* If we have registers left on the stack then pop some more.
26022 We know that at most we will want to pop FP and SP. */
26023 if (pops_needed
> 0)
26028 thumb_pop (f
, regs_available_for_popping
);
26030 /* We have popped either FP or SP.
26031 Move whichever one it is into the correct register. */
26032 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26033 move_to
= number_of_first_bit_set (regs_to_pop
);
26035 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26039 /* If we still have not popped everything then we must have only
26040 had one register available to us and we are now popping the SP. */
26041 if (pops_needed
> 0)
26045 thumb_pop (f
, regs_available_for_popping
);
26047 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26049 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26051 assert (regs_to_pop == (1 << STACK_POINTER))
26052 assert (pops_needed == 1)
26056 /* If necessary restore the a4 register. */
26059 if (reg_containing_return_addr
!= LR_REGNUM
)
26061 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26062 reg_containing_return_addr
= LR_REGNUM
;
26065 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26068 if (crtl
->calls_eh_return
)
26069 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26071 /* Return to caller. */
26072 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26074 /* This is for the cases where LR is not being used to contain the return
26075 address. It may therefore contain information that we might not want
26076 to leak, hence it must be cleared. The value in R0 will never be a
26077 secret at this point, so it is safe to use it, see the clearing code
26078 in cmse_nonsecure_entry_clear_before_return (). */
26079 if (reg_containing_return_addr
!= LR_REGNUM
)
26080 asm_fprintf (f
, "\tmov\tlr, r0\n");
26082 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26083 by cmse_nonsecure_entry_clear_before_return (). */
26084 if (!TARGET_HAVE_FPCXT_CMSE
)
26085 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26086 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26089 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26092 /* Scan INSN just before assembler is output for it.
26093 For Thumb-1, we track the status of the condition codes; this
26094 information is used in the cbranchsi4_insn pattern. */
26096 thumb1_final_prescan_insn (rtx_insn
*insn
)
26098 if (flag_print_asm_name
)
26099 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26100 INSN_ADDRESSES (INSN_UID (insn
)));
26101 /* Don't overwrite the previous setter when we get to a cbranch. */
26102 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26104 enum attr_conds conds
;
26106 if (cfun
->machine
->thumb1_cc_insn
)
26108 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26109 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26112 conds
= get_attr_conds (insn
);
26113 if (conds
== CONDS_SET
)
26115 rtx set
= single_set (insn
);
26116 cfun
->machine
->thumb1_cc_insn
= insn
;
26117 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26118 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26119 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26120 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26122 rtx src1
= XEXP (SET_SRC (set
), 1);
26123 if (src1
== const0_rtx
)
26124 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26126 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26128 /* Record the src register operand instead of dest because
26129 cprop_hardreg pass propagates src. */
26130 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26133 else if (conds
!= CONDS_NOCOND
)
26134 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26137 /* Check if unexpected far jump is used. */
26138 if (cfun
->machine
->lr_save_eliminated
26139 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26140 internal_error("Unexpected thumb1 far jump");
26144 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26146 unsigned HOST_WIDE_INT mask
= 0xff;
26149 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26150 if (val
== 0) /* XXX */
26153 for (i
= 0; i
< 25; i
++)
26154 if ((val
& (mask
<< i
)) == val
)
26160 /* Returns nonzero if the current function contains,
26161 or might contain a far jump. */
26163 thumb_far_jump_used_p (void)
26166 bool far_jump
= false;
26167 unsigned int func_size
= 0;
26169 /* If we have already decided that far jumps may be used,
26170 do not bother checking again, and always return true even if
26171 it turns out that they are not being used. Once we have made
26172 the decision that far jumps are present (and that hence the link
26173 register will be pushed onto the stack) we cannot go back on it. */
26174 if (cfun
->machine
->far_jump_used
)
26177 /* If this function is not being called from the prologue/epilogue
26178 generation code then it must be being called from the
26179 INITIAL_ELIMINATION_OFFSET macro. */
26180 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26182 /* In this case we know that we are being asked about the elimination
26183 of the arg pointer register. If that register is not being used,
26184 then there are no arguments on the stack, and we do not have to
26185 worry that a far jump might force the prologue to push the link
26186 register, changing the stack offsets. In this case we can just
26187 return false, since the presence of far jumps in the function will
26188 not affect stack offsets.
26190 If the arg pointer is live (or if it was live, but has now been
26191 eliminated and so set to dead) then we do have to test to see if
26192 the function might contain a far jump. This test can lead to some
26193 false negatives, since before reload is completed, then length of
26194 branch instructions is not known, so gcc defaults to returning their
26195 longest length, which in turn sets the far jump attribute to true.
26197 A false negative will not result in bad code being generated, but it
26198 will result in a needless push and pop of the link register. We
26199 hope that this does not occur too often.
26201 If we need doubleword stack alignment this could affect the other
26202 elimination offsets so we can't risk getting it wrong. */
26203 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26204 cfun
->machine
->arg_pointer_live
= 1;
26205 else if (!cfun
->machine
->arg_pointer_live
)
26209 /* We should not change far_jump_used during or after reload, as there is
26210 no chance to change stack frame layout. */
26211 if (reload_in_progress
|| reload_completed
)
26214 /* Check to see if the function contains a branch
26215 insn with the far jump attribute set. */
26216 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26218 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26222 func_size
+= get_attr_length (insn
);
26225 /* Attribute far_jump will always be true for thumb1 before
26226 shorten_branch pass. So checking far_jump attribute before
26227 shorten_branch isn't much useful.
26229 Following heuristic tries to estimate more accurately if a far jump
26230 may finally be used. The heuristic is very conservative as there is
26231 no chance to roll-back the decision of not to use far jump.
26233 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26234 2-byte insn is associated with a 4 byte constant pool. Using
26235 function size 2048/3 as the threshold is conservative enough. */
26238 if ((func_size
* 3) >= 2048)
26240 /* Record the fact that we have decided that
26241 the function does use far jumps. */
26242 cfun
->machine
->far_jump_used
= 1;
26250 /* Return nonzero if FUNC must be entered in ARM mode. */
26252 is_called_in_ARM_mode (tree func
)
26254 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26256 /* Ignore the problem about functions whose address is taken. */
26257 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26261 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26267 /* Given the stack offsets and register mask in OFFSETS, decide how
26268 many additional registers to push instead of subtracting a constant
26269 from SP. For epilogues the principle is the same except we use pop.
26270 FOR_PROLOGUE indicates which we're generating. */
26272 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26274 HOST_WIDE_INT amount
;
26275 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26276 /* Extract a mask of the ones we can give to the Thumb's push/pop
26278 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26279 /* Then count how many other high registers will need to be pushed. */
26280 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26281 int n_free
, reg_base
, size
;
26283 if (!for_prologue
&& frame_pointer_needed
)
26284 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26286 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26288 /* If the stack frame size is 512 exactly, we can save one load
26289 instruction, which should make this a win even when optimizing
26291 if (!optimize_size
&& amount
!= 512)
26294 /* Can't do this if there are high registers to push. */
26295 if (high_regs_pushed
!= 0)
26298 /* Shouldn't do it in the prologue if no registers would normally
26299 be pushed at all. In the epilogue, also allow it if we'll have
26300 a pop insn for the PC. */
26303 || TARGET_BACKTRACE
26304 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26305 || TARGET_INTERWORK
26306 || crtl
->args
.pretend_args_size
!= 0))
26309 /* Don't do this if thumb_expand_prologue wants to emit instructions
26310 between the push and the stack frame allocation. */
26312 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26313 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26320 size
= arm_size_return_regs ();
26321 reg_base
= ARM_NUM_INTS (size
);
26322 live_regs_mask
>>= reg_base
;
26325 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26326 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26328 live_regs_mask
>>= 1;
26334 gcc_assert (amount
/ 4 * 4 == amount
);
26336 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26337 return (amount
- 508) / 4;
26338 if (amount
<= n_free
* 4)
26343 /* The bits which aren't usefully expanded as rtl. */
26345 thumb1_unexpanded_epilogue (void)
26347 arm_stack_offsets
*offsets
;
26349 unsigned long live_regs_mask
= 0;
26350 int high_regs_pushed
= 0;
26352 int had_to_push_lr
;
26355 if (cfun
->machine
->return_used_this_function
!= 0)
26358 if (IS_NAKED (arm_current_func_type ()))
26361 offsets
= arm_get_frame_offsets ();
26362 live_regs_mask
= offsets
->saved_regs_mask
;
26363 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26365 /* If we can deduce the registers used from the function's return value.
26366 This is more reliable that examining df_regs_ever_live_p () because that
26367 will be set if the register is ever used in the function, not just if
26368 the register is used to hold a return value. */
26369 size
= arm_size_return_regs ();
26371 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26374 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26375 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26378 /* The prolog may have pushed some high registers to use as
26379 work registers. e.g. the testsuite file:
26380 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26381 compiles to produce:
26382 push {r4, r5, r6, r7, lr}
26386 as part of the prolog. We have to undo that pushing here. */
26388 if (high_regs_pushed
)
26390 unsigned long mask
= live_regs_mask
& 0xff;
26393 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26396 /* Oh dear! We have no low registers into which we can pop
26399 ("no low registers available for popping high registers");
26401 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26402 if (live_regs_mask
& (1 << next_hi_reg
))
26405 while (high_regs_pushed
)
26407 /* Find lo register(s) into which the high register(s) can
26409 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26411 if (mask
& (1 << regno
))
26412 high_regs_pushed
--;
26413 if (high_regs_pushed
== 0)
26417 if (high_regs_pushed
== 0 && regno
>= 0)
26418 mask
&= ~((1 << regno
) - 1);
26420 /* Pop the values into the low register(s). */
26421 thumb_pop (asm_out_file
, mask
);
26423 /* Move the value(s) into the high registers. */
26424 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26426 if (mask
& (1 << regno
))
26428 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26431 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26433 if (live_regs_mask
& (1 << next_hi_reg
))
26438 live_regs_mask
&= ~0x0f00;
26441 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26442 live_regs_mask
&= 0xff;
26444 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26446 /* Pop the return address into the PC. */
26447 if (had_to_push_lr
)
26448 live_regs_mask
|= 1 << PC_REGNUM
;
26450 /* Either no argument registers were pushed or a backtrace
26451 structure was created which includes an adjusted stack
26452 pointer, so just pop everything. */
26453 if (live_regs_mask
)
26454 thumb_pop (asm_out_file
, live_regs_mask
);
26456 /* We have either just popped the return address into the
26457 PC or it is was kept in LR for the entire function.
26458 Note that thumb_pop has already called thumb_exit if the
26459 PC was in the list. */
26460 if (!had_to_push_lr
)
26461 thumb_exit (asm_out_file
, LR_REGNUM
);
26465 /* Pop everything but the return address. */
26466 if (live_regs_mask
)
26467 thumb_pop (asm_out_file
, live_regs_mask
);
26469 if (had_to_push_lr
)
26473 /* We have no free low regs, so save one. */
26474 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26478 /* Get the return address into a temporary register. */
26479 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26483 /* Move the return address to lr. */
26484 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26486 /* Restore the low register. */
26487 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26492 regno
= LAST_ARG_REGNUM
;
26497 /* Remove the argument registers that were pushed onto the stack. */
26498 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26499 SP_REGNUM
, SP_REGNUM
,
26500 crtl
->args
.pretend_args_size
);
26502 thumb_exit (asm_out_file
, regno
);
26508 /* Functions to save and restore machine-specific function data. */
26509 static struct machine_function
*
26510 arm_init_machine_status (void)
26512 struct machine_function
*machine
;
26513 machine
= ggc_cleared_alloc
<machine_function
> ();
26515 #if ARM_FT_UNKNOWN != 0
26516 machine
->func_type
= ARM_FT_UNKNOWN
;
26518 machine
->static_chain_stack_bytes
= -1;
26522 /* Return an RTX indicating where the return address to the
26523 calling function can be found. */
26525 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26530 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26533 /* Do anything needed before RTL is emitted for each function. */
26535 arm_init_expanders (void)
26537 /* Arrange to initialize and mark the machine per-function status. */
26538 init_machine_status
= arm_init_machine_status
;
26540 /* This is to stop the combine pass optimizing away the alignment
26541 adjustment of va_arg. */
26542 /* ??? It is claimed that this should not be necessary. */
26544 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26547 /* Check that FUNC is called with a different mode. */
26550 arm_change_mode_p (tree func
)
26552 if (TREE_CODE (func
) != FUNCTION_DECL
)
26555 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
26558 callee_tree
= target_option_default_node
;
26560 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
26561 int flags
= callee_opts
->x_target_flags
;
26563 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
26566 /* Like arm_compute_initial_elimination offset. Simpler because there
26567 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26568 to point at the base of the local variables after static stack
26569 space for a function has been allocated. */
26572 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26574 arm_stack_offsets
*offsets
;
26576 offsets
= arm_get_frame_offsets ();
26580 case ARG_POINTER_REGNUM
:
26583 case STACK_POINTER_REGNUM
:
26584 return offsets
->outgoing_args
- offsets
->saved_args
;
26586 case FRAME_POINTER_REGNUM
:
26587 return offsets
->soft_frame
- offsets
->saved_args
;
26589 case ARM_HARD_FRAME_POINTER_REGNUM
:
26590 return offsets
->saved_regs
- offsets
->saved_args
;
26592 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26593 return offsets
->locals_base
- offsets
->saved_args
;
26596 gcc_unreachable ();
26600 case FRAME_POINTER_REGNUM
:
26603 case STACK_POINTER_REGNUM
:
26604 return offsets
->outgoing_args
- offsets
->soft_frame
;
26606 case ARM_HARD_FRAME_POINTER_REGNUM
:
26607 return offsets
->saved_regs
- offsets
->soft_frame
;
26609 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26610 return offsets
->locals_base
- offsets
->soft_frame
;
26613 gcc_unreachable ();
26618 gcc_unreachable ();
26622 /* Generate the function's prologue. */
26625 thumb1_expand_prologue (void)
26629 HOST_WIDE_INT amount
;
26630 HOST_WIDE_INT size
;
26631 arm_stack_offsets
*offsets
;
26632 unsigned long func_type
;
26634 unsigned long live_regs_mask
;
26635 unsigned long l_mask
;
26636 unsigned high_regs_pushed
= 0;
26637 bool lr_needs_saving
;
26639 func_type
= arm_current_func_type ();
26641 /* Naked functions don't have prologues. */
26642 if (IS_NAKED (func_type
))
26644 if (flag_stack_usage_info
)
26645 current_function_static_stack_size
= 0;
26649 if (IS_INTERRUPT (func_type
))
26651 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26655 if (is_called_in_ARM_mode (current_function_decl
))
26656 emit_insn (gen_prologue_thumb1_interwork ());
26658 offsets
= arm_get_frame_offsets ();
26659 live_regs_mask
= offsets
->saved_regs_mask
;
26660 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
26662 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26663 l_mask
= live_regs_mask
& 0x40ff;
26664 /* Then count how many other high registers will need to be pushed. */
26665 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26667 if (crtl
->args
.pretend_args_size
)
26669 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26671 if (cfun
->machine
->uses_anonymous_args
)
26673 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26674 unsigned long mask
;
26676 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26677 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26679 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26683 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26684 stack_pointer_rtx
, x
));
26686 RTX_FRAME_RELATED_P (insn
) = 1;
26689 if (TARGET_BACKTRACE
)
26691 HOST_WIDE_INT offset
= 0;
26692 unsigned work_register
;
26693 rtx work_reg
, x
, arm_hfp_rtx
;
26695 /* We have been asked to create a stack backtrace structure.
26696 The code looks like this:
26700 0 sub SP, #16 Reserve space for 4 registers.
26701 2 push {R7} Push low registers.
26702 4 add R7, SP, #20 Get the stack pointer before the push.
26703 6 str R7, [SP, #8] Store the stack pointer
26704 (before reserving the space).
26705 8 mov R7, PC Get hold of the start of this code + 12.
26706 10 str R7, [SP, #16] Store it.
26707 12 mov R7, FP Get hold of the current frame pointer.
26708 14 str R7, [SP, #4] Store it.
26709 16 mov R7, LR Get hold of the current return address.
26710 18 str R7, [SP, #12] Store it.
26711 20 add R7, SP, #16 Point at the start of the
26712 backtrace structure.
26713 22 mov FP, R7 Put this value into the frame pointer. */
26715 work_register
= thumb_find_work_register (live_regs_mask
);
26716 work_reg
= gen_rtx_REG (SImode
, work_register
);
26717 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
26719 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26720 stack_pointer_rtx
, GEN_INT (-16)));
26721 RTX_FRAME_RELATED_P (insn
) = 1;
26725 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
26726 RTX_FRAME_RELATED_P (insn
) = 1;
26727 lr_needs_saving
= false;
26729 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
26732 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
26733 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26735 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
26736 x
= gen_frame_mem (SImode
, x
);
26737 emit_move_insn (x
, work_reg
);
26739 /* Make sure that the instruction fetching the PC is in the right place
26740 to calculate "start of backtrace creation code + 12". */
26741 /* ??? The stores using the common WORK_REG ought to be enough to
26742 prevent the scheduler from doing anything weird. Failing that
26743 we could always move all of the following into an UNSPEC_VOLATILE. */
26746 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26747 emit_move_insn (work_reg
, x
);
26749 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26750 x
= gen_frame_mem (SImode
, x
);
26751 emit_move_insn (x
, work_reg
);
26753 emit_move_insn (work_reg
, arm_hfp_rtx
);
26755 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26756 x
= gen_frame_mem (SImode
, x
);
26757 emit_move_insn (x
, work_reg
);
26761 emit_move_insn (work_reg
, arm_hfp_rtx
);
26763 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26764 x
= gen_frame_mem (SImode
, x
);
26765 emit_move_insn (x
, work_reg
);
26767 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26768 emit_move_insn (work_reg
, x
);
26770 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26771 x
= gen_frame_mem (SImode
, x
);
26772 emit_move_insn (x
, work_reg
);
26775 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
26776 emit_move_insn (work_reg
, x
);
26778 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
26779 x
= gen_frame_mem (SImode
, x
);
26780 emit_move_insn (x
, work_reg
);
26782 x
= GEN_INT (offset
+ 12);
26783 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26785 emit_move_insn (arm_hfp_rtx
, work_reg
);
26787 /* Optimization: If we are not pushing any low registers but we are going
26788 to push some high registers then delay our first push. This will just
26789 be a push of LR and we can combine it with the push of the first high
26791 else if ((l_mask
& 0xff) != 0
26792 || (high_regs_pushed
== 0 && lr_needs_saving
))
26794 unsigned long mask
= l_mask
;
26795 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
26796 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
26797 RTX_FRAME_RELATED_P (insn
) = 1;
26798 lr_needs_saving
= false;
26801 if (high_regs_pushed
)
26803 unsigned pushable_regs
;
26804 unsigned next_hi_reg
;
26805 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
26806 : crtl
->args
.info
.nregs
;
26807 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
26809 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26810 if (live_regs_mask
& (1 << next_hi_reg
))
26813 /* Here we need to mask out registers used for passing arguments
26814 even if they can be pushed. This is to avoid using them to
26815 stash the high registers. Such kind of stash may clobber the
26816 use of arguments. */
26817 pushable_regs
= l_mask
& (~arg_regs_mask
);
26818 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
26820 /* Normally, LR can be used as a scratch register once it has been
26821 saved; but if the function examines its own return address then
26822 the value is still live and we need to avoid using it. */
26823 bool return_addr_live
26824 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
26827 if (lr_needs_saving
|| return_addr_live
)
26828 pushable_regs
&= ~(1 << LR_REGNUM
);
26830 if (pushable_regs
== 0)
26831 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26833 while (high_regs_pushed
> 0)
26835 unsigned long real_regs_mask
= 0;
26836 unsigned long push_mask
= 0;
26838 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
26840 if (pushable_regs
& (1 << regno
))
26842 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26843 gen_rtx_REG (SImode
, next_hi_reg
));
26845 high_regs_pushed
--;
26846 real_regs_mask
|= (1 << next_hi_reg
);
26847 push_mask
|= (1 << regno
);
26849 if (high_regs_pushed
)
26851 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26853 if (live_regs_mask
& (1 << next_hi_reg
))
26861 /* If we had to find a work register and we have not yet
26862 saved the LR then add it to the list of regs to push. */
26863 if (lr_needs_saving
)
26865 push_mask
|= 1 << LR_REGNUM
;
26866 real_regs_mask
|= 1 << LR_REGNUM
;
26867 lr_needs_saving
= false;
26868 /* If the return address is not live at this point, we
26869 can add LR to the list of registers that we can use
26871 if (!return_addr_live
)
26872 pushable_regs
|= 1 << LR_REGNUM
;
26875 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
26876 RTX_FRAME_RELATED_P (insn
) = 1;
26880 /* Load the pic register before setting the frame pointer,
26881 so we can use r7 as a temporary work register. */
26882 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26883 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
26885 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26886 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26887 stack_pointer_rtx
);
26889 size
= offsets
->outgoing_args
- offsets
->saved_args
;
26890 if (flag_stack_usage_info
)
26891 current_function_static_stack_size
= size
;
26893 /* If we have a frame, then do stack checking. FIXME: not implemented. */
26894 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
26895 || flag_stack_clash_protection
)
26897 sorry ("%<-fstack-check=specific%> for Thumb-1");
26899 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26900 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26905 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26906 GEN_INT (- amount
)));
26907 RTX_FRAME_RELATED_P (insn
) = 1;
26913 /* The stack decrement is too big for an immediate value in a single
26914 insn. In theory we could issue multiple subtracts, but after
26915 three of them it becomes more space efficient to place the full
26916 value in the constant pool and load into a register. (Also the
26917 ARM debugger really likes to see only one stack decrement per
26918 function). So instead we look for a scratch register into which
26919 we can load the decrement, and then we subtract this from the
26920 stack pointer. Unfortunately on the thumb the only available
26921 scratch registers are the argument registers, and we cannot use
26922 these as they may hold arguments to the function. Instead we
26923 attempt to locate a call preserved register which is used by this
26924 function. If we can find one, then we know that it will have
26925 been pushed at the start of the prologue and so we can corrupt
26927 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26928 if (live_regs_mask
& (1 << regno
))
26931 gcc_assert(regno
<= LAST_LO_REGNUM
);
26933 reg
= gen_rtx_REG (SImode
, regno
);
26935 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26937 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26938 stack_pointer_rtx
, reg
));
26940 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
26941 plus_constant (Pmode
, stack_pointer_rtx
,
26943 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26944 RTX_FRAME_RELATED_P (insn
) = 1;
26948 if (frame_pointer_needed
)
26949 thumb_set_frame_pointer (offsets
);
26951 /* If we are profiling, make sure no instructions are scheduled before
26952 the call to mcount. Similarly if the user has requested no
26953 scheduling in the prolog. Similarly if we want non-call exceptions
26954 using the EABI unwinder, to prevent faulting instructions from being
26955 swapped with a stack adjustment. */
26956 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26957 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26958 && cfun
->can_throw_non_call_exceptions
))
26959 emit_insn (gen_blockage ());
26961 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26962 if (live_regs_mask
& 0xff)
26963 cfun
->machine
->lr_save_eliminated
= 0;
26966 /* Clear caller saved registers not used to pass return values and leaked
26967 condition flags before exiting a cmse_nonsecure_entry function. */
26970 cmse_nonsecure_entry_clear_before_return (void)
26972 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
26973 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
26974 uint32_t padding_bits_to_clear
= 0;
26975 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
26976 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
26979 bitmap_clear (to_clear_bitmap
);
26980 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
26981 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
26983 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
26987 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
26989 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
26991 if (!TARGET_HAVE_FPCXT_CMSE
)
26993 /* Make sure we don't clear the two scratch registers used to clear
26994 the relevant FPSCR bits in output_return_instruction. */
26995 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
26996 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
26997 emit_use (gen_rtx_REG (SImode
, 4));
26998 bitmap_clear_bit (to_clear_bitmap
, 4);
27002 /* If the user has defined registers to be caller saved, these are no longer
27003 restored by the function before returning and must thus be cleared for
27004 security purposes. */
27005 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27007 /* We do not touch registers that can be used to pass arguments as per
27008 the AAPCS, since these should never be made callee-saved by user
27010 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27012 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27014 if (!callee_saved_reg_p (regno
)
27015 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27016 || TARGET_HARD_FLOAT
))
27017 bitmap_set_bit (to_clear_bitmap
, regno
);
27020 /* Make sure we do not clear the registers used to return the result in. */
27021 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27022 if (!VOID_TYPE_P (result_type
))
27024 uint64_t to_clear_return_mask
;
27025 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27027 /* No need to check that we return in registers, because we don't
27028 support returning on stack yet. */
27029 gcc_assert (REG_P (result_rtl
));
27030 to_clear_return_mask
27031 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27032 &padding_bits_to_clear
);
27033 if (to_clear_return_mask
)
27035 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27036 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27038 if (to_clear_return_mask
& (1ULL << regno
))
27039 bitmap_clear_bit (to_clear_bitmap
, regno
);
27044 if (padding_bits_to_clear
!= 0)
27046 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27047 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27049 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27050 returning a composite type, which only uses r0. Let's make sure that
27051 r1-r3 is cleared too. */
27052 bitmap_clear (to_clear_arg_regs_bitmap
);
27053 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27054 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27057 /* Clear full registers that leak before returning. */
27058 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27059 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27060 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27064 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27065 POP instruction can be generated. LR should be replaced by PC. All
27066 the checks required are already done by USE_RETURN_INSN (). Hence,
27067 all we really need to check here is if single register is to be
27068 returned, or multiple register return. */
27070 thumb2_expand_return (bool simple_return
)
27073 unsigned long saved_regs_mask
;
27074 arm_stack_offsets
*offsets
;
27076 offsets
= arm_get_frame_offsets ();
27077 saved_regs_mask
= offsets
->saved_regs_mask
;
27079 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27080 if (saved_regs_mask
& (1 << i
))
27083 if (!simple_return
&& saved_regs_mask
)
27085 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27086 functions or adapt code to handle according to ACLE. This path should
27087 not be reachable for cmse_nonsecure_entry functions though we prefer
27088 to assert it for now to ensure that future code changes do not silently
27089 change this behavior. */
27090 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27093 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27094 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27095 rtx addr
= gen_rtx_MEM (SImode
,
27096 gen_rtx_POST_INC (SImode
,
27097 stack_pointer_rtx
));
27098 set_mem_alias_set (addr
, get_frame_alias_set ());
27099 XVECEXP (par
, 0, 0) = ret_rtx
;
27100 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27101 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27102 emit_jump_insn (par
);
27106 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27107 saved_regs_mask
|= (1 << PC_REGNUM
);
27108 arm_emit_multi_reg_pop (saved_regs_mask
);
27113 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27114 cmse_nonsecure_entry_clear_before_return ();
27115 emit_jump_insn (simple_return_rtx
);
27120 thumb1_expand_epilogue (void)
27122 HOST_WIDE_INT amount
;
27123 arm_stack_offsets
*offsets
;
27126 /* Naked functions don't have prologues. */
27127 if (IS_NAKED (arm_current_func_type ()))
27130 offsets
= arm_get_frame_offsets ();
27131 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27133 if (frame_pointer_needed
)
27135 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27136 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27138 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27140 gcc_assert (amount
>= 0);
27143 emit_insn (gen_blockage ());
27146 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27147 GEN_INT (amount
)));
27150 /* r3 is always free in the epilogue. */
27151 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27153 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27154 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27158 /* Emit a USE (stack_pointer_rtx), so that
27159 the stack adjustment will not be deleted. */
27160 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27162 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27163 emit_insn (gen_blockage ());
27165 /* Emit a clobber for each insn that will be restored in the epilogue,
27166 so that flow2 will get register lifetimes correct. */
27167 for (regno
= 0; regno
< 13; regno
++)
27168 if (reg_needs_saving_p (regno
))
27169 emit_clobber (gen_rtx_REG (SImode
, regno
));
27171 if (! df_regs_ever_live_p (LR_REGNUM
))
27172 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27174 /* Clear all caller-saved regs that are not used to return. */
27175 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27176 cmse_nonsecure_entry_clear_before_return ();
27179 /* Epilogue code for APCS frame. */
27181 arm_expand_epilogue_apcs_frame (bool really_return
)
27183 unsigned long func_type
;
27184 unsigned long saved_regs_mask
;
27187 int floats_from_frame
= 0;
27188 arm_stack_offsets
*offsets
;
27190 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27191 func_type
= arm_current_func_type ();
27193 /* Get frame offsets for ARM. */
27194 offsets
= arm_get_frame_offsets ();
27195 saved_regs_mask
= offsets
->saved_regs_mask
;
27197 /* Find the offset of the floating-point save area in the frame. */
27199 = (offsets
->saved_args
27200 + arm_compute_static_chain_stack_bytes ()
27203 /* Compute how many core registers saved and how far away the floats are. */
27204 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27205 if (saved_regs_mask
& (1 << i
))
27208 floats_from_frame
+= 4;
27211 if (TARGET_VFP_BASE
)
27214 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27216 /* The offset is from IP_REGNUM. */
27217 int saved_size
= arm_get_vfp_saved_size ();
27218 if (saved_size
> 0)
27221 floats_from_frame
+= saved_size
;
27222 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27223 hard_frame_pointer_rtx
,
27224 GEN_INT (-floats_from_frame
)));
27225 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27226 ip_rtx
, hard_frame_pointer_rtx
);
27229 /* Generate VFP register multi-pop. */
27230 start_reg
= FIRST_VFP_REGNUM
;
27232 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27233 /* Look for a case where a reg does not need restoring. */
27234 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27236 if (start_reg
!= i
)
27237 arm_emit_vfp_multi_reg_pop (start_reg
,
27238 (i
- start_reg
) / 2,
27239 gen_rtx_REG (SImode
,
27244 /* Restore the remaining regs that we have discovered (or possibly
27245 even all of them, if the conditional in the for loop never
27247 if (start_reg
!= i
)
27248 arm_emit_vfp_multi_reg_pop (start_reg
,
27249 (i
- start_reg
) / 2,
27250 gen_rtx_REG (SImode
, IP_REGNUM
));
27255 /* The frame pointer is guaranteed to be non-double-word aligned, as
27256 it is set to double-word-aligned old_stack_pointer - 4. */
27258 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27260 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27261 if (reg_needs_saving_p (i
))
27263 rtx addr
= gen_frame_mem (V2SImode
,
27264 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27266 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27267 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27268 gen_rtx_REG (V2SImode
, i
),
27274 /* saved_regs_mask should contain IP which contains old stack pointer
27275 at the time of activation creation. Since SP and IP are adjacent registers,
27276 we can restore the value directly into SP. */
27277 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27278 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27279 saved_regs_mask
|= (1 << SP_REGNUM
);
27281 /* There are two registers left in saved_regs_mask - LR and PC. We
27282 only need to restore LR (the return address), but to
27283 save time we can load it directly into PC, unless we need a
27284 special function exit sequence, or we are not really returning. */
27286 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27287 && !crtl
->calls_eh_return
)
27288 /* Delete LR from the register mask, so that LR on
27289 the stack is loaded into the PC in the register mask. */
27290 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27292 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27294 num_regs
= bit_count (saved_regs_mask
);
27295 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27298 emit_insn (gen_blockage ());
27299 /* Unwind the stack to just below the saved registers. */
27300 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27301 hard_frame_pointer_rtx
,
27302 GEN_INT (- 4 * num_regs
)));
27304 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27305 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27308 arm_emit_multi_reg_pop (saved_regs_mask
);
27310 if (IS_INTERRUPT (func_type
))
27312 /* Interrupt handlers will have pushed the
27313 IP onto the stack, so restore it now. */
27315 rtx addr
= gen_rtx_MEM (SImode
,
27316 gen_rtx_POST_INC (SImode
,
27317 stack_pointer_rtx
));
27318 set_mem_alias_set (addr
, get_frame_alias_set ());
27319 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27320 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27321 gen_rtx_REG (SImode
, IP_REGNUM
),
27325 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27328 if (crtl
->calls_eh_return
)
27329 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27331 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27333 if (IS_STACKALIGN (func_type
))
27334 /* Restore the original stack pointer. Before prologue, the stack was
27335 realigned and the original stack pointer saved in r0. For details,
27336 see comment in arm_expand_prologue. */
27337 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27339 emit_jump_insn (simple_return_rtx
);
27342 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27343 function is not a sibcall. */
27345 arm_expand_epilogue (bool really_return
)
27347 unsigned long func_type
;
27348 unsigned long saved_regs_mask
;
27352 arm_stack_offsets
*offsets
;
27354 func_type
= arm_current_func_type ();
27356 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27357 let output_return_instruction take care of instruction emission if any. */
27358 if (IS_NAKED (func_type
)
27359 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27362 emit_jump_insn (simple_return_rtx
);
27366 /* If we are throwing an exception, then we really must be doing a
27367 return, so we can't tail-call. */
27368 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27370 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27372 arm_expand_epilogue_apcs_frame (really_return
);
27376 /* Get frame offsets for ARM. */
27377 offsets
= arm_get_frame_offsets ();
27378 saved_regs_mask
= offsets
->saved_regs_mask
;
27379 num_regs
= bit_count (saved_regs_mask
);
27381 if (frame_pointer_needed
)
27384 /* Restore stack pointer if necessary. */
27387 /* In ARM mode, frame pointer points to first saved register.
27388 Restore stack pointer to last saved register. */
27389 amount
= offsets
->frame
- offsets
->saved_regs
;
27391 /* Force out any pending memory operations that reference stacked data
27392 before stack de-allocation occurs. */
27393 emit_insn (gen_blockage ());
27394 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27395 hard_frame_pointer_rtx
,
27396 GEN_INT (amount
)));
27397 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27399 hard_frame_pointer_rtx
);
27401 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27403 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27407 /* In Thumb-2 mode, the frame pointer points to the last saved
27409 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27412 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27413 hard_frame_pointer_rtx
,
27414 GEN_INT (amount
)));
27415 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27416 hard_frame_pointer_rtx
,
27417 hard_frame_pointer_rtx
);
27420 /* Force out any pending memory operations that reference stacked data
27421 before stack de-allocation occurs. */
27422 emit_insn (gen_blockage ());
27423 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27424 hard_frame_pointer_rtx
));
27425 arm_add_cfa_adjust_cfa_note (insn
, 0,
27427 hard_frame_pointer_rtx
);
27428 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27430 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27435 /* Pop off outgoing args and local frame to adjust stack pointer to
27436 last saved register. */
27437 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27441 /* Force out any pending memory operations that reference stacked data
27442 before stack de-allocation occurs. */
27443 emit_insn (gen_blockage ());
27444 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27446 GEN_INT (amount
)));
27447 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27448 stack_pointer_rtx
, stack_pointer_rtx
);
27449 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27451 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27455 if (TARGET_VFP_BASE
)
27457 /* Generate VFP register multi-pop. */
27458 int end_reg
= LAST_VFP_REGNUM
+ 1;
27460 /* Scan the registers in reverse order. We need to match
27461 any groupings made in the prologue and generate matching
27462 vldm operations. The need to match groups is because,
27463 unlike pop, vldm can only do consecutive regs. */
27464 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27465 /* Look for a case where a reg does not need restoring. */
27466 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27468 /* Restore the regs discovered so far (from reg+2 to
27470 if (end_reg
> i
+ 2)
27471 arm_emit_vfp_multi_reg_pop (i
+ 2,
27472 (end_reg
- (i
+ 2)) / 2,
27473 stack_pointer_rtx
);
27477 /* Restore the remaining regs that we have discovered (or possibly
27478 even all of them, if the conditional in the for loop never
27480 if (end_reg
> i
+ 2)
27481 arm_emit_vfp_multi_reg_pop (i
+ 2,
27482 (end_reg
- (i
+ 2)) / 2,
27483 stack_pointer_rtx
);
27487 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27488 if (reg_needs_saving_p (i
))
27491 rtx addr
= gen_rtx_MEM (V2SImode
,
27492 gen_rtx_POST_INC (SImode
,
27493 stack_pointer_rtx
));
27494 set_mem_alias_set (addr
, get_frame_alias_set ());
27495 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27496 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27497 gen_rtx_REG (V2SImode
, i
),
27499 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27500 stack_pointer_rtx
, stack_pointer_rtx
);
27503 if (saved_regs_mask
)
27506 bool return_in_pc
= false;
27508 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27509 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27510 && !IS_CMSE_ENTRY (func_type
)
27511 && !IS_STACKALIGN (func_type
)
27513 && crtl
->args
.pretend_args_size
== 0
27514 && saved_regs_mask
& (1 << LR_REGNUM
)
27515 && !crtl
->calls_eh_return
)
27517 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27518 saved_regs_mask
|= (1 << PC_REGNUM
);
27519 return_in_pc
= true;
27522 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27524 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27525 if (saved_regs_mask
& (1 << i
))
27527 rtx addr
= gen_rtx_MEM (SImode
,
27528 gen_rtx_POST_INC (SImode
,
27529 stack_pointer_rtx
));
27530 set_mem_alias_set (addr
, get_frame_alias_set ());
27532 if (i
== PC_REGNUM
)
27534 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27535 XVECEXP (insn
, 0, 0) = ret_rtx
;
27536 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
27538 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27539 insn
= emit_jump_insn (insn
);
27543 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27545 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27546 gen_rtx_REG (SImode
, i
),
27548 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27550 stack_pointer_rtx
);
27557 && current_tune
->prefer_ldrd_strd
27558 && !optimize_function_for_size_p (cfun
))
27561 thumb2_emit_ldrd_pop (saved_regs_mask
);
27562 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27563 arm_emit_ldrd_pop (saved_regs_mask
);
27565 arm_emit_multi_reg_pop (saved_regs_mask
);
27568 arm_emit_multi_reg_pop (saved_regs_mask
);
27576 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
27580 rtx dwarf
= NULL_RTX
;
27582 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27584 GEN_INT (amount
)));
27586 RTX_FRAME_RELATED_P (tmp
) = 1;
27588 if (cfun
->machine
->uses_anonymous_args
)
27590 /* Restore pretend args. Refer arm_expand_prologue on how to save
27591 pretend_args in stack. */
27592 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27593 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27594 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27595 if (saved_regs_mask
& (1 << i
))
27597 rtx reg
= gen_rtx_REG (SImode
, i
);
27598 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27601 REG_NOTES (tmp
) = dwarf
;
27603 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27604 stack_pointer_rtx
, stack_pointer_rtx
);
27607 if (IS_CMSE_ENTRY (func_type
))
27609 /* CMSE_ENTRY always returns. */
27610 gcc_assert (really_return
);
27611 /* Clear all caller-saved regs that are not used to return. */
27612 cmse_nonsecure_entry_clear_before_return ();
27614 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27616 if (TARGET_HAVE_FPCXT_CMSE
)
27620 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
27621 GEN_INT (FPCXTNS_ENUM
)));
27622 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27623 plus_constant (Pmode
, stack_pointer_rtx
, 4));
27624 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27625 RTX_FRAME_RELATED_P (insn
) = 1;
27629 if (!really_return
)
27632 if (crtl
->calls_eh_return
)
27633 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27635 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27637 if (IS_STACKALIGN (func_type
))
27638 /* Restore the original stack pointer. Before prologue, the stack was
27639 realigned and the original stack pointer saved in r0. For details,
27640 see comment in arm_expand_prologue. */
27641 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27643 emit_jump_insn (simple_return_rtx
);
27646 /* Implementation of insn prologue_thumb1_interwork. This is the first
27647 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27650 thumb1_output_interwork (void)
27653 FILE *f
= asm_out_file
;
27655 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27656 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27658 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27660 /* Generate code sequence to switch us into Thumb mode. */
27661 /* The .code 32 directive has already been emitted by
27662 ASM_DECLARE_FUNCTION_NAME. */
27663 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27664 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27666 /* Generate a label, so that the debugger will notice the
27667 change in instruction sets. This label is also used by
27668 the assembler to bypass the ARM code when this function
27669 is called from a Thumb encoded function elsewhere in the
27670 same file. Hence the definition of STUB_NAME here must
27671 agree with the definition in gas/config/tc-arm.c. */
27673 #define STUB_NAME ".real_start_of"
27675 fprintf (f
, "\t.code\t16\n");
27677 if (arm_dllexport_name_p (name
))
27678 name
= arm_strip_name_encoding (name
);
27680 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27681 fprintf (f
, "\t.thumb_func\n");
27682 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27687 /* Handle the case of a double word load into a low register from
27688 a computed memory address. The computed address may involve a
27689 register which is overwritten by the load. */
27691 thumb_load_double_from_address (rtx
*operands
)
27699 gcc_assert (REG_P (operands
[0]));
27700 gcc_assert (MEM_P (operands
[1]));
27702 /* Get the memory address. */
27703 addr
= XEXP (operands
[1], 0);
27705 /* Work out how the memory address is computed. */
27706 switch (GET_CODE (addr
))
27709 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27711 if (REGNO (operands
[0]) == REGNO (addr
))
27713 output_asm_insn ("ldr\t%H0, %2", operands
);
27714 output_asm_insn ("ldr\t%0, %1", operands
);
27718 output_asm_insn ("ldr\t%0, %1", operands
);
27719 output_asm_insn ("ldr\t%H0, %2", operands
);
27724 /* Compute <address> + 4 for the high order load. */
27725 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27727 output_asm_insn ("ldr\t%0, %1", operands
);
27728 output_asm_insn ("ldr\t%H0, %2", operands
);
27732 arg1
= XEXP (addr
, 0);
27733 arg2
= XEXP (addr
, 1);
27735 if (CONSTANT_P (arg1
))
27736 base
= arg2
, offset
= arg1
;
27738 base
= arg1
, offset
= arg2
;
27740 gcc_assert (REG_P (base
));
27742 /* Catch the case of <address> = <reg> + <reg> */
27743 if (REG_P (offset
))
27745 int reg_offset
= REGNO (offset
);
27746 int reg_base
= REGNO (base
);
27747 int reg_dest
= REGNO (operands
[0]);
27749 /* Add the base and offset registers together into the
27750 higher destination register. */
27751 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27752 reg_dest
+ 1, reg_base
, reg_offset
);
27754 /* Load the lower destination register from the address in
27755 the higher destination register. */
27756 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27757 reg_dest
, reg_dest
+ 1);
27759 /* Load the higher destination register from its own address
27761 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27762 reg_dest
+ 1, reg_dest
+ 1);
27766 /* Compute <address> + 4 for the high order load. */
27767 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27769 /* If the computed address is held in the low order register
27770 then load the high order register first, otherwise always
27771 load the low order register first. */
27772 if (REGNO (operands
[0]) == REGNO (base
))
27774 output_asm_insn ("ldr\t%H0, %2", operands
);
27775 output_asm_insn ("ldr\t%0, %1", operands
);
27779 output_asm_insn ("ldr\t%0, %1", operands
);
27780 output_asm_insn ("ldr\t%H0, %2", operands
);
27786 /* With no registers to worry about we can just load the value
27788 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27790 output_asm_insn ("ldr\t%H0, %2", operands
);
27791 output_asm_insn ("ldr\t%0, %1", operands
);
27795 gcc_unreachable ();
27802 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27807 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27808 std::swap (operands
[4], operands
[5]);
27810 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27811 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27815 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27816 std::swap (operands
[4], operands
[5]);
27817 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27818 std::swap (operands
[5], operands
[6]);
27819 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27820 std::swap (operands
[4], operands
[5]);
27822 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27823 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27827 gcc_unreachable ();
27833 /* Output a call-via instruction for thumb state. */
27835 thumb_call_via_reg (rtx reg
)
27837 int regno
= REGNO (reg
);
27840 gcc_assert (regno
< LR_REGNUM
);
27842 /* If we are in the normal text section we can use a single instance
27843 per compilation unit. If we are doing function sections, then we need
27844 an entry per section, since we can't rely on reachability. */
27845 if (in_section
== text_section
)
27847 thumb_call_reg_needed
= 1;
27849 if (thumb_call_via_label
[regno
] == NULL
)
27850 thumb_call_via_label
[regno
] = gen_label_rtx ();
27851 labelp
= thumb_call_via_label
+ regno
;
27855 if (cfun
->machine
->call_via
[regno
] == NULL
)
27856 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27857 labelp
= cfun
->machine
->call_via
+ regno
;
27860 output_asm_insn ("bl\t%a0", labelp
);
27864 /* Routines for generating rtl. */
27866 thumb_expand_cpymemqi (rtx
*operands
)
27868 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27869 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27870 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27871 HOST_WIDE_INT offset
= 0;
27875 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
27881 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
27887 rtx reg
= gen_reg_rtx (SImode
);
27888 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27889 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27896 rtx reg
= gen_reg_rtx (HImode
);
27897 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27898 plus_constant (Pmode
, in
,
27900 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27909 rtx reg
= gen_reg_rtx (QImode
);
27910 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27911 plus_constant (Pmode
, in
,
27913 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27920 thumb_reload_out_hi (rtx
*operands
)
27922 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27925 /* Return the length of a function name prefix
27926 that starts with the character 'c'. */
27928 arm_get_strip_length (int c
)
27932 ARM_NAME_ENCODING_LENGTHS
27937 /* Return a pointer to a function's name with any
27938 and all prefix encodings stripped from it. */
27940 arm_strip_name_encoding (const char *name
)
27944 while ((skip
= arm_get_strip_length (* name
)))
27950 /* If there is a '*' anywhere in the name's prefix, then
27951 emit the stripped name verbatim, otherwise prepend an
27952 underscore if leading underscores are being used. */
27954 arm_asm_output_labelref (FILE *stream
, const char *name
)
27959 while ((skip
= arm_get_strip_length (* name
)))
27961 verbatim
|= (*name
== '*');
27966 fputs (name
, stream
);
27968 asm_fprintf (stream
, "%U%s", name
);
27971 /* This function is used to emit an EABI tag and its associated value.
27972 We emit the numerical value of the tag in case the assembler does not
27973 support textual tags. (Eg gas prior to 2.20). If requested we include
27974 the tag name in a comment so that anyone reading the assembler output
27975 will know which tag is being set.
27977 This function is not static because arm-c.c needs it too. */
27980 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27982 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27983 if (flag_verbose_asm
|| flag_debug_asm
)
27984 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27985 asm_fprintf (asm_out_file
, "\n");
27988 /* This function is used to print CPU tuning information as comment
27989 in assembler file. Pointers are not printed for now. */
27992 arm_print_tune_info (void)
27994 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
27995 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
27996 current_tune
->constant_limit
);
27997 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27998 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
27999 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28000 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28001 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28002 "prefetch.l1_cache_size:\t%d\n",
28003 current_tune
->prefetch
.l1_cache_size
);
28004 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28005 "prefetch.l1_cache_line_size:\t%d\n",
28006 current_tune
->prefetch
.l1_cache_line_size
);
28007 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28008 "prefer_constant_pool:\t%d\n",
28009 (int) current_tune
->prefer_constant_pool
);
28010 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28011 "branch_cost:\t(s:speed, p:predictable)\n");
28012 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28013 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28014 current_tune
->branch_cost (false, false));
28015 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28016 current_tune
->branch_cost (false, true));
28017 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28018 current_tune
->branch_cost (true, false));
28019 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28020 current_tune
->branch_cost (true, true));
28021 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28022 "prefer_ldrd_strd:\t%d\n",
28023 (int) current_tune
->prefer_ldrd_strd
);
28024 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28025 "logical_op_non_short_circuit:\t[%d,%d]\n",
28026 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28027 (int) current_tune
->logical_op_non_short_circuit_arm
);
28028 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28029 "disparage_flag_setting_t16_encodings:\t%d\n",
28030 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28031 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28032 "string_ops_prefer_neon:\t%d\n",
28033 (int) current_tune
->string_ops_prefer_neon
);
28034 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28035 "max_insns_inline_memset:\t%d\n",
28036 current_tune
->max_insns_inline_memset
);
28037 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28038 current_tune
->fusible_ops
);
28039 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28040 (int) current_tune
->sched_autopref
);
28043 /* Print .arch and .arch_extension directives corresponding to the
28044 current architecture configuration. */
28046 arm_print_asm_arch_directives ()
28048 const arch_option
*arch
28049 = arm_parse_arch_option_name (all_architectures
, "-march",
28050 arm_active_target
.arch_name
);
28051 auto_sbitmap
opt_bits (isa_num_bits
);
28055 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
28056 arm_last_printed_arch_string
= arm_active_target
.arch_name
;
28057 if (!arch
->common
.extensions
)
28060 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28066 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28068 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28069 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28070 floating point instructions is disabled. So the following check
28071 restricts the printing of ".arch_extension mve" and
28072 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28073 this special behaviour because the feature bit "mve" and
28074 "mve_float" are not part of "fpu bits", so they are not cleared
28075 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28076 TARGET_HAVE_MVE_FLOAT are disabled. */
28077 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28078 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28079 && !TARGET_HAVE_MVE_FLOAT
))
28082 /* If every feature bit of this option is set in the target
28083 ISA specification, print out the option name. However,
28084 don't print anything if all the bits are part of the
28085 FPU specification. */
28086 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
28087 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28088 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
28094 arm_file_start (void)
28100 /* We don't have a specified CPU. Use the architecture to
28103 Note: it might be better to do this unconditionally, then the
28104 assembler would not need to know about all new CPU names as
28106 if (!arm_active_target
.core_name
)
28108 /* armv7ve doesn't support any extensions. */
28109 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
28111 /* Keep backward compatability for assemblers
28112 which don't support armv7ve. */
28113 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
28114 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
28115 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
28116 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
28117 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
28118 arm_last_printed_arch_string
= "armv7ve";
28121 arm_print_asm_arch_directives ();
28123 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
28125 asm_fprintf (asm_out_file
, "\t.arch %s\n",
28126 arm_active_target
.core_name
+ 8);
28127 arm_last_printed_arch_string
= arm_active_target
.core_name
+ 8;
28131 const char* truncated_name
28132 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28133 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28134 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28137 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
28140 if (print_tune_info
)
28141 arm_print_tune_info ();
28143 if (! TARGET_SOFT_FLOAT
)
28145 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28146 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28148 if (TARGET_HARD_FLOAT_ABI
)
28149 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28152 /* Some of these attributes only apply when the corresponding features
28153 are used. However we don't have any easy way of figuring this out.
28154 Conservatively record the setting that would have been used. */
28156 if (flag_rounding_math
)
28157 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28159 if (!flag_unsafe_math_optimizations
)
28161 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28162 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28164 if (flag_signaling_nans
)
28165 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28167 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28168 flag_finite_math_only
? 1 : 3);
28170 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28171 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28172 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28173 flag_short_enums
? 1 : 2);
28175 /* Tag_ABI_optimization_goals. */
28178 else if (optimize
>= 2)
28184 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28186 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28189 if (arm_fp16_format
)
28190 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28191 (int) arm_fp16_format
);
28193 if (arm_lang_output_object_attributes_hook
)
28194 arm_lang_output_object_attributes_hook();
28197 default_file_start ();
28201 arm_file_end (void)
28205 if (NEED_INDICATE_EXEC_STACK
)
28206 /* Add .note.GNU-stack. */
28207 file_end_indicate_exec_stack ();
28209 if (! thumb_call_reg_needed
)
28212 switch_to_section (text_section
);
28213 asm_fprintf (asm_out_file
, "\t.code 16\n");
28214 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28216 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28218 rtx label
= thumb_call_via_label
[regno
];
28222 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28223 CODE_LABEL_NUMBER (label
));
28224 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28230 /* Symbols in the text segment can be accessed without indirecting via the
28231 constant pool; it may take an extra binary operation, but this is still
28232 faster than indirecting via memory. Don't do this when not optimizing,
28233 since we won't be calculating al of the offsets necessary to do this
28237 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28239 if (optimize
> 0 && TREE_CONSTANT (decl
))
28240 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28242 default_encode_section_info (decl
, rtl
, first
);
28244 #endif /* !ARM_PE */
28247 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28249 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28250 && !strcmp (prefix
, "L"))
28252 arm_ccfsm_state
= 0;
28253 arm_target_insn
= NULL
;
28255 default_internal_label (stream
, prefix
, labelno
);
28258 /* Output code to add DELTA to the first argument, and then jump
28259 to FUNCTION. Used for C++ multiple inheritance. */
28262 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28263 HOST_WIDE_INT
, tree function
)
28265 static int thunk_label
= 0;
28268 int mi_delta
= delta
;
28269 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28271 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28274 mi_delta
= - mi_delta
;
28276 final_start_function (emit_barrier (), file
, 1);
28280 int labelno
= thunk_label
++;
28281 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28282 /* Thunks are entered in arm mode when available. */
28283 if (TARGET_THUMB1_ONLY
)
28285 /* push r3 so we can use it as a temporary. */
28286 /* TODO: Omit this save if r3 is not used. */
28287 fputs ("\tpush {r3}\n", file
);
28289 /* With -mpure-code, we cannot load the address from the
28290 constant pool: we build it explicitly. */
28291 if (target_pure_code
)
28293 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
28294 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28295 fputc ('\n', file
);
28296 fputs ("\tlsls r3, #8\n", file
);
28297 fputs ("\tadds\tr3, #:upper0_7:#", file
);
28298 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28299 fputc ('\n', file
);
28300 fputs ("\tlsls r3, #8\n", file
);
28301 fputs ("\tadds\tr3, #:lower8_15:#", file
);
28302 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28303 fputc ('\n', file
);
28304 fputs ("\tlsls r3, #8\n", file
);
28305 fputs ("\tadds\tr3, #:lower0_7:#", file
);
28306 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28307 fputc ('\n', file
);
28310 fputs ("\tldr\tr3, ", file
);
28314 fputs ("\tldr\tr12, ", file
);
28317 if (!target_pure_code
)
28319 assemble_name (file
, label
);
28320 fputc ('\n', file
);
28325 /* If we are generating PIC, the ldr instruction below loads
28326 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28327 the address of the add + 8, so we have:
28329 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28332 Note that we have "+ 1" because some versions of GNU ld
28333 don't set the low bit of the result for R_ARM_REL32
28334 relocations against thumb function symbols.
28335 On ARMv6M this is +4, not +8. */
28336 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28337 assemble_name (file
, labelpc
);
28338 fputs (":\n", file
);
28339 if (TARGET_THUMB1_ONLY
)
28341 /* This is 2 insns after the start of the thunk, so we know it
28342 is 4-byte aligned. */
28343 fputs ("\tadd\tr3, pc, r3\n", file
);
28344 fputs ("\tmov r12, r3\n", file
);
28347 fputs ("\tadd\tr12, pc, r12\n", file
);
28349 else if (TARGET_THUMB1_ONLY
)
28350 fputs ("\tmov r12, r3\n", file
);
28352 if (TARGET_THUMB1_ONLY
)
28354 if (mi_delta
> 255)
28356 fputs ("\tldr\tr3, ", file
);
28357 assemble_name (file
, label
);
28358 fputs ("+4\n", file
);
28359 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
28360 mi_op
, this_regno
, this_regno
);
28362 else if (mi_delta
!= 0)
28364 /* Thumb1 unified syntax requires s suffix in instruction name when
28365 one of the operands is immediate. */
28366 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
28367 mi_op
, this_regno
, this_regno
,
28373 /* TODO: Use movw/movt for large constants when available. */
28374 while (mi_delta
!= 0)
28376 if ((mi_delta
& (3 << shift
)) == 0)
28380 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28381 mi_op
, this_regno
, this_regno
,
28382 mi_delta
& (0xff << shift
));
28383 mi_delta
&= ~(0xff << shift
);
28390 if (TARGET_THUMB1_ONLY
)
28391 fputs ("\tpop\t{r3}\n", file
);
28393 fprintf (file
, "\tbx\tr12\n");
28394 ASM_OUTPUT_ALIGN (file
, 2);
28395 assemble_name (file
, label
);
28396 fputs (":\n", file
);
28399 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28400 rtx tem
= XEXP (DECL_RTL (function
), 0);
28401 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28402 pipeline offset is four rather than eight. Adjust the offset
28404 tem
= plus_constant (GET_MODE (tem
), tem
,
28405 TARGET_THUMB1_ONLY
? -3 : -7);
28406 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28408 gen_rtx_SYMBOL_REF (Pmode
,
28409 ggc_strdup (labelpc
)));
28410 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28413 /* Output ".word .LTHUNKn". */
28414 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28416 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28417 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28421 fputs ("\tb\t", file
);
28422 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28423 if (NEED_PLT_RELOC
)
28424 fputs ("(PLT)", file
);
28425 fputc ('\n', file
);
28428 final_end_function ();
28431 /* MI thunk handling for TARGET_32BIT. */
28434 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28435 HOST_WIDE_INT vcall_offset
, tree function
)
28437 const bool long_call_p
= arm_is_long_call_p (function
);
28439 /* On ARM, this_regno is R0 or R1 depending on
28440 whether the function returns an aggregate or not.
28442 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
28444 ? R1_REGNUM
: R0_REGNUM
);
28446 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
28447 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
28448 reload_completed
= 1;
28449 emit_note (NOTE_INSN_PROLOGUE_END
);
28451 /* Add DELTA to THIS_RTX. */
28453 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
28454 delta
, this_rtx
, this_rtx
, false);
28456 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
28457 if (vcall_offset
!= 0)
28459 /* Load *THIS_RTX. */
28460 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
28461 /* Compute *THIS_RTX + VCALL_OFFSET. */
28462 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
28464 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
28465 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
28466 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
28469 /* Generate a tail call to the target function. */
28470 if (!TREE_USED (function
))
28472 assemble_external (function
);
28473 TREE_USED (function
) = 1;
28475 rtx funexp
= XEXP (DECL_RTL (function
), 0);
28478 emit_move_insn (temp
, funexp
);
28481 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
28482 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
28483 SIBLING_CALL_P (insn
) = 1;
28486 /* Indirect calls require a bit of fixup in PIC mode. */
28489 split_all_insns_noflow ();
28493 insn
= get_insns ();
28494 shorten_branches (insn
);
28495 final_start_function (insn
, file
, 1);
28496 final (insn
, file
, 1);
28497 final_end_function ();
28499 /* Stop pretending this is a post-reload pass. */
28500 reload_completed
= 0;
28503 /* Output code to add DELTA to the first argument, and then jump
28504 to FUNCTION. Used for C++ multiple inheritance. */
28507 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
28508 HOST_WIDE_INT vcall_offset
, tree function
)
28510 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
28512 assemble_start_function (thunk
, fnname
);
28514 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
28516 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
28517 assemble_end_function (thunk
, fnname
);
28521 arm_emit_vector_const (FILE *file
, rtx x
)
28524 const char * pattern
;
28526 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28528 switch (GET_MODE (x
))
28530 case E_V2SImode
: pattern
= "%08x"; break;
28531 case E_V4HImode
: pattern
= "%04x"; break;
28532 case E_V8QImode
: pattern
= "%02x"; break;
28533 default: gcc_unreachable ();
28536 fprintf (file
, "0x");
28537 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28541 element
= CONST_VECTOR_ELT (x
, i
);
28542 fprintf (file
, pattern
, INTVAL (element
));
28548 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28549 HFmode constant pool entries are actually loaded with ldr. */
28551 arm_emit_fp16_const (rtx c
)
28555 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
28556 if (WORDS_BIG_ENDIAN
)
28557 assemble_zeros (2);
28558 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28559 if (!WORDS_BIG_ENDIAN
)
28560 assemble_zeros (2);
28564 arm_output_load_gr (rtx
*operands
)
28571 if (!MEM_P (operands
[1])
28572 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28573 || !REG_P (reg
= XEXP (sum
, 0))
28574 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28575 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28576 return "wldrw%?\t%0, %1";
28578 /* Fix up an out-of-range load of a GR register. */
28579 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28580 wcgr
= operands
[0];
28582 output_asm_insn ("ldr%?\t%0, %1", operands
);
28584 operands
[0] = wcgr
;
28586 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28587 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28592 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28594 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28595 named arg and all anonymous args onto the stack.
28596 XXX I know the prologue shouldn't be pushing registers, but it is faster
28600 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28601 const function_arg_info
&arg
,
28603 int second_time ATTRIBUTE_UNUSED
)
28605 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28608 cfun
->machine
->uses_anonymous_args
= 1;
28609 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28611 nregs
= pcum
->aapcs_ncrn
;
28614 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
28615 if (res
< 0 && warn_psabi
)
28616 inform (input_location
, "parameter passing for argument of "
28617 "type %qT changed in GCC 7.1", arg
.type
);
28621 if (res
> 1 && warn_psabi
)
28622 inform (input_location
,
28623 "parameter passing for argument of type "
28624 "%qT changed in GCC 9.1", arg
.type
);
28629 nregs
= pcum
->nregs
;
28631 if (nregs
< NUM_ARG_REGS
)
28632 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28635 /* We can't rely on the caller doing the proper promotion when
28636 using APCS or ATPCS. */
28639 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28641 return !TARGET_AAPCS_BASED
;
28644 static machine_mode
28645 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28647 int *punsignedp ATTRIBUTE_UNUSED
,
28648 const_tree fntype ATTRIBUTE_UNUSED
,
28649 int for_return ATTRIBUTE_UNUSED
)
28651 if (GET_MODE_CLASS (mode
) == MODE_INT
28652 && GET_MODE_SIZE (mode
) < 4)
28660 arm_default_short_enums (void)
28662 return ARM_DEFAULT_SHORT_ENUMS
;
28666 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28669 arm_align_anon_bitfield (void)
28671 return TARGET_AAPCS_BASED
;
28675 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28678 arm_cxx_guard_type (void)
28680 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28684 /* The EABI says test the least significant bit of a guard variable. */
28687 arm_cxx_guard_mask_bit (void)
28689 return TARGET_AAPCS_BASED
;
28693 /* The EABI specifies that all array cookies are 8 bytes long. */
28696 arm_get_cookie_size (tree type
)
28700 if (!TARGET_AAPCS_BASED
)
28701 return default_cxx_get_cookie_size (type
);
28703 size
= build_int_cst (sizetype
, 8);
28708 /* The EABI says that array cookies should also contain the element size. */
28711 arm_cookie_has_size (void)
28713 return TARGET_AAPCS_BASED
;
28717 /* The EABI says constructors and destructors should return a pointer to
28718 the object constructed/destroyed. */
28721 arm_cxx_cdtor_returns_this (void)
28723 return TARGET_AAPCS_BASED
;
28726 /* The EABI says that an inline function may never be the key
28730 arm_cxx_key_method_may_be_inline (void)
28732 return !TARGET_AAPCS_BASED
;
28736 arm_cxx_determine_class_data_visibility (tree decl
)
28738 if (!TARGET_AAPCS_BASED
28739 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28742 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28743 is exported. However, on systems without dynamic vague linkage,
28744 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28745 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28746 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28748 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28749 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28753 arm_cxx_class_data_always_comdat (void)
28755 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28756 vague linkage if the class has no key function. */
28757 return !TARGET_AAPCS_BASED
;
28761 /* The EABI says __aeabi_atexit should be used to register static
28765 arm_cxx_use_aeabi_atexit (void)
28767 return TARGET_AAPCS_BASED
;
28772 arm_set_return_address (rtx source
, rtx scratch
)
28774 arm_stack_offsets
*offsets
;
28775 HOST_WIDE_INT delta
;
28777 unsigned long saved_regs
;
28779 offsets
= arm_get_frame_offsets ();
28780 saved_regs
= offsets
->saved_regs_mask
;
28782 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28783 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28786 if (frame_pointer_needed
)
28787 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28790 /* LR will be the first saved register. */
28791 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28796 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28797 GEN_INT (delta
& ~4095)));
28802 addr
= stack_pointer_rtx
;
28804 addr
= plus_constant (Pmode
, addr
, delta
);
28807 /* The store needs to be marked to prevent DSE from deleting
28808 it as dead if it is based on fp. */
28809 mem
= gen_frame_mem (Pmode
, addr
);
28810 MEM_VOLATILE_P (mem
) = true;
28811 emit_move_insn (mem
, source
);
28817 thumb_set_return_address (rtx source
, rtx scratch
)
28819 arm_stack_offsets
*offsets
;
28820 HOST_WIDE_INT delta
;
28821 HOST_WIDE_INT limit
;
28824 unsigned long mask
;
28828 offsets
= arm_get_frame_offsets ();
28829 mask
= offsets
->saved_regs_mask
;
28830 if (mask
& (1 << LR_REGNUM
))
28833 /* Find the saved regs. */
28834 if (frame_pointer_needed
)
28836 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28837 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28843 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28846 /* Allow for the stack frame. */
28847 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28849 /* The link register is always the first saved register. */
28852 /* Construct the address. */
28853 addr
= gen_rtx_REG (SImode
, reg
);
28856 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28857 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28861 addr
= plus_constant (Pmode
, addr
, delta
);
28863 /* The store needs to be marked to prevent DSE from deleting
28864 it as dead if it is based on fp. */
28865 mem
= gen_frame_mem (Pmode
, addr
);
28866 MEM_VOLATILE_P (mem
) = true;
28867 emit_move_insn (mem
, source
);
28870 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28873 /* Implements target hook vector_mode_supported_p. */
28875 arm_vector_mode_supported_p (machine_mode mode
)
28877 /* Neon also supports V2SImode, etc. listed in the clause below. */
28878 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28879 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
28880 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
28881 || mode
== V8BFmode
))
28884 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28885 && ((mode
== V2SImode
)
28886 || (mode
== V4HImode
)
28887 || (mode
== V8QImode
)))
28890 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28891 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28892 || mode
== V2HAmode
))
28895 if (TARGET_HAVE_MVE
28896 && (mode
== V2DImode
|| mode
== V4SImode
|| mode
== V8HImode
28897 || mode
== V16QImode
))
28900 if (TARGET_HAVE_MVE_FLOAT
28901 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
28907 /* Implements target hook array_mode_supported_p. */
28910 arm_array_mode_supported_p (machine_mode mode
,
28911 unsigned HOST_WIDE_INT nelems
)
28913 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28914 for now, as the lane-swapping logic needs to be extended in the expanders.
28915 See PR target/82518. */
28916 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
28917 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28918 && (nelems
>= 2 && nelems
<= 4))
28921 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
28922 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
28928 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28929 registers when autovectorizing for Neon, at least until multiple vector
28930 widths are supported properly by the middle-end. */
28932 static machine_mode
28933 arm_preferred_simd_mode (scalar_mode mode
)
28939 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28941 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28943 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28945 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28947 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28954 if (TARGET_REALLY_IWMMXT
)
28967 if (TARGET_HAVE_MVE
)
28980 if (TARGET_HAVE_MVE_FLOAT
)
28994 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28996 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28997 using r0-r4 for function arguments, r7 for the stack frame and don't have
28998 enough left over to do doubleword arithmetic. For Thumb-2 all the
28999 potentially problematic instructions accept high registers so this is not
29000 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29001 that require many low registers. */
29003 arm_class_likely_spilled_p (reg_class_t rclass
)
29005 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29006 || rclass
== CC_REG
)
29012 /* Implements target hook small_register_classes_for_mode_p. */
29014 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29016 return TARGET_THUMB1
;
29019 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29020 ARM insns and therefore guarantee that the shift count is modulo 256.
29021 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29022 guarantee no particular behavior for out-of-range counts. */
29024 static unsigned HOST_WIDE_INT
29025 arm_shift_truncation_mask (machine_mode mode
)
29027 return mode
== SImode
? 255 : 0;
29031 /* Map internal gcc register numbers to DWARF2 register numbers. */
29034 arm_dbx_register_number (unsigned int regno
)
29039 if (IS_VFP_REGNUM (regno
))
29041 /* See comment in arm_dwarf_register_span. */
29042 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29043 return 64 + regno
- FIRST_VFP_REGNUM
;
29045 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29048 if (IS_IWMMXT_GR_REGNUM (regno
))
29049 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29051 if (IS_IWMMXT_REGNUM (regno
))
29052 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29054 return DWARF_FRAME_REGISTERS
;
29057 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29058 GCC models tham as 64 32-bit registers, so we need to describe this to
29059 the DWARF generation code. Other registers can use the default. */
29061 arm_dwarf_register_span (rtx rtl
)
29069 regno
= REGNO (rtl
);
29070 if (!IS_VFP_REGNUM (regno
))
29073 /* XXX FIXME: The EABI defines two VFP register ranges:
29074 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29076 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29077 corresponding D register. Until GDB supports this, we shall use the
29078 legacy encodings. We also use these encodings for D0-D15 for
29079 compatibility with older debuggers. */
29080 mode
= GET_MODE (rtl
);
29081 if (GET_MODE_SIZE (mode
) < 8)
29084 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29086 nregs
= GET_MODE_SIZE (mode
) / 4;
29087 for (i
= 0; i
< nregs
; i
+= 2)
29088 if (TARGET_BIG_END
)
29090 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29091 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29095 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29096 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29101 nregs
= GET_MODE_SIZE (mode
) / 8;
29102 for (i
= 0; i
< nregs
; i
++)
29103 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29106 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29109 #if ARM_UNWIND_INFO
29110 /* Emit unwind directives for a store-multiple instruction or stack pointer
29111 push during alignment.
29112 These should only ever be generated by the function prologue code, so
29113 expect them to have a particular form.
29114 The store-multiple instruction sometimes pushes pc as the last register,
29115 although it should not be tracked into unwind information, or for -Os
29116 sometimes pushes some dummy registers before first register that needs
29117 to be tracked in unwind information; such dummy registers are there just
29118 to avoid separate stack adjustment, and will not be restored in the
29122 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
29125 HOST_WIDE_INT offset
;
29126 HOST_WIDE_INT nregs
;
29130 unsigned padfirst
= 0, padlast
= 0;
29133 e
= XVECEXP (p
, 0, 0);
29134 gcc_assert (GET_CODE (e
) == SET
);
29136 /* First insn will adjust the stack pointer. */
29137 gcc_assert (GET_CODE (e
) == SET
29138 && REG_P (SET_DEST (e
))
29139 && REGNO (SET_DEST (e
)) == SP_REGNUM
29140 && GET_CODE (SET_SRC (e
)) == PLUS
);
29142 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29143 nregs
= XVECLEN (p
, 0) - 1;
29144 gcc_assert (nregs
);
29146 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29149 /* For -Os dummy registers can be pushed at the beginning to
29150 avoid separate stack pointer adjustment. */
29151 e
= XVECEXP (p
, 0, 1);
29152 e
= XEXP (SET_DEST (e
), 0);
29153 if (GET_CODE (e
) == PLUS
)
29154 padfirst
= INTVAL (XEXP (e
, 1));
29155 gcc_assert (padfirst
== 0 || optimize_size
);
29156 /* The function prologue may also push pc, but not annotate it as it is
29157 never restored. We turn this into a stack pointer adjustment. */
29158 e
= XVECEXP (p
, 0, nregs
);
29159 e
= XEXP (SET_DEST (e
), 0);
29160 if (GET_CODE (e
) == PLUS
)
29161 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29163 padlast
= offset
- 4;
29164 gcc_assert (padlast
== 0 || padlast
== 4);
29166 fprintf (asm_out_file
, "\t.pad #4\n");
29168 fprintf (asm_out_file
, "\t.save {");
29170 else if (IS_VFP_REGNUM (reg
))
29173 fprintf (asm_out_file
, "\t.vsave {");
29176 /* Unknown register type. */
29177 gcc_unreachable ();
29179 /* If the stack increment doesn't match the size of the saved registers,
29180 something has gone horribly wrong. */
29181 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29185 /* The remaining insns will describe the stores. */
29186 for (i
= 1; i
<= nregs
; i
++)
29188 /* Expect (set (mem <addr>) (reg)).
29189 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29190 e
= XVECEXP (p
, 0, i
);
29191 gcc_assert (GET_CODE (e
) == SET
29192 && MEM_P (SET_DEST (e
))
29193 && REG_P (SET_SRC (e
)));
29195 reg
= REGNO (SET_SRC (e
));
29196 gcc_assert (reg
>= lastreg
);
29199 fprintf (asm_out_file
, ", ");
29200 /* We can't use %r for vfp because we need to use the
29201 double precision register names. */
29202 if (IS_VFP_REGNUM (reg
))
29203 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29205 asm_fprintf (asm_out_file
, "%r", reg
);
29209 /* Check that the addresses are consecutive. */
29210 e
= XEXP (SET_DEST (e
), 0);
29211 if (GET_CODE (e
) == PLUS
)
29212 gcc_assert (REG_P (XEXP (e
, 0))
29213 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29214 && CONST_INT_P (XEXP (e
, 1))
29215 && offset
== INTVAL (XEXP (e
, 1)));
29219 && REGNO (e
) == SP_REGNUM
);
29220 offset
+= reg_size
;
29223 fprintf (asm_out_file
, "}\n");
29225 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
29228 /* Emit unwind directives for a SET. */
29231 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
29239 switch (GET_CODE (e0
))
29242 /* Pushing a single register. */
29243 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29244 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29245 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29248 asm_fprintf (asm_out_file
, "\t.save ");
29249 if (IS_VFP_REGNUM (REGNO (e1
)))
29250 asm_fprintf(asm_out_file
, "{d%d}\n",
29251 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29253 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
29257 if (REGNO (e0
) == SP_REGNUM
)
29259 /* A stack increment. */
29260 if (GET_CODE (e1
) != PLUS
29261 || !REG_P (XEXP (e1
, 0))
29262 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29263 || !CONST_INT_P (XEXP (e1
, 1)))
29266 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
29267 -INTVAL (XEXP (e1
, 1)));
29269 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29271 HOST_WIDE_INT offset
;
29273 if (GET_CODE (e1
) == PLUS
)
29275 if (!REG_P (XEXP (e1
, 0))
29276 || !CONST_INT_P (XEXP (e1
, 1)))
29278 reg
= REGNO (XEXP (e1
, 0));
29279 offset
= INTVAL (XEXP (e1
, 1));
29280 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
29281 HARD_FRAME_POINTER_REGNUM
, reg
,
29284 else if (REG_P (e1
))
29287 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
29288 HARD_FRAME_POINTER_REGNUM
, reg
);
29293 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29295 /* Move from sp to reg. */
29296 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
29298 else if (GET_CODE (e1
) == PLUS
29299 && REG_P (XEXP (e1
, 0))
29300 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29301 && CONST_INT_P (XEXP (e1
, 1)))
29303 /* Set reg to offset from sp. */
29304 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
29305 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29317 /* Emit unwind directives for the given insn. */
29320 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
29323 bool handled_one
= false;
29325 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29328 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29329 && (TREE_NOTHROW (current_function_decl
)
29330 || crtl
->all_throwers_are_sibcalls
))
29333 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29336 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29338 switch (REG_NOTE_KIND (note
))
29340 case REG_FRAME_RELATED_EXPR
:
29341 pat
= XEXP (note
, 0);
29344 case REG_CFA_REGISTER
:
29345 pat
= XEXP (note
, 0);
29348 pat
= PATTERN (insn
);
29349 if (GET_CODE (pat
) == PARALLEL
)
29350 pat
= XVECEXP (pat
, 0, 0);
29353 /* Only emitted for IS_STACKALIGN re-alignment. */
29358 src
= SET_SRC (pat
);
29359 dest
= SET_DEST (pat
);
29361 gcc_assert (src
== stack_pointer_rtx
);
29362 reg
= REGNO (dest
);
29363 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29366 handled_one
= true;
29369 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29370 to get correct dwarf information for shrink-wrap. We should not
29371 emit unwind information for it because these are used either for
29372 pretend arguments or notes to adjust sp and restore registers from
29374 case REG_CFA_DEF_CFA
:
29375 case REG_CFA_ADJUST_CFA
:
29376 case REG_CFA_RESTORE
:
29379 case REG_CFA_EXPRESSION
:
29380 case REG_CFA_OFFSET
:
29381 /* ??? Only handling here what we actually emit. */
29382 gcc_unreachable ();
29390 pat
= PATTERN (insn
);
29393 switch (GET_CODE (pat
))
29396 arm_unwind_emit_set (asm_out_file
, pat
);
29400 /* Store multiple. */
29401 arm_unwind_emit_sequence (asm_out_file
, pat
);
29410 /* Output a reference from a function exception table to the type_info
29411 object X. The EABI specifies that the symbol should be relocated by
29412 an R_ARM_TARGET2 relocation. */
29415 arm_output_ttype (rtx x
)
29417 fputs ("\t.word\t", asm_out_file
);
29418 output_addr_const (asm_out_file
, x
);
29419 /* Use special relocations for symbol references. */
29420 if (!CONST_INT_P (x
))
29421 fputs ("(TARGET2)", asm_out_file
);
29422 fputc ('\n', asm_out_file
);
29427 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29430 arm_asm_emit_except_personality (rtx personality
)
29432 fputs ("\t.personality\t", asm_out_file
);
29433 output_addr_const (asm_out_file
, personality
);
29434 fputc ('\n', asm_out_file
);
29436 #endif /* ARM_UNWIND_INFO */
29438 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29441 arm_asm_init_sections (void)
29443 #if ARM_UNWIND_INFO
29444 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29446 #endif /* ARM_UNWIND_INFO */
29448 #ifdef OBJECT_FORMAT_ELF
29449 if (target_pure_code
)
29450 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
29454 /* Output unwind directives for the start/end of a function. */
29457 arm_output_fn_unwind (FILE * f
, bool prologue
)
29459 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29463 fputs ("\t.fnstart\n", f
);
29466 /* If this function will never be unwound, then mark it as such.
29467 The came condition is used in arm_unwind_emit to suppress
29468 the frame annotations. */
29469 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29470 && (TREE_NOTHROW (current_function_decl
)
29471 || crtl
->all_throwers_are_sibcalls
))
29472 fputs("\t.cantunwind\n", f
);
29474 fputs ("\t.fnend\n", f
);
29479 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29481 enum tls_reloc reloc
;
29484 val
= XVECEXP (x
, 0, 0);
29485 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29487 output_addr_const (fp
, val
);
29492 fputs ("(tlsgd)", fp
);
29494 case TLS_GD32_FDPIC
:
29495 fputs ("(tlsgd_fdpic)", fp
);
29498 fputs ("(tlsldm)", fp
);
29500 case TLS_LDM32_FDPIC
:
29501 fputs ("(tlsldm_fdpic)", fp
);
29504 fputs ("(tlsldo)", fp
);
29507 fputs ("(gottpoff)", fp
);
29509 case TLS_IE32_FDPIC
:
29510 fputs ("(gottpoff_fdpic)", fp
);
29513 fputs ("(tpoff)", fp
);
29516 fputs ("(tlsdesc)", fp
);
29519 gcc_unreachable ();
29528 fputs (" + (. - ", fp
);
29529 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29530 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29531 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29532 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29542 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29545 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29547 gcc_assert (size
== 4);
29548 fputs ("\t.word\t", file
);
29549 output_addr_const (file
, x
);
29550 fputs ("(tlsldo)", file
);
29553 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29556 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29558 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29559 return arm_emit_tls_decoration (fp
, x
);
29560 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29563 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29565 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29566 assemble_name_raw (fp
, label
);
29570 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29572 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29576 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29580 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29582 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29586 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29590 else if (GET_CODE (x
) == CONST_VECTOR
)
29591 return arm_emit_vector_const (fp
, x
);
29596 /* Output assembly for a shift instruction.
29597 SET_FLAGS determines how the instruction modifies the condition codes.
29598 0 - Do not set condition codes.
29599 1 - Set condition codes.
29600 2 - Use smallest instruction. */
29602 arm_output_shift(rtx
* operands
, int set_flags
)
29605 static const char flag_chars
[3] = {'?', '.', '!'};
29610 c
= flag_chars
[set_flags
];
29611 shift
= shift_op(operands
[3], &val
);
29615 operands
[2] = GEN_INT(val
);
29616 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29619 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29621 output_asm_insn (pattern
, operands
);
29625 /* Output assembly for a WMMX immediate shift instruction. */
29627 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29629 int shift
= INTVAL (operands
[2]);
29631 machine_mode opmode
= GET_MODE (operands
[0]);
29633 gcc_assert (shift
>= 0);
29635 /* If the shift value in the register versions is > 63 (for D qualifier),
29636 31 (for W qualifier) or 15 (for H qualifier). */
29637 if (((opmode
== V4HImode
) && (shift
> 15))
29638 || ((opmode
== V2SImode
) && (shift
> 31))
29639 || ((opmode
== DImode
) && (shift
> 63)))
29643 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29644 output_asm_insn (templ
, operands
);
29645 if (opmode
== DImode
)
29647 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29648 output_asm_insn (templ
, operands
);
29653 /* The destination register will contain all zeros. */
29654 sprintf (templ
, "wzero\t%%0");
29655 output_asm_insn (templ
, operands
);
29660 if ((opmode
== DImode
) && (shift
> 32))
29662 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29663 output_asm_insn (templ
, operands
);
29664 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29665 output_asm_insn (templ
, operands
);
29669 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29670 output_asm_insn (templ
, operands
);
29675 /* Output assembly for a WMMX tinsr instruction. */
29677 arm_output_iwmmxt_tinsr (rtx
*operands
)
29679 int mask
= INTVAL (operands
[3]);
29682 int units
= mode_nunits
[GET_MODE (operands
[0])];
29683 gcc_assert ((mask
& (mask
- 1)) == 0);
29684 for (i
= 0; i
< units
; ++i
)
29686 if ((mask
& 0x01) == 1)
29692 gcc_assert (i
< units
);
29694 switch (GET_MODE (operands
[0]))
29697 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29700 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29703 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29706 gcc_unreachable ();
29709 output_asm_insn (templ
, operands
);
29714 /* Output a Thumb-1 casesi dispatch sequence. */
29716 thumb1_output_casesi (rtx
*operands
)
29718 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
29720 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29722 switch (GET_MODE(diff_vec
))
29725 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29726 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29728 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29729 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29731 return "bl\t%___gnu_thumb1_case_si";
29733 gcc_unreachable ();
29737 /* Output a Thumb-2 casesi instruction. */
29739 thumb2_output_casesi (rtx
*operands
)
29741 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
29743 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29745 output_asm_insn ("cmp\t%0, %1", operands
);
29746 output_asm_insn ("bhi\t%l3", operands
);
29747 switch (GET_MODE(diff_vec
))
29750 return "tbb\t[%|pc, %0]";
29752 return "tbh\t[%|pc, %0, lsl #1]";
29756 output_asm_insn ("adr\t%4, %l2", operands
);
29757 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29758 output_asm_insn ("add\t%4, %4, %5", operands
);
29763 output_asm_insn ("adr\t%4, %l2", operands
);
29764 return "ldr\t%|pc, [%4, %0, lsl #2]";
29767 gcc_unreachable ();
29771 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
29772 per-core tuning structs. */
29774 arm_issue_rate (void)
29776 return current_tune
->issue_rate
;
29779 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
29781 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
29783 if (DEBUG_INSN_P (insn
))
29786 rtx_code code
= GET_CODE (PATTERN (insn
));
29787 if (code
== USE
|| code
== CLOBBER
)
29790 if (get_attr_type (insn
) == TYPE_NO_INSN
)
29796 /* Return how many instructions should scheduler lookahead to choose the
29799 arm_first_cycle_multipass_dfa_lookahead (void)
29801 int issue_rate
= arm_issue_rate ();
29803 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
29806 /* Enable modeling of L2 auto-prefetcher. */
29808 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
29810 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
29814 arm_mangle_type (const_tree type
)
29816 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29817 has to be managled as if it is in the "std" namespace. */
29818 if (TARGET_AAPCS_BASED
29819 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29820 return "St9__va_list";
29822 /* Half-precision floating point types. */
29823 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29825 if (TYPE_MODE (type
) == BFmode
)
29831 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
29833 if (TYPE_NAME (type
) != NULL
)
29834 return arm_mangle_builtin_type (type
);
29836 /* Use the default mangling. */
29840 /* Order of allocation of core registers for Thumb: this allocation is
29841 written over the corresponding initial entries of the array
29842 initialized with REG_ALLOC_ORDER. We allocate all low registers
29843 first. Saving and restoring a low register is usually cheaper than
29844 using a call-clobbered high register. */
29846 static const int thumb_core_reg_alloc_order
[] =
29848 3, 2, 1, 0, 4, 5, 6, 7,
29849 12, 14, 8, 9, 10, 11
29852 /* Adjust register allocation order when compiling for Thumb. */
29855 arm_order_regs_for_local_alloc (void)
29857 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29858 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29860 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29861 sizeof (thumb_core_reg_alloc_order
));
29864 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29867 arm_frame_pointer_required (void)
29869 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
29872 /* If the function receives nonlocal gotos, it needs to save the frame
29873 pointer in the nonlocal_goto_save_area object. */
29874 if (cfun
->has_nonlocal_label
)
29877 /* The frame pointer is required for non-leaf APCS frames. */
29878 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
29881 /* If we are probing the stack in the prologue, we will have a faulting
29882 instruction prior to the stack adjustment and this requires a frame
29883 pointer if we want to catch the exception using the EABI unwinder. */
29884 if (!IS_INTERRUPT (arm_current_func_type ())
29885 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
29886 || flag_stack_clash_protection
)
29887 && arm_except_unwind_info (&global_options
) == UI_TARGET
29888 && cfun
->can_throw_non_call_exceptions
)
29890 HOST_WIDE_INT size
= get_frame_size ();
29892 /* That's irrelevant if there is no stack adjustment. */
29896 /* That's relevant only if there is a stack probe. */
29897 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
29899 /* We don't have the final size of the frame so adjust. */
29900 size
+= 32 * UNITS_PER_WORD
;
29901 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
29911 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
29912 All modes except THUMB1 have conditional execution.
29913 If we have conditional arithmetic, return false before reload to
29914 enable some ifcvt transformations. */
29916 arm_have_conditional_execution (void)
29918 bool has_cond_exec
, enable_ifcvt_trans
;
29920 /* Only THUMB1 cannot support conditional execution. */
29921 has_cond_exec
= !TARGET_THUMB1
;
29923 /* Enable ifcvt transformations if we have conditional arithmetic, but only
29925 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
29927 return has_cond_exec
&& !enable_ifcvt_trans
;
29930 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29931 static HOST_WIDE_INT
29932 arm_vector_alignment (const_tree type
)
29934 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29936 if (TARGET_AAPCS_BASED
)
29937 align
= MIN (align
, 64);
29942 static unsigned int
29943 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
29945 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29947 modes
->safe_push (V16QImode
);
29948 modes
->safe_push (V8QImode
);
29954 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29956 /* Vectors which aren't in packed structures will not be less aligned than
29957 the natural alignment of their element type, so this is safe. */
29958 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29961 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29965 arm_builtin_support_vector_misalignment (machine_mode mode
,
29966 const_tree type
, int misalignment
,
29969 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29971 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29976 /* If the misalignment is unknown, we should be able to handle the access
29977 so long as it is not to a member of a packed data structure. */
29978 if (misalignment
== -1)
29981 /* Return true if the misalignment is a multiple of the natural alignment
29982 of the vector's element type. This is probably always going to be
29983 true in practice, since we've already established that this isn't a
29985 return ((misalignment
% align
) == 0);
29988 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29993 arm_conditional_register_usage (void)
29997 if (TARGET_THUMB1
&& optimize_size
)
29999 /* When optimizing for size on Thumb-1, it's better not
30000 to use the HI regs, because of the overhead of
30002 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30003 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30006 /* The link register can be clobbered by any branch insn,
30007 but we have no way to track that at present, so mark
30008 it as unavailable. */
30010 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30012 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30014 /* VFPv3 registers are disabled when earlier VFP
30015 versions are selected due to the definition of
30016 LAST_VFP_REGNUM. */
30017 for (regno
= FIRST_VFP_REGNUM
;
30018 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30020 fixed_regs
[regno
] = 0;
30021 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30022 || regno
>= FIRST_VFP_REGNUM
+ 32;
30024 if (TARGET_HAVE_MVE
)
30025 fixed_regs
[VPR_REGNUM
] = 0;
30028 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30030 regno
= FIRST_IWMMXT_GR_REGNUM
;
30031 /* The 2002/10/09 revision of the XScale ABI has wCG0
30032 and wCG1 as call-preserved registers. The 2002/11/21
30033 revision changed this so that all wCG registers are
30034 scratch registers. */
30035 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30036 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30037 fixed_regs
[regno
] = 0;
30038 /* The XScale ABI has wR0 - wR9 as scratch registers,
30039 the rest as call-preserved registers. */
30040 for (regno
= FIRST_IWMMXT_REGNUM
;
30041 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30043 fixed_regs
[regno
] = 0;
30044 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30048 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30050 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30051 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30053 else if (TARGET_APCS_STACK
)
30055 fixed_regs
[10] = 1;
30056 call_used_regs
[10] = 1;
30058 /* -mcaller-super-interworking reserves r11 for calls to
30059 _interwork_r11_call_via_rN(). Making the register global
30060 is an easy way of ensuring that it remains valid for all
30062 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30063 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30065 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30066 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30067 if (TARGET_CALLER_INTERWORKING
)
30068 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30071 /* The Q and GE bits are only accessed via special ACLE patterns. */
30072 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30073 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30075 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30079 arm_preferred_rename_class (reg_class_t rclass
)
30081 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30082 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30083 and code size can be reduced. */
30084 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30090 /* Compute the attribute "length" of insn "*push_multi".
30091 So this function MUST be kept in sync with that insn pattern. */
30093 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30095 int i
, regno
, hi_reg
;
30096 int num_saves
= XVECLEN (parallel_op
, 0);
30106 regno
= REGNO (first_op
);
30107 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30108 list is 8-bit. Normally this means all registers in the list must be
30109 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30110 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30111 with 16-bit encoding. */
30112 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30113 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30115 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30116 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30124 /* Compute the attribute "length" of insn. Currently, this function is used
30125 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30126 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30127 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30128 true if OPERANDS contains insn which explicit updates base register. */
30131 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30140 rtx parallel_op
= operands
[0];
30141 /* Initialize to elements number of PARALLEL. */
30142 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30143 /* Initialize the value to base register. */
30144 unsigned regno
= REGNO (operands
[1]);
30145 /* Skip return and write back pattern.
30146 We only need register pop pattern for later analysis. */
30147 unsigned first_indx
= 0;
30148 first_indx
+= return_pc
? 1 : 0;
30149 first_indx
+= write_back_p
? 1 : 0;
30151 /* A pop operation can be done through LDM or POP. If the base register is SP
30152 and if it's with write back, then a LDM will be alias of POP. */
30153 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30154 bool ldm_p
= !pop_p
;
30156 /* Check base register for LDM. */
30157 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30160 /* Check each register in the list. */
30161 for (; indx
>= first_indx
; indx
--)
30163 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30164 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30165 comment in arm_attr_length_push_multi. */
30166 if (REGNO_REG_CLASS (regno
) == HI_REGS
30167 && (regno
!= PC_REGNUM
|| ldm_p
))
30174 /* Compute the number of instructions emitted by output_move_double. */
30176 arm_count_output_move_double_insns (rtx
*operands
)
30180 /* output_move_double may modify the operands array, so call it
30181 here on a copy of the array. */
30182 ops
[0] = operands
[0];
30183 ops
[1] = operands
[1];
30184 output_move_double (ops
, false, &count
);
30188 /* Same as above, but operands are a register/memory pair in SImode.
30189 Assumes operands has the base register in position 0 and memory in position
30190 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30192 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
30196 int regnum
, memnum
;
30198 regnum
= 0, memnum
= 1;
30200 regnum
= 1, memnum
= 0;
30201 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
30202 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
30203 output_move_double (ops
, false, &count
);
30209 vfp3_const_double_for_fract_bits (rtx operand
)
30211 REAL_VALUE_TYPE r0
;
30213 if (!CONST_DOUBLE_P (operand
))
30216 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
30217 if (exact_real_inverse (DFmode
, &r0
)
30218 && !REAL_VALUE_NEGATIVE (r0
))
30220 if (exact_real_truncate (DFmode
, &r0
))
30222 HOST_WIDE_INT value
= real_to_integer (&r0
);
30223 value
= value
& 0xffffffff;
30224 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30226 int ret
= exact_log2 (value
);
30227 gcc_assert (IN_RANGE (ret
, 0, 31));
30235 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30236 log2 is in [1, 32], return that log2. Otherwise return -1.
30237 This is used in the patterns for vcvt.s32.f32 floating-point to
30238 fixed-point conversions. */
30241 vfp3_const_double_for_bits (rtx x
)
30243 const REAL_VALUE_TYPE
*r
;
30245 if (!CONST_DOUBLE_P (x
))
30248 r
= CONST_DOUBLE_REAL_VALUE (x
);
30250 if (REAL_VALUE_NEGATIVE (*r
)
30251 || REAL_VALUE_ISNAN (*r
)
30252 || REAL_VALUE_ISINF (*r
)
30253 || !real_isinteger (r
, SFmode
))
30256 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
30258 /* The exact_log2 above will have returned -1 if this is
30259 not an exact log2. */
30260 if (!IN_RANGE (hwint
, 1, 32))
30267 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30270 arm_pre_atomic_barrier (enum memmodel model
)
30272 if (need_atomic_barrier_p (model
, true))
30273 emit_insn (gen_memory_barrier ());
30277 arm_post_atomic_barrier (enum memmodel model
)
30279 if (need_atomic_barrier_p (model
, false))
30280 emit_insn (gen_memory_barrier ());
30283 /* Emit the load-exclusive and store-exclusive instructions.
30284 Use acquire and release versions if necessary. */
30287 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30289 rtx (*gen
) (rtx
, rtx
);
30295 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30296 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30297 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30298 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30300 gcc_unreachable ();
30307 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30308 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
30309 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
30310 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
30312 gcc_unreachable ();
30316 emit_insn (gen (rval
, mem
));
30320 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
30323 rtx (*gen
) (rtx
, rtx
, rtx
);
30329 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30330 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30331 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30332 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30334 gcc_unreachable ();
30341 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30342 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
30343 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
30344 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
30346 gcc_unreachable ();
30350 emit_insn (gen (bval
, rval
, mem
));
30353 /* Mark the previous jump instruction as unlikely. */
30356 emit_unlikely_jump (rtx insn
)
30358 rtx_insn
*jump
= emit_jump_insn (insn
);
30359 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
30362 /* Expand a compare and swap pattern. */
30365 arm_expand_compare_and_swap (rtx operands
[])
30367 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30368 machine_mode mode
, cmp_mode
;
30370 bval
= operands
[0];
30371 rval
= operands
[1];
30373 oldval
= operands
[3];
30374 newval
= operands
[4];
30375 is_weak
= operands
[5];
30376 mod_s
= operands
[6];
30377 mod_f
= operands
[7];
30378 mode
= GET_MODE (mem
);
30380 /* Normally the succ memory model must be stronger than fail, but in the
30381 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30382 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30384 if (TARGET_HAVE_LDACQ
30385 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
30386 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
30387 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30393 /* For narrow modes, we're going to perform the comparison in SImode,
30394 so do the zero-extension now. */
30395 rval
= gen_reg_rtx (SImode
);
30396 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30400 /* Force the value into a register if needed. We waited until after
30401 the zero-extension above to do this properly. */
30402 if (!arm_add_operand (oldval
, SImode
))
30403 oldval
= force_reg (SImode
, oldval
);
30407 if (!cmpdi_operand (oldval
, mode
))
30408 oldval
= force_reg (mode
, oldval
);
30412 gcc_unreachable ();
30416 cmp_mode
= E_SImode
;
30418 cmp_mode
= CC_Zmode
;
30420 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
30421 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
30422 oldval
, newval
, is_weak
, mod_s
, mod_f
));
30424 if (mode
== QImode
|| mode
== HImode
)
30425 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30427 /* In all cases, we arrange for success to be signaled by Z set.
30428 This arrangement allows for the boolean result to be used directly
30429 in a subsequent branch, post optimization. For Thumb-1 targets, the
30430 boolean negation of the result is also stored in bval because Thumb-1
30431 backend lacks dependency tracking for CC flag due to flag-setting not
30432 being represented at RTL level. */
30434 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
30437 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
30438 emit_insn (gen_rtx_SET (bval
, x
));
30442 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30443 another memory store between the load-exclusive and store-exclusive can
30444 reset the monitor from Exclusive to Open state. This means we must wait
30445 until after reload to split the pattern, lest we get a register spill in
30446 the middle of the atomic sequence. Success of the compare and swap is
30447 indicated by the Z flag set for 32bit targets and by neg_bval being zero
30448 for Thumb-1 targets (ie. negation of the boolean value returned by
30449 atomic_compare_and_swapmode standard pattern in operand 0). */
30452 arm_split_compare_and_swap (rtx operands
[])
30454 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
30456 enum memmodel mod_s
, mod_f
;
30458 rtx_code_label
*label1
, *label2
;
30461 rval
= operands
[1];
30463 oldval
= operands
[3];
30464 newval
= operands
[4];
30465 is_weak
= (operands
[5] != const0_rtx
);
30466 mod_s_rtx
= operands
[6];
30467 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
30468 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
30469 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
30470 mode
= GET_MODE (mem
);
30472 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
30474 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
30475 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
30477 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
30478 a full barrier is emitted after the store-release. */
30480 use_acquire
= false;
30482 /* Checks whether a barrier is needed and emits one accordingly. */
30483 if (!(use_acquire
|| use_release
))
30484 arm_pre_atomic_barrier (mod_s
);
30489 label1
= gen_label_rtx ();
30490 emit_label (label1
);
30492 label2
= gen_label_rtx ();
30494 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30496 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30497 as required to communicate with arm_expand_compare_and_swap. */
30500 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
30501 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30502 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30503 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30504 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30508 emit_move_insn (neg_bval
, const1_rtx
);
30509 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
30510 if (thumb1_cmpneg_operand (oldval
, SImode
))
30511 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
30514 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
30517 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
30519 /* Weak or strong, we want EQ to be true for success, so that we
30520 match the flags that we got from the compare above. */
30523 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30524 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
30525 emit_insn (gen_rtx_SET (cond
, x
));
30530 /* Z is set to boolean value of !neg_bval, as required to communicate
30531 with arm_expand_compare_and_swap. */
30532 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
30533 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
30536 if (!is_mm_relaxed (mod_f
))
30537 emit_label (label2
);
30539 /* Checks whether a barrier is needed and emits one accordingly. */
30541 || !(use_acquire
|| use_release
))
30542 arm_post_atomic_barrier (mod_s
);
30544 if (is_mm_relaxed (mod_f
))
30545 emit_label (label2
);
30548 /* Split an atomic operation pattern. Operation is given by CODE and is one
30549 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30550 operation). Operation is performed on the content at MEM and on VALUE
30551 following the memory model MODEL_RTX. The content at MEM before and after
30552 the operation is returned in OLD_OUT and NEW_OUT respectively while the
30553 success of the operation is returned in COND. Using a scratch register or
30554 an operand register for these determines what result is returned for that
30558 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30559 rtx value
, rtx model_rtx
, rtx cond
)
30561 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
30562 machine_mode mode
= GET_MODE (mem
);
30563 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30564 rtx_code_label
*label
;
30565 bool all_low_regs
, bind_old_new
;
30568 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
30570 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
30571 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
30573 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
30574 a full barrier is emitted after the store-release. */
30576 use_acquire
= false;
30578 /* Checks whether a barrier is needed and emits one accordingly. */
30579 if (!(use_acquire
|| use_release
))
30580 arm_pre_atomic_barrier (model
);
30582 label
= gen_label_rtx ();
30583 emit_label (label
);
30586 new_out
= gen_lowpart (wmode
, new_out
);
30588 old_out
= gen_lowpart (wmode
, old_out
);
30591 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30593 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30595 /* Does the operation require destination and first operand to use the same
30596 register? This is decided by register constraints of relevant insn
30597 patterns in thumb1.md. */
30598 gcc_assert (!new_out
|| REG_P (new_out
));
30599 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
30600 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
30601 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
30606 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
30608 /* We want to return the old value while putting the result of the operation
30609 in the same register as the old value so copy the old value over to the
30610 destination register and use that register for the operation. */
30611 if (old_out
&& bind_old_new
)
30613 emit_move_insn (new_out
, old_out
);
30624 x
= gen_rtx_AND (wmode
, old_out
, value
);
30625 emit_insn (gen_rtx_SET (new_out
, x
));
30626 x
= gen_rtx_NOT (wmode
, new_out
);
30627 emit_insn (gen_rtx_SET (new_out
, x
));
30631 if (CONST_INT_P (value
))
30633 value
= GEN_INT (-INTVAL (value
));
30639 if (mode
== DImode
)
30641 /* DImode plus/minus need to clobber flags. */
30642 /* The adddi3 and subdi3 patterns are incorrectly written so that
30643 they require matching operands, even when we could easily support
30644 three operands. Thankfully, this can be fixed up post-splitting,
30645 as the individual add+adc patterns do accept three operands and
30646 post-reload cprop can make these moves go away. */
30647 emit_move_insn (new_out
, old_out
);
30649 x
= gen_adddi3 (new_out
, new_out
, value
);
30651 x
= gen_subdi3 (new_out
, new_out
, value
);
30658 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30659 emit_insn (gen_rtx_SET (new_out
, x
));
30663 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30666 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30667 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30669 /* Checks whether a barrier is needed and emits one accordingly. */
30671 || !(use_acquire
|| use_release
))
30672 arm_post_atomic_barrier (model
);
30675 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
30676 If CAN_INVERT, store either the result or its inverse in TARGET
30677 and return true if TARGET contains the inverse. If !CAN_INVERT,
30678 always store the result in TARGET, never its inverse.
30680 Note that the handling of floating-point comparisons is not
30684 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
30687 machine_mode cmp_result_mode
= GET_MODE (target
);
30688 machine_mode cmp_mode
= GET_MODE (op0
);
30693 /* For these we need to compute the inverse of the requested
30702 code
= reverse_condition_maybe_unordered (code
);
30705 /* Recursively emit the inverted comparison into a temporary
30706 and then store its inverse in TARGET. This avoids reusing
30707 TARGET (which for integer NE could be one of the inputs). */
30708 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
30709 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
30710 gcc_unreachable ();
30711 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
30724 /* These are natively supported for zero comparisons, but otherwise
30725 require the operands to be swapped. */
30728 if (op1
!= CONST0_RTX (cmp_mode
))
30730 code
= swap_condition (code
);
30731 std::swap (op0
, op1
);
30733 /* Fall through. */
30735 /* These are natively supported for both register and zero operands. */
30739 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
30742 /* These are natively supported for register operands only.
30743 Comparisons with zero aren't useful and should be folded
30744 or canonicalized by target-independent code. */
30747 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
30748 op0
, force_reg (cmp_mode
, op1
)));
30751 /* These require the operands to be swapped and likewise do not
30752 support comparisons with zero. */
30755 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
30756 target
, force_reg (cmp_mode
, op1
), op0
));
30759 /* These need a combination of two comparisons. */
30763 /* Operands are LTGT iff (a > b || a > b).
30764 Operands are ORDERED iff (a > b || a <= b). */
30765 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
30766 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
30767 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
30768 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
30769 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
30770 gcc_unreachable ();
30771 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
30772 gt_res
, alt_res
)));
30777 gcc_unreachable ();
30781 /* Expand a vcond or vcondu pattern with operands OPERANDS.
30782 CMP_RESULT_MODE is the mode of the comparison result. */
30785 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
30787 rtx mask
= gen_reg_rtx (cmp_result_mode
);
30788 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
30789 operands
[4], operands
[5], true);
30791 std::swap (operands
[1], operands
[2]);
30792 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
30793 mask
, operands
[1], operands
[2]));
30796 #define MAX_VECT_LEN 16
30798 struct expand_vec_perm_d
30800 rtx target
, op0
, op1
;
30801 vec_perm_indices perm
;
30802 machine_mode vmode
;
30807 /* Generate a variable permutation. */
30810 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30812 machine_mode vmode
= GET_MODE (target
);
30813 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30815 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30816 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30817 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30818 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30819 gcc_checking_assert (TARGET_NEON
);
30823 if (vmode
== V8QImode
)
30824 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30826 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30832 if (vmode
== V8QImode
)
30834 pair
= gen_reg_rtx (V16QImode
);
30835 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30836 pair
= gen_lowpart (TImode
, pair
);
30837 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30841 pair
= gen_reg_rtx (OImode
);
30842 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30843 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30849 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30851 machine_mode vmode
= GET_MODE (target
);
30852 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
30853 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30856 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30857 numbering of elements for big-endian, we must reverse the order. */
30858 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30860 /* The VTBL instruction does not use a modulo index, so we must take care
30861 of that ourselves. */
30862 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30863 mask
= gen_const_vec_duplicate (vmode
, mask
);
30864 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30866 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30869 /* Map lane ordering between architectural lane order, and GCC lane order,
30870 taking into account ABI. See comment above output_move_neon for details. */
30873 neon_endian_lane_map (machine_mode mode
, int lane
)
30875 if (BYTES_BIG_ENDIAN
)
30877 int nelems
= GET_MODE_NUNITS (mode
);
30878 /* Reverse lane order. */
30879 lane
= (nelems
- 1 - lane
);
30880 /* Reverse D register order, to match ABI. */
30881 if (GET_MODE_SIZE (mode
) == 16)
30882 lane
= lane
^ (nelems
/ 2);
30887 /* Some permutations index into pairs of vectors, this is a helper function
30888 to map indexes into those pairs of vectors. */
30891 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
30893 int nelem
= GET_MODE_NUNITS (mode
);
30894 if (BYTES_BIG_ENDIAN
)
30896 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
30900 /* Generate or test for an insn that supports a constant permutation. */
30902 /* Recognize patterns for the VUZP insns. */
30905 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30907 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
30908 rtx out0
, out1
, in0
, in1
;
30912 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30915 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
30916 big endian pattern on 64 bit vectors, so we correct for that. */
30917 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
30918 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
30920 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
30922 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
30924 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
30928 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30930 for (i
= 0; i
< nelt
; i
++)
30933 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
30934 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
30944 if (swap_nelt
!= 0)
30945 std::swap (in0
, in1
);
30948 out1
= gen_reg_rtx (d
->vmode
);
30950 std::swap (out0
, out1
);
30952 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
30956 /* Recognize patterns for the VZIP insns. */
30959 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30961 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
30962 rtx out0
, out1
, in0
, in1
;
30966 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30969 is_swapped
= BYTES_BIG_ENDIAN
;
30971 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
30974 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
30976 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
30980 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30982 for (i
= 0; i
< nelt
/ 2; i
++)
30985 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
30986 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
30990 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
30991 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31003 std::swap (in0
, in1
);
31006 out1
= gen_reg_rtx (d
->vmode
);
31008 std::swap (out0
, out1
);
31010 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31014 /* Recognize patterns for the VREV insns. */
31016 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31018 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31019 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31021 if (!d
->one_vector_p
)
31032 gen
= gen_neon_vrev64
;
31043 gen
= gen_neon_vrev32
;
31049 gen
= gen_neon_vrev64
;
31060 gen
= gen_neon_vrev16
;
31064 gen
= gen_neon_vrev32
;
31070 gen
= gen_neon_vrev64
;
31080 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
31081 for (j
= 0; j
<= diff
; j
+= 1)
31083 /* This is guaranteed to be true as the value of diff
31084 is 7, 3, 1 and we should have enough elements in the
31085 queue to generate this. Getting a vector mask with a
31086 value of diff other than these values implies that
31087 something is wrong by the time we get here. */
31088 gcc_assert (i
+ j
< nelt
);
31089 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
31097 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
31101 /* Recognize patterns for the VTRN insns. */
31104 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
31106 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31107 rtx out0
, out1
, in0
, in1
;
31109 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31112 /* Note that these are little-endian tests. Adjust for big-endian later. */
31113 if (d
->perm
[0] == 0)
31115 else if (d
->perm
[0] == 1)
31119 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31121 for (i
= 0; i
< nelt
; i
+= 2)
31123 if (d
->perm
[i
] != i
+ odd
)
31125 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
31135 if (BYTES_BIG_ENDIAN
)
31137 std::swap (in0
, in1
);
31142 out1
= gen_reg_rtx (d
->vmode
);
31144 std::swap (out0
, out1
);
31146 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31150 /* Recognize patterns for the VEXT insns. */
31153 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
31155 unsigned int i
, nelt
= d
->perm
.length ();
31158 unsigned int location
;
31160 unsigned int next
= d
->perm
[0] + 1;
31162 /* TODO: Handle GCC's numbering of elements for big-endian. */
31163 if (BYTES_BIG_ENDIAN
)
31166 /* Check if the extracted indexes are increasing by one. */
31167 for (i
= 1; i
< nelt
; next
++, i
++)
31169 /* If we hit the most significant element of the 2nd vector in
31170 the previous iteration, no need to test further. */
31171 if (next
== 2 * nelt
)
31174 /* If we are operating on only one vector: it could be a
31175 rotation. If there are only two elements of size < 64, let
31176 arm_evpc_neon_vrev catch it. */
31177 if (d
->one_vector_p
&& (next
== nelt
))
31179 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
31185 if (d
->perm
[i
] != next
)
31189 location
= d
->perm
[0];
31195 offset
= GEN_INT (location
);
31197 if(d
->vmode
== E_DImode
)
31200 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
31204 /* The NEON VTBL instruction is a fully variable permuation that's even
31205 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31206 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31207 can do slightly better by expanding this as a constant where we don't
31208 have to apply a mask. */
31211 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
31213 rtx rperm
[MAX_VECT_LEN
], sel
;
31214 machine_mode vmode
= d
->vmode
;
31215 unsigned int i
, nelt
= d
->perm
.length ();
31217 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31218 numbering of elements for big-endian, we must reverse the order. */
31219 if (BYTES_BIG_ENDIAN
)
31225 /* Generic code will try constant permutation twice. Once with the
31226 original mode and again with the elements lowered to QImode.
31227 So wait and don't do the selector expansion ourselves. */
31228 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
31231 for (i
= 0; i
< nelt
; ++i
)
31232 rperm
[i
] = GEN_INT (d
->perm
[i
]);
31233 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
31234 sel
= force_reg (vmode
, sel
);
31236 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
31241 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
31243 /* Check if the input mask matches vext before reordering the
31246 if (arm_evpc_neon_vext (d
))
31249 /* The pattern matching functions above are written to look for a small
31250 number to begin the sequence (0, 1, N/2). If we begin with an index
31251 from the second operand, we can swap the operands. */
31252 unsigned int nelt
= d
->perm
.length ();
31253 if (d
->perm
[0] >= nelt
)
31255 d
->perm
.rotate_inputs (1);
31256 std::swap (d
->op0
, d
->op1
);
31261 if (arm_evpc_neon_vuzp (d
))
31263 if (arm_evpc_neon_vzip (d
))
31265 if (arm_evpc_neon_vrev (d
))
31267 if (arm_evpc_neon_vtrn (d
))
31269 return arm_evpc_neon_vtbl (d
);
31274 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31277 arm_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
, rtx op1
,
31278 const vec_perm_indices
&sel
)
31280 struct expand_vec_perm_d d
;
31281 int i
, nelt
, which
;
31283 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
31291 gcc_assert (VECTOR_MODE_P (d
.vmode
));
31292 d
.testing_p
= !target
;
31294 nelt
= GET_MODE_NUNITS (d
.vmode
);
31295 for (i
= which
= 0; i
< nelt
; ++i
)
31297 int ei
= sel
[i
] & (2 * nelt
- 1);
31298 which
|= (ei
< nelt
? 1 : 2);
31307 d
.one_vector_p
= false;
31308 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
31311 /* The elements of PERM do not suggest that only the first operand
31312 is used, but both operands are identical. Allow easier matching
31313 of the permutation by folding the permutation into the single
31318 d
.one_vector_p
= true;
31323 d
.one_vector_p
= true;
31327 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
31330 return arm_expand_vec_perm_const_1 (&d
);
31332 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
31333 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31334 if (!d
.one_vector_p
)
31335 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31338 bool ret
= arm_expand_vec_perm_const_1 (&d
);
31345 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
31347 /* If we are soft float and we do not have ldrd
31348 then all auto increment forms are ok. */
31349 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
31354 /* Post increment and Pre Decrement are supported for all
31355 instruction forms except for vector forms. */
31358 if (VECTOR_MODE_P (mode
))
31360 if (code
!= ARM_PRE_DEC
)
31370 /* Without LDRD and mode size greater than
31371 word size, there is no point in auto-incrementing
31372 because ldm and stm will not have these forms. */
31373 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
31376 /* Vector and floating point modes do not support
31377 these auto increment forms. */
31378 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
31391 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31392 on ARM, since we know that shifts by negative amounts are no-ops.
31393 Additionally, the default expansion code is not available or suitable
31394 for post-reload insn splits (this can occur when the register allocator
31395 chooses not to do a shift in NEON).
31397 This function is used in both initial expand and post-reload splits, and
31398 handles all kinds of 64-bit shifts.
31400 Input requirements:
31401 - It is safe for the input and output to be the same register, but
31402 early-clobber rules apply for the shift amount and scratch registers.
31403 - Shift by register requires both scratch registers. In all other cases
31404 the scratch registers may be NULL.
31405 - Ashiftrt by a register also clobbers the CC register. */
31407 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
31408 rtx amount
, rtx scratch1
, rtx scratch2
)
31410 rtx out_high
= gen_highpart (SImode
, out
);
31411 rtx out_low
= gen_lowpart (SImode
, out
);
31412 rtx in_high
= gen_highpart (SImode
, in
);
31413 rtx in_low
= gen_lowpart (SImode
, in
);
31416 in = the register pair containing the input value.
31417 out = the destination register pair.
31418 up = the high- or low-part of each pair.
31419 down = the opposite part to "up".
31420 In a shift, we can consider bits to shift from "up"-stream to
31421 "down"-stream, so in a left-shift "up" is the low-part and "down"
31422 is the high-part of each register pair. */
31424 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
31425 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
31426 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
31427 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
31429 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
31431 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
31432 && GET_MODE (out
) == DImode
);
31434 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
31435 && GET_MODE (in
) == DImode
);
31437 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
31438 && GET_MODE (amount
) == SImode
)
31439 || CONST_INT_P (amount
)));
31440 gcc_assert (scratch1
== NULL
31441 || (GET_CODE (scratch1
) == SCRATCH
)
31442 || (GET_MODE (scratch1
) == SImode
31443 && REG_P (scratch1
)));
31444 gcc_assert (scratch2
== NULL
31445 || (GET_CODE (scratch2
) == SCRATCH
)
31446 || (GET_MODE (scratch2
) == SImode
31447 && REG_P (scratch2
)));
31448 gcc_assert (!REG_P (out
) || !REG_P (amount
)
31449 || !HARD_REGISTER_P (out
)
31450 || (REGNO (out
) != REGNO (amount
)
31451 && REGNO (out
) + 1 != REGNO (amount
)));
31453 /* Macros to make following code more readable. */
31454 #define SUB_32(DEST,SRC) \
31455 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31456 #define RSB_32(DEST,SRC) \
31457 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31458 #define SUB_S_32(DEST,SRC) \
31459 gen_addsi3_compare0 ((DEST), (SRC), \
31461 #define SET(DEST,SRC) \
31462 gen_rtx_SET ((DEST), (SRC))
31463 #define SHIFT(CODE,SRC,AMOUNT) \
31464 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31465 #define LSHIFT(CODE,SRC,AMOUNT) \
31466 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31467 SImode, (SRC), (AMOUNT))
31468 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31469 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31470 SImode, (SRC), (AMOUNT))
31472 gen_rtx_IOR (SImode, (A), (B))
31473 #define BRANCH(COND,LABEL) \
31474 gen_arm_cond_branch ((LABEL), \
31475 gen_rtx_ ## COND (CCmode, cc_reg, \
31479 /* Shifts by register and shifts by constant are handled separately. */
31480 if (CONST_INT_P (amount
))
31482 /* We have a shift-by-constant. */
31484 /* First, handle out-of-range shift amounts.
31485 In both cases we try to match the result an ARM instruction in a
31486 shift-by-register would give. This helps reduce execution
31487 differences between optimization levels, but it won't stop other
31488 parts of the compiler doing different things. This is "undefined
31489 behavior, in any case. */
31490 if (INTVAL (amount
) <= 0)
31491 emit_insn (gen_movdi (out
, in
));
31492 else if (INTVAL (amount
) >= 64)
31494 if (code
== ASHIFTRT
)
31496 rtx const31_rtx
= GEN_INT (31);
31497 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
31498 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
31501 emit_insn (gen_movdi (out
, const0_rtx
));
31504 /* Now handle valid shifts. */
31505 else if (INTVAL (amount
) < 32)
31507 /* Shifts by a constant less than 32. */
31508 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
31510 /* Clearing the out register in DImode first avoids lots
31511 of spilling and results in less stack usage.
31512 Later this redundant insn is completely removed.
31513 Do that only if "in" and "out" are different registers. */
31514 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
31515 emit_insn (SET (out
, const0_rtx
));
31516 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31517 emit_insn (SET (out_down
,
31518 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
31520 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31524 /* Shifts by a constant greater than 31. */
31525 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
31527 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
31528 emit_insn (SET (out
, const0_rtx
));
31529 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
31530 if (code
== ASHIFTRT
)
31531 emit_insn (gen_ashrsi3 (out_up
, in_up
,
31534 emit_insn (SET (out_up
, const0_rtx
));
31539 /* We have a shift-by-register. */
31540 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
31542 /* This alternative requires the scratch registers. */
31543 gcc_assert (scratch1
&& REG_P (scratch1
));
31544 gcc_assert (scratch2
&& REG_P (scratch2
));
31546 /* We will need the values "amount-32" and "32-amount" later.
31547 Swapping them around now allows the later code to be more general. */
31551 emit_insn (SUB_32 (scratch1
, amount
));
31552 emit_insn (RSB_32 (scratch2
, amount
));
31555 emit_insn (RSB_32 (scratch1
, amount
));
31556 /* Also set CC = amount > 32. */
31557 emit_insn (SUB_S_32 (scratch2
, amount
));
31560 emit_insn (RSB_32 (scratch1
, amount
));
31561 emit_insn (SUB_32 (scratch2
, amount
));
31564 gcc_unreachable ();
31567 /* Emit code like this:
31570 out_down = in_down << amount;
31571 out_down = (in_up << (amount - 32)) | out_down;
31572 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31573 out_up = in_up << amount;
31576 out_down = in_down >> amount;
31577 out_down = (in_up << (32 - amount)) | out_down;
31579 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31580 out_up = in_up << amount;
31583 out_down = in_down >> amount;
31584 out_down = (in_up << (32 - amount)) | out_down;
31586 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31587 out_up = in_up << amount;
31589 The ARM and Thumb2 variants are the same but implemented slightly
31590 differently. If this were only called during expand we could just
31591 use the Thumb2 case and let combine do the right thing, but this
31592 can also be called from post-reload splitters. */
31594 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31596 if (!TARGET_THUMB2
)
31598 /* Emit code for ARM mode. */
31599 emit_insn (SET (out_down
,
31600 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31601 if (code
== ASHIFTRT
)
31603 rtx_code_label
*done_label
= gen_label_rtx ();
31604 emit_jump_insn (BRANCH (LT
, done_label
));
31605 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31607 emit_label (done_label
);
31610 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31615 /* Emit code for Thumb2 mode.
31616 Thumb2 can't do shift and or in one insn. */
31617 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31618 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31620 if (code
== ASHIFTRT
)
31622 rtx_code_label
*done_label
= gen_label_rtx ();
31623 emit_jump_insn (BRANCH (LT
, done_label
));
31624 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31625 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31626 emit_label (done_label
);
31630 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31631 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31635 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31649 /* Returns true if the pattern is a valid symbolic address, which is either a
31650 symbol_ref or (symbol_ref + addend).
31652 According to the ARM ELF ABI, the initial addend of REL-type relocations
31653 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31654 literal field of the instruction as a 16-bit signed value in the range
31655 -32768 <= A < 32768. */
31658 arm_valid_symbolic_address_p (rtx addr
)
31660 rtx xop0
, xop1
= NULL_RTX
;
31663 if (target_word_relocations
)
31666 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
31669 /* (const (plus: symbol_ref const_int)) */
31670 if (GET_CODE (addr
) == CONST
)
31671 tmp
= XEXP (addr
, 0);
31673 if (GET_CODE (tmp
) == PLUS
)
31675 xop0
= XEXP (tmp
, 0);
31676 xop1
= XEXP (tmp
, 1);
31678 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
31679 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
31685 /* Returns true if a valid comparison operation and makes
31686 the operands in a form that is valid. */
31688 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31690 enum rtx_code code
= GET_CODE (*comparison
);
31692 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31693 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31695 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31697 if (code
== UNEQ
|| code
== LTGT
)
31700 code_int
= (int)code
;
31701 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31702 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31707 if (!arm_add_operand (*op1
, mode
))
31708 *op1
= force_reg (mode
, *op1
);
31709 if (!arm_add_operand (*op2
, mode
))
31710 *op2
= force_reg (mode
, *op2
);
31714 /* gen_compare_reg() will sort out any invalid operands. */
31718 if (!TARGET_VFP_FP16INST
)
31720 /* FP16 comparisons are done in SF mode. */
31722 *op1
= convert_to_mode (mode
, *op1
, 1);
31723 *op2
= convert_to_mode (mode
, *op2
, 1);
31724 /* Fall through. */
31727 if (!vfp_compare_operand (*op1
, mode
))
31728 *op1
= force_reg (mode
, *op1
);
31729 if (!vfp_compare_operand (*op2
, mode
))
31730 *op2
= force_reg (mode
, *op2
);
31740 /* Maximum number of instructions to set block of memory. */
31742 arm_block_set_max_insns (void)
31744 if (optimize_function_for_size_p (cfun
))
31747 return current_tune
->max_insns_inline_memset
;
31750 /* Return TRUE if it's profitable to set block of memory for
31751 non-vectorized case. VAL is the value to set the memory
31752 with. LENGTH is the number of bytes to set. ALIGN is the
31753 alignment of the destination memory in bytes. UNALIGNED_P
31754 is TRUE if we can only set the memory with instructions
31755 meeting alignment requirements. USE_STRD_P is TRUE if we
31756 can use strd to set the memory. */
31758 arm_block_set_non_vect_profit_p (rtx val
,
31759 unsigned HOST_WIDE_INT length
,
31760 unsigned HOST_WIDE_INT align
,
31761 bool unaligned_p
, bool use_strd_p
)
31764 /* For leftovers in bytes of 0-7, we can set the memory block using
31765 strb/strh/str with minimum instruction number. */
31766 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31770 num
= arm_const_inline_cost (SET
, val
);
31771 num
+= length
/ align
+ length
% align
;
31773 else if (use_strd_p
)
31775 num
= arm_const_double_inline_cost (val
);
31776 num
+= (length
>> 3) + leftover
[length
& 7];
31780 num
= arm_const_inline_cost (SET
, val
);
31781 num
+= (length
>> 2) + leftover
[length
& 3];
31784 /* We may be able to combine last pair STRH/STRB into a single STR
31785 by shifting one byte back. */
31786 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
31789 return (num
<= arm_block_set_max_insns ());
31792 /* Return TRUE if it's profitable to set block of memory for
31793 vectorized case. LENGTH is the number of bytes to set.
31794 ALIGN is the alignment of destination memory in bytes.
31795 MODE is the vector mode used to set the memory. */
31797 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
31798 unsigned HOST_WIDE_INT align
,
31802 bool unaligned_p
= ((align
& 3) != 0);
31803 unsigned int nelt
= GET_MODE_NUNITS (mode
);
31805 /* Instruction loading constant value. */
31807 /* Instructions storing the memory. */
31808 num
+= (length
+ nelt
- 1) / nelt
;
31809 /* Instructions adjusting the address expression. Only need to
31810 adjust address expression if it's 4 bytes aligned and bytes
31811 leftover can only be stored by mis-aligned store instruction. */
31812 if (!unaligned_p
&& (length
& 3) != 0)
31815 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31816 if (!unaligned_p
&& mode
== V16QImode
)
31819 return (num
<= arm_block_set_max_insns ());
31822 /* Set a block of memory using vectorization instructions for the
31823 unaligned case. We fill the first LENGTH bytes of the memory
31824 area starting from DSTBASE with byte constant VALUE. ALIGN is
31825 the alignment requirement of memory. Return TRUE if succeeded. */
31827 arm_block_set_unaligned_vect (rtx dstbase
,
31828 unsigned HOST_WIDE_INT length
,
31829 unsigned HOST_WIDE_INT value
,
31830 unsigned HOST_WIDE_INT align
)
31832 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
31835 rtx (*gen_func
) (rtx
, rtx
);
31837 unsigned HOST_WIDE_INT v
= value
;
31838 unsigned int offset
= 0;
31839 gcc_assert ((align
& 0x3) != 0);
31840 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
31841 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
31842 if (length
>= nelt_v16
)
31845 gen_func
= gen_movmisalignv16qi
;
31850 gen_func
= gen_movmisalignv8qi
;
31852 nelt_mode
= GET_MODE_NUNITS (mode
);
31853 gcc_assert (length
>= nelt_mode
);
31854 /* Skip if it isn't profitable. */
31855 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
31858 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31859 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
31861 v
= sext_hwi (v
, BITS_PER_WORD
);
31863 reg
= gen_reg_rtx (mode
);
31864 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
31865 /* Emit instruction loading the constant value. */
31866 emit_move_insn (reg
, val_vec
);
31868 /* Handle nelt_mode bytes in a vector. */
31869 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
31871 emit_insn ((*gen_func
) (mem
, reg
));
31872 if (i
+ 2 * nelt_mode
<= length
)
31874 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
31875 offset
+= nelt_mode
;
31876 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
31880 /* If there are not less than nelt_v8 bytes leftover, we must be in
31882 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
31884 /* Handle (8, 16) bytes leftover. */
31885 if (i
+ nelt_v8
< length
)
31887 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
31888 offset
+= length
- i
;
31889 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
31891 /* We are shifting bytes back, set the alignment accordingly. */
31892 if ((length
& 1) != 0 && align
>= 2)
31893 set_mem_align (mem
, BITS_PER_UNIT
);
31895 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31897 /* Handle (0, 8] bytes leftover. */
31898 else if (i
< length
&& i
+ nelt_v8
>= length
)
31900 if (mode
== V16QImode
)
31901 reg
= gen_lowpart (V8QImode
, reg
);
31903 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
31904 + (nelt_mode
- nelt_v8
))));
31905 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
31906 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
31908 /* We are shifting bytes back, set the alignment accordingly. */
31909 if ((length
& 1) != 0 && align
>= 2)
31910 set_mem_align (mem
, BITS_PER_UNIT
);
31912 emit_insn (gen_movmisalignv8qi (mem
, reg
));
31918 /* Set a block of memory using vectorization instructions for the
31919 aligned case. We fill the first LENGTH bytes of the memory area
31920 starting from DSTBASE with byte constant VALUE. ALIGN is the
31921 alignment requirement of memory. Return TRUE if succeeded. */
31923 arm_block_set_aligned_vect (rtx dstbase
,
31924 unsigned HOST_WIDE_INT length
,
31925 unsigned HOST_WIDE_INT value
,
31926 unsigned HOST_WIDE_INT align
)
31928 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
31929 rtx dst
, addr
, mem
;
31932 unsigned int offset
= 0;
31934 gcc_assert ((align
& 0x3) == 0);
31935 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
31936 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
31937 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
31942 nelt_mode
= GET_MODE_NUNITS (mode
);
31943 gcc_assert (length
>= nelt_mode
);
31944 /* Skip if it isn't profitable. */
31945 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
31948 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31950 reg
= gen_reg_rtx (mode
);
31951 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
31952 /* Emit instruction loading the constant value. */
31953 emit_move_insn (reg
, val_vec
);
31956 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31957 if (mode
== V16QImode
)
31959 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
31960 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31962 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31963 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
31965 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
31966 offset
+= length
- nelt_mode
;
31967 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
31968 /* We are shifting bytes back, set the alignment accordingly. */
31969 if ((length
& 0x3) == 0)
31970 set_mem_align (mem
, BITS_PER_UNIT
* 4);
31971 else if ((length
& 0x1) == 0)
31972 set_mem_align (mem
, BITS_PER_UNIT
* 2);
31974 set_mem_align (mem
, BITS_PER_UNIT
);
31976 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31979 /* Fall through for bytes leftover. */
31981 nelt_mode
= GET_MODE_NUNITS (mode
);
31982 reg
= gen_lowpart (V8QImode
, reg
);
31985 /* Handle 8 bytes in a vector. */
31986 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
31988 addr
= plus_constant (Pmode
, dst
, i
);
31989 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
31990 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
31991 emit_move_insn (mem
, reg
);
31993 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
31996 /* Handle single word leftover by shifting 4 bytes back. We can
31997 use aligned access for this case. */
31998 if (i
+ UNITS_PER_WORD
== length
)
32000 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32001 offset
+= i
- UNITS_PER_WORD
;
32002 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32003 /* We are shifting 4 bytes back, set the alignment accordingly. */
32004 if (align
> UNITS_PER_WORD
)
32005 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32007 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32009 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32010 We have to use unaligned access for this case. */
32011 else if (i
< length
)
32013 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32014 offset
+= length
- nelt_mode
;
32015 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32016 /* We are shifting bytes back, set the alignment accordingly. */
32017 if ((length
& 1) == 0)
32018 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32020 set_mem_align (mem
, BITS_PER_UNIT
);
32022 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32028 /* Set a block of memory using plain strh/strb instructions, only
32029 using instructions allowed by ALIGN on processor. We fill the
32030 first LENGTH bytes of the memory area starting from DSTBASE
32031 with byte constant VALUE. ALIGN is the alignment requirement
32034 arm_block_set_unaligned_non_vect (rtx dstbase
,
32035 unsigned HOST_WIDE_INT length
,
32036 unsigned HOST_WIDE_INT value
,
32037 unsigned HOST_WIDE_INT align
)
32040 rtx dst
, addr
, mem
;
32041 rtx val_exp
, val_reg
, reg
;
32043 HOST_WIDE_INT v
= value
;
32045 gcc_assert (align
== 1 || align
== 2);
32048 v
|= (value
<< BITS_PER_UNIT
);
32050 v
= sext_hwi (v
, BITS_PER_WORD
);
32051 val_exp
= GEN_INT (v
);
32052 /* Skip if it isn't profitable. */
32053 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32054 align
, true, false))
32057 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32058 mode
= (align
== 2 ? HImode
: QImode
);
32059 val_reg
= force_reg (SImode
, val_exp
);
32060 reg
= gen_lowpart (mode
, val_reg
);
32062 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
32064 addr
= plus_constant (Pmode
, dst
, i
);
32065 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
32066 emit_move_insn (mem
, reg
);
32069 /* Handle single byte leftover. */
32070 if (i
+ 1 == length
)
32072 reg
= gen_lowpart (QImode
, val_reg
);
32073 addr
= plus_constant (Pmode
, dst
, i
);
32074 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32075 emit_move_insn (mem
, reg
);
32079 gcc_assert (i
== length
);
32083 /* Set a block of memory using plain strd/str/strh/strb instructions,
32084 to permit unaligned copies on processors which support unaligned
32085 semantics for those instructions. We fill the first LENGTH bytes
32086 of the memory area starting from DSTBASE with byte constant VALUE.
32087 ALIGN is the alignment requirement of memory. */
32089 arm_block_set_aligned_non_vect (rtx dstbase
,
32090 unsigned HOST_WIDE_INT length
,
32091 unsigned HOST_WIDE_INT value
,
32092 unsigned HOST_WIDE_INT align
)
32095 rtx dst
, addr
, mem
;
32096 rtx val_exp
, val_reg
, reg
;
32097 unsigned HOST_WIDE_INT v
;
32100 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
32101 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
32103 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
32104 if (length
< UNITS_PER_WORD
)
32105 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
32108 v
|= (v
<< BITS_PER_WORD
);
32110 v
= sext_hwi (v
, BITS_PER_WORD
);
32112 val_exp
= GEN_INT (v
);
32113 /* Skip if it isn't profitable. */
32114 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32115 align
, false, use_strd_p
))
32120 /* Try without strd. */
32121 v
= (v
>> BITS_PER_WORD
);
32122 v
= sext_hwi (v
, BITS_PER_WORD
);
32123 val_exp
= GEN_INT (v
);
32124 use_strd_p
= false;
32125 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32126 align
, false, use_strd_p
))
32131 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32132 /* Handle double words using strd if possible. */
32135 val_reg
= force_reg (DImode
, val_exp
);
32137 for (; (i
+ 8 <= length
); i
+= 8)
32139 addr
= plus_constant (Pmode
, dst
, i
);
32140 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
32141 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32142 emit_move_insn (mem
, reg
);
32144 emit_insn (gen_unaligned_storedi (mem
, reg
));
32148 val_reg
= force_reg (SImode
, val_exp
);
32150 /* Handle words. */
32151 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
32152 for (; (i
+ 4 <= length
); i
+= 4)
32154 addr
= plus_constant (Pmode
, dst
, i
);
32155 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
32156 if ((align
& 3) == 0)
32157 emit_move_insn (mem
, reg
);
32159 emit_insn (gen_unaligned_storesi (mem
, reg
));
32162 /* Merge last pair of STRH and STRB into a STR if possible. */
32163 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
32165 addr
= plus_constant (Pmode
, dst
, i
- 1);
32166 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
32167 /* We are shifting one byte back, set the alignment accordingly. */
32168 if ((align
& 1) == 0)
32169 set_mem_align (mem
, BITS_PER_UNIT
);
32171 /* Most likely this is an unaligned access, and we can't tell at
32172 compilation time. */
32173 emit_insn (gen_unaligned_storesi (mem
, reg
));
32177 /* Handle half word leftover. */
32178 if (i
+ 2 <= length
)
32180 reg
= gen_lowpart (HImode
, val_reg
);
32181 addr
= plus_constant (Pmode
, dst
, i
);
32182 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
32183 if ((align
& 1) == 0)
32184 emit_move_insn (mem
, reg
);
32186 emit_insn (gen_unaligned_storehi (mem
, reg
));
32191 /* Handle single byte leftover. */
32192 if (i
+ 1 == length
)
32194 reg
= gen_lowpart (QImode
, val_reg
);
32195 addr
= plus_constant (Pmode
, dst
, i
);
32196 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32197 emit_move_insn (mem
, reg
);
32203 /* Set a block of memory using vectorization instructions for both
32204 aligned and unaligned cases. We fill the first LENGTH bytes of
32205 the memory area starting from DSTBASE with byte constant VALUE.
32206 ALIGN is the alignment requirement of memory. */
32208 arm_block_set_vect (rtx dstbase
,
32209 unsigned HOST_WIDE_INT length
,
32210 unsigned HOST_WIDE_INT value
,
32211 unsigned HOST_WIDE_INT align
)
32213 /* Check whether we need to use unaligned store instruction. */
32214 if (((align
& 3) != 0 || (length
& 3) != 0)
32215 /* Check whether unaligned store instruction is available. */
32216 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
32219 if ((align
& 3) == 0)
32220 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
32222 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
32225 /* Expand string store operation. Firstly we try to do that by using
32226 vectorization instructions, then try with ARM unaligned access and
32227 double-word store if profitable. OPERANDS[0] is the destination,
32228 OPERANDS[1] is the number of bytes, operands[2] is the value to
32229 initialize the memory, OPERANDS[3] is the known alignment of the
32232 arm_gen_setmem (rtx
*operands
)
32234 rtx dstbase
= operands
[0];
32235 unsigned HOST_WIDE_INT length
;
32236 unsigned HOST_WIDE_INT value
;
32237 unsigned HOST_WIDE_INT align
;
32239 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
32242 length
= UINTVAL (operands
[1]);
32246 value
= (UINTVAL (operands
[2]) & 0xFF);
32247 align
= UINTVAL (operands
[3]);
32248 if (TARGET_NEON
&& length
>= 8
32249 && current_tune
->string_ops_prefer_neon
32250 && arm_block_set_vect (dstbase
, length
, value
, align
))
32253 if (!unaligned_access
&& (align
& 3) != 0)
32254 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
32256 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
32261 arm_macro_fusion_p (void)
32263 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
32266 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32267 for MOVW / MOVT macro fusion. */
32270 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
32272 /* We are trying to fuse
32273 movw imm / movt imm
32274 instructions as a group that gets scheduled together. */
32276 rtx set_dest
= SET_DEST (curr_set
);
32278 if (GET_MODE (set_dest
) != SImode
)
32281 /* We are trying to match:
32282 prev (movw) == (set (reg r0) (const_int imm16))
32283 curr (movt) == (set (zero_extract (reg r0)
32286 (const_int imm16_1))
32288 prev (movw) == (set (reg r1)
32289 (high (symbol_ref ("SYM"))))
32290 curr (movt) == (set (reg r0)
32292 (symbol_ref ("SYM")))) */
32294 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
32296 if (CONST_INT_P (SET_SRC (curr_set
))
32297 && CONST_INT_P (SET_SRC (prev_set
))
32298 && REG_P (XEXP (set_dest
, 0))
32299 && REG_P (SET_DEST (prev_set
))
32300 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
32304 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
32305 && REG_P (SET_DEST (curr_set
))
32306 && REG_P (SET_DEST (prev_set
))
32307 && GET_CODE (SET_SRC (prev_set
)) == HIGH
32308 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
32315 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
32317 rtx prev_set
= single_set (prev
);
32318 rtx curr_set
= single_set (curr
);
32324 if (any_condjump_p (curr
))
32327 if (!arm_macro_fusion_p ())
32330 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
32331 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
32337 /* Return true iff the instruction fusion described by OP is enabled. */
32339 arm_fusion_enabled_p (tune_params::fuse_ops op
)
32341 return current_tune
->fusible_ops
& op
;
32344 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
32345 scheduled for speculative execution. Reject the long-running division
32346 and square-root instructions. */
32349 arm_sched_can_speculate_insn (rtx_insn
*insn
)
32351 switch (get_attr_type (insn
))
32359 case TYPE_NEON_FP_SQRT_S
:
32360 case TYPE_NEON_FP_SQRT_D
:
32361 case TYPE_NEON_FP_SQRT_S_Q
:
32362 case TYPE_NEON_FP_SQRT_D_Q
:
32363 case TYPE_NEON_FP_DIV_S
:
32364 case TYPE_NEON_FP_DIV_D
:
32365 case TYPE_NEON_FP_DIV_S_Q
:
32366 case TYPE_NEON_FP_DIV_D_Q
:
32373 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32375 static unsigned HOST_WIDE_INT
32376 arm_asan_shadow_offset (void)
32378 return HOST_WIDE_INT_1U
<< 29;
32382 /* This is a temporary fix for PR60655. Ideally we need
32383 to handle most of these cases in the generic part but
32384 currently we reject minus (..) (sym_ref). We try to
32385 ameliorate the case with minus (sym_ref1) (sym_ref2)
32386 where they are in the same section. */
32389 arm_const_not_ok_for_debug_p (rtx p
)
32391 tree decl_op0
= NULL
;
32392 tree decl_op1
= NULL
;
32394 if (GET_CODE (p
) == UNSPEC
)
32396 if (GET_CODE (p
) == MINUS
)
32398 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
32400 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
32402 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
32403 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
32405 if ((VAR_P (decl_op1
)
32406 || TREE_CODE (decl_op1
) == CONST_DECL
)
32407 && (VAR_P (decl_op0
)
32408 || TREE_CODE (decl_op0
) == CONST_DECL
))
32409 return (get_variable_section (decl_op1
, false)
32410 != get_variable_section (decl_op0
, false));
32412 if (TREE_CODE (decl_op1
) == LABEL_DECL
32413 && TREE_CODE (decl_op0
) == LABEL_DECL
)
32414 return (DECL_CONTEXT (decl_op1
)
32415 != DECL_CONTEXT (decl_op0
));
32425 /* return TRUE if x is a reference to a value in a constant pool */
32427 arm_is_constant_pool_ref (rtx x
)
32430 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
32431 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
32434 /* Remember the last target of arm_set_current_function. */
32435 static GTY(()) tree arm_previous_fndecl
;
32437 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
32440 save_restore_target_globals (tree new_tree
)
32442 /* If we have a previous state, use it. */
32443 if (TREE_TARGET_GLOBALS (new_tree
))
32444 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
32445 else if (new_tree
== target_option_default_node
)
32446 restore_target_globals (&default_target_globals
);
32449 /* Call target_reinit and save the state for TARGET_GLOBALS. */
32450 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
32453 arm_option_params_internal ();
32456 /* Invalidate arm_previous_fndecl. */
32459 arm_reset_previous_fndecl (void)
32461 arm_previous_fndecl
= NULL_TREE
;
32464 /* Establish appropriate back-end context for processing the function
32465 FNDECL. The argument might be NULL to indicate processing at top
32466 level, outside of any function scope. */
32469 arm_set_current_function (tree fndecl
)
32471 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
32474 tree old_tree
= (arm_previous_fndecl
32475 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
32478 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
32480 /* If current function has no attributes but previous one did,
32481 use the default node. */
32482 if (! new_tree
&& old_tree
)
32483 new_tree
= target_option_default_node
;
32485 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
32486 the default have been handled by save_restore_target_globals from
32487 arm_pragma_target_parse. */
32488 if (old_tree
== new_tree
)
32491 arm_previous_fndecl
= fndecl
;
32493 /* First set the target options. */
32494 cl_target_option_restore (&global_options
, &global_options_set
,
32495 TREE_TARGET_OPTION (new_tree
));
32497 save_restore_target_globals (new_tree
);
32499 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
32502 /* Implement TARGET_OPTION_PRINT. */
32505 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
32507 int flags
= ptr
->x_target_flags
;
32508 const char *fpu_name
;
32510 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
32511 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
32513 fprintf (file
, "%*sselected isa %s\n", indent
, "",
32514 TARGET_THUMB2_P (flags
) ? "thumb2" :
32515 TARGET_THUMB_P (flags
) ? "thumb1" :
32518 if (ptr
->x_arm_arch_string
)
32519 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
32520 ptr
->x_arm_arch_string
);
32522 if (ptr
->x_arm_cpu_string
)
32523 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
32524 ptr
->x_arm_cpu_string
);
32526 if (ptr
->x_arm_tune_string
)
32527 fprintf (file
, "%*sselected tune %s\n", indent
, "",
32528 ptr
->x_arm_tune_string
);
32530 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
32533 /* Hook to determine if one function can safely inline another. */
32536 arm_can_inline_p (tree caller
, tree callee
)
32538 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
32539 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
32540 bool can_inline
= true;
32542 struct cl_target_option
*caller_opts
32543 = TREE_TARGET_OPTION (caller_tree
? caller_tree
32544 : target_option_default_node
);
32546 struct cl_target_option
*callee_opts
32547 = TREE_TARGET_OPTION (callee_tree
? callee_tree
32548 : target_option_default_node
);
32550 if (callee_opts
== caller_opts
)
32553 /* Callee's ISA features should be a subset of the caller's. */
32554 struct arm_build_target caller_target
;
32555 struct arm_build_target callee_target
;
32556 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
32557 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
32559 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
32561 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
32563 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
32564 can_inline
= false;
32566 sbitmap_free (caller_target
.isa
);
32567 sbitmap_free (callee_target
.isa
);
32569 /* OK to inline between different modes.
32570 Function with mode specific instructions, e.g using asm,
32571 must be explicitly protected with noinline. */
32575 /* Hook to fix function's alignment affected by target attribute. */
32578 arm_relayout_function (tree fndecl
)
32580 if (DECL_USER_ALIGN (fndecl
))
32583 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
32586 callee_tree
= target_option_default_node
;
32588 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
32591 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
32594 /* Inner function to process the attribute((target(...))), take an argument and
32595 set the current options from the argument. If we have a list, recursively
32596 go over the list. */
32599 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
32601 if (TREE_CODE (args
) == TREE_LIST
)
32605 for (; args
; args
= TREE_CHAIN (args
))
32606 if (TREE_VALUE (args
)
32607 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
32612 else if (TREE_CODE (args
) != STRING_CST
)
32614 error ("attribute %<target%> argument not a string");
32618 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
32621 while ((q
= strtok (argstr
, ",")) != NULL
)
32624 if (!strcmp (q
, "thumb"))
32626 opts
->x_target_flags
|= MASK_THUMB
;
32627 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
32628 sorry ("FDPIC mode is not supported in Thumb-1 mode");
32631 else if (!strcmp (q
, "arm"))
32632 opts
->x_target_flags
&= ~MASK_THUMB
;
32634 else if (!strcmp (q
, "general-regs-only"))
32635 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
32637 else if (!strncmp (q
, "fpu=", 4))
32640 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
32641 &fpu_index
, CL_TARGET
))
32643 error ("invalid fpu for target attribute or pragma %qs", q
);
32646 if (fpu_index
== TARGET_FPU_auto
)
32648 /* This doesn't really make sense until we support
32649 general dynamic selection of the architecture and all
32651 sorry ("auto fpu selection not currently permitted here");
32654 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
32656 else if (!strncmp (q
, "arch=", 5))
32658 char *arch
= q
+ 5;
32659 const arch_option
*arm_selected_arch
32660 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
32662 if (!arm_selected_arch
)
32664 error ("invalid architecture for target attribute or pragma %qs",
32669 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
32671 else if (q
[0] == '+')
32673 opts
->x_arm_arch_string
32674 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
32678 error ("unknown target attribute or pragma %qs", q
);
32686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
32689 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
32690 struct gcc_options
*opts_set
)
32692 struct cl_target_option cl_opts
;
32694 if (!arm_valid_target_attribute_rec (args
, opts
))
32697 cl_target_option_save (&cl_opts
, opts
, opts_set
);
32698 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
32699 arm_option_check_internal (opts
);
32700 /* Do any overrides, such as global options arch=xxx.
32701 We do this since arm_active_target was overridden. */
32702 arm_option_reconfigure_globals ();
32703 arm_options_perform_arch_sanity_checks ();
32704 arm_option_override_internal (opts
, opts_set
);
32706 return build_target_option_node (opts
, opts_set
);
32710 add_attribute (const char * mode
, tree
*attributes
)
32712 size_t len
= strlen (mode
);
32713 tree value
= build_string (len
, mode
);
32715 TREE_TYPE (value
) = build_array_type (char_type_node
,
32716 build_index_type (size_int (len
)));
32718 *attributes
= tree_cons (get_identifier ("target"),
32719 build_tree_list (NULL_TREE
, value
),
32723 /* For testing. Insert thumb or arm modes alternatively on functions. */
32726 arm_insert_attributes (tree fndecl
, tree
* attributes
)
32730 if (! TARGET_FLIP_THUMB
)
32733 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
32734 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
32737 /* Nested definitions must inherit mode. */
32738 if (current_function_decl
)
32740 mode
= TARGET_THUMB
? "thumb" : "arm";
32741 add_attribute (mode
, attributes
);
32745 /* If there is already a setting don't change it. */
32746 if (lookup_attribute ("target", *attributes
) != NULL
)
32749 mode
= thumb_flipper
? "thumb" : "arm";
32750 add_attribute (mode
, attributes
);
32752 thumb_flipper
= !thumb_flipper
;
32755 /* Hook to validate attribute((target("string"))). */
32758 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
32759 tree args
, int ARG_UNUSED (flags
))
32762 struct gcc_options func_options
, func_options_set
;
32763 tree cur_tree
, new_optimize
;
32764 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
32766 /* Get the optimization options of the current function. */
32767 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
32769 /* If the function changed the optimization levels as well as setting target
32770 options, start with the optimizations specified. */
32771 if (!func_optimize
)
32772 func_optimize
= optimization_default_node
;
32774 /* Init func_options. */
32775 memset (&func_options
, 0, sizeof (func_options
));
32776 init_options_struct (&func_options
, NULL
);
32777 lang_hooks
.init_options_struct (&func_options
);
32778 memset (&func_options_set
, 0, sizeof (func_options_set
));
32780 /* Initialize func_options to the defaults. */
32781 cl_optimization_restore (&func_options
, &func_options_set
,
32782 TREE_OPTIMIZATION (func_optimize
));
32784 cl_target_option_restore (&func_options
, &func_options_set
,
32785 TREE_TARGET_OPTION (target_option_default_node
));
32787 /* Set func_options flags with new target mode. */
32788 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
32789 &func_options_set
);
32791 if (cur_tree
== NULL_TREE
)
32794 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
32796 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
32798 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
32803 /* Match an ISA feature bitmap to a named FPU. We always use the
32804 first entry that exactly matches the feature set, so that we
32805 effectively canonicalize the FPU name for the assembler. */
32807 arm_identify_fpu_from_isa (sbitmap isa
)
32809 auto_sbitmap
fpubits (isa_num_bits
);
32810 auto_sbitmap
cand_fpubits (isa_num_bits
);
32812 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
32814 /* If there are no ISA feature bits relating to the FPU, we must be
32815 doing soft-float. */
32816 if (bitmap_empty_p (fpubits
))
32819 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
32821 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
32822 if (bitmap_equal_p (fpubits
, cand_fpubits
))
32823 return all_fpus
[i
].name
;
32825 /* We must find an entry, or things have gone wrong. */
32826 gcc_unreachable ();
32829 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
32830 by the function fndecl. */
32832 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
32834 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
32836 struct cl_target_option
*targ_options
;
32838 targ_options
= TREE_TARGET_OPTION (target_parts
);
32840 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
32841 gcc_assert (targ_options
);
32843 /* Only update the assembler .arch string if it is distinct from the last
32844 such string we printed. arch_to_print is set conditionally in case
32845 targ_options->x_arm_arch_string is NULL which can be the case
32846 when cc1 is invoked directly without passing -march option. */
32847 std::string arch_to_print
;
32848 if (targ_options
->x_arm_arch_string
)
32849 arch_to_print
= targ_options
->x_arm_arch_string
;
32851 if (arch_to_print
!= arm_last_printed_arch_string
)
32853 std::string arch_name
32854 = arch_to_print
.substr (0, arch_to_print
.find ("+"));
32855 asm_fprintf (asm_out_file
, "\t.arch %s\n", arch_name
.c_str ());
32856 const arch_option
*arch
32857 = arm_parse_arch_option_name (all_architectures
, "-march",
32858 targ_options
->x_arm_arch_string
);
32859 auto_sbitmap
opt_bits (isa_num_bits
);
32862 if (arch
->common
.extensions
)
32864 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
32870 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
32871 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft"
32872 and "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and
32873 MVE with floating point instructions is disabled. So the
32874 following check restricts the printing of ".arch_extension
32875 mve" and ".arch_extension fp" (for mve.fp) in the assembly
32876 file. MVE needs this special behaviour because the
32877 feature bit "mve" and "mve_float" are not part of
32878 "fpu bits", so they are not cleared when -mfloat-abi=soft
32879 (i.e nofp) but the marco TARGET_HAVE_MVE and
32880 TARGET_HAVE_MVE_FLOAT are disabled. */
32881 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
32882 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
32883 && !TARGET_HAVE_MVE_FLOAT
))
32885 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
32886 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
32887 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n",
32893 arm_last_printed_arch_string
= arch_to_print
;
32896 fprintf (stream
, "\t.syntax unified\n");
32900 if (is_called_in_ARM_mode (decl
)
32901 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
32902 && cfun
->is_thunk
))
32903 fprintf (stream
, "\t.code 32\n");
32904 else if (TARGET_THUMB1
)
32905 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
32907 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
32910 fprintf (stream
, "\t.arm\n");
32912 std::string fpu_to_print
32913 = TARGET_SOFT_FLOAT
32914 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target
.isa
);
32916 if (!(!strcmp (fpu_to_print
.c_str (), "softvfp") && TARGET_VFP_BASE
)
32917 && (fpu_to_print
!= arm_last_printed_arch_string
))
32919 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_to_print
.c_str ());
32920 arm_last_printed_fpu_string
= fpu_to_print
;
32923 if (TARGET_POKE_FUNCTION_NAME
)
32924 arm_poke_function_name (stream
, (const char *) name
);
32927 /* If MEM is in the form of [base+offset], extract the two parts
32928 of address and set to BASE and OFFSET, otherwise return false
32929 after clearing BASE and OFFSET. */
32932 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
32936 gcc_assert (MEM_P (mem
));
32938 addr
= XEXP (mem
, 0);
32940 /* Strip off const from addresses like (const (addr)). */
32941 if (GET_CODE (addr
) == CONST
)
32942 addr
= XEXP (addr
, 0);
32944 if (GET_CODE (addr
) == REG
)
32947 *offset
= const0_rtx
;
32951 if (GET_CODE (addr
) == PLUS
32952 && GET_CODE (XEXP (addr
, 0)) == REG
32953 && CONST_INT_P (XEXP (addr
, 1)))
32955 *base
= XEXP (addr
, 0);
32956 *offset
= XEXP (addr
, 1);
32961 *offset
= NULL_RTX
;
32966 /* If INSN is a load or store of address in the form of [base+offset],
32967 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
32968 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
32969 otherwise return FALSE. */
32972 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
32976 gcc_assert (INSN_P (insn
));
32977 x
= PATTERN (insn
);
32978 if (GET_CODE (x
) != SET
)
32982 dest
= SET_DEST (x
);
32983 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
32986 extract_base_offset_in_addr (dest
, base
, offset
);
32988 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
32991 extract_base_offset_in_addr (src
, base
, offset
);
32996 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
32999 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33001 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33002 and PRI are only calculated for these instructions. For other instruction,
33003 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33004 instruction fusion can be supported by returning different priorities.
33006 It's important that irrelevant instructions get the largest FUSION_PRI. */
33009 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33010 int *fusion_pri
, int *pri
)
33016 gcc_assert (INSN_P (insn
));
33019 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33026 /* Load goes first. */
33028 *fusion_pri
= tmp
- 1;
33030 *fusion_pri
= tmp
- 2;
33034 /* INSN with smaller base register goes first. */
33035 tmp
-= ((REGNO (base
) & 0xff) << 20);
33037 /* INSN with smaller offset goes first. */
33038 off_val
= (int)(INTVAL (offset
));
33040 tmp
-= (off_val
& 0xfffff);
33042 tmp
+= ((- off_val
) & 0xfffff);
33049 /* Construct and return a PARALLEL RTX vector with elements numbering the
33050 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33051 the vector - from the perspective of the architecture. This does not
33052 line up with GCC's perspective on lane numbers, so we end up with
33053 different masks depending on our target endian-ness. The diagram
33054 below may help. We must draw the distinction when building masks
33055 which select one half of the vector. An instruction selecting
33056 architectural low-lanes for a big-endian target, must be described using
33057 a mask selecting GCC high-lanes.
33059 Big-Endian Little-Endian
33061 GCC 0 1 2 3 3 2 1 0
33062 | x | x | x | x | | x | x | x | x |
33063 Architecture 3 2 1 0 3 2 1 0
33065 Low Mask: { 2, 3 } { 0, 1 }
33066 High Mask: { 0, 1 } { 2, 3 }
33070 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
33072 int nunits
= GET_MODE_NUNITS (mode
);
33073 rtvec v
= rtvec_alloc (nunits
/ 2);
33074 int high_base
= nunits
/ 2;
33080 if (BYTES_BIG_ENDIAN
)
33081 base
= high
? low_base
: high_base
;
33083 base
= high
? high_base
: low_base
;
33085 for (i
= 0; i
< nunits
/ 2; i
++)
33086 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
33088 t1
= gen_rtx_PARALLEL (mode
, v
);
33092 /* Check OP for validity as a PARALLEL RTX vector with elements
33093 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33094 from the perspective of the architecture. See the diagram above
33095 arm_simd_vect_par_cnst_half_p for more details. */
33098 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
33101 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
33102 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
33103 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
33106 if (!VECTOR_MODE_P (mode
))
33109 if (count_op
!= count_ideal
)
33112 for (i
= 0; i
< count_ideal
; i
++)
33114 rtx elt_op
= XVECEXP (op
, 0, i
);
33115 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
33117 if (!CONST_INT_P (elt_op
)
33118 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
33124 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33127 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
33130 /* For now, we punt and not handle this for TARGET_THUMB1. */
33131 if (vcall_offset
&& TARGET_THUMB1
)
33134 /* Otherwise ok. */
33138 /* Generate RTL for a conditional branch with rtx comparison CODE in
33139 mode CC_MODE. The destination of the unlikely conditional branch
33143 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
33147 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
33148 gen_rtx_REG (cc_mode
, CC_REGNUM
),
33151 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
33152 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
33154 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
33157 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33159 For pure-code sections there is no letter code for this attribute, so
33160 output all the section flags numerically when this is needed. */
33163 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
33166 if (flags
& SECTION_ARM_PURECODE
)
33170 if (!(flags
& SECTION_DEBUG
))
33172 if (flags
& SECTION_EXCLUDE
)
33173 *num
|= 0x80000000;
33174 if (flags
& SECTION_WRITE
)
33176 if (flags
& SECTION_CODE
)
33178 if (flags
& SECTION_MERGE
)
33180 if (flags
& SECTION_STRINGS
)
33182 if (flags
& SECTION_TLS
)
33184 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
33193 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33195 If pure-code is passed as an option, make sure all functions are in
33196 sections that have the SHF_ARM_PURECODE attribute. */
33199 arm_function_section (tree decl
, enum node_frequency freq
,
33200 bool startup
, bool exit
)
33202 const char * section_name
;
33205 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
33206 return default_function_section (decl
, freq
, startup
, exit
);
33208 if (!target_pure_code
)
33209 return default_function_section (decl
, freq
, startup
, exit
);
33212 section_name
= DECL_SECTION_NAME (decl
);
33214 /* If a function is not in a named section then it falls under the 'default'
33215 text section, also known as '.text'. We can preserve previous behavior as
33216 the default text section already has the SHF_ARM_PURECODE section
33220 section
*default_sec
= default_function_section (decl
, freq
, startup
,
33223 /* If default_sec is not null, then it must be a special section like for
33224 example .text.startup. We set the pure-code attribute and return the
33225 same section to preserve existing behavior. */
33227 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
33228 return default_sec
;
33231 /* Otherwise look whether a section has already been created with
33233 sec
= get_named_section (decl
, section_name
, 0);
33235 /* If that is not the case passing NULL as the section's name to
33236 'get_named_section' will create a section with the declaration's
33238 sec
= get_named_section (decl
, NULL
, 0);
33240 /* Set the SHF_ARM_PURECODE attribute. */
33241 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
33246 /* Implements the TARGET_SECTION_FLAGS hook.
33248 If DECL is a function declaration and pure-code is passed as an option
33249 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33250 section's name and RELOC indicates whether the declarations initializer may
33251 contain runtime relocations. */
33253 static unsigned int
33254 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
33256 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
33258 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
33259 flags
|= SECTION_ARM_PURECODE
;
33264 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33267 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
33269 rtx
*quot_p
, rtx
*rem_p
)
33271 if (mode
== SImode
)
33272 gcc_assert (!TARGET_IDIV
);
33274 scalar_int_mode libval_mode
33275 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
33277 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
33278 libval_mode
, op0
, mode
, op1
, mode
);
33280 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
33281 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
33282 GET_MODE_SIZE (mode
));
33284 gcc_assert (quotient
);
33285 gcc_assert (remainder
);
33287 *quot_p
= quotient
;
33288 *rem_p
= remainder
;
33291 /* This function checks for the availability of the coprocessor builtin passed
33292 in BUILTIN for the current target. Returns true if it is available and
33293 false otherwise. If a BUILTIN is passed for which this function has not
33294 been implemented it will cause an exception. */
33297 arm_coproc_builtin_available (enum unspecv builtin
)
33299 /* None of these builtins are available in Thumb mode if the target only
33300 supports Thumb-1. */
33318 case VUNSPEC_LDC2L
:
33320 case VUNSPEC_STC2L
:
33323 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33330 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33332 if (arm_arch6
|| arm_arch5te
)
33335 case VUNSPEC_MCRR2
:
33336 case VUNSPEC_MRRC2
:
33341 gcc_unreachable ();
33346 /* This function returns true if OP is a valid memory operand for the ldc and
33347 stc coprocessor instructions and false otherwise. */
33350 arm_coproc_ldc_stc_legitimate_address (rtx op
)
33352 HOST_WIDE_INT range
;
33353 /* Has to be a memory operand. */
33359 /* We accept registers. */
33363 switch GET_CODE (op
)
33367 /* Or registers with an offset. */
33368 if (!REG_P (XEXP (op
, 0)))
33373 /* The offset must be an immediate though. */
33374 if (!CONST_INT_P (op
))
33377 range
= INTVAL (op
);
33379 /* Within the range of [-1020,1020]. */
33380 if (!IN_RANGE (range
, -1020, 1020))
33383 /* And a multiple of 4. */
33384 return (range
% 4) == 0;
33390 return REG_P (XEXP (op
, 0));
33392 gcc_unreachable ();
33397 /* Return the diagnostic message string if conversion from FROMTYPE to
33398 TOTYPE is not allowed, NULL otherwise. */
33400 static const char *
33401 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
33403 if (element_mode (fromtype
) != element_mode (totype
))
33405 /* Do no allow conversions to/from BFmode scalar types. */
33406 if (TYPE_MODE (fromtype
) == BFmode
)
33407 return N_("invalid conversion from type %<bfloat16_t%>");
33408 if (TYPE_MODE (totype
) == BFmode
)
33409 return N_("invalid conversion to type %<bfloat16_t%>");
33412 /* Conversion allowed. */
33416 /* Return the diagnostic message string if the unary operation OP is
33417 not permitted on TYPE, NULL otherwise. */
33419 static const char *
33420 arm_invalid_unary_op (int op
, const_tree type
)
33422 /* Reject all single-operand operations on BFmode except for &. */
33423 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
33424 return N_("operation not permitted on type %<bfloat16_t%>");
33426 /* Operation allowed. */
33430 /* Return the diagnostic message string if the binary operation OP is
33431 not permitted on TYPE1 and TYPE2, NULL otherwise. */
33433 static const char *
33434 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
33437 /* Reject all 2-operand operations on BFmode. */
33438 if (element_mode (type1
) == BFmode
33439 || element_mode (type2
) == BFmode
)
33440 return N_("operation not permitted on type %<bfloat16_t%>");
33442 /* Operation allowed. */
33446 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33448 In VFPv1, VFP registers could only be accessed in the mode they were
33449 set, so subregs would be invalid there. However, we don't support
33450 VFPv1 at the moment, and the restriction was lifted in VFPv2.
33452 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33453 VFP registers in little-endian order. We can't describe that accurately to
33454 GCC, so avoid taking subregs of such values.
33456 The only exception is going from a 128-bit to a 64-bit type. In that
33457 case the data layout happens to be consistent for big-endian, so we
33458 explicitly allow that case. */
33461 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
33462 reg_class_t rclass
)
33465 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
33466 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
33467 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
33468 && reg_classes_intersect_p (VFP_REGS
, rclass
))
33473 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
33474 strcpy from constants will be faster. */
33476 static HOST_WIDE_INT
33477 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
33479 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
33480 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
33481 return MAX (align
, BITS_PER_WORD
* factor
);
33485 /* Emit a speculation barrier on target architectures that do not have
33486 DSB/ISB directly. Such systems probably don't need a barrier
33487 themselves, but if the code is ever run on a later architecture, it
33488 might become a problem. */
33490 arm_emit_speculation_barrier_function ()
33492 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
33495 /* Have we recorded an explicit access to the Q bit of APSR?. */
33497 arm_q_bit_access (void)
33499 if (cfun
&& cfun
->decl
)
33500 return lookup_attribute ("acle qbit",
33501 DECL_ATTRIBUTES (cfun
->decl
));
33505 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
33507 arm_ge_bits_access (void)
33509 if (cfun
&& cfun
->decl
)
33510 return lookup_attribute ("acle gebits",
33511 DECL_ATTRIBUTES (cfun
->decl
));
33515 /* NULL if insn INSN is valid within a low-overhead loop.
33516 Otherwise return why doloop cannot be applied. */
33518 static const char *
33519 arm_invalid_within_doloop (const rtx_insn
*insn
)
33521 if (!TARGET_HAVE_LOB
)
33522 return default_invalid_within_doloop (insn
);
33525 return "Function call in the loop.";
33527 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
33528 return "LR is used inside loop.";
33534 arm_target_insn_ok_for_lob (rtx insn
)
33536 basic_block bb
= BLOCK_FOR_INSN (insn
);
33537 /* Make sure the basic block of the target insn is a simple latch
33538 having as single predecessor and successor the body of the loop
33539 itself. Only simple loops with a single basic block as body are
33540 supported for 'low over head loop' making sure that LE target is
33541 above LE itself in the generated code. */
33543 return single_succ_p (bb
)
33544 && single_pred_p (bb
)
33545 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
33546 && contains_no_active_insn_p (bb
);
33550 namespace selftest
{
33552 /* Scan the static data tables generated by parsecpu.awk looking for
33553 potential issues with the data. We primarily check for
33554 inconsistencies in the option extensions at present (extensions
33555 that duplicate others but aren't marked as aliases). Furthermore,
33556 for correct canonicalization later options must never be a subset
33557 of an earlier option. Any extension should also only specify other
33558 feature bits and never an architecture bit. The architecture is inferred
33559 from the declaration of the extension. */
33561 arm_test_cpu_arch_data (void)
33563 const arch_option
*arch
;
33564 const cpu_option
*cpu
;
33565 auto_sbitmap
target_isa (isa_num_bits
);
33566 auto_sbitmap
isa1 (isa_num_bits
);
33567 auto_sbitmap
isa2 (isa_num_bits
);
33569 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
33571 const cpu_arch_extension
*ext1
, *ext2
;
33573 if (arch
->common
.extensions
== NULL
)
33576 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
33578 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
33583 arm_initialize_isa (isa1
, ext1
->isa_bits
);
33584 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
33586 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
33589 arm_initialize_isa (isa2
, ext2
->isa_bits
);
33590 /* If the option is a subset of the parent option, it doesn't
33591 add anything and so isn't useful. */
33592 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
33594 /* If the extension specifies any architectural bits then
33595 disallow it. Extensions should only specify feature bits. */
33596 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
33601 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
33603 const cpu_arch_extension
*ext1
, *ext2
;
33605 if (cpu
->common
.extensions
== NULL
)
33608 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
33610 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
33615 arm_initialize_isa (isa1
, ext1
->isa_bits
);
33616 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
33618 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
33621 arm_initialize_isa (isa2
, ext2
->isa_bits
);
33622 /* If the option is a subset of the parent option, it doesn't
33623 add anything and so isn't useful. */
33624 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
33626 /* If the extension specifies any architectural bits then
33627 disallow it. Extensions should only specify feature bits. */
33628 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
33634 /* Scan the static data tables generated by parsecpu.awk looking for
33635 potential issues with the data. Here we check for consistency between the
33636 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33637 a feature bit that is not defined by any FPU flag. */
33639 arm_test_fpu_data (void)
33641 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
33642 auto_sbitmap
fpubits (isa_num_bits
);
33643 auto_sbitmap
tmpset (isa_num_bits
);
33645 static const enum isa_feature fpu_bitlist_internal
[]
33646 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
33647 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
33649 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33651 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
33652 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
33653 bitmap_clear (isa_all_fpubits_internal
);
33654 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
33657 if (!bitmap_empty_p (isa_all_fpubits_internal
))
33659 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
33660 " group that are not defined by any FPU.\n"
33661 " Check your arm-cpus.in.\n");
33662 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
33667 arm_run_selftests (void)
33669 arm_test_cpu_arch_data ();
33670 arm_test_fpu_data ();
33672 } /* Namespace selftest. */
33674 #undef TARGET_RUN_TARGET_SELFTESTS
33675 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33676 #endif /* CHECKING_P */
33678 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33679 Unlike the arm version, we do NOT implement asm flag outputs. */
33682 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
33683 vec
<const char *> &constraints
,
33684 vec
<rtx
> &/*clobbers*/, HARD_REG_SET
&/*clobbered_regs*/)
33686 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
33687 if (strncmp (constraints
[i
], "=@cc", 4) == 0)
33689 sorry ("asm flags not supported in thumb1 mode");
33695 /* Generate code to enable conditional branches in functions over 1 MiB.
33697 operands: is the operands list of the asm insn (see arm_cond_branch or
33698 arm_cond_branch_reversed).
33699 pos_label: is an index into the operands array where operands[pos_label] is
33700 the asm label of the final jump destination.
33701 dest: is a string which is used to generate the asm label of the intermediate
33703 branch_format: is a string denoting the intermediate branch format, e.g.
33704 "beq", "bne", etc. */
33707 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
33708 const char * branch_format
)
33710 rtx_code_label
* tmp_label
= gen_label_rtx ();
33711 char label_buf
[256];
33713 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
33714 CODE_LABEL_NUMBER (tmp_label
));
33715 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
33716 rtx dest_label
= operands
[pos_label
];
33717 operands
[pos_label
] = tmp_label
;
33719 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
33720 output_asm_insn (buffer
, operands
);
33722 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
33723 operands
[pos_label
] = dest_label
;
33724 output_asm_insn (buffer
, operands
);
33728 /* If given mode matches, load from memory to LO_REGS.
33729 (i.e [Rn], Rn <= LO_REGS). */
33731 arm_mode_base_reg_class (machine_mode mode
)
33733 if (TARGET_HAVE_MVE
33734 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
33737 return MODE_BASE_REG_REG_CLASS (mode
);
33740 struct gcc_target targetm
= TARGET_INITIALIZER
;
33742 #include "gt-arm.h"