1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
69 /* This file should be included last. */
70 #include "target-def.h"
72 /* Forward definitions of types. */
73 typedef struct minipool_node Mnode
;
74 typedef struct minipool_fixup Mfix
;
76 void (*arm_lang_output_object_attributes_hook
)(void);
83 /* Forward function declarations. */
84 static bool arm_const_not_ok_for_debug_p (rtx
);
85 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
86 static int arm_compute_static_chain_stack_bytes (void);
87 static arm_stack_offsets
*arm_get_frame_offsets (void);
88 static void arm_add_gc_roots (void);
89 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
90 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
91 static unsigned bit_count (unsigned long);
92 static unsigned bitmap_popcount (const sbitmap
);
93 static int arm_address_register_rtx_p (rtx
, int);
94 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
95 static bool is_called_in_ARM_mode (tree
);
96 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
97 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
98 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
99 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
100 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
101 inline static int thumb1_index_register_rtx_p (rtx
, int);
102 static int thumb_far_jump_used_p (void);
103 static bool thumb_force_lr_save (void);
104 static unsigned arm_size_return_regs (void);
105 static bool arm_assemble_integer (rtx
, unsigned int, int);
106 static void arm_print_operand (FILE *, rtx
, int);
107 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
108 static bool arm_print_operand_punct_valid_p (unsigned char code
);
109 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
110 static arm_cc
get_arm_condition_code (rtx
);
111 static const char *output_multi_immediate (rtx
*, const char *, const char *,
113 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
114 static struct machine_function
*arm_init_machine_status (void);
115 static void thumb_exit (FILE *, int);
116 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
117 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
118 static Mnode
*add_minipool_forward_ref (Mfix
*);
119 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
120 static Mnode
*add_minipool_backward_ref (Mfix
*);
121 static void assign_minipool_offsets (Mfix
*);
122 static void arm_print_value (FILE *, rtx
);
123 static void dump_minipool (rtx_insn
*);
124 static int arm_barrier_cost (rtx_insn
*);
125 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
126 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
127 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
129 static void arm_reorg (void);
130 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
131 static unsigned long arm_compute_save_reg0_reg12_mask (void);
132 static unsigned long arm_compute_save_reg_mask (void);
133 static unsigned long arm_isr_value (tree
);
134 static unsigned long arm_compute_func_type (void);
135 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
136 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
137 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
138 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
139 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
142 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
143 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
144 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
145 static int arm_comp_type_attributes (const_tree
, const_tree
);
146 static void arm_set_default_type_attributes (tree
);
147 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
148 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
149 static int optimal_immediate_sequence (enum rtx_code code
,
150 unsigned HOST_WIDE_INT val
,
151 struct four_ints
*return_sequence
);
152 static int optimal_immediate_sequence_1 (enum rtx_code code
,
153 unsigned HOST_WIDE_INT val
,
154 struct four_ints
*return_sequence
,
156 static int arm_get_strip_length (int);
157 static bool arm_function_ok_for_sibcall (tree
, tree
);
158 static machine_mode
arm_promote_function_mode (const_tree
,
161 static bool arm_return_in_memory (const_tree
, const_tree
);
162 static rtx
arm_function_value (const_tree
, const_tree
, bool);
163 static rtx
arm_libcall_value_1 (machine_mode
);
164 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
165 static bool arm_function_value_regno_p (const unsigned int);
166 static void arm_internal_label (FILE *, const char *, unsigned long);
167 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
169 static bool arm_have_conditional_execution (void);
170 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
171 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
172 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
173 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
174 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
175 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
176 static void emit_constant_insn (rtx cond
, rtx pattern
);
177 static rtx_insn
*emit_set_insn (rtx
, rtx
);
178 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
181 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
183 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
185 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
186 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
188 static rtx
aapcs_libcall_value (machine_mode
);
189 static int aapcs_select_return_coproc (const_tree
, const_tree
);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
193 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
196 static void arm_encode_section_info (tree
, rtx
, int);
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree
, tree
*);
203 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
205 static bool arm_pass_by_reference (cumulative_args_t
,
206 machine_mode
, const_tree
, bool);
207 static bool arm_promote_prototypes (const_tree
);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree
);
211 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
212 static bool arm_return_in_memory (const_tree
, const_tree
);
214 static void arm_unwind_emit (FILE *, rtx_insn
*);
215 static bool arm_output_ttype (rtx
);
216 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
219 static rtx
arm_dwarf_register_span (rtx
);
221 static tree
arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree
arm_get_cookie_size (tree
);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree
);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree
arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree
, rtx
);
233 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
234 static void arm_option_override (void);
235 static void arm_option_restore (struct gcc_options
*,
236 struct cl_target_option
*);
237 static void arm_override_options_after_change (void);
238 static void arm_option_print (FILE *, int, struct cl_target_option
*);
239 static void arm_set_current_function (tree
);
240 static bool arm_can_inline_p (tree
, tree
);
241 static void arm_relayout_function (tree
);
242 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
243 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
244 static bool arm_sched_can_speculate_insn (rtx_insn
*);
245 static bool arm_macro_fusion_p (void);
246 static bool arm_cannot_copy_insn_p (rtx_insn
*);
247 static int arm_issue_rate (void);
248 static int arm_first_cycle_multipass_dfa_lookahead (void);
249 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
250 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
251 static bool arm_output_addr_const_extra (FILE *, rtx
);
252 static bool arm_allocate_stack_slots_for_args (void);
253 static bool arm_warn_func_return (tree
);
254 static tree
arm_promoted_type (const_tree t
);
255 static bool arm_scalar_mode_supported_p (machine_mode
);
256 static bool arm_frame_pointer_required (void);
257 static bool arm_can_eliminate (const int, const int);
258 static void arm_asm_trampoline_template (FILE *);
259 static void arm_trampoline_init (rtx
, tree
, rtx
);
260 static rtx
arm_trampoline_adjust_address (rtx
);
261 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
262 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
263 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
264 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
265 static bool arm_array_mode_supported_p (machine_mode
,
266 unsigned HOST_WIDE_INT
);
267 static machine_mode
arm_preferred_simd_mode (machine_mode
);
268 static bool arm_class_likely_spilled_p (reg_class_t
);
269 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
270 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
271 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
275 static void arm_conditional_register_usage (void);
276 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
277 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
285 const unsigned char *sel
);
287 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
291 int misalign ATTRIBUTE_UNUSED
);
292 static unsigned arm_add_stmt_cost (void *data
, int count
,
293 enum vect_cost_for_stmt kind
,
294 struct _stmt_vec_info
*stmt_info
,
296 enum vect_cost_model_location where
);
298 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
299 bool op0_preserve_value
);
300 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
302 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
303 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
305 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
306 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
307 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
309 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
310 static machine_mode
arm_floatn_mode (int, bool);
312 /* Table of machine attributes. */
313 static const struct attribute_spec arm_attribute_table
[] =
315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
316 affects_type_identity } */
317 /* Function calls made to this symbol must be done indirectly, because
318 it may lie outside of the 26 bit addressing range of a normal function
320 { "long_call", 0, 0, false, true, true, NULL
, false },
321 /* Whereas these functions are always known to reside within the 26 bit
323 { "short_call", 0, 0, false, true, true, NULL
, false },
324 /* Specify the procedure call conventions for a function. */
325 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
327 /* Interrupt Service Routines have special prologue and epilogue requirements. */
328 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
330 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
332 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
335 /* ARM/PE has three new attributes:
337 dllexport - for exporting a function/variable that will live in a dll
338 dllimport - for importing a function/variable from a dll
340 Microsoft allows multiple declspecs in one __declspec, separating
341 them with spaces. We do NOT support this. Instead, use __declspec
344 { "dllimport", 0, 0, true, false, false, NULL
, false },
345 { "dllexport", 0, 0, true, false, false, NULL
, false },
346 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
348 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
351 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
354 /* ARMv8-M Security Extensions support. */
355 { "cmse_nonsecure_entry", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_entry
, false },
357 { "cmse_nonsecure_call", 0, 0, true, false, false,
358 arm_handle_cmse_nonsecure_call
, true },
359 { NULL
, 0, 0, false, false, false, NULL
, false }
362 /* Initialize the GCC target structure. */
363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
364 #undef TARGET_MERGE_DECL_ATTRIBUTES
365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
368 #undef TARGET_LEGITIMIZE_ADDRESS
369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
371 #undef TARGET_ATTRIBUTE_TABLE
372 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
374 #undef TARGET_INSERT_ATTRIBUTES
375 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
377 #undef TARGET_ASM_FILE_START
378 #define TARGET_ASM_FILE_START arm_file_start
379 #undef TARGET_ASM_FILE_END
380 #define TARGET_ASM_FILE_END arm_file_end
382 #undef TARGET_ASM_ALIGNED_SI_OP
383 #define TARGET_ASM_ALIGNED_SI_OP NULL
384 #undef TARGET_ASM_INTEGER
385 #define TARGET_ASM_INTEGER arm_assemble_integer
387 #undef TARGET_PRINT_OPERAND
388 #define TARGET_PRINT_OPERAND arm_print_operand
389 #undef TARGET_PRINT_OPERAND_ADDRESS
390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
400 #undef TARGET_ASM_FUNCTION_EPILOGUE
401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
403 #undef TARGET_CAN_INLINE_P
404 #define TARGET_CAN_INLINE_P arm_can_inline_p
406 #undef TARGET_RELAYOUT_FUNCTION
407 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
409 #undef TARGET_OPTION_OVERRIDE
410 #define TARGET_OPTION_OVERRIDE arm_option_override
412 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
413 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
415 #undef TARGET_OPTION_RESTORE
416 #define TARGET_OPTION_RESTORE arm_option_restore
418 #undef TARGET_OPTION_PRINT
419 #define TARGET_OPTION_PRINT arm_option_print
421 #undef TARGET_COMP_TYPE_ATTRIBUTES
422 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
424 #undef TARGET_SCHED_CAN_SPECULATE_INSN
425 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
427 #undef TARGET_SCHED_MACRO_FUSION_P
428 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
430 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
431 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
433 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
434 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
436 #undef TARGET_SCHED_ADJUST_COST
437 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
439 #undef TARGET_SET_CURRENT_FUNCTION
440 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
442 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
443 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
445 #undef TARGET_SCHED_REORDER
446 #define TARGET_SCHED_REORDER arm_sched_reorder
448 #undef TARGET_REGISTER_MOVE_COST
449 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
451 #undef TARGET_MEMORY_MOVE_COST
452 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
454 #undef TARGET_ENCODE_SECTION_INFO
456 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
458 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
461 #undef TARGET_STRIP_NAME_ENCODING
462 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
464 #undef TARGET_ASM_INTERNAL_LABEL
465 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
467 #undef TARGET_FLOATN_MODE
468 #define TARGET_FLOATN_MODE arm_floatn_mode
470 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
471 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
473 #undef TARGET_FUNCTION_VALUE
474 #define TARGET_FUNCTION_VALUE arm_function_value
476 #undef TARGET_LIBCALL_VALUE
477 #define TARGET_LIBCALL_VALUE arm_libcall_value
479 #undef TARGET_FUNCTION_VALUE_REGNO_P
480 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
482 #undef TARGET_ASM_OUTPUT_MI_THUNK
483 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
484 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
485 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
487 #undef TARGET_RTX_COSTS
488 #define TARGET_RTX_COSTS arm_rtx_costs
489 #undef TARGET_ADDRESS_COST
490 #define TARGET_ADDRESS_COST arm_address_cost
492 #undef TARGET_SHIFT_TRUNCATION_MASK
493 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
494 #undef TARGET_VECTOR_MODE_SUPPORTED_P
495 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
496 #undef TARGET_ARRAY_MODE_SUPPORTED_P
497 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
498 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
499 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
500 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
501 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
502 arm_autovectorize_vector_sizes
504 #undef TARGET_MACHINE_DEPENDENT_REORG
505 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS arm_init_builtins
509 #undef TARGET_EXPAND_BUILTIN
510 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
511 #undef TARGET_BUILTIN_DECL
512 #define TARGET_BUILTIN_DECL arm_builtin_decl
514 #undef TARGET_INIT_LIBFUNCS
515 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
517 #undef TARGET_PROMOTE_FUNCTION_MODE
518 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
519 #undef TARGET_PROMOTE_PROTOTYPES
520 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
521 #undef TARGET_PASS_BY_REFERENCE
522 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
523 #undef TARGET_ARG_PARTIAL_BYTES
524 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
525 #undef TARGET_FUNCTION_ARG
526 #define TARGET_FUNCTION_ARG arm_function_arg
527 #undef TARGET_FUNCTION_ARG_ADVANCE
528 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
529 #undef TARGET_FUNCTION_ARG_BOUNDARY
530 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
532 #undef TARGET_SETUP_INCOMING_VARARGS
533 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
535 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
536 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
538 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
539 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
540 #undef TARGET_TRAMPOLINE_INIT
541 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
542 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
543 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
545 #undef TARGET_WARN_FUNC_RETURN
546 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
548 #undef TARGET_DEFAULT_SHORT_ENUMS
549 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
551 #undef TARGET_ALIGN_ANON_BITFIELD
552 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
554 #undef TARGET_NARROW_VOLATILE_BITFIELD
555 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
557 #undef TARGET_CXX_GUARD_TYPE
558 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
560 #undef TARGET_CXX_GUARD_MASK_BIT
561 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
563 #undef TARGET_CXX_GET_COOKIE_SIZE
564 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
566 #undef TARGET_CXX_COOKIE_HAS_SIZE
567 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
569 #undef TARGET_CXX_CDTOR_RETURNS_THIS
570 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
572 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
573 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
575 #undef TARGET_CXX_USE_AEABI_ATEXIT
576 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
578 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
579 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
580 arm_cxx_determine_class_data_visibility
582 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
583 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
585 #undef TARGET_RETURN_IN_MSB
586 #define TARGET_RETURN_IN_MSB arm_return_in_msb
588 #undef TARGET_RETURN_IN_MEMORY
589 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
591 #undef TARGET_MUST_PASS_IN_STACK
592 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
595 #undef TARGET_ASM_UNWIND_EMIT
596 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
598 /* EABI unwinding tables use a different format for the typeinfo tables. */
599 #undef TARGET_ASM_TTYPE
600 #define TARGET_ASM_TTYPE arm_output_ttype
602 #undef TARGET_ARM_EABI_UNWINDER
603 #define TARGET_ARM_EABI_UNWINDER true
605 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
606 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
608 #endif /* ARM_UNWIND_INFO */
610 #undef TARGET_ASM_INIT_SECTIONS
611 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
613 #undef TARGET_DWARF_REGISTER_SPAN
614 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
616 #undef TARGET_CANNOT_COPY_INSN_P
617 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
620 #undef TARGET_HAVE_TLS
621 #define TARGET_HAVE_TLS true
624 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
625 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
627 #undef TARGET_LEGITIMATE_CONSTANT_P
628 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
630 #undef TARGET_CANNOT_FORCE_CONST_MEM
631 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
633 #undef TARGET_MAX_ANCHOR_OFFSET
634 #define TARGET_MAX_ANCHOR_OFFSET 4095
636 /* The minimum is set such that the total size of the block
637 for a particular anchor is -4088 + 1 + 4095 bytes, which is
638 divisible by eight, ensuring natural spacing of anchors. */
639 #undef TARGET_MIN_ANCHOR_OFFSET
640 #define TARGET_MIN_ANCHOR_OFFSET -4088
642 #undef TARGET_SCHED_ISSUE_RATE
643 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
645 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
646 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
647 arm_first_cycle_multipass_dfa_lookahead
649 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
650 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
651 arm_first_cycle_multipass_dfa_lookahead_guard
653 #undef TARGET_MANGLE_TYPE
654 #define TARGET_MANGLE_TYPE arm_mangle_type
656 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
657 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
659 #undef TARGET_BUILD_BUILTIN_VA_LIST
660 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
661 #undef TARGET_EXPAND_BUILTIN_VA_START
662 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
663 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
664 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
667 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
668 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
671 #undef TARGET_LEGITIMATE_ADDRESS_P
672 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
674 #undef TARGET_PREFERRED_RELOAD_CLASS
675 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
677 #undef TARGET_PROMOTED_TYPE
678 #define TARGET_PROMOTED_TYPE arm_promoted_type
680 #undef TARGET_SCALAR_MODE_SUPPORTED_P
681 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
683 #undef TARGET_FRAME_POINTER_REQUIRED
684 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
686 #undef TARGET_CAN_ELIMINATE
687 #define TARGET_CAN_ELIMINATE arm_can_eliminate
689 #undef TARGET_CONDITIONAL_REGISTER_USAGE
690 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
692 #undef TARGET_CLASS_LIKELY_SPILLED_P
693 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
695 #undef TARGET_VECTORIZE_BUILTINS
696 #define TARGET_VECTORIZE_BUILTINS
698 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
699 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
700 arm_builtin_vectorized_function
702 #undef TARGET_VECTOR_ALIGNMENT
703 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
705 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
706 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
707 arm_vector_alignment_reachable
709 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
710 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
711 arm_builtin_support_vector_misalignment
713 #undef TARGET_PREFERRED_RENAME_CLASS
714 #define TARGET_PREFERRED_RENAME_CLASS \
715 arm_preferred_rename_class
717 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
718 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
719 arm_vectorize_vec_perm_const_ok
721 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
722 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
723 arm_builtin_vectorization_cost
724 #undef TARGET_VECTORIZE_ADD_STMT_COST
725 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
727 #undef TARGET_CANONICALIZE_COMPARISON
728 #define TARGET_CANONICALIZE_COMPARISON \
729 arm_canonicalize_comparison
731 #undef TARGET_ASAN_SHADOW_OFFSET
732 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
734 #undef MAX_INSN_PER_IT_BLOCK
735 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
737 #undef TARGET_CAN_USE_DOLOOP_P
738 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
740 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
741 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
743 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
744 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
746 #undef TARGET_SCHED_FUSION_PRIORITY
747 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
749 #undef TARGET_ASM_FUNCTION_SECTION
750 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
752 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
753 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
755 #undef TARGET_SECTION_TYPE_FLAGS
756 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
758 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
759 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
761 #undef TARGET_C_EXCESS_PRECISION
762 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
764 /* Although the architecture reserves bits 0 and 1, only the former is
765 used for ARM/Thumb ISA selection in v7 and earlier versions. */
766 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
767 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
769 struct gcc_target targetm
= TARGET_INITIALIZER
;
771 /* Obstack for minipool constant handling. */
772 static struct obstack minipool_obstack
;
773 static char * minipool_startobj
;
775 /* The maximum number of insns skipped which
776 will be conditionalised if possible. */
777 static int max_insns_skipped
= 5;
779 extern FILE * asm_out_file
;
781 /* True if we are currently building a constant table. */
782 int making_const_table
;
784 /* The processor for which instructions should be scheduled. */
785 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
787 /* The current tuning set. */
788 const struct tune_params
*current_tune
;
790 /* Which floating point hardware to schedule for. */
793 /* Used for Thumb call_via trampolines. */
794 rtx thumb_call_via_label
[14];
795 static int thumb_call_reg_needed
;
797 /* The bits in this mask specify which instruction scheduling options should
799 unsigned int tune_flags
= 0;
801 /* The highest ARM architecture version supported by the
803 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
805 /* Active target architecture and tuning. */
807 struct arm_build_target arm_active_target
;
809 /* The following are used in the arm.md file as equivalents to bits
810 in the above two flag variables. */
812 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
815 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
818 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
821 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
824 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
827 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
830 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
833 /* Nonzero if this chip supports the ARM 6K extensions. */
836 /* Nonzero if this chip supports the ARM 6KZ extensions. */
839 /* Nonzero if instructions present in ARMv6-M can be used. */
842 /* Nonzero if this chip supports the ARM 7 extensions. */
845 /* Nonzero if this chip supports the Large Physical Address Extension. */
846 int arm_arch_lpae
= 0;
848 /* Nonzero if instructions not present in the 'M' profile can be used. */
849 int arm_arch_notm
= 0;
851 /* Nonzero if instructions present in ARMv7E-M can be used. */
854 /* Nonzero if instructions present in ARMv8 can be used. */
857 /* Nonzero if this chip supports the ARMv8.1 extensions. */
860 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
863 /* Nonzero if this chip supports the FP16 instructions extension of ARM
865 int arm_fp16_inst
= 0;
867 /* Nonzero if this chip can benefit from load scheduling. */
868 int arm_ld_sched
= 0;
870 /* Nonzero if this chip is a StrongARM. */
871 int arm_tune_strongarm
= 0;
873 /* Nonzero if this chip supports Intel Wireless MMX technology. */
874 int arm_arch_iwmmxt
= 0;
876 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
877 int arm_arch_iwmmxt2
= 0;
879 /* Nonzero if this chip is an XScale. */
880 int arm_arch_xscale
= 0;
882 /* Nonzero if tuning for XScale */
883 int arm_tune_xscale
= 0;
885 /* Nonzero if we want to tune for stores that access the write-buffer.
886 This typically means an ARM6 or ARM7 with MMU or MPU. */
887 int arm_tune_wbuf
= 0;
889 /* Nonzero if tuning for Cortex-A9. */
890 int arm_tune_cortex_a9
= 0;
892 /* Nonzero if we should define __THUMB_INTERWORK__ in the
894 XXX This is a bit of a hack, it's intended to help work around
895 problems in GLD which doesn't understand that armv5t code is
896 interworking clean. */
897 int arm_cpp_interwork
= 0;
899 /* Nonzero if chip supports Thumb 1. */
902 /* Nonzero if chip supports Thumb 2. */
905 /* Nonzero if chip supports integer division instruction. */
906 int arm_arch_arm_hwdiv
;
907 int arm_arch_thumb_hwdiv
;
909 /* Nonzero if chip disallows volatile memory access in IT block. */
910 int arm_arch_no_volatile_ce
;
912 /* Nonzero if we should use Neon to handle 64-bits operations rather
913 than core registers. */
914 int prefer_neon_for_64bits
= 0;
916 /* Nonzero if we shouldn't use literal pools. */
917 bool arm_disable_literal_pool
= false;
919 /* The register number to be used for the PIC offset register. */
920 unsigned arm_pic_register
= INVALID_REGNUM
;
922 enum arm_pcs arm_pcs_default
;
924 /* For an explanation of these variables, see final_prescan_insn below. */
926 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
927 enum arm_cond_code arm_current_cc
;
930 int arm_target_label
;
931 /* The number of conditionally executed insns, including the current insn. */
932 int arm_condexec_count
= 0;
933 /* A bitmask specifying the patterns for the IT block.
934 Zero means do not output an IT block before this insn. */
935 int arm_condexec_mask
= 0;
936 /* The number of bits used in arm_condexec_mask. */
937 int arm_condexec_masklen
= 0;
939 /* Nonzero if chip supports the ARMv8 CRC instructions. */
940 int arm_arch_crc
= 0;
942 /* Nonzero if chip supports the ARMv8-M security extensions. */
943 int arm_arch_cmse
= 0;
945 /* Nonzero if the core has a very small, high-latency, multiply unit. */
946 int arm_m_profile_small_mul
= 0;
948 /* The condition codes of the ARM, and the inverse function. */
949 static const char * const arm_condition_codes
[] =
951 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
952 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
955 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
956 int arm_regs_in_sequence
[] =
958 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
961 #define ARM_LSL_NAME "lsl"
962 #define streq(string1, string2) (strcmp (string1, string2) == 0)
964 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
965 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
966 | (1 << PIC_OFFSET_TABLE_REGNUM)))
968 /* Initialization code. */
972 const char *const name
;
973 enum processor_type core
;
974 unsigned int tune_flags
;
976 enum base_architecture base_arch
;
977 enum isa_feature isa_bits
[isa_num_bits
];
978 const struct tune_params
*const tune
;
982 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
983 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
990 /* arm generic vectorizer costs. */
992 struct cpu_vec_costs arm_default_vec_cost
= {
993 1, /* scalar_stmt_cost. */
994 1, /* scalar load_cost. */
995 1, /* scalar_store_cost. */
996 1, /* vec_stmt_cost. */
997 1, /* vec_to_scalar_cost. */
998 1, /* scalar_to_vec_cost. */
999 1, /* vec_align_load_cost. */
1000 1, /* vec_unalign_load_cost. */
1001 1, /* vec_unalign_store_cost. */
1002 1, /* vec_store_cost. */
1003 3, /* cond_taken_branch_cost. */
1004 1, /* cond_not_taken_branch_cost. */
1007 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1008 #include "aarch-cost-tables.h"
1012 const struct cpu_cost_table cortexa9_extra_costs
=
1019 COSTS_N_INSNS (1), /* shift_reg. */
1020 COSTS_N_INSNS (1), /* arith_shift. */
1021 COSTS_N_INSNS (2), /* arith_shift_reg. */
1023 COSTS_N_INSNS (1), /* log_shift_reg. */
1024 COSTS_N_INSNS (1), /* extend. */
1025 COSTS_N_INSNS (2), /* extend_arith. */
1026 COSTS_N_INSNS (1), /* bfi. */
1027 COSTS_N_INSNS (1), /* bfx. */
1031 true /* non_exec_costs_exec. */
1036 COSTS_N_INSNS (3), /* simple. */
1037 COSTS_N_INSNS (3), /* flag_setting. */
1038 COSTS_N_INSNS (2), /* extend. */
1039 COSTS_N_INSNS (3), /* add. */
1040 COSTS_N_INSNS (2), /* extend_add. */
1041 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1045 0, /* simple (N/A). */
1046 0, /* flag_setting (N/A). */
1047 COSTS_N_INSNS (4), /* extend. */
1049 COSTS_N_INSNS (4), /* extend_add. */
1055 COSTS_N_INSNS (2), /* load. */
1056 COSTS_N_INSNS (2), /* load_sign_extend. */
1057 COSTS_N_INSNS (2), /* ldrd. */
1058 COSTS_N_INSNS (2), /* ldm_1st. */
1059 1, /* ldm_regs_per_insn_1st. */
1060 2, /* ldm_regs_per_insn_subsequent. */
1061 COSTS_N_INSNS (5), /* loadf. */
1062 COSTS_N_INSNS (5), /* loadd. */
1063 COSTS_N_INSNS (1), /* load_unaligned. */
1064 COSTS_N_INSNS (2), /* store. */
1065 COSTS_N_INSNS (2), /* strd. */
1066 COSTS_N_INSNS (2), /* stm_1st. */
1067 1, /* stm_regs_per_insn_1st. */
1068 2, /* stm_regs_per_insn_subsequent. */
1069 COSTS_N_INSNS (1), /* storef. */
1070 COSTS_N_INSNS (1), /* stored. */
1071 COSTS_N_INSNS (1), /* store_unaligned. */
1072 COSTS_N_INSNS (1), /* loadv. */
1073 COSTS_N_INSNS (1) /* storev. */
1078 COSTS_N_INSNS (14), /* div. */
1079 COSTS_N_INSNS (4), /* mult. */
1080 COSTS_N_INSNS (7), /* mult_addsub. */
1081 COSTS_N_INSNS (30), /* fma. */
1082 COSTS_N_INSNS (3), /* addsub. */
1083 COSTS_N_INSNS (1), /* fpconst. */
1084 COSTS_N_INSNS (1), /* neg. */
1085 COSTS_N_INSNS (3), /* compare. */
1086 COSTS_N_INSNS (3), /* widen. */
1087 COSTS_N_INSNS (3), /* narrow. */
1088 COSTS_N_INSNS (3), /* toint. */
1089 COSTS_N_INSNS (3), /* fromint. */
1090 COSTS_N_INSNS (3) /* roundint. */
1094 COSTS_N_INSNS (24), /* div. */
1095 COSTS_N_INSNS (5), /* mult. */
1096 COSTS_N_INSNS (8), /* mult_addsub. */
1097 COSTS_N_INSNS (30), /* fma. */
1098 COSTS_N_INSNS (3), /* addsub. */
1099 COSTS_N_INSNS (1), /* fpconst. */
1100 COSTS_N_INSNS (1), /* neg. */
1101 COSTS_N_INSNS (3), /* compare. */
1102 COSTS_N_INSNS (3), /* widen. */
1103 COSTS_N_INSNS (3), /* narrow. */
1104 COSTS_N_INSNS (3), /* toint. */
1105 COSTS_N_INSNS (3), /* fromint. */
1106 COSTS_N_INSNS (3) /* roundint. */
1111 COSTS_N_INSNS (1) /* alu. */
1115 const struct cpu_cost_table cortexa8_extra_costs
=
1121 COSTS_N_INSNS (1), /* shift. */
1123 COSTS_N_INSNS (1), /* arith_shift. */
1124 0, /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift. */
1126 0, /* log_shift_reg. */
1128 0, /* extend_arith. */
1134 true /* non_exec_costs_exec. */
1139 COSTS_N_INSNS (1), /* simple. */
1140 COSTS_N_INSNS (1), /* flag_setting. */
1141 COSTS_N_INSNS (1), /* extend. */
1142 COSTS_N_INSNS (1), /* add. */
1143 COSTS_N_INSNS (1), /* extend_add. */
1144 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1148 0, /* simple (N/A). */
1149 0, /* flag_setting (N/A). */
1150 COSTS_N_INSNS (2), /* extend. */
1152 COSTS_N_INSNS (2), /* extend_add. */
1158 COSTS_N_INSNS (1), /* load. */
1159 COSTS_N_INSNS (1), /* load_sign_extend. */
1160 COSTS_N_INSNS (1), /* ldrd. */
1161 COSTS_N_INSNS (1), /* ldm_1st. */
1162 1, /* ldm_regs_per_insn_1st. */
1163 2, /* ldm_regs_per_insn_subsequent. */
1164 COSTS_N_INSNS (1), /* loadf. */
1165 COSTS_N_INSNS (1), /* loadd. */
1166 COSTS_N_INSNS (1), /* load_unaligned. */
1167 COSTS_N_INSNS (1), /* store. */
1168 COSTS_N_INSNS (1), /* strd. */
1169 COSTS_N_INSNS (1), /* stm_1st. */
1170 1, /* stm_regs_per_insn_1st. */
1171 2, /* stm_regs_per_insn_subsequent. */
1172 COSTS_N_INSNS (1), /* storef. */
1173 COSTS_N_INSNS (1), /* stored. */
1174 COSTS_N_INSNS (1), /* store_unaligned. */
1175 COSTS_N_INSNS (1), /* loadv. */
1176 COSTS_N_INSNS (1) /* storev. */
1181 COSTS_N_INSNS (36), /* div. */
1182 COSTS_N_INSNS (11), /* mult. */
1183 COSTS_N_INSNS (20), /* mult_addsub. */
1184 COSTS_N_INSNS (30), /* fma. */
1185 COSTS_N_INSNS (9), /* addsub. */
1186 COSTS_N_INSNS (3), /* fpconst. */
1187 COSTS_N_INSNS (3), /* neg. */
1188 COSTS_N_INSNS (6), /* compare. */
1189 COSTS_N_INSNS (4), /* widen. */
1190 COSTS_N_INSNS (4), /* narrow. */
1191 COSTS_N_INSNS (8), /* toint. */
1192 COSTS_N_INSNS (8), /* fromint. */
1193 COSTS_N_INSNS (8) /* roundint. */
1197 COSTS_N_INSNS (64), /* div. */
1198 COSTS_N_INSNS (16), /* mult. */
1199 COSTS_N_INSNS (25), /* mult_addsub. */
1200 COSTS_N_INSNS (30), /* fma. */
1201 COSTS_N_INSNS (9), /* addsub. */
1202 COSTS_N_INSNS (3), /* fpconst. */
1203 COSTS_N_INSNS (3), /* neg. */
1204 COSTS_N_INSNS (6), /* compare. */
1205 COSTS_N_INSNS (6), /* widen. */
1206 COSTS_N_INSNS (6), /* narrow. */
1207 COSTS_N_INSNS (8), /* toint. */
1208 COSTS_N_INSNS (8), /* fromint. */
1209 COSTS_N_INSNS (8) /* roundint. */
1214 COSTS_N_INSNS (1) /* alu. */
1218 const struct cpu_cost_table cortexa5_extra_costs
=
1224 COSTS_N_INSNS (1), /* shift. */
1225 COSTS_N_INSNS (1), /* shift_reg. */
1226 COSTS_N_INSNS (1), /* arith_shift. */
1227 COSTS_N_INSNS (1), /* arith_shift_reg. */
1228 COSTS_N_INSNS (1), /* log_shift. */
1229 COSTS_N_INSNS (1), /* log_shift_reg. */
1230 COSTS_N_INSNS (1), /* extend. */
1231 COSTS_N_INSNS (1), /* extend_arith. */
1232 COSTS_N_INSNS (1), /* bfi. */
1233 COSTS_N_INSNS (1), /* bfx. */
1234 COSTS_N_INSNS (1), /* clz. */
1235 COSTS_N_INSNS (1), /* rev. */
1237 true /* non_exec_costs_exec. */
1244 COSTS_N_INSNS (1), /* flag_setting. */
1245 COSTS_N_INSNS (1), /* extend. */
1246 COSTS_N_INSNS (1), /* add. */
1247 COSTS_N_INSNS (1), /* extend_add. */
1248 COSTS_N_INSNS (7) /* idiv. */
1252 0, /* simple (N/A). */
1253 0, /* flag_setting (N/A). */
1254 COSTS_N_INSNS (1), /* extend. */
1256 COSTS_N_INSNS (2), /* extend_add. */
1262 COSTS_N_INSNS (1), /* load. */
1263 COSTS_N_INSNS (1), /* load_sign_extend. */
1264 COSTS_N_INSNS (6), /* ldrd. */
1265 COSTS_N_INSNS (1), /* ldm_1st. */
1266 1, /* ldm_regs_per_insn_1st. */
1267 2, /* ldm_regs_per_insn_subsequent. */
1268 COSTS_N_INSNS (2), /* loadf. */
1269 COSTS_N_INSNS (4), /* loadd. */
1270 COSTS_N_INSNS (1), /* load_unaligned. */
1271 COSTS_N_INSNS (1), /* store. */
1272 COSTS_N_INSNS (3), /* strd. */
1273 COSTS_N_INSNS (1), /* stm_1st. */
1274 1, /* stm_regs_per_insn_1st. */
1275 2, /* stm_regs_per_insn_subsequent. */
1276 COSTS_N_INSNS (2), /* storef. */
1277 COSTS_N_INSNS (2), /* stored. */
1278 COSTS_N_INSNS (1), /* store_unaligned. */
1279 COSTS_N_INSNS (1), /* loadv. */
1280 COSTS_N_INSNS (1) /* storev. */
1285 COSTS_N_INSNS (15), /* div. */
1286 COSTS_N_INSNS (3), /* mult. */
1287 COSTS_N_INSNS (7), /* mult_addsub. */
1288 COSTS_N_INSNS (7), /* fma. */
1289 COSTS_N_INSNS (3), /* addsub. */
1290 COSTS_N_INSNS (3), /* fpconst. */
1291 COSTS_N_INSNS (3), /* neg. */
1292 COSTS_N_INSNS (3), /* compare. */
1293 COSTS_N_INSNS (3), /* widen. */
1294 COSTS_N_INSNS (3), /* narrow. */
1295 COSTS_N_INSNS (3), /* toint. */
1296 COSTS_N_INSNS (3), /* fromint. */
1297 COSTS_N_INSNS (3) /* roundint. */
1301 COSTS_N_INSNS (30), /* div. */
1302 COSTS_N_INSNS (6), /* mult. */
1303 COSTS_N_INSNS (10), /* mult_addsub. */
1304 COSTS_N_INSNS (7), /* fma. */
1305 COSTS_N_INSNS (3), /* addsub. */
1306 COSTS_N_INSNS (3), /* fpconst. */
1307 COSTS_N_INSNS (3), /* neg. */
1308 COSTS_N_INSNS (3), /* compare. */
1309 COSTS_N_INSNS (3), /* widen. */
1310 COSTS_N_INSNS (3), /* narrow. */
1311 COSTS_N_INSNS (3), /* toint. */
1312 COSTS_N_INSNS (3), /* fromint. */
1313 COSTS_N_INSNS (3) /* roundint. */
1318 COSTS_N_INSNS (1) /* alu. */
1323 const struct cpu_cost_table cortexa7_extra_costs
=
1329 COSTS_N_INSNS (1), /* shift. */
1330 COSTS_N_INSNS (1), /* shift_reg. */
1331 COSTS_N_INSNS (1), /* arith_shift. */
1332 COSTS_N_INSNS (1), /* arith_shift_reg. */
1333 COSTS_N_INSNS (1), /* log_shift. */
1334 COSTS_N_INSNS (1), /* log_shift_reg. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* extend_arith. */
1337 COSTS_N_INSNS (1), /* bfi. */
1338 COSTS_N_INSNS (1), /* bfx. */
1339 COSTS_N_INSNS (1), /* clz. */
1340 COSTS_N_INSNS (1), /* rev. */
1342 true /* non_exec_costs_exec. */
1349 COSTS_N_INSNS (1), /* flag_setting. */
1350 COSTS_N_INSNS (1), /* extend. */
1351 COSTS_N_INSNS (1), /* add. */
1352 COSTS_N_INSNS (1), /* extend_add. */
1353 COSTS_N_INSNS (7) /* idiv. */
1357 0, /* simple (N/A). */
1358 0, /* flag_setting (N/A). */
1359 COSTS_N_INSNS (1), /* extend. */
1361 COSTS_N_INSNS (2), /* extend_add. */
1367 COSTS_N_INSNS (1), /* load. */
1368 COSTS_N_INSNS (1), /* load_sign_extend. */
1369 COSTS_N_INSNS (3), /* ldrd. */
1370 COSTS_N_INSNS (1), /* ldm_1st. */
1371 1, /* ldm_regs_per_insn_1st. */
1372 2, /* ldm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* loadf. */
1374 COSTS_N_INSNS (2), /* loadd. */
1375 COSTS_N_INSNS (1), /* load_unaligned. */
1376 COSTS_N_INSNS (1), /* store. */
1377 COSTS_N_INSNS (3), /* strd. */
1378 COSTS_N_INSNS (1), /* stm_1st. */
1379 1, /* stm_regs_per_insn_1st. */
1380 2, /* stm_regs_per_insn_subsequent. */
1381 COSTS_N_INSNS (2), /* storef. */
1382 COSTS_N_INSNS (2), /* stored. */
1383 COSTS_N_INSNS (1), /* store_unaligned. */
1384 COSTS_N_INSNS (1), /* loadv. */
1385 COSTS_N_INSNS (1) /* storev. */
1390 COSTS_N_INSNS (15), /* div. */
1391 COSTS_N_INSNS (3), /* mult. */
1392 COSTS_N_INSNS (7), /* mult_addsub. */
1393 COSTS_N_INSNS (7), /* fma. */
1394 COSTS_N_INSNS (3), /* addsub. */
1395 COSTS_N_INSNS (3), /* fpconst. */
1396 COSTS_N_INSNS (3), /* neg. */
1397 COSTS_N_INSNS (3), /* compare. */
1398 COSTS_N_INSNS (3), /* widen. */
1399 COSTS_N_INSNS (3), /* narrow. */
1400 COSTS_N_INSNS (3), /* toint. */
1401 COSTS_N_INSNS (3), /* fromint. */
1402 COSTS_N_INSNS (3) /* roundint. */
1406 COSTS_N_INSNS (30), /* div. */
1407 COSTS_N_INSNS (6), /* mult. */
1408 COSTS_N_INSNS (10), /* mult_addsub. */
1409 COSTS_N_INSNS (7), /* fma. */
1410 COSTS_N_INSNS (3), /* addsub. */
1411 COSTS_N_INSNS (3), /* fpconst. */
1412 COSTS_N_INSNS (3), /* neg. */
1413 COSTS_N_INSNS (3), /* compare. */
1414 COSTS_N_INSNS (3), /* widen. */
1415 COSTS_N_INSNS (3), /* narrow. */
1416 COSTS_N_INSNS (3), /* toint. */
1417 COSTS_N_INSNS (3), /* fromint. */
1418 COSTS_N_INSNS (3) /* roundint. */
1423 COSTS_N_INSNS (1) /* alu. */
1427 const struct cpu_cost_table cortexa12_extra_costs
=
1434 COSTS_N_INSNS (1), /* shift_reg. */
1435 COSTS_N_INSNS (1), /* arith_shift. */
1436 COSTS_N_INSNS (1), /* arith_shift_reg. */
1437 COSTS_N_INSNS (1), /* log_shift. */
1438 COSTS_N_INSNS (1), /* log_shift_reg. */
1440 COSTS_N_INSNS (1), /* extend_arith. */
1442 COSTS_N_INSNS (1), /* bfx. */
1443 COSTS_N_INSNS (1), /* clz. */
1444 COSTS_N_INSNS (1), /* rev. */
1446 true /* non_exec_costs_exec. */
1451 COSTS_N_INSNS (2), /* simple. */
1452 COSTS_N_INSNS (3), /* flag_setting. */
1453 COSTS_N_INSNS (2), /* extend. */
1454 COSTS_N_INSNS (3), /* add. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1456 COSTS_N_INSNS (18) /* idiv. */
1460 0, /* simple (N/A). */
1461 0, /* flag_setting (N/A). */
1462 COSTS_N_INSNS (3), /* extend. */
1464 COSTS_N_INSNS (3), /* extend_add. */
1470 COSTS_N_INSNS (3), /* load. */
1471 COSTS_N_INSNS (3), /* load_sign_extend. */
1472 COSTS_N_INSNS (3), /* ldrd. */
1473 COSTS_N_INSNS (3), /* ldm_1st. */
1474 1, /* ldm_regs_per_insn_1st. */
1475 2, /* ldm_regs_per_insn_subsequent. */
1476 COSTS_N_INSNS (3), /* loadf. */
1477 COSTS_N_INSNS (3), /* loadd. */
1478 0, /* load_unaligned. */
1482 1, /* stm_regs_per_insn_1st. */
1483 2, /* stm_regs_per_insn_subsequent. */
1484 COSTS_N_INSNS (2), /* storef. */
1485 COSTS_N_INSNS (2), /* stored. */
1486 0, /* store_unaligned. */
1487 COSTS_N_INSNS (1), /* loadv. */
1488 COSTS_N_INSNS (1) /* storev. */
1493 COSTS_N_INSNS (17), /* div. */
1494 COSTS_N_INSNS (4), /* mult. */
1495 COSTS_N_INSNS (8), /* mult_addsub. */
1496 COSTS_N_INSNS (8), /* fma. */
1497 COSTS_N_INSNS (4), /* addsub. */
1498 COSTS_N_INSNS (2), /* fpconst. */
1499 COSTS_N_INSNS (2), /* neg. */
1500 COSTS_N_INSNS (2), /* compare. */
1501 COSTS_N_INSNS (4), /* widen. */
1502 COSTS_N_INSNS (4), /* narrow. */
1503 COSTS_N_INSNS (4), /* toint. */
1504 COSTS_N_INSNS (4), /* fromint. */
1505 COSTS_N_INSNS (4) /* roundint. */
1509 COSTS_N_INSNS (31), /* div. */
1510 COSTS_N_INSNS (4), /* mult. */
1511 COSTS_N_INSNS (8), /* mult_addsub. */
1512 COSTS_N_INSNS (8), /* fma. */
1513 COSTS_N_INSNS (4), /* addsub. */
1514 COSTS_N_INSNS (2), /* fpconst. */
1515 COSTS_N_INSNS (2), /* neg. */
1516 COSTS_N_INSNS (2), /* compare. */
1517 COSTS_N_INSNS (4), /* widen. */
1518 COSTS_N_INSNS (4), /* narrow. */
1519 COSTS_N_INSNS (4), /* toint. */
1520 COSTS_N_INSNS (4), /* fromint. */
1521 COSTS_N_INSNS (4) /* roundint. */
1526 COSTS_N_INSNS (1) /* alu. */
1530 const struct cpu_cost_table cortexa15_extra_costs
=
1538 COSTS_N_INSNS (1), /* arith_shift. */
1539 COSTS_N_INSNS (1), /* arith_shift_reg. */
1540 COSTS_N_INSNS (1), /* log_shift. */
1541 COSTS_N_INSNS (1), /* log_shift_reg. */
1543 COSTS_N_INSNS (1), /* extend_arith. */
1544 COSTS_N_INSNS (1), /* bfi. */
1549 true /* non_exec_costs_exec. */
1554 COSTS_N_INSNS (2), /* simple. */
1555 COSTS_N_INSNS (3), /* flag_setting. */
1556 COSTS_N_INSNS (2), /* extend. */
1557 COSTS_N_INSNS (2), /* add. */
1558 COSTS_N_INSNS (2), /* extend_add. */
1559 COSTS_N_INSNS (18) /* idiv. */
1563 0, /* simple (N/A). */
1564 0, /* flag_setting (N/A). */
1565 COSTS_N_INSNS (3), /* extend. */
1567 COSTS_N_INSNS (3), /* extend_add. */
1573 COSTS_N_INSNS (3), /* load. */
1574 COSTS_N_INSNS (3), /* load_sign_extend. */
1575 COSTS_N_INSNS (3), /* ldrd. */
1576 COSTS_N_INSNS (4), /* ldm_1st. */
1577 1, /* ldm_regs_per_insn_1st. */
1578 2, /* ldm_regs_per_insn_subsequent. */
1579 COSTS_N_INSNS (4), /* loadf. */
1580 COSTS_N_INSNS (4), /* loadd. */
1581 0, /* load_unaligned. */
1584 COSTS_N_INSNS (1), /* stm_1st. */
1585 1, /* stm_regs_per_insn_1st. */
1586 2, /* stm_regs_per_insn_subsequent. */
1589 0, /* store_unaligned. */
1590 COSTS_N_INSNS (1), /* loadv. */
1591 COSTS_N_INSNS (1) /* storev. */
1596 COSTS_N_INSNS (17), /* div. */
1597 COSTS_N_INSNS (4), /* mult. */
1598 COSTS_N_INSNS (8), /* mult_addsub. */
1599 COSTS_N_INSNS (8), /* fma. */
1600 COSTS_N_INSNS (4), /* addsub. */
1601 COSTS_N_INSNS (2), /* fpconst. */
1602 COSTS_N_INSNS (2), /* neg. */
1603 COSTS_N_INSNS (5), /* compare. */
1604 COSTS_N_INSNS (4), /* widen. */
1605 COSTS_N_INSNS (4), /* narrow. */
1606 COSTS_N_INSNS (4), /* toint. */
1607 COSTS_N_INSNS (4), /* fromint. */
1608 COSTS_N_INSNS (4) /* roundint. */
1612 COSTS_N_INSNS (31), /* div. */
1613 COSTS_N_INSNS (4), /* mult. */
1614 COSTS_N_INSNS (8), /* mult_addsub. */
1615 COSTS_N_INSNS (8), /* fma. */
1616 COSTS_N_INSNS (4), /* addsub. */
1617 COSTS_N_INSNS (2), /* fpconst. */
1618 COSTS_N_INSNS (2), /* neg. */
1619 COSTS_N_INSNS (2), /* compare. */
1620 COSTS_N_INSNS (4), /* widen. */
1621 COSTS_N_INSNS (4), /* narrow. */
1622 COSTS_N_INSNS (4), /* toint. */
1623 COSTS_N_INSNS (4), /* fromint. */
1624 COSTS_N_INSNS (4) /* roundint. */
1629 COSTS_N_INSNS (1) /* alu. */
1633 const struct cpu_cost_table v7m_extra_costs
=
1641 0, /* arith_shift. */
1642 COSTS_N_INSNS (1), /* arith_shift_reg. */
1644 COSTS_N_INSNS (1), /* log_shift_reg. */
1646 COSTS_N_INSNS (1), /* extend_arith. */
1651 COSTS_N_INSNS (1), /* non_exec. */
1652 false /* non_exec_costs_exec. */
1657 COSTS_N_INSNS (1), /* simple. */
1658 COSTS_N_INSNS (1), /* flag_setting. */
1659 COSTS_N_INSNS (2), /* extend. */
1660 COSTS_N_INSNS (1), /* add. */
1661 COSTS_N_INSNS (3), /* extend_add. */
1662 COSTS_N_INSNS (8) /* idiv. */
1666 0, /* simple (N/A). */
1667 0, /* flag_setting (N/A). */
1668 COSTS_N_INSNS (2), /* extend. */
1670 COSTS_N_INSNS (3), /* extend_add. */
1676 COSTS_N_INSNS (2), /* load. */
1677 0, /* load_sign_extend. */
1678 COSTS_N_INSNS (3), /* ldrd. */
1679 COSTS_N_INSNS (2), /* ldm_1st. */
1680 1, /* ldm_regs_per_insn_1st. */
1681 1, /* ldm_regs_per_insn_subsequent. */
1682 COSTS_N_INSNS (2), /* loadf. */
1683 COSTS_N_INSNS (3), /* loadd. */
1684 COSTS_N_INSNS (1), /* load_unaligned. */
1685 COSTS_N_INSNS (2), /* store. */
1686 COSTS_N_INSNS (3), /* strd. */
1687 COSTS_N_INSNS (2), /* stm_1st. */
1688 1, /* stm_regs_per_insn_1st. */
1689 1, /* stm_regs_per_insn_subsequent. */
1690 COSTS_N_INSNS (2), /* storef. */
1691 COSTS_N_INSNS (3), /* stored. */
1692 COSTS_N_INSNS (1), /* store_unaligned. */
1693 COSTS_N_INSNS (1), /* loadv. */
1694 COSTS_N_INSNS (1) /* storev. */
1699 COSTS_N_INSNS (7), /* div. */
1700 COSTS_N_INSNS (2), /* mult. */
1701 COSTS_N_INSNS (5), /* mult_addsub. */
1702 COSTS_N_INSNS (3), /* fma. */
1703 COSTS_N_INSNS (1), /* addsub. */
1715 COSTS_N_INSNS (15), /* div. */
1716 COSTS_N_INSNS (5), /* mult. */
1717 COSTS_N_INSNS (7), /* mult_addsub. */
1718 COSTS_N_INSNS (7), /* fma. */
1719 COSTS_N_INSNS (3), /* addsub. */
1732 COSTS_N_INSNS (1) /* alu. */
1736 const struct tune_params arm_slowmul_tune
=
1738 &generic_extra_costs
, /* Insn extra costs. */
1739 NULL
, /* Sched adj cost. */
1740 arm_default_branch_cost
,
1741 &arm_default_vec_cost
,
1742 3, /* Constant limit. */
1743 5, /* Max cond insns. */
1744 8, /* Memset max inline. */
1745 1, /* Issue rate. */
1746 ARM_PREFETCH_NOT_BENEFICIAL
,
1747 tune_params::PREF_CONST_POOL_TRUE
,
1748 tune_params::PREF_LDRD_FALSE
,
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1750 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1751 tune_params::DISPARAGE_FLAGS_NEITHER
,
1752 tune_params::PREF_NEON_64_FALSE
,
1753 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1754 tune_params::FUSE_NOTHING
,
1755 tune_params::SCHED_AUTOPREF_OFF
1758 const struct tune_params arm_fastmul_tune
=
1760 &generic_extra_costs
, /* Insn extra costs. */
1761 NULL
, /* Sched adj cost. */
1762 arm_default_branch_cost
,
1763 &arm_default_vec_cost
,
1764 1, /* Constant limit. */
1765 5, /* Max cond insns. */
1766 8, /* Memset max inline. */
1767 1, /* Issue rate. */
1768 ARM_PREFETCH_NOT_BENEFICIAL
,
1769 tune_params::PREF_CONST_POOL_TRUE
,
1770 tune_params::PREF_LDRD_FALSE
,
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1773 tune_params::DISPARAGE_FLAGS_NEITHER
,
1774 tune_params::PREF_NEON_64_FALSE
,
1775 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1776 tune_params::FUSE_NOTHING
,
1777 tune_params::SCHED_AUTOPREF_OFF
1780 /* StrongARM has early execution of branches, so a sequence that is worth
1781 skipping is shorter. Set max_insns_skipped to a lower value. */
1783 const struct tune_params arm_strongarm_tune
=
1785 &generic_extra_costs
, /* Insn extra costs. */
1786 NULL
, /* Sched adj cost. */
1787 arm_default_branch_cost
,
1788 &arm_default_vec_cost
,
1789 1, /* Constant limit. */
1790 3, /* Max cond insns. */
1791 8, /* Memset max inline. */
1792 1, /* Issue rate. */
1793 ARM_PREFETCH_NOT_BENEFICIAL
,
1794 tune_params::PREF_CONST_POOL_TRUE
,
1795 tune_params::PREF_LDRD_FALSE
,
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1797 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1798 tune_params::DISPARAGE_FLAGS_NEITHER
,
1799 tune_params::PREF_NEON_64_FALSE
,
1800 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1801 tune_params::FUSE_NOTHING
,
1802 tune_params::SCHED_AUTOPREF_OFF
1805 const struct tune_params arm_xscale_tune
=
1807 &generic_extra_costs
, /* Insn extra costs. */
1808 xscale_sched_adjust_cost
,
1809 arm_default_branch_cost
,
1810 &arm_default_vec_cost
,
1811 2, /* Constant limit. */
1812 3, /* Max cond insns. */
1813 8, /* Memset max inline. */
1814 1, /* Issue rate. */
1815 ARM_PREFETCH_NOT_BENEFICIAL
,
1816 tune_params::PREF_CONST_POOL_TRUE
,
1817 tune_params::PREF_LDRD_FALSE
,
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1820 tune_params::DISPARAGE_FLAGS_NEITHER
,
1821 tune_params::PREF_NEON_64_FALSE
,
1822 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1823 tune_params::FUSE_NOTHING
,
1824 tune_params::SCHED_AUTOPREF_OFF
1827 const struct tune_params arm_9e_tune
=
1829 &generic_extra_costs
, /* Insn extra costs. */
1830 NULL
, /* Sched adj cost. */
1831 arm_default_branch_cost
,
1832 &arm_default_vec_cost
,
1833 1, /* Constant limit. */
1834 5, /* Max cond insns. */
1835 8, /* Memset max inline. */
1836 1, /* Issue rate. */
1837 ARM_PREFETCH_NOT_BENEFICIAL
,
1838 tune_params::PREF_CONST_POOL_TRUE
,
1839 tune_params::PREF_LDRD_FALSE
,
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1842 tune_params::DISPARAGE_FLAGS_NEITHER
,
1843 tune_params::PREF_NEON_64_FALSE
,
1844 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1845 tune_params::FUSE_NOTHING
,
1846 tune_params::SCHED_AUTOPREF_OFF
1849 const struct tune_params arm_marvell_pj4_tune
=
1851 &generic_extra_costs
, /* Insn extra costs. */
1852 NULL
, /* Sched adj cost. */
1853 arm_default_branch_cost
,
1854 &arm_default_vec_cost
,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL
,
1860 tune_params::PREF_CONST_POOL_TRUE
,
1861 tune_params::PREF_LDRD_FALSE
,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER
,
1865 tune_params::PREF_NEON_64_FALSE
,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1867 tune_params::FUSE_NOTHING
,
1868 tune_params::SCHED_AUTOPREF_OFF
1871 const struct tune_params arm_v6t2_tune
=
1873 &generic_extra_costs
, /* Insn extra costs. */
1874 NULL
, /* Sched adj cost. */
1875 arm_default_branch_cost
,
1876 &arm_default_vec_cost
,
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 8, /* Memset max inline. */
1880 1, /* Issue rate. */
1881 ARM_PREFETCH_NOT_BENEFICIAL
,
1882 tune_params::PREF_CONST_POOL_FALSE
,
1883 tune_params::PREF_LDRD_FALSE
,
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1886 tune_params::DISPARAGE_FLAGS_NEITHER
,
1887 tune_params::PREF_NEON_64_FALSE
,
1888 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1889 tune_params::FUSE_NOTHING
,
1890 tune_params::SCHED_AUTOPREF_OFF
1894 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1895 const struct tune_params arm_cortex_tune
=
1897 &generic_extra_costs
,
1898 NULL
, /* Sched adj cost. */
1899 arm_default_branch_cost
,
1900 &arm_default_vec_cost
,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL
,
1906 tune_params::PREF_CONST_POOL_FALSE
,
1907 tune_params::PREF_LDRD_FALSE
,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER
,
1911 tune_params::PREF_NEON_64_FALSE
,
1912 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1913 tune_params::FUSE_NOTHING
,
1914 tune_params::SCHED_AUTOPREF_OFF
1917 const struct tune_params arm_cortex_a8_tune
=
1919 &cortexa8_extra_costs
,
1920 NULL
, /* Sched adj cost. */
1921 arm_default_branch_cost
,
1922 &arm_default_vec_cost
,
1923 1, /* Constant limit. */
1924 5, /* Max cond insns. */
1925 8, /* Memset max inline. */
1926 2, /* Issue rate. */
1927 ARM_PREFETCH_NOT_BENEFICIAL
,
1928 tune_params::PREF_CONST_POOL_FALSE
,
1929 tune_params::PREF_LDRD_FALSE
,
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1932 tune_params::DISPARAGE_FLAGS_NEITHER
,
1933 tune_params::PREF_NEON_64_FALSE
,
1934 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1935 tune_params::FUSE_NOTHING
,
1936 tune_params::SCHED_AUTOPREF_OFF
1939 const struct tune_params arm_cortex_a7_tune
=
1941 &cortexa7_extra_costs
,
1942 NULL
, /* Sched adj cost. */
1943 arm_default_branch_cost
,
1944 &arm_default_vec_cost
,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 2, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL
,
1950 tune_params::PREF_CONST_POOL_FALSE
,
1951 tune_params::PREF_LDRD_FALSE
,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER
,
1955 tune_params::PREF_NEON_64_FALSE
,
1956 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1957 tune_params::FUSE_NOTHING
,
1958 tune_params::SCHED_AUTOPREF_OFF
1961 const struct tune_params arm_cortex_a15_tune
=
1963 &cortexa15_extra_costs
,
1964 NULL
, /* Sched adj cost. */
1965 arm_default_branch_cost
,
1966 &arm_default_vec_cost
,
1967 1, /* Constant limit. */
1968 2, /* Max cond insns. */
1969 8, /* Memset max inline. */
1970 3, /* Issue rate. */
1971 ARM_PREFETCH_NOT_BENEFICIAL
,
1972 tune_params::PREF_CONST_POOL_FALSE
,
1973 tune_params::PREF_LDRD_TRUE
,
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1976 tune_params::DISPARAGE_FLAGS_ALL
,
1977 tune_params::PREF_NEON_64_FALSE
,
1978 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1979 tune_params::FUSE_NOTHING
,
1980 tune_params::SCHED_AUTOPREF_FULL
1983 const struct tune_params arm_cortex_a35_tune
=
1985 &cortexa53_extra_costs
,
1986 NULL
, /* Sched adj cost. */
1987 arm_default_branch_cost
,
1988 &arm_default_vec_cost
,
1989 1, /* Constant limit. */
1990 5, /* Max cond insns. */
1991 8, /* Memset max inline. */
1992 1, /* Issue rate. */
1993 ARM_PREFETCH_NOT_BENEFICIAL
,
1994 tune_params::PREF_CONST_POOL_FALSE
,
1995 tune_params::PREF_LDRD_FALSE
,
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1998 tune_params::DISPARAGE_FLAGS_NEITHER
,
1999 tune_params::PREF_NEON_64_FALSE
,
2000 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2001 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2002 tune_params::SCHED_AUTOPREF_OFF
2005 const struct tune_params arm_cortex_a53_tune
=
2007 &cortexa53_extra_costs
,
2008 NULL
, /* Sched adj cost. */
2009 arm_default_branch_cost
,
2010 &arm_default_vec_cost
,
2011 1, /* Constant limit. */
2012 5, /* Max cond insns. */
2013 8, /* Memset max inline. */
2014 2, /* Issue rate. */
2015 ARM_PREFETCH_NOT_BENEFICIAL
,
2016 tune_params::PREF_CONST_POOL_FALSE
,
2017 tune_params::PREF_LDRD_FALSE
,
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2020 tune_params::DISPARAGE_FLAGS_NEITHER
,
2021 tune_params::PREF_NEON_64_FALSE
,
2022 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2023 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2024 tune_params::SCHED_AUTOPREF_OFF
2027 const struct tune_params arm_cortex_a57_tune
=
2029 &cortexa57_extra_costs
,
2030 NULL
, /* Sched adj cost. */
2031 arm_default_branch_cost
,
2032 &arm_default_vec_cost
,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL
,
2038 tune_params::PREF_CONST_POOL_FALSE
,
2039 tune_params::PREF_LDRD_TRUE
,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL
,
2043 tune_params::PREF_NEON_64_FALSE
,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2045 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2046 tune_params::SCHED_AUTOPREF_FULL
2049 const struct tune_params arm_exynosm1_tune
=
2051 &exynosm1_extra_costs
,
2052 NULL
, /* Sched adj cost. */
2053 arm_default_branch_cost
,
2054 &arm_default_vec_cost
,
2055 1, /* Constant limit. */
2056 2, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 3, /* Issue rate. */
2059 ARM_PREFETCH_NOT_BENEFICIAL
,
2060 tune_params::PREF_CONST_POOL_FALSE
,
2061 tune_params::PREF_LDRD_TRUE
,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_ALL
,
2065 tune_params::PREF_NEON_64_FALSE
,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2067 tune_params::FUSE_NOTHING
,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_xgene1_tune
=
2073 &xgene1_extra_costs
,
2074 NULL
, /* Sched adj cost. */
2075 arm_default_branch_cost
,
2076 &arm_default_vec_cost
,
2077 1, /* Constant limit. */
2078 2, /* Max cond insns. */
2079 32, /* Memset max inline. */
2080 4, /* Issue rate. */
2081 ARM_PREFETCH_NOT_BENEFICIAL
,
2082 tune_params::PREF_CONST_POOL_FALSE
,
2083 tune_params::PREF_LDRD_TRUE
,
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2086 tune_params::DISPARAGE_FLAGS_ALL
,
2087 tune_params::PREF_NEON_64_FALSE
,
2088 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2089 tune_params::FUSE_NOTHING
,
2090 tune_params::SCHED_AUTOPREF_OFF
2093 const struct tune_params arm_qdf24xx_tune
=
2095 &qdf24xx_extra_costs
,
2096 NULL
, /* Scheduler cost adjustment. */
2097 arm_default_branch_cost
,
2098 &arm_default_vec_cost
, /* Vectorizer costs. */
2099 1, /* Constant limit. */
2100 2, /* Max cond insns. */
2101 8, /* Memset max inline. */
2102 4, /* Issue rate. */
2103 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2104 tune_params::PREF_CONST_POOL_FALSE
,
2105 tune_params::PREF_LDRD_TRUE
,
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2108 tune_params::DISPARAGE_FLAGS_ALL
,
2109 tune_params::PREF_NEON_64_FALSE
,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2111 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2112 tune_params::SCHED_AUTOPREF_FULL
2115 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2116 less appealing. Set max_insns_skipped to a low value. */
2118 const struct tune_params arm_cortex_a5_tune
=
2120 &cortexa5_extra_costs
,
2121 NULL
, /* Sched adj cost. */
2122 arm_cortex_a5_branch_cost
,
2123 &arm_default_vec_cost
,
2124 1, /* Constant limit. */
2125 1, /* Max cond insns. */
2126 8, /* Memset max inline. */
2127 2, /* Issue rate. */
2128 ARM_PREFETCH_NOT_BENEFICIAL
,
2129 tune_params::PREF_CONST_POOL_FALSE
,
2130 tune_params::PREF_LDRD_FALSE
,
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2133 tune_params::DISPARAGE_FLAGS_NEITHER
,
2134 tune_params::PREF_NEON_64_FALSE
,
2135 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2136 tune_params::FUSE_NOTHING
,
2137 tune_params::SCHED_AUTOPREF_OFF
2140 const struct tune_params arm_cortex_a9_tune
=
2142 &cortexa9_extra_costs
,
2143 cortex_a9_sched_adjust_cost
,
2144 arm_default_branch_cost
,
2145 &arm_default_vec_cost
,
2146 1, /* Constant limit. */
2147 5, /* Max cond insns. */
2148 8, /* Memset max inline. */
2149 2, /* Issue rate. */
2150 ARM_PREFETCH_BENEFICIAL(4,32,32),
2151 tune_params::PREF_CONST_POOL_FALSE
,
2152 tune_params::PREF_LDRD_FALSE
,
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2154 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2155 tune_params::DISPARAGE_FLAGS_NEITHER
,
2156 tune_params::PREF_NEON_64_FALSE
,
2157 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2158 tune_params::FUSE_NOTHING
,
2159 tune_params::SCHED_AUTOPREF_OFF
2162 const struct tune_params arm_cortex_a12_tune
=
2164 &cortexa12_extra_costs
,
2165 NULL
, /* Sched adj cost. */
2166 arm_default_branch_cost
,
2167 &arm_default_vec_cost
, /* Vectorizer costs. */
2168 1, /* Constant limit. */
2169 2, /* Max cond insns. */
2170 8, /* Memset max inline. */
2171 2, /* Issue rate. */
2172 ARM_PREFETCH_NOT_BENEFICIAL
,
2173 tune_params::PREF_CONST_POOL_FALSE
,
2174 tune_params::PREF_LDRD_TRUE
,
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2176 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2177 tune_params::DISPARAGE_FLAGS_ALL
,
2178 tune_params::PREF_NEON_64_FALSE
,
2179 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2180 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2181 tune_params::SCHED_AUTOPREF_OFF
2184 const struct tune_params arm_cortex_a73_tune
=
2186 &cortexa57_extra_costs
,
2187 NULL
, /* Sched adj cost. */
2188 arm_default_branch_cost
,
2189 &arm_default_vec_cost
, /* Vectorizer costs. */
2190 1, /* Constant limit. */
2191 2, /* Max cond insns. */
2192 8, /* Memset max inline. */
2193 2, /* Issue rate. */
2194 ARM_PREFETCH_NOT_BENEFICIAL
,
2195 tune_params::PREF_CONST_POOL_FALSE
,
2196 tune_params::PREF_LDRD_TRUE
,
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2198 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2199 tune_params::DISPARAGE_FLAGS_ALL
,
2200 tune_params::PREF_NEON_64_FALSE
,
2201 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2202 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2203 tune_params::SCHED_AUTOPREF_FULL
2206 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2207 cycle to execute each. An LDR from the constant pool also takes two cycles
2208 to execute, but mildly increases pipelining opportunity (consecutive
2209 loads/stores can be pipelined together, saving one cycle), and may also
2210 improve icache utilisation. Hence we prefer the constant pool for such
2213 const struct tune_params arm_v7m_tune
=
2216 NULL
, /* Sched adj cost. */
2217 arm_cortex_m_branch_cost
,
2218 &arm_default_vec_cost
,
2219 1, /* Constant limit. */
2220 2, /* Max cond insns. */
2221 8, /* Memset max inline. */
2222 1, /* Issue rate. */
2223 ARM_PREFETCH_NOT_BENEFICIAL
,
2224 tune_params::PREF_CONST_POOL_TRUE
,
2225 tune_params::PREF_LDRD_FALSE
,
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2228 tune_params::DISPARAGE_FLAGS_NEITHER
,
2229 tune_params::PREF_NEON_64_FALSE
,
2230 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2231 tune_params::FUSE_NOTHING
,
2232 tune_params::SCHED_AUTOPREF_OFF
2235 /* Cortex-M7 tuning. */
2237 const struct tune_params arm_cortex_m7_tune
=
2240 NULL
, /* Sched adj cost. */
2241 arm_cortex_m7_branch_cost
,
2242 &arm_default_vec_cost
,
2243 0, /* Constant limit. */
2244 1, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL
,
2248 tune_params::PREF_CONST_POOL_TRUE
,
2249 tune_params::PREF_LDRD_FALSE
,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_NEITHER
,
2253 tune_params::PREF_NEON_64_FALSE
,
2254 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2255 tune_params::FUSE_NOTHING
,
2256 tune_params::SCHED_AUTOPREF_OFF
2259 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2260 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2262 const struct tune_params arm_v6m_tune
=
2264 &generic_extra_costs
, /* Insn extra costs. */
2265 NULL
, /* Sched adj cost. */
2266 arm_default_branch_cost
,
2267 &arm_default_vec_cost
, /* Vectorizer costs. */
2268 1, /* Constant limit. */
2269 5, /* Max cond insns. */
2270 8, /* Memset max inline. */
2271 1, /* Issue rate. */
2272 ARM_PREFETCH_NOT_BENEFICIAL
,
2273 tune_params::PREF_CONST_POOL_FALSE
,
2274 tune_params::PREF_LDRD_FALSE
,
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2277 tune_params::DISPARAGE_FLAGS_NEITHER
,
2278 tune_params::PREF_NEON_64_FALSE
,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2280 tune_params::FUSE_NOTHING
,
2281 tune_params::SCHED_AUTOPREF_OFF
2284 const struct tune_params arm_fa726te_tune
=
2286 &generic_extra_costs
, /* Insn extra costs. */
2287 fa726te_sched_adjust_cost
,
2288 arm_default_branch_cost
,
2289 &arm_default_vec_cost
,
2290 1, /* Constant limit. */
2291 5, /* Max cond insns. */
2292 8, /* Memset max inline. */
2293 2, /* Issue rate. */
2294 ARM_PREFETCH_NOT_BENEFICIAL
,
2295 tune_params::PREF_CONST_POOL_TRUE
,
2296 tune_params::PREF_LDRD_FALSE
,
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2299 tune_params::DISPARAGE_FLAGS_NEITHER
,
2300 tune_params::PREF_NEON_64_FALSE
,
2301 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2302 tune_params::FUSE_NOTHING
,
2303 tune_params::SCHED_AUTOPREF_OFF
2306 /* Auto-generated CPU, FPU and architecture tables. */
2307 #include "arm-cpu-data.h"
2309 /* The name of the preprocessor macro to define for this architecture. PROFILE
2310 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2311 is thus chosen to be big enough to hold the longest architecture name. */
2313 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2315 /* Supported TLS relocations. */
2323 TLS_DESCSEQ
/* GNU scheme */
2326 /* The maximum number of insns to be used when loading a constant. */
2328 arm_constant_limit (bool size_p
)
2330 return size_p
? 1 : current_tune
->constant_limit
;
2333 /* Emit an insn that's a simple single-set. Both the operands must be known
2335 inline static rtx_insn
*
2336 emit_set_insn (rtx x
, rtx y
)
2338 return emit_insn (gen_rtx_SET (x
, y
));
2341 /* Return the number of bits set in VALUE. */
2343 bit_count (unsigned long value
)
2345 unsigned long count
= 0;
2350 value
&= value
- 1; /* Clear the least-significant set bit. */
2356 /* Return the number of bits set in BMAP. */
2358 bitmap_popcount (const sbitmap bmap
)
2360 unsigned int count
= 0;
2362 sbitmap_iterator sbi
;
2364 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2373 } arm_fixed_mode_set
;
2375 /* A small helper for setting fixed-point library libfuncs. */
2378 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2379 const char *funcname
, const char *modename
,
2384 if (num_suffix
== 0)
2385 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2387 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2389 set_optab_libfunc (optable
, mode
, buffer
);
2393 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2394 machine_mode from
, const char *funcname
,
2395 const char *toname
, const char *fromname
)
2398 const char *maybe_suffix_2
= "";
2400 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2401 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2402 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2403 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2404 maybe_suffix_2
= "2";
2406 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2409 set_conv_libfunc (optable
, to
, from
, buffer
);
2412 /* Set up library functions unique to ARM. */
2415 arm_init_libfuncs (void)
2417 /* For Linux, we have access to kernel support for atomic operations. */
2418 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2419 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2421 /* There are no special library functions unless we are using the
2426 /* The functions below are described in Section 4 of the "Run-Time
2427 ABI for the ARM architecture", Version 1.0. */
2429 /* Double-precision floating-point arithmetic. Table 2. */
2430 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2431 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2432 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2433 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2434 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2436 /* Double-precision comparisons. Table 3. */
2437 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2438 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2439 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2440 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2441 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2442 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2443 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2445 /* Single-precision floating-point arithmetic. Table 4. */
2446 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2447 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2448 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2449 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2450 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2452 /* Single-precision comparisons. Table 5. */
2453 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2454 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2455 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2456 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2457 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2458 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2459 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2461 /* Floating-point to integer conversions. Table 6. */
2462 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2463 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2464 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2465 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2466 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2467 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2468 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2469 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2471 /* Conversions between floating types. Table 7. */
2472 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2473 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2475 /* Integer to floating-point conversions. Table 8. */
2476 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2477 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2478 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2479 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2480 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2481 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2482 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2483 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2485 /* Long long. Table 9. */
2486 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2487 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2488 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2489 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2490 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2491 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2492 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2493 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2495 /* Integer (32/32->32) division. \S 4.3.1. */
2496 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2497 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2499 /* The divmod functions are designed so that they can be used for
2500 plain division, even though they return both the quotient and the
2501 remainder. The quotient is returned in the usual location (i.e.,
2502 r0 for SImode, {r0, r1} for DImode), just as would be expected
2503 for an ordinary division routine. Because the AAPCS calling
2504 conventions specify that all of { r0, r1, r2, r3 } are
2505 callee-saved registers, there is no need to tell the compiler
2506 explicitly that those registers are clobbered by these
2508 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2509 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2511 /* For SImode division the ABI provides div-without-mod routines,
2512 which are faster. */
2513 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2514 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2516 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2517 divmod libcalls instead. */
2518 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2519 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2520 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2521 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2523 /* Half-precision float operations. The compiler handles all operations
2524 with NULL libfuncs by converting the SFmode. */
2525 switch (arm_fp16_format
)
2527 case ARM_FP16_FORMAT_IEEE
:
2528 case ARM_FP16_FORMAT_ALTERNATIVE
:
2531 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2532 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2534 : "__gnu_f2h_alternative"));
2535 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2536 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2538 : "__gnu_h2f_alternative"));
2540 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2541 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2543 : "__gnu_d2h_alternative"));
2546 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2550 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2553 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2556 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2557 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2559 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2566 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2568 const arm_fixed_mode_set fixed_arith_modes
[] =
2589 const arm_fixed_mode_set fixed_conv_modes
[] =
2619 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2621 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2622 "add", fixed_arith_modes
[i
].name
, 3);
2623 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2624 "ssadd", fixed_arith_modes
[i
].name
, 3);
2625 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2626 "usadd", fixed_arith_modes
[i
].name
, 3);
2627 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2628 "sub", fixed_arith_modes
[i
].name
, 3);
2629 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2630 "sssub", fixed_arith_modes
[i
].name
, 3);
2631 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2632 "ussub", fixed_arith_modes
[i
].name
, 3);
2633 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2634 "mul", fixed_arith_modes
[i
].name
, 3);
2635 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2636 "ssmul", fixed_arith_modes
[i
].name
, 3);
2637 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2638 "usmul", fixed_arith_modes
[i
].name
, 3);
2639 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2640 "div", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2642 "udiv", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2644 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2645 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2646 "usdiv", fixed_arith_modes
[i
].name
, 3);
2647 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2648 "neg", fixed_arith_modes
[i
].name
, 2);
2649 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2650 "ssneg", fixed_arith_modes
[i
].name
, 2);
2651 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2652 "usneg", fixed_arith_modes
[i
].name
, 2);
2653 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2654 "ashl", fixed_arith_modes
[i
].name
, 3);
2655 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2656 "ashr", fixed_arith_modes
[i
].name
, 3);
2657 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2658 "lshr", fixed_arith_modes
[i
].name
, 3);
2659 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2660 "ssashl", fixed_arith_modes
[i
].name
, 3);
2661 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2662 "usashl", fixed_arith_modes
[i
].name
, 3);
2663 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2664 "cmp", fixed_arith_modes
[i
].name
, 2);
2667 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2668 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2671 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2672 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2675 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2676 fixed_conv_modes
[j
].mode
, "fract",
2677 fixed_conv_modes
[i
].name
,
2678 fixed_conv_modes
[j
].name
);
2679 arm_set_fixed_conv_libfunc (satfract_optab
,
2680 fixed_conv_modes
[i
].mode
,
2681 fixed_conv_modes
[j
].mode
, "satfract",
2682 fixed_conv_modes
[i
].name
,
2683 fixed_conv_modes
[j
].name
);
2684 arm_set_fixed_conv_libfunc (fractuns_optab
,
2685 fixed_conv_modes
[i
].mode
,
2686 fixed_conv_modes
[j
].mode
, "fractuns",
2687 fixed_conv_modes
[i
].name
,
2688 fixed_conv_modes
[j
].name
);
2689 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2690 fixed_conv_modes
[i
].mode
,
2691 fixed_conv_modes
[j
].mode
, "satfractuns",
2692 fixed_conv_modes
[i
].name
,
2693 fixed_conv_modes
[j
].name
);
2697 if (TARGET_AAPCS_BASED
)
2698 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2701 /* On AAPCS systems, this is the "struct __va_list". */
2702 static GTY(()) tree va_list_type
;
2704 /* Return the type to use as __builtin_va_list. */
2706 arm_build_builtin_va_list (void)
2711 if (!TARGET_AAPCS_BASED
)
2712 return std_build_builtin_va_list ();
2714 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2722 The C Library ABI further reinforces this definition in \S
2725 We must follow this definition exactly. The structure tag
2726 name is visible in C++ mangled names, and thus forms a part
2727 of the ABI. The field name may be used by people who
2728 #include <stdarg.h>. */
2729 /* Create the type. */
2730 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2731 /* Give it the required name. */
2732 va_list_name
= build_decl (BUILTINS_LOCATION
,
2734 get_identifier ("__va_list"),
2736 DECL_ARTIFICIAL (va_list_name
) = 1;
2737 TYPE_NAME (va_list_type
) = va_list_name
;
2738 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2739 /* Create the __ap field. */
2740 ap_field
= build_decl (BUILTINS_LOCATION
,
2742 get_identifier ("__ap"),
2744 DECL_ARTIFICIAL (ap_field
) = 1;
2745 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2746 TYPE_FIELDS (va_list_type
) = ap_field
;
2747 /* Compute its layout. */
2748 layout_type (va_list_type
);
2750 return va_list_type
;
2753 /* Return an expression of type "void *" pointing to the next
2754 available argument in a variable-argument list. VALIST is the
2755 user-level va_list object, of type __builtin_va_list. */
2757 arm_extract_valist_ptr (tree valist
)
2759 if (TREE_TYPE (valist
) == error_mark_node
)
2760 return error_mark_node
;
2762 /* On an AAPCS target, the pointer is stored within "struct
2764 if (TARGET_AAPCS_BASED
)
2766 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2767 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2768 valist
, ap_field
, NULL_TREE
);
2774 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2776 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2778 valist
= arm_extract_valist_ptr (valist
);
2779 std_expand_builtin_va_start (valist
, nextarg
);
2782 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2784 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2787 valist
= arm_extract_valist_ptr (valist
);
2788 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2791 /* Check any incompatible options that the user has specified. */
2793 arm_option_check_internal (struct gcc_options
*opts
)
2795 int flags
= opts
->x_target_flags
;
2797 /* iWMMXt and NEON are incompatible. */
2799 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2800 error ("iWMMXt and NEON are incompatible");
2802 /* Make sure that the processor choice does not conflict with any of the
2803 other command line choices. */
2804 if (TARGET_ARM_P (flags
)
2805 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2806 error ("target CPU does not support ARM mode");
2808 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2809 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2810 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2812 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2813 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2815 /* If this target is normally configured to use APCS frames, warn if they
2816 are turned off and debugging is turned on. */
2817 if (TARGET_ARM_P (flags
)
2818 && write_symbols
!= NO_DEBUG
2819 && !TARGET_APCS_FRAME
2820 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2821 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2823 /* iWMMXt unsupported under Thumb mode. */
2824 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2825 error ("iWMMXt unsupported under Thumb mode");
2827 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2828 error ("can not use -mtp=cp15 with 16-bit Thumb");
2830 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2832 error ("RTP PIC is incompatible with Thumb");
2836 /* We only support -mslow-flash-data on armv7-m targets. */
2837 if (target_slow_flash_data
2838 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2839 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2840 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2842 /* We only support pure-code on Thumb-2 M-profile targets. */
2843 if (target_pure_code
2844 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2845 error ("-mpure-code only supports non-pic code on armv7-m targets");
2849 /* Recompute the global settings depending on target attribute options. */
2852 arm_option_params_internal (void)
2854 /* If we are not using the default (ARM mode) section anchor offset
2855 ranges, then set the correct ranges now. */
2858 /* Thumb-1 LDR instructions cannot have negative offsets.
2859 Permissible positive offset ranges are 5-bit (for byte loads),
2860 6-bit (for halfword loads), or 7-bit (for word loads).
2861 Empirical results suggest a 7-bit anchor range gives the best
2862 overall code size. */
2863 targetm
.min_anchor_offset
= 0;
2864 targetm
.max_anchor_offset
= 127;
2866 else if (TARGET_THUMB2
)
2868 /* The minimum is set such that the total size of the block
2869 for a particular anchor is 248 + 1 + 4095 bytes, which is
2870 divisible by eight, ensuring natural spacing of anchors. */
2871 targetm
.min_anchor_offset
= -248;
2872 targetm
.max_anchor_offset
= 4095;
2876 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2877 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2882 /* If optimizing for size, bump the number of instructions that we
2883 are prepared to conditionally execute (even on a StrongARM). */
2884 max_insns_skipped
= 6;
2886 /* For THUMB2, we limit the conditional sequence to one IT block. */
2888 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2891 /* When -mrestrict-it is in use tone down the if-conversion. */
2892 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2893 ? 1 : current_tune
->max_insns_skipped
;
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper
;
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize
;
2904 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2906 if (opts
->x_align_functions
<= 0)
2907 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2908 && opts
->x_optimize_size
? 2 : 4;
2911 /* Implement targetm.override_options_after_change. */
2914 arm_override_options_after_change (void)
2916 arm_configure_build_target (&arm_active_target
,
2917 TREE_TARGET_OPTION (target_option_default_node
),
2918 &global_options_set
, false);
2920 arm_override_options_after_change_1 (&global_options
);
2924 arm_option_restore (struct gcc_options
*, struct cl_target_option
*ptr
)
2926 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2930 /* Reset options between modes that the user has specified. */
2932 arm_option_override_internal (struct gcc_options
*opts
,
2933 struct gcc_options
*opts_set
)
2935 arm_override_options_after_change_1 (opts
);
2937 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2939 /* The default is to enable interworking, so this warning message would
2940 be confusing to users who have just compiled with, eg, -march=armv3. */
2941 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2942 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2945 if (TARGET_THUMB_P (opts
->x_target_flags
)
2946 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2948 warning (0, "target CPU does not support THUMB instructions");
2949 opts
->x_target_flags
&= ~MASK_THUMB
;
2952 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2954 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2955 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2958 /* Callee super interworking implies thumb interworking. Adding
2959 this to the flags here simplifies the logic elsewhere. */
2960 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2961 opts
->x_target_flags
|= MASK_INTERWORK
;
2963 /* need to remember initial values so combinaisons of options like
2964 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2965 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2967 if (! opts_set
->x_arm_restrict_it
)
2968 opts
->x_arm_restrict_it
= arm_arch8
;
2970 /* ARM execution state and M profile don't have [restrict] IT. */
2971 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2972 opts
->x_arm_restrict_it
= 0;
2974 /* Enable -munaligned-access by default for
2975 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2976 i.e. Thumb2 and ARM state only.
2977 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2978 - ARMv8 architecture-base processors.
2980 Disable -munaligned-access by default for
2981 - all pre-ARMv6 architecture-based processors
2982 - ARMv6-M architecture-based processors
2983 - ARMv8-M Baseline processors. */
2985 if (! opts_set
->x_unaligned_access
)
2987 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2988 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2990 else if (opts
->x_unaligned_access
== 1
2991 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2993 warning (0, "target CPU does not support unaligned accesses");
2994 opts
->x_unaligned_access
= 0;
2997 /* Don't warn since it's on by default in -O2. */
2998 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2999 opts
->x_flag_schedule_insns
= 0;
3001 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3003 /* Disable shrink-wrap when optimizing function for size, since it tends to
3004 generate additional returns. */
3005 if (optimize_function_for_size_p (cfun
)
3006 && TARGET_THUMB2_P (opts
->x_target_flags
))
3007 opts
->x_flag_shrink_wrap
= false;
3009 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3011 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3012 - epilogue_insns - does not accurately model the corresponding insns
3013 emitted in the asm file. In particular, see the comment in thumb_exit
3014 'Find out how many of the (return) argument registers we can corrupt'.
3015 As a consequence, the epilogue may clobber registers without fipa-ra
3016 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3017 TODO: Accurately model clobbers for epilogue_insns and reenable
3019 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3020 opts
->x_flag_ipa_ra
= 0;
3022 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3024 /* Thumb2 inline assembly code should always use unified syntax.
3025 This will apply to ARM and Thumb1 eventually. */
3026 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3028 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3029 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3033 /* Convert a static initializer array of feature bits to sbitmap
3036 arm_initialize_isa (sbitmap isa
, const enum isa_feature
*isa_bits
)
3039 while (*isa_bits
!= isa_nobit
)
3040 bitmap_set_bit (isa
, *(isa_bits
++));
3043 static sbitmap isa_all_fpubits
;
3044 static sbitmap isa_quirkbits
;
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3050 arm_configure_build_target (struct arm_build_target
*target
,
3051 struct cl_target_option
*opts
,
3052 struct gcc_options
*opts_set
,
3053 bool warn_compatible
)
3055 const struct processors
*arm_selected_tune
= NULL
;
3056 const struct processors
*arm_selected_arch
= NULL
;
3057 const struct processors
*arm_selected_cpu
= NULL
;
3058 const struct arm_fpu_desc
*arm_selected_fpu
= NULL
;
3060 bitmap_clear (target
->isa
);
3061 target
->core_name
= NULL
;
3062 target
->arch_name
= NULL
;
3064 if (opts_set
->x_arm_arch_option
)
3065 arm_selected_arch
= &all_architectures
[opts
->x_arm_arch_option
];
3067 if (opts_set
->x_arm_cpu_option
)
3069 arm_selected_cpu
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3070 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3073 if (opts_set
->x_arm_tune_option
)
3074 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_tune_option
];
3076 if (arm_selected_arch
)
3078 arm_initialize_isa (target
->isa
, arm_selected_arch
->isa_bits
);
3080 if (arm_selected_cpu
)
3082 auto_sbitmap
cpu_isa (isa_num_bits
);
3084 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->isa_bits
);
3085 bitmap_xor (cpu_isa
, cpu_isa
, target
->isa
);
3086 /* Ignore any bits that are quirk bits. */
3087 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_quirkbits
);
3088 /* Ignore (for now) any bits that might be set by -mfpu. */
3089 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_all_fpubits
);
3091 if (!bitmap_empty_p (cpu_isa
))
3093 if (warn_compatible
)
3094 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3095 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3096 /* -march wins for code generation.
3097 -mcpu wins for default tuning. */
3098 if (!arm_selected_tune
)
3099 arm_selected_tune
= arm_selected_cpu
;
3101 arm_selected_cpu
= arm_selected_arch
;
3102 target
->arch_name
= arm_selected_arch
->name
;
3106 /* Architecture and CPU are essentially the same.
3107 Prefer the CPU setting. */
3108 arm_selected_arch
= NULL
;
3109 target
->core_name
= arm_selected_cpu
->name
;
3114 /* Pick a CPU based on the architecture. */
3115 arm_selected_cpu
= arm_selected_arch
;
3116 target
->arch_name
= arm_selected_arch
->name
;
3117 /* Note: target->core_name is left unset in this path. */
3120 else if (arm_selected_cpu
)
3122 target
->core_name
= arm_selected_cpu
->name
;
3123 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3125 /* If the user did not specify a processor, choose one for them. */
3128 const struct processors
* sel
;
3129 auto_sbitmap
sought_isa (isa_num_bits
);
3130 bitmap_clear (sought_isa
);
3131 auto_sbitmap
default_isa (isa_num_bits
);
3133 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3134 gcc_assert (arm_selected_cpu
->name
);
3136 /* RWE: All of the selection logic below (to the end of this
3137 'if' clause) looks somewhat suspect. It appears to be mostly
3138 there to support forcing thumb support when the default CPU
3139 does not have thumb (somewhat dubious in terms of what the
3140 user might be expecting). I think it should be removed once
3141 support for the pre-thumb era cores is removed. */
3142 sel
= arm_selected_cpu
;
3143 arm_initialize_isa (default_isa
, sel
->isa_bits
);
3145 /* Now check to see if the user has specified any command line
3146 switches that require certain abilities from the cpu. */
3148 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3150 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3151 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3153 /* There are no ARM processors that support both APCS-26 and
3154 interworking. Therefore we forcibly remove MODE26 from
3155 from the isa features here (if it was set), so that the
3156 search below will always be able to find a compatible
3158 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3161 /* If there are such requirements and the default CPU does not
3162 satisfy them, we need to run over the complete list of
3163 cores looking for one that is satisfactory. */
3164 if (!bitmap_empty_p (sought_isa
)
3165 && !bitmap_subset_p (sought_isa
, default_isa
))
3167 auto_sbitmap
candidate_isa (isa_num_bits
);
3168 /* We're only interested in a CPU with at least the
3169 capabilities of the default CPU and the required
3170 additional features. */
3171 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3173 /* Try to locate a CPU type that supports all of the abilities
3174 of the default CPU, plus the extra abilities requested by
3176 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3178 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3179 /* An exact match? */
3180 if (bitmap_equal_p (default_isa
, candidate_isa
))
3184 if (sel
->name
== NULL
)
3186 unsigned current_bit_count
= isa_num_bits
;
3187 const struct processors
* best_fit
= NULL
;
3189 /* Ideally we would like to issue an error message here
3190 saying that it was not possible to find a CPU compatible
3191 with the default CPU, but which also supports the command
3192 line options specified by the programmer, and so they
3193 ought to use the -mcpu=<name> command line option to
3194 override the default CPU type.
3196 If we cannot find a CPU that has exactly the
3197 characteristics of the default CPU and the given
3198 command line options we scan the array again looking
3199 for a best match. The best match must have at least
3200 the capabilities of the perfect match. */
3201 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3203 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3205 if (bitmap_subset_p (default_isa
, candidate_isa
))
3209 bitmap_and_compl (candidate_isa
, candidate_isa
,
3211 count
= bitmap_popcount (candidate_isa
);
3213 if (count
< current_bit_count
)
3216 current_bit_count
= count
;
3220 gcc_assert (best_fit
);
3224 arm_selected_cpu
= sel
;
3227 /* Now we know the CPU, we can finally initialize the target
3229 target
->core_name
= arm_selected_cpu
->name
;
3230 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3233 gcc_assert (arm_selected_cpu
);
3235 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3237 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3238 auto_sbitmap
fpu_bits (isa_num_bits
);
3240 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3241 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3242 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3244 else if (target
->core_name
== NULL
)
3245 /* To support this we need to be able to parse FPU feature options
3246 from the architecture string. */
3247 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3249 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3250 if (!arm_selected_tune
)
3251 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3253 /* Finish initializing the target structure. */
3254 target
->arch_pp_name
= arm_selected_cpu
->arch
;
3255 target
->base_arch
= arm_selected_cpu
->base_arch
;
3256 target
->arch_core
= arm_selected_cpu
->core
;
3258 target
->tune_flags
= arm_selected_tune
->tune_flags
;
3259 target
->tune
= arm_selected_tune
->tune
;
3260 target
->tune_core
= arm_selected_tune
->core
;
3263 /* Fix up any incompatible options that the user has specified. */
3265 arm_option_override (void)
3267 static const enum isa_feature fpu_bitlist
[] = { ISA_ALL_FPU
, isa_nobit
};
3268 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3269 cl_target_option opts
;
3271 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3272 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3274 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3275 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3277 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3279 if (!global_options_set
.x_arm_fpu_index
)
3281 const char *target_fpu_name
;
3285 #ifdef FPUTYPE_DEFAULT
3286 target_fpu_name
= FPUTYPE_DEFAULT
;
3288 target_fpu_name
= "vfp";
3291 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &fpu_index
,
3294 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3297 cl_target_option_save (&opts
, &global_options
);
3298 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3301 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3302 SUBTARGET_OVERRIDE_OPTIONS
;
3305 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3306 arm_base_arch
= arm_active_target
.base_arch
;
3308 arm_tune
= arm_active_target
.tune_core
;
3309 tune_flags
= arm_active_target
.tune_flags
;
3310 current_tune
= arm_active_target
.tune
;
3312 /* TBD: Dwarf info for apcs frame is not handled yet. */
3313 if (TARGET_APCS_FRAME
)
3314 flag_shrink_wrap
= false;
3316 /* BPABI targets use linker tricks to allow interworking on cores
3317 without thumb support. */
3318 if (TARGET_INTERWORK
3320 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3322 warning (0, "target CPU does not support interworking" );
3323 target_flags
&= ~MASK_INTERWORK
;
3326 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3328 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3329 target_flags
|= MASK_APCS_FRAME
;
3332 if (TARGET_POKE_FUNCTION_NAME
)
3333 target_flags
|= MASK_APCS_FRAME
;
3335 if (TARGET_APCS_REENT
&& flag_pic
)
3336 error ("-fpic and -mapcs-reent are incompatible");
3338 if (TARGET_APCS_REENT
)
3339 warning (0, "APCS reentrant code not supported. Ignored");
3341 /* Initialize boolean versions of the architectural flags, for use
3342 in the arm.md file. */
3343 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3344 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3345 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3346 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3347 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3348 arm_arch5te
= arm_arch5e
3349 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3350 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3351 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3352 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3353 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3354 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3355 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3356 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3357 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3358 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3359 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3360 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3361 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3362 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3363 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3364 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3365 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3366 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3367 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3368 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3369 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3372 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3373 error ("selected fp16 options are incompatible");
3374 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3378 /* Set up some tuning parameters. */
3379 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3380 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3381 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3382 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3383 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3384 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3386 /* And finally, set up some quirks. */
3387 arm_arch_no_volatile_ce
3388 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3390 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3392 /* V5 code we generate is completely interworking capable, so we turn off
3393 TARGET_INTERWORK here to avoid many tests later on. */
3395 /* XXX However, we must pass the right pre-processor defines to CPP
3396 or GLD can get confused. This is a hack. */
3397 if (TARGET_INTERWORK
)
3398 arm_cpp_interwork
= 1;
3401 target_flags
&= ~MASK_INTERWORK
;
3403 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3404 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3406 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3407 error ("iwmmxt abi requires an iwmmxt capable cpu");
3409 /* If soft-float is specified then don't use FPU. */
3410 if (TARGET_SOFT_FLOAT
)
3411 arm_fpu_attr
= FPU_NONE
;
3413 arm_fpu_attr
= FPU_VFP
;
3415 if (TARGET_AAPCS_BASED
)
3417 if (TARGET_CALLER_INTERWORKING
)
3418 error ("AAPCS does not support -mcaller-super-interworking");
3420 if (TARGET_CALLEE_INTERWORKING
)
3421 error ("AAPCS does not support -mcallee-super-interworking");
3424 /* __fp16 support currently assumes the core has ldrh. */
3425 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3426 sorry ("__fp16 and no ldrh");
3428 if (TARGET_AAPCS_BASED
)
3430 if (arm_abi
== ARM_ABI_IWMMXT
)
3431 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3432 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3433 && TARGET_HARD_FLOAT
)
3435 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3436 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3437 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3440 arm_pcs_default
= ARM_PCS_AAPCS
;
3444 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3445 sorry ("-mfloat-abi=hard and VFP");
3447 if (arm_abi
== ARM_ABI_APCS
)
3448 arm_pcs_default
= ARM_PCS_APCS
;
3450 arm_pcs_default
= ARM_PCS_ATPCS
;
3453 /* For arm2/3 there is no need to do any scheduling if we are doing
3454 software floating-point. */
3455 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3456 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3458 /* Use the cp15 method if it is available. */
3459 if (target_thread_pointer
== TP_AUTO
)
3461 if (arm_arch6k
&& !TARGET_THUMB1
)
3462 target_thread_pointer
= TP_CP15
;
3464 target_thread_pointer
= TP_SOFT
;
3467 /* Override the default structure alignment for AAPCS ABI. */
3468 if (!global_options_set
.x_arm_structure_size_boundary
)
3470 if (TARGET_AAPCS_BASED
)
3471 arm_structure_size_boundary
= 8;
3475 if (arm_structure_size_boundary
!= 8
3476 && arm_structure_size_boundary
!= 32
3477 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3479 if (ARM_DOUBLEWORD_ALIGN
)
3481 "structure size boundary can only be set to 8, 32 or 64");
3483 warning (0, "structure size boundary can only be set to 8 or 32");
3484 arm_structure_size_boundary
3485 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3489 if (TARGET_VXWORKS_RTP
)
3491 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3492 arm_pic_data_is_text_relative
= 0;
3495 && !arm_pic_data_is_text_relative
3496 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3497 /* When text & data segments don't have a fixed displacement, the
3498 intended use is with a single, read only, pic base register.
3499 Unless the user explicitly requested not to do that, set
3501 target_flags
|= MASK_SINGLE_PIC_BASE
;
3503 /* If stack checking is disabled, we can use r10 as the PIC register,
3504 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3505 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3507 if (TARGET_VXWORKS_RTP
)
3508 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3509 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3512 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3513 arm_pic_register
= 9;
3515 if (arm_pic_register_string
!= NULL
)
3517 int pic_register
= decode_reg_name (arm_pic_register_string
);
3520 warning (0, "-mpic-register= is useless without -fpic");
3522 /* Prevent the user from choosing an obviously stupid PIC register. */
3523 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3524 || pic_register
== HARD_FRAME_POINTER_REGNUM
3525 || pic_register
== STACK_POINTER_REGNUM
3526 || pic_register
>= PC_REGNUM
3527 || (TARGET_VXWORKS_RTP
3528 && (unsigned int) pic_register
!= arm_pic_register
))
3529 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3531 arm_pic_register
= pic_register
;
3534 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3535 if (fix_cm3_ldrd
== 2)
3537 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3543 /* Hot/Cold partitioning is not currently supported, since we can't
3544 handle literal pool placement in that case. */
3545 if (flag_reorder_blocks_and_partition
)
3547 inform (input_location
,
3548 "-freorder-blocks-and-partition not supported on this architecture");
3549 flag_reorder_blocks_and_partition
= 0;
3550 flag_reorder_blocks
= 1;
3554 /* Hoisting PIC address calculations more aggressively provides a small,
3555 but measurable, size reduction for PIC code. Therefore, we decrease
3556 the bar for unrestricted expression hoisting to the cost of PIC address
3557 calculation, which is 2 instructions. */
3558 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3559 global_options
.x_param_values
,
3560 global_options_set
.x_param_values
);
3562 /* ARM EABI defaults to strict volatile bitfields. */
3563 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3564 && abi_version_at_least(2))
3565 flag_strict_volatile_bitfields
= 1;
3567 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3568 have deemed it beneficial (signified by setting
3569 prefetch.num_slots to 1 or more). */
3570 if (flag_prefetch_loop_arrays
< 0
3573 && current_tune
->prefetch
.num_slots
> 0)
3574 flag_prefetch_loop_arrays
= 1;
3576 /* Set up parameters to be used in prefetching algorithm. Do not
3577 override the defaults unless we are tuning for a core we have
3578 researched values for. */
3579 if (current_tune
->prefetch
.num_slots
> 0)
3580 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3581 current_tune
->prefetch
.num_slots
,
3582 global_options
.x_param_values
,
3583 global_options_set
.x_param_values
);
3584 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3586 current_tune
->prefetch
.l1_cache_line_size
,
3587 global_options
.x_param_values
,
3588 global_options_set
.x_param_values
);
3589 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3590 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3591 current_tune
->prefetch
.l1_cache_size
,
3592 global_options
.x_param_values
,
3593 global_options_set
.x_param_values
);
3595 /* Use Neon to perform 64-bits operations rather than core
3597 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3598 if (use_neon_for_64bits
== 1)
3599 prefer_neon_for_64bits
= true;
3601 /* Use the alternative scheduling-pressure algorithm by default. */
3602 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3603 global_options
.x_param_values
,
3604 global_options_set
.x_param_values
);
3606 /* Look through ready list and all of queue for instructions
3607 relevant for L2 auto-prefetcher. */
3608 int param_sched_autopref_queue_depth
;
3610 switch (current_tune
->sched_autopref
)
3612 case tune_params::SCHED_AUTOPREF_OFF
:
3613 param_sched_autopref_queue_depth
= -1;
3616 case tune_params::SCHED_AUTOPREF_RANK
:
3617 param_sched_autopref_queue_depth
= 0;
3620 case tune_params::SCHED_AUTOPREF_FULL
:
3621 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3628 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3629 param_sched_autopref_queue_depth
,
3630 global_options
.x_param_values
,
3631 global_options_set
.x_param_values
);
3633 /* Currently, for slow flash data, we just disable literal pools. We also
3634 disable it for pure-code. */
3635 if (target_slow_flash_data
|| target_pure_code
)
3636 arm_disable_literal_pool
= true;
3638 if (use_cmse
&& !arm_arch_cmse
)
3639 error ("target CPU does not support ARMv8-M Security Extensions");
3641 /* Disable scheduling fusion by default if it's not armv7 processor
3642 or doesn't prefer ldrd/strd. */
3643 if (flag_schedule_fusion
== 2
3644 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3645 flag_schedule_fusion
= 0;
3647 /* Need to remember initial options before they are overriden. */
3648 init_optimize
= build_optimization_node (&global_options
);
3650 arm_option_override_internal (&global_options
, &global_options_set
);
3651 arm_option_check_internal (&global_options
);
3652 arm_option_params_internal ();
3654 /* Create the default target_options structure. */
3655 target_option_default_node
= target_option_current_node
3656 = build_target_option_node (&global_options
);
3658 /* Register global variables with the garbage collector. */
3659 arm_add_gc_roots ();
3661 /* Init initial mode for testing. */
3662 thumb_flipper
= TARGET_THUMB
;
3666 arm_add_gc_roots (void)
3668 gcc_obstack_init(&minipool_obstack
);
3669 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3672 /* A table of known ARM exception types.
3673 For use with the interrupt function attribute. */
3677 const char *const arg
;
3678 const unsigned long return_value
;
3682 static const isr_attribute_arg isr_attribute_args
[] =
3684 { "IRQ", ARM_FT_ISR
},
3685 { "irq", ARM_FT_ISR
},
3686 { "FIQ", ARM_FT_FIQ
},
3687 { "fiq", ARM_FT_FIQ
},
3688 { "ABORT", ARM_FT_ISR
},
3689 { "abort", ARM_FT_ISR
},
3690 { "ABORT", ARM_FT_ISR
},
3691 { "abort", ARM_FT_ISR
},
3692 { "UNDEF", ARM_FT_EXCEPTION
},
3693 { "undef", ARM_FT_EXCEPTION
},
3694 { "SWI", ARM_FT_EXCEPTION
},
3695 { "swi", ARM_FT_EXCEPTION
},
3696 { NULL
, ARM_FT_NORMAL
}
3699 /* Returns the (interrupt) function type of the current
3700 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3702 static unsigned long
3703 arm_isr_value (tree argument
)
3705 const isr_attribute_arg
* ptr
;
3709 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3711 /* No argument - default to IRQ. */
3712 if (argument
== NULL_TREE
)
3715 /* Get the value of the argument. */
3716 if (TREE_VALUE (argument
) == NULL_TREE
3717 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3718 return ARM_FT_UNKNOWN
;
3720 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3722 /* Check it against the list of known arguments. */
3723 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3724 if (streq (arg
, ptr
->arg
))
3725 return ptr
->return_value
;
3727 /* An unrecognized interrupt type. */
3728 return ARM_FT_UNKNOWN
;
3731 /* Computes the type of the current function. */
3733 static unsigned long
3734 arm_compute_func_type (void)
3736 unsigned long type
= ARM_FT_UNKNOWN
;
3740 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3742 /* Decide if the current function is volatile. Such functions
3743 never return, and many memory cycles can be saved by not storing
3744 register values that will never be needed again. This optimization
3745 was added to speed up context switching in a kernel application. */
3747 && (TREE_NOTHROW (current_function_decl
)
3748 || !(flag_unwind_tables
3750 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3751 && TREE_THIS_VOLATILE (current_function_decl
))
3752 type
|= ARM_FT_VOLATILE
;
3754 if (cfun
->static_chain_decl
!= NULL
)
3755 type
|= ARM_FT_NESTED
;
3757 attr
= DECL_ATTRIBUTES (current_function_decl
);
3759 a
= lookup_attribute ("naked", attr
);
3761 type
|= ARM_FT_NAKED
;
3763 a
= lookup_attribute ("isr", attr
);
3765 a
= lookup_attribute ("interrupt", attr
);
3768 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3770 type
|= arm_isr_value (TREE_VALUE (a
));
3772 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3773 type
|= ARM_FT_CMSE_ENTRY
;
3778 /* Returns the type of the current function. */
3781 arm_current_func_type (void)
3783 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3784 cfun
->machine
->func_type
= arm_compute_func_type ();
3786 return cfun
->machine
->func_type
;
3790 arm_allocate_stack_slots_for_args (void)
3792 /* Naked functions should not allocate stack slots for arguments. */
3793 return !IS_NAKED (arm_current_func_type ());
3797 arm_warn_func_return (tree decl
)
3799 /* Naked functions are implemented entirely in assembly, including the
3800 return sequence, so suppress warnings about this. */
3801 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3805 /* Output assembler code for a block containing the constant parts
3806 of a trampoline, leaving space for the variable parts.
3808 On the ARM, (if r8 is the static chain regnum, and remembering that
3809 referencing pc adds an offset of 8) the trampoline looks like:
3812 .word static chain value
3813 .word function's address
3814 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3817 arm_asm_trampoline_template (FILE *f
)
3819 fprintf (f
, "\t.syntax unified\n");
3823 fprintf (f
, "\t.arm\n");
3824 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3825 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3827 else if (TARGET_THUMB2
)
3829 fprintf (f
, "\t.thumb\n");
3830 /* The Thumb-2 trampoline is similar to the arm implementation.
3831 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3832 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3833 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3834 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3838 ASM_OUTPUT_ALIGN (f
, 2);
3839 fprintf (f
, "\t.code\t16\n");
3840 fprintf (f
, ".Ltrampoline_start:\n");
3841 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3842 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3843 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3844 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3845 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3846 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3848 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3849 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3852 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3855 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3857 rtx fnaddr
, mem
, a_tramp
;
3859 emit_block_move (m_tramp
, assemble_trampoline_template (),
3860 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3862 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3863 emit_move_insn (mem
, chain_value
);
3865 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3866 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3867 emit_move_insn (mem
, fnaddr
);
3869 a_tramp
= XEXP (m_tramp
, 0);
3870 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3871 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3872 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3875 /* Thumb trampolines should be entered in thumb mode, so set
3876 the bottom bit of the address. */
3879 arm_trampoline_adjust_address (rtx addr
)
3882 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3883 NULL
, 0, OPTAB_LIB_WIDEN
);
3887 /* Return 1 if it is possible to return using a single instruction.
3888 If SIBLING is non-null, this is a test for a return before a sibling
3889 call. SIBLING is the call insn, so we can examine its register usage. */
3892 use_return_insn (int iscond
, rtx sibling
)
3895 unsigned int func_type
;
3896 unsigned long saved_int_regs
;
3897 unsigned HOST_WIDE_INT stack_adjust
;
3898 arm_stack_offsets
*offsets
;
3900 /* Never use a return instruction before reload has run. */
3901 if (!reload_completed
)
3904 func_type
= arm_current_func_type ();
3906 /* Naked, volatile and stack alignment functions need special
3908 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3911 /* So do interrupt functions that use the frame pointer and Thumb
3912 interrupt functions. */
3913 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3916 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3917 && !optimize_function_for_size_p (cfun
))
3920 offsets
= arm_get_frame_offsets ();
3921 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3923 /* As do variadic functions. */
3924 if (crtl
->args
.pretend_args_size
3925 || cfun
->machine
->uses_anonymous_args
3926 /* Or if the function calls __builtin_eh_return () */
3927 || crtl
->calls_eh_return
3928 /* Or if the function calls alloca */
3929 || cfun
->calls_alloca
3930 /* Or if there is a stack adjustment. However, if the stack pointer
3931 is saved on the stack, we can use a pre-incrementing stack load. */
3932 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3933 && stack_adjust
== 4))
3934 /* Or if the static chain register was saved above the frame, under the
3935 assumption that the stack pointer isn't saved on the stack. */
3936 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3937 && arm_compute_static_chain_stack_bytes() != 0))
3940 saved_int_regs
= offsets
->saved_regs_mask
;
3942 /* Unfortunately, the insn
3944 ldmib sp, {..., sp, ...}
3946 triggers a bug on most SA-110 based devices, such that the stack
3947 pointer won't be correctly restored if the instruction takes a
3948 page fault. We work around this problem by popping r3 along with
3949 the other registers, since that is never slower than executing
3950 another instruction.
3952 We test for !arm_arch5 here, because code for any architecture
3953 less than this could potentially be run on one of the buggy
3955 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3957 /* Validate that r3 is a call-clobbered register (always true in
3958 the default abi) ... */
3959 if (!call_used_regs
[3])
3962 /* ... that it isn't being used for a return value ... */
3963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3966 /* ... or for a tail-call argument ... */
3969 gcc_assert (CALL_P (sibling
));
3971 if (find_regno_fusage (sibling
, USE
, 3))
3975 /* ... and that there are no call-saved registers in r0-r2
3976 (always true in the default ABI). */
3977 if (saved_int_regs
& 0x7)
3981 /* Can't be done if interworking with Thumb, and any registers have been
3983 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3986 /* On StrongARM, conditional returns are expensive if they aren't
3987 taken and multiple registers have been stacked. */
3988 if (iscond
&& arm_tune_strongarm
)
3990 /* Conditional return when just the LR is stored is a simple
3991 conditional-load instruction, that's not expensive. */
3992 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3996 && arm_pic_register
!= INVALID_REGNUM
3997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4001 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4002 several instructions if anything needs to be popped. */
4003 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4006 /* If there are saved registers but the LR isn't saved, then we need
4007 two instructions for the return. */
4008 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4011 /* Can't be done if any of the VFP regs are pushed,
4012 since this also requires an insn. */
4013 if (TARGET_HARD_FLOAT
)
4014 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4015 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4018 if (TARGET_REALLY_IWMMXT
)
4019 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4020 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4026 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4027 shrink-wrapping if possible. This is the case if we need to emit a
4028 prologue, which we can test by looking at the offsets. */
4030 use_simple_return_p (void)
4032 arm_stack_offsets
*offsets
;
4034 offsets
= arm_get_frame_offsets ();
4035 return offsets
->outgoing_args
!= 0;
4038 /* Return TRUE if int I is a valid immediate ARM constant. */
4041 const_ok_for_arm (HOST_WIDE_INT i
)
4045 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4046 be all zero, or all one. */
4047 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4048 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4049 != ((~(unsigned HOST_WIDE_INT
) 0)
4050 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4053 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4055 /* Fast return for 0 and small values. We must do this for zero, since
4056 the code below can't handle that one case. */
4057 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4060 /* Get the number of trailing zeros. */
4061 lowbit
= ffs((int) i
) - 1;
4063 /* Only even shifts are allowed in ARM mode so round down to the
4064 nearest even number. */
4068 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4073 /* Allow rotated constants in ARM mode. */
4075 && ((i
& ~0xc000003f) == 0
4076 || (i
& ~0xf000000f) == 0
4077 || (i
& ~0xfc000003) == 0))
4084 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4087 if (i
== v
|| i
== (v
| (v
<< 8)))
4090 /* Allow repeated pattern 0xXY00XY00. */
4100 /* Return true if I is a valid constant for the operation CODE. */
4102 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4104 if (const_ok_for_arm (i
))
4110 /* See if we can use movw. */
4111 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4114 /* Otherwise, try mvn. */
4115 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4118 /* See if we can use addw or subw. */
4120 && ((i
& 0xfffff000) == 0
4121 || ((-i
) & 0xfffff000) == 0))
4142 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4144 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4154 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4161 /* Return true if I is a valid di mode constant for the operation CODE. */
4163 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4165 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4166 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4167 rtx hi
= GEN_INT (hi_val
);
4168 rtx lo
= GEN_INT (lo_val
);
4178 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4179 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4181 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4188 /* Emit a sequence of insns to handle a large constant.
4189 CODE is the code of the operation required, it can be any of SET, PLUS,
4190 IOR, AND, XOR, MINUS;
4191 MODE is the mode in which the operation is being performed;
4192 VAL is the integer to operate on;
4193 SOURCE is the other operand (a register, or a null-pointer for SET);
4194 SUBTARGETS means it is safe to create scratch registers if that will
4195 either produce a simpler sequence, or we will want to cse the values.
4196 Return value is the number of insns emitted. */
4198 /* ??? Tweak this for thumb2. */
4200 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4201 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4205 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4206 cond
= COND_EXEC_TEST (PATTERN (insn
));
4210 if (subtargets
|| code
== SET
4211 || (REG_P (target
) && REG_P (source
)
4212 && REGNO (target
) != REGNO (source
)))
4214 /* After arm_reorg has been called, we can't fix up expensive
4215 constants by pushing them into memory so we must synthesize
4216 them in-line, regardless of the cost. This is only likely to
4217 be more costly on chips that have load delay slots and we are
4218 compiling without running the scheduler (so no splitting
4219 occurred before the final instruction emission).
4221 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4223 if (!cfun
->machine
->after_arm_reorg
4225 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4227 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4232 /* Currently SET is the only monadic value for CODE, all
4233 the rest are diadic. */
4234 if (TARGET_USE_MOVT
)
4235 arm_emit_movpair (target
, GEN_INT (val
));
4237 emit_set_insn (target
, GEN_INT (val
));
4243 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4245 if (TARGET_USE_MOVT
)
4246 arm_emit_movpair (temp
, GEN_INT (val
));
4248 emit_set_insn (temp
, GEN_INT (val
));
4250 /* For MINUS, the value is subtracted from, since we never
4251 have subtraction of a constant. */
4253 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4255 emit_set_insn (target
,
4256 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4262 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4266 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4267 ARM/THUMB2 immediates, and add up to VAL.
4268 Thr function return value gives the number of insns required. */
4270 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4271 struct four_ints
*return_sequence
)
4273 int best_consecutive_zeros
= 0;
4277 struct four_ints tmp_sequence
;
4279 /* If we aren't targeting ARM, the best place to start is always at
4280 the bottom, otherwise look more closely. */
4283 for (i
= 0; i
< 32; i
+= 2)
4285 int consecutive_zeros
= 0;
4287 if (!(val
& (3 << i
)))
4289 while ((i
< 32) && !(val
& (3 << i
)))
4291 consecutive_zeros
+= 2;
4294 if (consecutive_zeros
> best_consecutive_zeros
)
4296 best_consecutive_zeros
= consecutive_zeros
;
4297 best_start
= i
- consecutive_zeros
;
4304 /* So long as it won't require any more insns to do so, it's
4305 desirable to emit a small constant (in bits 0...9) in the last
4306 insn. This way there is more chance that it can be combined with
4307 a later addressing insn to form a pre-indexed load or store
4308 operation. Consider:
4310 *((volatile int *)0xe0000100) = 1;
4311 *((volatile int *)0xe0000110) = 2;
4313 We want this to wind up as:
4317 str rB, [rA, #0x100]
4319 str rB, [rA, #0x110]
4321 rather than having to synthesize both large constants from scratch.
4323 Therefore, we calculate how many insns would be required to emit
4324 the constant starting from `best_start', and also starting from
4325 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4326 yield a shorter sequence, we may as well use zero. */
4327 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4329 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4331 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4332 if (insns2
<= insns1
)
4334 *return_sequence
= tmp_sequence
;
4342 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4344 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4345 struct four_ints
*return_sequence
, int i
)
4347 int remainder
= val
& 0xffffffff;
4350 /* Try and find a way of doing the job in either two or three
4353 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4354 location. We start at position I. This may be the MSB, or
4355 optimial_immediate_sequence may have positioned it at the largest block
4356 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4357 wrapping around to the top of the word when we drop off the bottom.
4358 In the worst case this code should produce no more than four insns.
4360 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4361 constants, shifted to any arbitrary location. We should always start
4366 unsigned int b1
, b2
, b3
, b4
;
4367 unsigned HOST_WIDE_INT result
;
4370 gcc_assert (insns
< 4);
4375 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4376 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4379 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4380 /* We can use addw/subw for the last 12 bits. */
4384 /* Use an 8-bit shifted/rotated immediate. */
4388 result
= remainder
& ((0x0ff << end
)
4389 | ((i
< end
) ? (0xff >> (32 - end
))
4396 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4397 arbitrary shifts. */
4398 i
-= TARGET_ARM
? 2 : 1;
4402 /* Next, see if we can do a better job with a thumb2 replicated
4405 We do it this way around to catch the cases like 0x01F001E0 where
4406 two 8-bit immediates would work, but a replicated constant would
4409 TODO: 16-bit constants that don't clear all the bits, but still win.
4410 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4413 b1
= (remainder
& 0xff000000) >> 24;
4414 b2
= (remainder
& 0x00ff0000) >> 16;
4415 b3
= (remainder
& 0x0000ff00) >> 8;
4416 b4
= remainder
& 0xff;
4420 /* The 8-bit immediate already found clears b1 (and maybe b2),
4421 but must leave b3 and b4 alone. */
4423 /* First try to find a 32-bit replicated constant that clears
4424 almost everything. We can assume that we can't do it in one,
4425 or else we wouldn't be here. */
4426 unsigned int tmp
= b1
& b2
& b3
& b4
;
4427 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4429 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4430 + (tmp
== b3
) + (tmp
== b4
);
4432 && (matching_bytes
>= 3
4433 || (matching_bytes
== 2
4434 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4436 /* At least 3 of the bytes match, and the fourth has at
4437 least as many bits set, or two of the bytes match
4438 and it will only require one more insn to finish. */
4446 /* Second, try to find a 16-bit replicated constant that can
4447 leave three of the bytes clear. If b2 or b4 is already
4448 zero, then we can. If the 8-bit from above would not
4449 clear b2 anyway, then we still win. */
4450 else if (b1
== b3
&& (!b2
|| !b4
4451 || (remainder
& 0x00ff0000 & ~result
)))
4453 result
= remainder
& 0xff00ff00;
4459 /* The 8-bit immediate already found clears b2 (and maybe b3)
4460 and we don't get here unless b1 is alredy clear, but it will
4461 leave b4 unchanged. */
4463 /* If we can clear b2 and b4 at once, then we win, since the
4464 8-bits couldn't possibly reach that far. */
4467 result
= remainder
& 0x00ff00ff;
4473 return_sequence
->i
[insns
++] = result
;
4474 remainder
&= ~result
;
4476 if (code
== SET
|| code
== MINUS
)
4484 /* Emit an instruction with the indicated PATTERN. If COND is
4485 non-NULL, conditionalize the execution of the instruction on COND
4489 emit_constant_insn (rtx cond
, rtx pattern
)
4492 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4493 emit_insn (pattern
);
4496 /* As above, but extra parameter GENERATE which, if clear, suppresses
4500 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4501 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4502 int subtargets
, int generate
)
4506 int final_invert
= 0;
4508 int set_sign_bit_copies
= 0;
4509 int clear_sign_bit_copies
= 0;
4510 int clear_zero_bit_copies
= 0;
4511 int set_zero_bit_copies
= 0;
4512 int insns
= 0, neg_insns
, inv_insns
;
4513 unsigned HOST_WIDE_INT temp1
, temp2
;
4514 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4515 struct four_ints
*immediates
;
4516 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4518 /* Find out which operations are safe for a given CODE. Also do a quick
4519 check for degenerate cases; these can occur when DImode operations
4532 if (remainder
== 0xffffffff)
4535 emit_constant_insn (cond
,
4536 gen_rtx_SET (target
,
4537 GEN_INT (ARM_SIGN_EXTEND (val
))));
4543 if (reload_completed
&& rtx_equal_p (target
, source
))
4547 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4556 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4559 if (remainder
== 0xffffffff)
4561 if (reload_completed
&& rtx_equal_p (target
, source
))
4564 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4573 if (reload_completed
&& rtx_equal_p (target
, source
))
4576 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4580 if (remainder
== 0xffffffff)
4583 emit_constant_insn (cond
,
4584 gen_rtx_SET (target
,
4585 gen_rtx_NOT (mode
, source
)));
4592 /* We treat MINUS as (val - source), since (source - val) is always
4593 passed as (source + (-val)). */
4597 emit_constant_insn (cond
,
4598 gen_rtx_SET (target
,
4599 gen_rtx_NEG (mode
, source
)));
4602 if (const_ok_for_arm (val
))
4605 emit_constant_insn (cond
,
4606 gen_rtx_SET (target
,
4607 gen_rtx_MINUS (mode
, GEN_INT (val
),
4618 /* If we can do it in one insn get out quickly. */
4619 if (const_ok_for_op (val
, code
))
4622 emit_constant_insn (cond
,
4623 gen_rtx_SET (target
,
4625 ? gen_rtx_fmt_ee (code
, mode
, source
,
4631 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4633 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4634 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4638 if (mode
== SImode
&& i
== 16)
4639 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4641 emit_constant_insn (cond
,
4642 gen_zero_extendhisi2
4643 (target
, gen_lowpart (HImode
, source
)));
4645 /* Extz only supports SImode, but we can coerce the operands
4647 emit_constant_insn (cond
,
4648 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4649 gen_lowpart (SImode
, source
),
4650 GEN_INT (i
), const0_rtx
));
4656 /* Calculate a few attributes that may be useful for specific
4658 /* Count number of leading zeros. */
4659 for (i
= 31; i
>= 0; i
--)
4661 if ((remainder
& (1 << i
)) == 0)
4662 clear_sign_bit_copies
++;
4667 /* Count number of leading 1's. */
4668 for (i
= 31; i
>= 0; i
--)
4670 if ((remainder
& (1 << i
)) != 0)
4671 set_sign_bit_copies
++;
4676 /* Count number of trailing zero's. */
4677 for (i
= 0; i
<= 31; i
++)
4679 if ((remainder
& (1 << i
)) == 0)
4680 clear_zero_bit_copies
++;
4685 /* Count number of trailing 1's. */
4686 for (i
= 0; i
<= 31; i
++)
4688 if ((remainder
& (1 << i
)) != 0)
4689 set_zero_bit_copies
++;
4697 /* See if we can do this by sign_extending a constant that is known
4698 to be negative. This is a good, way of doing it, since the shift
4699 may well merge into a subsequent insn. */
4700 if (set_sign_bit_copies
> 1)
4702 if (const_ok_for_arm
4703 (temp1
= ARM_SIGN_EXTEND (remainder
4704 << (set_sign_bit_copies
- 1))))
4708 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4709 emit_constant_insn (cond
,
4710 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4711 emit_constant_insn (cond
,
4712 gen_ashrsi3 (target
, new_src
,
4713 GEN_INT (set_sign_bit_copies
- 1)));
4717 /* For an inverted constant, we will need to set the low bits,
4718 these will be shifted out of harm's way. */
4719 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4720 if (const_ok_for_arm (~temp1
))
4724 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4725 emit_constant_insn (cond
,
4726 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4727 emit_constant_insn (cond
,
4728 gen_ashrsi3 (target
, new_src
,
4729 GEN_INT (set_sign_bit_copies
- 1)));
4735 /* See if we can calculate the value as the difference between two
4736 valid immediates. */
4737 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4739 int topshift
= clear_sign_bit_copies
& ~1;
4741 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4742 & (0xff000000 >> topshift
));
4744 /* If temp1 is zero, then that means the 9 most significant
4745 bits of remainder were 1 and we've caused it to overflow.
4746 When topshift is 0 we don't need to do anything since we
4747 can borrow from 'bit 32'. */
4748 if (temp1
== 0 && topshift
!= 0)
4749 temp1
= 0x80000000 >> (topshift
- 1);
4751 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4753 if (const_ok_for_arm (temp2
))
4757 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4758 emit_constant_insn (cond
,
4759 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4760 emit_constant_insn (cond
,
4761 gen_addsi3 (target
, new_src
,
4769 /* See if we can generate this by setting the bottom (or the top)
4770 16 bits, and then shifting these into the other half of the
4771 word. We only look for the simplest cases, to do more would cost
4772 too much. Be careful, however, not to generate this when the
4773 alternative would take fewer insns. */
4774 if (val
& 0xffff0000)
4776 temp1
= remainder
& 0xffff0000;
4777 temp2
= remainder
& 0x0000ffff;
4779 /* Overlaps outside this range are best done using other methods. */
4780 for (i
= 9; i
< 24; i
++)
4782 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4783 && !const_ok_for_arm (temp2
))
4785 rtx new_src
= (subtargets
4786 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4788 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4789 source
, subtargets
, generate
);
4797 gen_rtx_ASHIFT (mode
, source
,
4804 /* Don't duplicate cases already considered. */
4805 for (i
= 17; i
< 24; i
++)
4807 if (((temp1
| (temp1
>> i
)) == remainder
)
4808 && !const_ok_for_arm (temp1
))
4810 rtx new_src
= (subtargets
4811 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4813 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4814 source
, subtargets
, generate
);
4819 gen_rtx_SET (target
,
4822 gen_rtx_LSHIFTRT (mode
, source
,
4833 /* If we have IOR or XOR, and the constant can be loaded in a
4834 single instruction, and we can find a temporary to put it in,
4835 then this can be done in two instructions instead of 3-4. */
4837 /* TARGET can't be NULL if SUBTARGETS is 0 */
4838 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4840 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4844 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4846 emit_constant_insn (cond
,
4847 gen_rtx_SET (sub
, GEN_INT (val
)));
4848 emit_constant_insn (cond
,
4849 gen_rtx_SET (target
,
4850 gen_rtx_fmt_ee (code
, mode
,
4861 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4862 and the remainder 0s for e.g. 0xfff00000)
4863 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4865 This can be done in 2 instructions by using shifts with mov or mvn.
4870 mvn r0, r0, lsr #12 */
4871 if (set_sign_bit_copies
> 8
4872 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4876 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4877 rtx shift
= GEN_INT (set_sign_bit_copies
);
4883 gen_rtx_ASHIFT (mode
,
4888 gen_rtx_SET (target
,
4890 gen_rtx_LSHIFTRT (mode
, sub
,
4897 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4899 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4901 For eg. r0 = r0 | 0xfff
4906 if (set_zero_bit_copies
> 8
4907 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4911 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4912 rtx shift
= GEN_INT (set_zero_bit_copies
);
4918 gen_rtx_LSHIFTRT (mode
,
4923 gen_rtx_SET (target
,
4925 gen_rtx_ASHIFT (mode
, sub
,
4931 /* This will never be reached for Thumb2 because orn is a valid
4932 instruction. This is for Thumb1 and the ARM 32 bit cases.
4934 x = y | constant (such that ~constant is a valid constant)
4936 x = ~(~y & ~constant).
4938 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4942 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4943 emit_constant_insn (cond
,
4945 gen_rtx_NOT (mode
, source
)));
4948 sub
= gen_reg_rtx (mode
);
4949 emit_constant_insn (cond
,
4951 gen_rtx_AND (mode
, source
,
4953 emit_constant_insn (cond
,
4954 gen_rtx_SET (target
,
4955 gen_rtx_NOT (mode
, sub
)));
4962 /* See if two shifts will do 2 or more insn's worth of work. */
4963 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4965 HOST_WIDE_INT shift_mask
= ((0xffffffff
4966 << (32 - clear_sign_bit_copies
))
4969 if ((remainder
| shift_mask
) != 0xffffffff)
4971 HOST_WIDE_INT new_val
4972 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4976 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4977 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4978 new_src
, source
, subtargets
, 1);
4983 rtx targ
= subtargets
? NULL_RTX
: target
;
4984 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4985 targ
, source
, subtargets
, 0);
4991 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4992 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4994 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4995 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5001 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5003 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5005 if ((remainder
| shift_mask
) != 0xffffffff)
5007 HOST_WIDE_INT new_val
5008 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5011 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5013 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5014 new_src
, source
, subtargets
, 1);
5019 rtx targ
= subtargets
? NULL_RTX
: target
;
5021 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5022 targ
, source
, subtargets
, 0);
5028 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5029 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5031 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5032 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5044 /* Calculate what the instruction sequences would be if we generated it
5045 normally, negated, or inverted. */
5047 /* AND cannot be split into multiple insns, so invert and use BIC. */
5050 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5053 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5058 if (can_invert
|| final_invert
)
5059 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5064 immediates
= &pos_immediates
;
5066 /* Is the negated immediate sequence more efficient? */
5067 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5070 immediates
= &neg_immediates
;
5075 /* Is the inverted immediate sequence more efficient?
5076 We must allow for an extra NOT instruction for XOR operations, although
5077 there is some chance that the final 'mvn' will get optimized later. */
5078 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5081 immediates
= &inv_immediates
;
5089 /* Now output the chosen sequence as instructions. */
5092 for (i
= 0; i
< insns
; i
++)
5094 rtx new_src
, temp1_rtx
;
5096 temp1
= immediates
->i
[i
];
5098 if (code
== SET
|| code
== MINUS
)
5099 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5100 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5101 new_src
= gen_reg_rtx (mode
);
5107 else if (can_negate
)
5110 temp1
= trunc_int_for_mode (temp1
, mode
);
5111 temp1_rtx
= GEN_INT (temp1
);
5115 else if (code
== MINUS
)
5116 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5118 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5120 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5125 can_negate
= can_invert
;
5129 else if (code
== MINUS
)
5137 emit_constant_insn (cond
, gen_rtx_SET (target
,
5138 gen_rtx_NOT (mode
, source
)));
5145 /* Canonicalize a comparison so that we are more likely to recognize it.
5146 This can be done for a few constant compares, where we can make the
5147 immediate value easier to load. */
5150 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5151 bool op0_preserve_value
)
5154 unsigned HOST_WIDE_INT i
, maxval
;
5156 mode
= GET_MODE (*op0
);
5157 if (mode
== VOIDmode
)
5158 mode
= GET_MODE (*op1
);
5160 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5162 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5163 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5164 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5165 for GTU/LEU in Thumb mode. */
5169 if (*code
== GT
|| *code
== LE
5170 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5172 /* Missing comparison. First try to use an available
5174 if (CONST_INT_P (*op1
))
5182 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5184 *op1
= GEN_INT (i
+ 1);
5185 *code
= *code
== GT
? GE
: LT
;
5191 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5192 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5194 *op1
= GEN_INT (i
+ 1);
5195 *code
= *code
== GTU
? GEU
: LTU
;
5204 /* If that did not work, reverse the condition. */
5205 if (!op0_preserve_value
)
5207 std::swap (*op0
, *op1
);
5208 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5214 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5215 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5216 to facilitate possible combining with a cmp into 'ands'. */
5218 && GET_CODE (*op0
) == ZERO_EXTEND
5219 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5220 && GET_MODE (XEXP (*op0
, 0)) == QImode
5221 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5222 && subreg_lowpart_p (XEXP (*op0
, 0))
5223 && *op1
== const0_rtx
)
5224 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5227 /* Comparisons smaller than DImode. Only adjust comparisons against
5228 an out-of-range constant. */
5229 if (!CONST_INT_P (*op1
)
5230 || const_ok_for_arm (INTVAL (*op1
))
5231 || const_ok_for_arm (- INTVAL (*op1
)))
5245 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5247 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5248 *code
= *code
== GT
? GE
: LT
;
5256 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5258 *op1
= GEN_INT (i
- 1);
5259 *code
= *code
== GE
? GT
: LE
;
5266 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5267 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5269 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5270 *code
= *code
== GTU
? GEU
: LTU
;
5278 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5280 *op1
= GEN_INT (i
- 1);
5281 *code
= *code
== GEU
? GTU
: LEU
;
5292 /* Define how to find the value returned by a function. */
5295 arm_function_value(const_tree type
, const_tree func
,
5296 bool outgoing ATTRIBUTE_UNUSED
)
5299 int unsignedp ATTRIBUTE_UNUSED
;
5300 rtx r ATTRIBUTE_UNUSED
;
5302 mode
= TYPE_MODE (type
);
5304 if (TARGET_AAPCS_BASED
)
5305 return aapcs_allocate_return_reg (mode
, type
, func
);
5307 /* Promote integer types. */
5308 if (INTEGRAL_TYPE_P (type
))
5309 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5311 /* Promotes small structs returned in a register to full-word size
5312 for big-endian AAPCS. */
5313 if (arm_return_in_msb (type
))
5315 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5316 if (size
% UNITS_PER_WORD
!= 0)
5318 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5319 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5323 return arm_libcall_value_1 (mode
);
5326 /* libcall hashtable helpers. */
5328 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5330 static inline hashval_t
hash (const rtx_def
*);
5331 static inline bool equal (const rtx_def
*, const rtx_def
*);
5332 static inline void remove (rtx_def
*);
5336 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5338 return rtx_equal_p (p1
, p2
);
5342 libcall_hasher::hash (const rtx_def
*p1
)
5344 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5347 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5350 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5352 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5356 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5358 static bool init_done
= false;
5359 static libcall_table_type
*libcall_htab
= NULL
;
5365 libcall_htab
= new libcall_table_type (31);
5366 add_libcall (libcall_htab
,
5367 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5368 add_libcall (libcall_htab
,
5369 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5370 add_libcall (libcall_htab
,
5371 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5372 add_libcall (libcall_htab
,
5373 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5375 add_libcall (libcall_htab
,
5376 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5377 add_libcall (libcall_htab
,
5378 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5379 add_libcall (libcall_htab
,
5380 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5381 add_libcall (libcall_htab
,
5382 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5384 add_libcall (libcall_htab
,
5385 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5386 add_libcall (libcall_htab
,
5387 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5388 add_libcall (libcall_htab
,
5389 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5390 add_libcall (libcall_htab
,
5391 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5392 add_libcall (libcall_htab
,
5393 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5394 add_libcall (libcall_htab
,
5395 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5396 add_libcall (libcall_htab
,
5397 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5398 add_libcall (libcall_htab
,
5399 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5401 /* Values from double-precision helper functions are returned in core
5402 registers if the selected core only supports single-precision
5403 arithmetic, even if we are using the hard-float ABI. The same is
5404 true for single-precision helpers, but we will never be using the
5405 hard-float ABI on a CPU which doesn't support single-precision
5406 operations in hardware. */
5407 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5408 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5409 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5410 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5411 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5412 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5413 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5414 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5415 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5416 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5417 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5418 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5420 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5422 add_libcall (libcall_htab
,
5423 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5426 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5430 arm_libcall_value_1 (machine_mode mode
)
5432 if (TARGET_AAPCS_BASED
)
5433 return aapcs_libcall_value (mode
);
5434 else if (TARGET_IWMMXT_ABI
5435 && arm_vector_mode_supported_p (mode
))
5436 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5438 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5441 /* Define how to find the value returned by a library function
5442 assuming the value has mode MODE. */
5445 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5447 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5448 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5450 /* The following libcalls return their result in integer registers,
5451 even though they return a floating point value. */
5452 if (arm_libcall_uses_aapcs_base (libcall
))
5453 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5457 return arm_libcall_value_1 (mode
);
5460 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5463 arm_function_value_regno_p (const unsigned int regno
)
5465 if (regno
== ARG_REGISTER (1)
5467 && TARGET_AAPCS_BASED
5468 && TARGET_HARD_FLOAT
5469 && regno
== FIRST_VFP_REGNUM
)
5470 || (TARGET_IWMMXT_ABI
5471 && regno
== FIRST_IWMMXT_REGNUM
))
5477 /* Determine the amount of memory needed to store the possible return
5478 registers of an untyped call. */
5480 arm_apply_result_size (void)
5486 if (TARGET_HARD_FLOAT_ABI
)
5488 if (TARGET_IWMMXT_ABI
)
5495 /* Decide whether TYPE should be returned in memory (true)
5496 or in a register (false). FNTYPE is the type of the function making
5499 arm_return_in_memory (const_tree type
, const_tree fntype
)
5503 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5505 if (TARGET_AAPCS_BASED
)
5507 /* Simple, non-aggregate types (ie not including vectors and
5508 complex) are always returned in a register (or registers).
5509 We don't care about which register here, so we can short-cut
5510 some of the detail. */
5511 if (!AGGREGATE_TYPE_P (type
)
5512 && TREE_CODE (type
) != VECTOR_TYPE
5513 && TREE_CODE (type
) != COMPLEX_TYPE
)
5516 /* Any return value that is no larger than one word can be
5518 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5521 /* Check any available co-processors to see if they accept the
5522 type as a register candidate (VFP, for example, can return
5523 some aggregates in consecutive registers). These aren't
5524 available if the call is variadic. */
5525 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5528 /* Vector values should be returned using ARM registers, not
5529 memory (unless they're over 16 bytes, which will break since
5530 we only have four call-clobbered registers to play with). */
5531 if (TREE_CODE (type
) == VECTOR_TYPE
)
5532 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5534 /* The rest go in memory. */
5538 if (TREE_CODE (type
) == VECTOR_TYPE
)
5539 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5541 if (!AGGREGATE_TYPE_P (type
) &&
5542 (TREE_CODE (type
) != VECTOR_TYPE
))
5543 /* All simple types are returned in registers. */
5546 if (arm_abi
!= ARM_ABI_APCS
)
5548 /* ATPCS and later return aggregate types in memory only if they are
5549 larger than a word (or are variable size). */
5550 return (size
< 0 || size
> UNITS_PER_WORD
);
5553 /* For the arm-wince targets we choose to be compatible with Microsoft's
5554 ARM and Thumb compilers, which always return aggregates in memory. */
5556 /* All structures/unions bigger than one word are returned in memory.
5557 Also catch the case where int_size_in_bytes returns -1. In this case
5558 the aggregate is either huge or of variable size, and in either case
5559 we will want to return it via memory and not in a register. */
5560 if (size
< 0 || size
> UNITS_PER_WORD
)
5563 if (TREE_CODE (type
) == RECORD_TYPE
)
5567 /* For a struct the APCS says that we only return in a register
5568 if the type is 'integer like' and every addressable element
5569 has an offset of zero. For practical purposes this means
5570 that the structure can have at most one non bit-field element
5571 and that this element must be the first one in the structure. */
5573 /* Find the first field, ignoring non FIELD_DECL things which will
5574 have been created by C++. */
5575 for (field
= TYPE_FIELDS (type
);
5576 field
&& TREE_CODE (field
) != FIELD_DECL
;
5577 field
= DECL_CHAIN (field
))
5581 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5583 /* Check that the first field is valid for returning in a register. */
5585 /* ... Floats are not allowed */
5586 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5589 /* ... Aggregates that are not themselves valid for returning in
5590 a register are not allowed. */
5591 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5594 /* Now check the remaining fields, if any. Only bitfields are allowed,
5595 since they are not addressable. */
5596 for (field
= DECL_CHAIN (field
);
5598 field
= DECL_CHAIN (field
))
5600 if (TREE_CODE (field
) != FIELD_DECL
)
5603 if (!DECL_BIT_FIELD_TYPE (field
))
5610 if (TREE_CODE (type
) == UNION_TYPE
)
5614 /* Unions can be returned in registers if every element is
5615 integral, or can be returned in an integer register. */
5616 for (field
= TYPE_FIELDS (type
);
5618 field
= DECL_CHAIN (field
))
5620 if (TREE_CODE (field
) != FIELD_DECL
)
5623 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5626 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5632 #endif /* not ARM_WINCE */
5634 /* Return all other types in memory. */
5638 const struct pcs_attribute_arg
5642 } pcs_attribute_args
[] =
5644 {"aapcs", ARM_PCS_AAPCS
},
5645 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5647 /* We could recognize these, but changes would be needed elsewhere
5648 * to implement them. */
5649 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5650 {"atpcs", ARM_PCS_ATPCS
},
5651 {"apcs", ARM_PCS_APCS
},
5653 {NULL
, ARM_PCS_UNKNOWN
}
5657 arm_pcs_from_attribute (tree attr
)
5659 const struct pcs_attribute_arg
*ptr
;
5662 /* Get the value of the argument. */
5663 if (TREE_VALUE (attr
) == NULL_TREE
5664 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5665 return ARM_PCS_UNKNOWN
;
5667 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5669 /* Check it against the list of known arguments. */
5670 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5671 if (streq (arg
, ptr
->arg
))
5674 /* An unrecognized interrupt type. */
5675 return ARM_PCS_UNKNOWN
;
5678 /* Get the PCS variant to use for this call. TYPE is the function's type
5679 specification, DECL is the specific declartion. DECL may be null if
5680 the call could be indirect or if this is a library call. */
5682 arm_get_pcs_model (const_tree type
, const_tree decl
)
5684 bool user_convention
= false;
5685 enum arm_pcs user_pcs
= arm_pcs_default
;
5690 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5693 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5694 user_convention
= true;
5697 if (TARGET_AAPCS_BASED
)
5699 /* Detect varargs functions. These always use the base rules
5700 (no argument is ever a candidate for a co-processor
5702 bool base_rules
= stdarg_p (type
);
5704 if (user_convention
)
5706 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5707 sorry ("non-AAPCS derived PCS variant");
5708 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5709 error ("variadic functions must use the base AAPCS variant");
5713 return ARM_PCS_AAPCS
;
5714 else if (user_convention
)
5716 else if (decl
&& flag_unit_at_a_time
)
5718 /* Local functions never leak outside this compilation unit,
5719 so we are free to use whatever conventions are
5721 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5722 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5724 return ARM_PCS_AAPCS_LOCAL
;
5727 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5728 sorry ("PCS variant");
5730 /* For everything else we use the target's default. */
5731 return arm_pcs_default
;
5736 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5737 const_tree fntype ATTRIBUTE_UNUSED
,
5738 rtx libcall ATTRIBUTE_UNUSED
,
5739 const_tree fndecl ATTRIBUTE_UNUSED
)
5741 /* Record the unallocated VFP registers. */
5742 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5743 pcum
->aapcs_vfp_reg_alloc
= 0;
5746 /* Walk down the type tree of TYPE counting consecutive base elements.
5747 If *MODEP is VOIDmode, then set it to the first valid floating point
5748 type. If a non-floating point type is found, or if a floating point
5749 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5750 otherwise return the count in the sub-tree. */
5752 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5757 switch (TREE_CODE (type
))
5760 mode
= TYPE_MODE (type
);
5761 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5764 if (*modep
== VOIDmode
)
5773 mode
= TYPE_MODE (TREE_TYPE (type
));
5774 if (mode
!= DFmode
&& mode
!= SFmode
)
5777 if (*modep
== VOIDmode
)
5786 /* Use V2SImode and V4SImode as representatives of all 64-bit
5787 and 128-bit vector types, whether or not those modes are
5788 supported with the present options. */
5789 size
= int_size_in_bytes (type
);
5802 if (*modep
== VOIDmode
)
5805 /* Vector modes are considered to be opaque: two vectors are
5806 equivalent for the purposes of being homogeneous aggregates
5807 if they are the same size. */
5816 tree index
= TYPE_DOMAIN (type
);
5818 /* Can't handle incomplete types nor sizes that are not
5820 if (!COMPLETE_TYPE_P (type
)
5821 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5824 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5827 || !TYPE_MAX_VALUE (index
)
5828 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5829 || !TYPE_MIN_VALUE (index
)
5830 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5834 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5835 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5837 /* There must be no padding. */
5838 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5850 /* Can't handle incomplete types nor sizes that are not
5852 if (!COMPLETE_TYPE_P (type
)
5853 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5856 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5858 if (TREE_CODE (field
) != FIELD_DECL
)
5861 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5867 /* There must be no padding. */
5868 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5875 case QUAL_UNION_TYPE
:
5877 /* These aren't very interesting except in a degenerate case. */
5882 /* Can't handle incomplete types nor sizes that are not
5884 if (!COMPLETE_TYPE_P (type
)
5885 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5888 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5890 if (TREE_CODE (field
) != FIELD_DECL
)
5893 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5896 count
= count
> sub_count
? count
: sub_count
;
5899 /* There must be no padding. */
5900 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5913 /* Return true if PCS_VARIANT should use VFP registers. */
5915 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5917 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5919 static bool seen_thumb1_vfp
= false;
5921 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5923 sorry ("Thumb-1 hard-float VFP ABI");
5924 /* sorry() is not immediately fatal, so only display this once. */
5925 seen_thumb1_vfp
= true;
5931 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5934 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5935 (TARGET_VFP_DOUBLE
|| !is_double
));
5938 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5939 suitable for passing or returning in VFP registers for the PCS
5940 variant selected. If it is, then *BASE_MODE is updated to contain
5941 a machine mode describing each element of the argument's type and
5942 *COUNT to hold the number of such elements. */
5944 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5945 machine_mode mode
, const_tree type
,
5946 machine_mode
*base_mode
, int *count
)
5948 machine_mode new_mode
= VOIDmode
;
5950 /* If we have the type information, prefer that to working things
5951 out from the mode. */
5954 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5956 if (ag_count
> 0 && ag_count
<= 4)
5961 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5962 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5963 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5968 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5971 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5977 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5980 *base_mode
= new_mode
;
5985 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5986 machine_mode mode
, const_tree type
)
5988 int count ATTRIBUTE_UNUSED
;
5989 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5991 if (!use_vfp_abi (pcs_variant
, false))
5993 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5998 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6001 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6004 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6005 &pcum
->aapcs_vfp_rmode
,
6006 &pcum
->aapcs_vfp_rcount
);
6009 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6010 for the behaviour of this function. */
6013 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6014 const_tree type ATTRIBUTE_UNUSED
)
6017 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6018 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6019 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6022 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6023 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6025 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6027 || (mode
== TImode
&& ! TARGET_NEON
)
6028 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6031 int rcount
= pcum
->aapcs_vfp_rcount
;
6033 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6037 /* Avoid using unsupported vector modes. */
6038 if (rmode
== V2SImode
)
6040 else if (rmode
== V4SImode
)
6047 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6048 for (i
= 0; i
< rcount
; i
++)
6050 rtx tmp
= gen_rtx_REG (rmode
,
6051 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6052 tmp
= gen_rtx_EXPR_LIST
6054 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6055 XVECEXP (par
, 0, i
) = tmp
;
6058 pcum
->aapcs_reg
= par
;
6061 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6067 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6068 comment there for the behaviour of this function. */
6071 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6073 const_tree type ATTRIBUTE_UNUSED
)
6075 if (!use_vfp_abi (pcs_variant
, false))
6079 || (GET_MODE_CLASS (mode
) == MODE_INT
6080 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6084 machine_mode ag_mode
;
6089 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6094 if (ag_mode
== V2SImode
)
6096 else if (ag_mode
== V4SImode
)
6102 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6103 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6104 for (i
= 0; i
< count
; i
++)
6106 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6107 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6108 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6109 XVECEXP (par
, 0, i
) = tmp
;
6115 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6119 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6120 machine_mode mode ATTRIBUTE_UNUSED
,
6121 const_tree type ATTRIBUTE_UNUSED
)
6123 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6124 pcum
->aapcs_vfp_reg_alloc
= 0;
6128 #define AAPCS_CP(X) \
6130 aapcs_ ## X ## _cum_init, \
6131 aapcs_ ## X ## _is_call_candidate, \
6132 aapcs_ ## X ## _allocate, \
6133 aapcs_ ## X ## _is_return_candidate, \
6134 aapcs_ ## X ## _allocate_return_reg, \
6135 aapcs_ ## X ## _advance \
6138 /* Table of co-processors that can be used to pass arguments in
6139 registers. Idealy no arugment should be a candidate for more than
6140 one co-processor table entry, but the table is processed in order
6141 and stops after the first match. If that entry then fails to put
6142 the argument into a co-processor register, the argument will go on
6146 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6147 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6149 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6150 BLKmode) is a candidate for this co-processor's registers; this
6151 function should ignore any position-dependent state in
6152 CUMULATIVE_ARGS and only use call-type dependent information. */
6153 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6155 /* Return true if the argument does get a co-processor register; it
6156 should set aapcs_reg to an RTX of the register allocated as is
6157 required for a return from FUNCTION_ARG. */
6158 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6160 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6161 be returned in this co-processor's registers. */
6162 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6164 /* Allocate and return an RTX element to hold the return type of a call. This
6165 routine must not fail and will only be called if is_return_candidate
6166 returned true with the same parameters. */
6167 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6169 /* Finish processing this argument and prepare to start processing
6171 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6172 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6180 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6185 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6186 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6193 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6195 /* We aren't passed a decl, so we can't check that a call is local.
6196 However, it isn't clear that that would be a win anyway, since it
6197 might limit some tail-calling opportunities. */
6198 enum arm_pcs pcs_variant
;
6202 const_tree fndecl
= NULL_TREE
;
6204 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6207 fntype
= TREE_TYPE (fntype
);
6210 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6213 pcs_variant
= arm_pcs_default
;
6215 if (pcs_variant
!= ARM_PCS_AAPCS
)
6219 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6220 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6229 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6232 /* We aren't passed a decl, so we can't check that a call is local.
6233 However, it isn't clear that that would be a win anyway, since it
6234 might limit some tail-calling opportunities. */
6235 enum arm_pcs pcs_variant
;
6236 int unsignedp ATTRIBUTE_UNUSED
;
6240 const_tree fndecl
= NULL_TREE
;
6242 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6245 fntype
= TREE_TYPE (fntype
);
6248 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6251 pcs_variant
= arm_pcs_default
;
6253 /* Promote integer types. */
6254 if (type
&& INTEGRAL_TYPE_P (type
))
6255 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6257 if (pcs_variant
!= ARM_PCS_AAPCS
)
6261 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6262 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6264 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6268 /* Promotes small structs returned in a register to full-word size
6269 for big-endian AAPCS. */
6270 if (type
&& arm_return_in_msb (type
))
6272 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6273 if (size
% UNITS_PER_WORD
!= 0)
6275 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6276 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6280 return gen_rtx_REG (mode
, R0_REGNUM
);
6284 aapcs_libcall_value (machine_mode mode
)
6286 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6287 && GET_MODE_SIZE (mode
) <= 4)
6290 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6293 /* Lay out a function argument using the AAPCS rules. The rule
6294 numbers referred to here are those in the AAPCS. */
6296 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6297 const_tree type
, bool named
)
6302 /* We only need to do this once per argument. */
6303 if (pcum
->aapcs_arg_processed
)
6306 pcum
->aapcs_arg_processed
= true;
6308 /* Special case: if named is false then we are handling an incoming
6309 anonymous argument which is on the stack. */
6313 /* Is this a potential co-processor register candidate? */
6314 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6316 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6317 pcum
->aapcs_cprc_slot
= slot
;
6319 /* We don't have to apply any of the rules from part B of the
6320 preparation phase, these are handled elsewhere in the
6325 /* A Co-processor register candidate goes either in its own
6326 class of registers or on the stack. */
6327 if (!pcum
->aapcs_cprc_failed
[slot
])
6329 /* C1.cp - Try to allocate the argument to co-processor
6331 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6334 /* C2.cp - Put the argument on the stack and note that we
6335 can't assign any more candidates in this slot. We also
6336 need to note that we have allocated stack space, so that
6337 we won't later try to split a non-cprc candidate between
6338 core registers and the stack. */
6339 pcum
->aapcs_cprc_failed
[slot
] = true;
6340 pcum
->can_split
= false;
6343 /* We didn't get a register, so this argument goes on the
6345 gcc_assert (pcum
->can_split
== false);
6350 /* C3 - For double-word aligned arguments, round the NCRN up to the
6351 next even number. */
6352 ncrn
= pcum
->aapcs_ncrn
;
6355 int res
= arm_needs_doubleword_align (mode
, type
);
6356 /* Only warn during RTL expansion of call stmts, otherwise we would
6357 warn e.g. during gimplification even on functions that will be
6358 always inlined, and we'd warn multiple times. Don't warn when
6359 called in expand_function_start either, as we warn instead in
6360 arm_function_arg_boundary in that case. */
6361 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6362 inform (input_location
, "parameter passing for argument of type "
6363 "%qT changed in GCC 7.1", type
);
6368 nregs
= ARM_NUM_REGS2(mode
, type
);
6370 /* Sigh, this test should really assert that nregs > 0, but a GCC
6371 extension allows empty structs and then gives them empty size; it
6372 then allows such a structure to be passed by value. For some of
6373 the code below we have to pretend that such an argument has
6374 non-zero size so that we 'locate' it correctly either in
6375 registers or on the stack. */
6376 gcc_assert (nregs
>= 0);
6378 nregs2
= nregs
? nregs
: 1;
6380 /* C4 - Argument fits entirely in core registers. */
6381 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6383 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6384 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6388 /* C5 - Some core registers left and there are no arguments already
6389 on the stack: split this argument between the remaining core
6390 registers and the stack. */
6391 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6393 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6394 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6395 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6399 /* C6 - NCRN is set to 4. */
6400 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6402 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6406 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6407 for a call to a function whose data type is FNTYPE.
6408 For a library call, FNTYPE is NULL. */
6410 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6412 tree fndecl ATTRIBUTE_UNUSED
)
6414 /* Long call handling. */
6416 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6418 pcum
->pcs_variant
= arm_pcs_default
;
6420 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6422 if (arm_libcall_uses_aapcs_base (libname
))
6423 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6425 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6426 pcum
->aapcs_reg
= NULL_RTX
;
6427 pcum
->aapcs_partial
= 0;
6428 pcum
->aapcs_arg_processed
= false;
6429 pcum
->aapcs_cprc_slot
= -1;
6430 pcum
->can_split
= true;
6432 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6436 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6438 pcum
->aapcs_cprc_failed
[i
] = false;
6439 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6447 /* On the ARM, the offset starts at 0. */
6449 pcum
->iwmmxt_nregs
= 0;
6450 pcum
->can_split
= true;
6452 /* Varargs vectors are treated the same as long long.
6453 named_count avoids having to change the way arm handles 'named' */
6454 pcum
->named_count
= 0;
6457 if (TARGET_REALLY_IWMMXT
&& fntype
)
6461 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6463 fn_arg
= TREE_CHAIN (fn_arg
))
6464 pcum
->named_count
+= 1;
6466 if (! pcum
->named_count
)
6467 pcum
->named_count
= INT_MAX
;
6471 /* Return 1 if double word alignment is required for argument passing.
6472 Return -1 if double word alignment used to be required for argument
6473 passing before PR77728 ABI fix, but is not required anymore.
6474 Return 0 if double word alignment is not required and wasn't requried
6477 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6480 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6482 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6483 if (!AGGREGATE_TYPE_P (type
))
6484 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6486 /* Array types: Use member alignment of element type. */
6487 if (TREE_CODE (type
) == ARRAY_TYPE
)
6488 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6491 /* Record/aggregate types: Use greatest member alignment of any member. */
6492 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6493 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6495 if (TREE_CODE (field
) == FIELD_DECL
)
6498 /* Before PR77728 fix, we were incorrectly considering also
6499 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6500 Make sure we can warn about that with -Wpsabi. */
6508 /* Determine where to put an argument to a function.
6509 Value is zero to push the argument on the stack,
6510 or a hard register in which to store the argument.
6512 MODE is the argument's machine mode.
6513 TYPE is the data type of the argument (as a tree).
6514 This is null for libcalls where that information may
6516 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6517 the preceding args and about the function being called.
6518 NAMED is nonzero if this argument is a named parameter
6519 (otherwise it is an extra parameter matching an ellipsis).
6521 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6522 other arguments are passed on the stack. If (NAMED == 0) (which happens
6523 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6524 defined), say it is passed in the stack (function_prologue will
6525 indeed make it pass in the stack if necessary). */
6528 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6529 const_tree type
, bool named
)
6531 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6534 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6535 a call insn (op3 of a call_value insn). */
6536 if (mode
== VOIDmode
)
6539 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6541 aapcs_layout_arg (pcum
, mode
, type
, named
);
6542 return pcum
->aapcs_reg
;
6545 /* Varargs vectors are treated the same as long long.
6546 named_count avoids having to change the way arm handles 'named' */
6547 if (TARGET_IWMMXT_ABI
6548 && arm_vector_mode_supported_p (mode
)
6549 && pcum
->named_count
> pcum
->nargs
+ 1)
6551 if (pcum
->iwmmxt_nregs
<= 9)
6552 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6555 pcum
->can_split
= false;
6560 /* Put doubleword aligned quantities in even register pairs. */
6561 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6563 int res
= arm_needs_doubleword_align (mode
, type
);
6564 if (res
< 0 && warn_psabi
)
6565 inform (input_location
, "parameter passing for argument of type "
6566 "%qT changed in GCC 7.1", type
);
6571 /* Only allow splitting an arg between regs and memory if all preceding
6572 args were allocated to regs. For args passed by reference we only count
6573 the reference pointer. */
6574 if (pcum
->can_split
)
6577 nregs
= ARM_NUM_REGS2 (mode
, type
);
6579 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6582 return gen_rtx_REG (mode
, pcum
->nregs
);
6586 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6588 if (!ARM_DOUBLEWORD_ALIGN
)
6589 return PARM_BOUNDARY
;
6591 int res
= arm_needs_doubleword_align (mode
, type
);
6592 if (res
< 0 && warn_psabi
)
6593 inform (input_location
, "parameter passing for argument of type %qT "
6594 "changed in GCC 7.1", type
);
6596 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6600 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6601 tree type
, bool named
)
6603 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6604 int nregs
= pcum
->nregs
;
6606 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6608 aapcs_layout_arg (pcum
, mode
, type
, named
);
6609 return pcum
->aapcs_partial
;
6612 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6615 if (NUM_ARG_REGS
> nregs
6616 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6618 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6623 /* Update the data in PCUM to advance over an argument
6624 of mode MODE and data type TYPE.
6625 (TYPE is null for libcalls where that information may not be available.) */
6628 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6629 const_tree type
, bool named
)
6631 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6633 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6635 aapcs_layout_arg (pcum
, mode
, type
, named
);
6637 if (pcum
->aapcs_cprc_slot
>= 0)
6639 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6641 pcum
->aapcs_cprc_slot
= -1;
6644 /* Generic stuff. */
6645 pcum
->aapcs_arg_processed
= false;
6646 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6647 pcum
->aapcs_reg
= NULL_RTX
;
6648 pcum
->aapcs_partial
= 0;
6653 if (arm_vector_mode_supported_p (mode
)
6654 && pcum
->named_count
> pcum
->nargs
6655 && TARGET_IWMMXT_ABI
)
6656 pcum
->iwmmxt_nregs
+= 1;
6658 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6662 /* Variable sized types are passed by reference. This is a GCC
6663 extension to the ARM ABI. */
6666 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6667 machine_mode mode ATTRIBUTE_UNUSED
,
6668 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6670 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6673 /* Encode the current state of the #pragma [no_]long_calls. */
6676 OFF
, /* No #pragma [no_]long_calls is in effect. */
6677 LONG
, /* #pragma long_calls is in effect. */
6678 SHORT
/* #pragma no_long_calls is in effect. */
6681 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6684 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6686 arm_pragma_long_calls
= LONG
;
6690 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6692 arm_pragma_long_calls
= SHORT
;
6696 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6698 arm_pragma_long_calls
= OFF
;
6701 /* Handle an attribute requiring a FUNCTION_DECL;
6702 arguments as in struct attribute_spec.handler. */
6704 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6705 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6707 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6709 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6711 *no_add_attrs
= true;
6717 /* Handle an "interrupt" or "isr" attribute;
6718 arguments as in struct attribute_spec.handler. */
6720 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6725 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6727 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6729 *no_add_attrs
= true;
6731 /* FIXME: the argument if any is checked for type attributes;
6732 should it be checked for decl ones? */
6736 if (TREE_CODE (*node
) == FUNCTION_TYPE
6737 || TREE_CODE (*node
) == METHOD_TYPE
)
6739 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6741 warning (OPT_Wattributes
, "%qE attribute ignored",
6743 *no_add_attrs
= true;
6746 else if (TREE_CODE (*node
) == POINTER_TYPE
6747 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6748 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6749 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6751 *node
= build_variant_type_copy (*node
);
6752 TREE_TYPE (*node
) = build_type_attribute_variant
6754 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6755 *no_add_attrs
= true;
6759 /* Possibly pass this attribute on from the type to a decl. */
6760 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6761 | (int) ATTR_FLAG_FUNCTION_NEXT
6762 | (int) ATTR_FLAG_ARRAY_NEXT
))
6764 *no_add_attrs
= true;
6765 return tree_cons (name
, args
, NULL_TREE
);
6769 warning (OPT_Wattributes
, "%qE attribute ignored",
6778 /* Handle a "pcs" attribute; arguments as in struct
6779 attribute_spec.handler. */
6781 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6782 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6784 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6786 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6787 *no_add_attrs
= true;
6792 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6793 /* Handle the "notshared" attribute. This attribute is another way of
6794 requesting hidden visibility. ARM's compiler supports
6795 "__declspec(notshared)"; we support the same thing via an
6799 arm_handle_notshared_attribute (tree
*node
,
6800 tree name ATTRIBUTE_UNUSED
,
6801 tree args ATTRIBUTE_UNUSED
,
6802 int flags ATTRIBUTE_UNUSED
,
6805 tree decl
= TYPE_NAME (*node
);
6809 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6810 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6811 *no_add_attrs
= false;
6817 /* This function returns true if a function with declaration FNDECL and type
6818 FNTYPE uses the stack to pass arguments or return variables and false
6819 otherwise. This is used for functions with the attributes
6820 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6821 diagnostic messages if the stack is used. NAME is the name of the attribute
6825 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6827 function_args_iterator args_iter
;
6828 CUMULATIVE_ARGS args_so_far_v
;
6829 cumulative_args_t args_so_far
;
6830 bool first_param
= true;
6831 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6833 /* Error out if any argument is passed on the stack. */
6834 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6835 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6836 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6839 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6841 prev_arg_type
= arg_type
;
6842 if (VOID_TYPE_P (arg_type
))
6846 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6847 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6849 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6851 error ("%qE attribute not available to functions with arguments "
6852 "passed on the stack", name
);
6855 first_param
= false;
6858 /* Error out for variadic functions since we cannot control how many
6859 arguments will be passed and thus stack could be used. stdarg_p () is not
6860 used for the checking to avoid browsing arguments twice. */
6861 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6863 error ("%qE attribute not available to functions with variable number "
6864 "of arguments", name
);
6868 /* Error out if return value is passed on the stack. */
6869 ret_type
= TREE_TYPE (fntype
);
6870 if (arm_return_in_memory (ret_type
, fntype
))
6872 error ("%qE attribute not available to functions that return value on "
6879 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6880 function will check whether the attribute is allowed here and will add the
6881 attribute to the function declaration tree or otherwise issue a warning. */
6884 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6893 *no_add_attrs
= true;
6894 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6899 /* Ignore attribute for function types. */
6900 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6902 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6904 *no_add_attrs
= true;
6910 /* Warn for static linkage functions. */
6911 if (!TREE_PUBLIC (fndecl
))
6913 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6914 "with static linkage", name
);
6915 *no_add_attrs
= true;
6919 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6920 TREE_TYPE (fndecl
));
6925 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6926 function will check whether the attribute is allowed here and will add the
6927 attribute to the function type tree or otherwise issue a diagnostic. The
6928 reason we check this at declaration time is to only allow the use of the
6929 attribute with declarations of function pointers and not function
6930 declarations. This function checks NODE is of the expected type and issues
6931 diagnostics otherwise using NAME. If it is not of the expected type
6932 *NO_ADD_ATTRS will be set to true. */
6935 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6940 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6945 *no_add_attrs
= true;
6946 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6951 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6954 fntype
= TREE_TYPE (decl
);
6957 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6958 fntype
= TREE_TYPE (fntype
);
6960 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6962 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6963 "function pointer", name
);
6964 *no_add_attrs
= true;
6968 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
6973 /* Prevent trees being shared among function types with and without
6974 cmse_nonsecure_call attribute. */
6975 type
= TREE_TYPE (decl
);
6977 type
= build_distinct_type_copy (type
);
6978 TREE_TYPE (decl
) = type
;
6981 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
6984 fntype
= TREE_TYPE (fntype
);
6985 fntype
= build_distinct_type_copy (fntype
);
6986 TREE_TYPE (type
) = fntype
;
6989 /* Construct a type attribute and add it to the function type. */
6990 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
6991 TYPE_ATTRIBUTES (fntype
));
6992 TYPE_ATTRIBUTES (fntype
) = attrs
;
6996 /* Return 0 if the attributes for two types are incompatible, 1 if they
6997 are compatible, and 2 if they are nearly compatible (which causes a
6998 warning to be generated). */
7000 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7004 /* Check for mismatch of non-default calling convention. */
7005 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7008 /* Check for mismatched call attributes. */
7009 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7010 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7011 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7012 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7014 /* Only bother to check if an attribute is defined. */
7015 if (l1
| l2
| s1
| s2
)
7017 /* If one type has an attribute, the other must have the same attribute. */
7018 if ((l1
!= l2
) || (s1
!= s2
))
7021 /* Disallow mixed attributes. */
7022 if ((l1
& s2
) || (l2
& s1
))
7026 /* Check for mismatched ISR attribute. */
7027 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7029 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7030 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7032 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7036 l1
= lookup_attribute ("cmse_nonsecure_call",
7037 TYPE_ATTRIBUTES (type1
)) != NULL
;
7038 l2
= lookup_attribute ("cmse_nonsecure_call",
7039 TYPE_ATTRIBUTES (type2
)) != NULL
;
7047 /* Assigns default attributes to newly defined type. This is used to
7048 set short_call/long_call attributes for function types of
7049 functions defined inside corresponding #pragma scopes. */
7051 arm_set_default_type_attributes (tree type
)
7053 /* Add __attribute__ ((long_call)) to all functions, when
7054 inside #pragma long_calls or __attribute__ ((short_call)),
7055 when inside #pragma no_long_calls. */
7056 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7058 tree type_attr_list
, attr_name
;
7059 type_attr_list
= TYPE_ATTRIBUTES (type
);
7061 if (arm_pragma_long_calls
== LONG
)
7062 attr_name
= get_identifier ("long_call");
7063 else if (arm_pragma_long_calls
== SHORT
)
7064 attr_name
= get_identifier ("short_call");
7068 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7069 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7073 /* Return true if DECL is known to be linked into section SECTION. */
7076 arm_function_in_section_p (tree decl
, section
*section
)
7078 /* We can only be certain about the prevailing symbol definition. */
7079 if (!decl_binds_to_current_def_p (decl
))
7082 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7083 if (!DECL_SECTION_NAME (decl
))
7085 /* Make sure that we will not create a unique section for DECL. */
7086 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7090 return function_section (decl
) == section
;
7093 /* Return nonzero if a 32-bit "long_call" should be generated for
7094 a call from the current function to DECL. We generate a long_call
7097 a. has an __attribute__((long call))
7098 or b. is within the scope of a #pragma long_calls
7099 or c. the -mlong-calls command line switch has been specified
7101 However we do not generate a long call if the function:
7103 d. has an __attribute__ ((short_call))
7104 or e. is inside the scope of a #pragma no_long_calls
7105 or f. is defined in the same section as the current function. */
7108 arm_is_long_call_p (tree decl
)
7113 return TARGET_LONG_CALLS
;
7115 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7116 if (lookup_attribute ("short_call", attrs
))
7119 /* For "f", be conservative, and only cater for cases in which the
7120 whole of the current function is placed in the same section. */
7121 if (!flag_reorder_blocks_and_partition
7122 && TREE_CODE (decl
) == FUNCTION_DECL
7123 && arm_function_in_section_p (decl
, current_function_section ()))
7126 if (lookup_attribute ("long_call", attrs
))
7129 return TARGET_LONG_CALLS
;
7132 /* Return nonzero if it is ok to make a tail-call to DECL. */
7134 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7136 unsigned long func_type
;
7138 if (cfun
->machine
->sibcall_blocked
)
7141 /* Never tailcall something if we are generating code for Thumb-1. */
7145 /* The PIC register is live on entry to VxWorks PLT entries, so we
7146 must make the call before restoring the PIC register. */
7147 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7150 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7151 may be used both as target of the call and base register for restoring
7152 the VFP registers */
7153 if (TARGET_APCS_FRAME
&& TARGET_ARM
7154 && TARGET_HARD_FLOAT
7155 && decl
&& arm_is_long_call_p (decl
))
7158 /* If we are interworking and the function is not declared static
7159 then we can't tail-call it unless we know that it exists in this
7160 compilation unit (since it might be a Thumb routine). */
7161 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7162 && !TREE_ASM_WRITTEN (decl
))
7165 func_type
= arm_current_func_type ();
7166 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7167 if (IS_INTERRUPT (func_type
))
7170 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7171 generated for entry functions themselves. */
7172 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7175 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7176 this would complicate matters for later code generation. */
7177 if (TREE_CODE (exp
) == CALL_EXPR
)
7179 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7180 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7184 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7186 /* Check that the return value locations are the same. For
7187 example that we aren't returning a value from the sibling in
7188 a VFP register but then need to transfer it to a core
7191 tree decl_or_type
= decl
;
7193 /* If it is an indirect function pointer, get the function type. */
7195 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7197 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7198 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7200 if (!rtx_equal_p (a
, b
))
7204 /* Never tailcall if function may be called with a misaligned SP. */
7205 if (IS_STACKALIGN (func_type
))
7208 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7209 references should become a NOP. Don't convert such calls into
7211 if (TARGET_AAPCS_BASED
7212 && arm_abi
== ARM_ABI_AAPCS
7214 && DECL_WEAK (decl
))
7217 /* We cannot do a tailcall for an indirect call by descriptor if all the
7218 argument registers are used because the only register left to load the
7219 address is IP and it will already contain the static chain. */
7220 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7222 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7223 CUMULATIVE_ARGS cum
;
7224 cumulative_args_t cum_v
;
7226 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7227 cum_v
= pack_cumulative_args (&cum
);
7229 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7231 tree type
= TREE_VALUE (t
);
7232 if (!VOID_TYPE_P (type
))
7233 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7236 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7240 /* Everything else is ok. */
7245 /* Addressing mode support functions. */
7247 /* Return nonzero if X is a legitimate immediate operand when compiling
7248 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7250 legitimate_pic_operand_p (rtx x
)
7252 if (GET_CODE (x
) == SYMBOL_REF
7253 || (GET_CODE (x
) == CONST
7254 && GET_CODE (XEXP (x
, 0)) == PLUS
7255 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7261 /* Record that the current function needs a PIC register. Initialize
7262 cfun->machine->pic_reg if we have not already done so. */
7265 require_pic_register (void)
7267 /* A lot of the logic here is made obscure by the fact that this
7268 routine gets called as part of the rtx cost estimation process.
7269 We don't want those calls to affect any assumptions about the real
7270 function; and further, we can't call entry_of_function() until we
7271 start the real expansion process. */
7272 if (!crtl
->uses_pic_offset_table
)
7274 gcc_assert (can_create_pseudo_p ());
7275 if (arm_pic_register
!= INVALID_REGNUM
7276 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7278 if (!cfun
->machine
->pic_reg
)
7279 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7281 /* Play games to avoid marking the function as needing pic
7282 if we are being called as part of the cost-estimation
7284 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7285 crtl
->uses_pic_offset_table
= 1;
7289 rtx_insn
*seq
, *insn
;
7291 if (!cfun
->machine
->pic_reg
)
7292 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7294 /* Play games to avoid marking the function as needing pic
7295 if we are being called as part of the cost-estimation
7297 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7299 crtl
->uses_pic_offset_table
= 1;
7302 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7303 && arm_pic_register
> LAST_LO_REGNUM
)
7304 emit_move_insn (cfun
->machine
->pic_reg
,
7305 gen_rtx_REG (Pmode
, arm_pic_register
));
7307 arm_load_pic_register (0UL);
7312 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7314 INSN_LOCATION (insn
) = prologue_location
;
7316 /* We can be called during expansion of PHI nodes, where
7317 we can't yet emit instructions directly in the final
7318 insn stream. Queue the insns on the entry edge, they will
7319 be committed after everything else is expanded. */
7320 insert_insn_on_edge (seq
,
7321 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7328 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7330 if (GET_CODE (orig
) == SYMBOL_REF
7331 || GET_CODE (orig
) == LABEL_REF
)
7335 gcc_assert (can_create_pseudo_p ());
7336 reg
= gen_reg_rtx (Pmode
);
7339 /* VxWorks does not impose a fixed gap between segments; the run-time
7340 gap can be different from the object-file gap. We therefore can't
7341 use GOTOFF unless we are absolutely sure that the symbol is in the
7342 same segment as the GOT. Unfortunately, the flexibility of linker
7343 scripts means that we can't be sure of that in general, so assume
7344 that GOTOFF is never valid on VxWorks. */
7345 /* References to weak symbols cannot be resolved locally: they
7346 may be overridden by a non-weak definition at link time. */
7348 if ((GET_CODE (orig
) == LABEL_REF
7349 || (GET_CODE (orig
) == SYMBOL_REF
7350 && SYMBOL_REF_LOCAL_P (orig
)
7351 && (SYMBOL_REF_DECL (orig
)
7352 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7354 && arm_pic_data_is_text_relative
)
7355 insn
= arm_pic_static_addr (orig
, reg
);
7361 /* If this function doesn't have a pic register, create one now. */
7362 require_pic_register ();
7364 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7366 /* Make the MEM as close to a constant as possible. */
7367 mem
= SET_SRC (pat
);
7368 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7369 MEM_READONLY_P (mem
) = 1;
7370 MEM_NOTRAP_P (mem
) = 1;
7372 insn
= emit_insn (pat
);
7375 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7377 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7381 else if (GET_CODE (orig
) == CONST
)
7385 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7386 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7389 /* Handle the case where we have: const (UNSPEC_TLS). */
7390 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7391 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7394 /* Handle the case where we have:
7395 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7397 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7398 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7399 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7401 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7407 gcc_assert (can_create_pseudo_p ());
7408 reg
= gen_reg_rtx (Pmode
);
7411 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7413 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7414 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7415 base
== reg
? 0 : reg
);
7417 if (CONST_INT_P (offset
))
7419 /* The base register doesn't really matter, we only want to
7420 test the index for the appropriate mode. */
7421 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7423 gcc_assert (can_create_pseudo_p ());
7424 offset
= force_reg (Pmode
, offset
);
7427 if (CONST_INT_P (offset
))
7428 return plus_constant (Pmode
, base
, INTVAL (offset
));
7431 if (GET_MODE_SIZE (mode
) > 4
7432 && (GET_MODE_CLASS (mode
) == MODE_INT
7433 || TARGET_SOFT_FLOAT
))
7435 emit_insn (gen_addsi3 (reg
, base
, offset
));
7439 return gen_rtx_PLUS (Pmode
, base
, offset
);
7446 /* Find a spare register to use during the prolog of a function. */
7449 thumb_find_work_register (unsigned long pushed_regs_mask
)
7453 /* Check the argument registers first as these are call-used. The
7454 register allocation order means that sometimes r3 might be used
7455 but earlier argument registers might not, so check them all. */
7456 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7457 if (!df_regs_ever_live_p (reg
))
7460 /* Before going on to check the call-saved registers we can try a couple
7461 more ways of deducing that r3 is available. The first is when we are
7462 pushing anonymous arguments onto the stack and we have less than 4
7463 registers worth of fixed arguments(*). In this case r3 will be part of
7464 the variable argument list and so we can be sure that it will be
7465 pushed right at the start of the function. Hence it will be available
7466 for the rest of the prologue.
7467 (*): ie crtl->args.pretend_args_size is greater than 0. */
7468 if (cfun
->machine
->uses_anonymous_args
7469 && crtl
->args
.pretend_args_size
> 0)
7470 return LAST_ARG_REGNUM
;
7472 /* The other case is when we have fixed arguments but less than 4 registers
7473 worth. In this case r3 might be used in the body of the function, but
7474 it is not being used to convey an argument into the function. In theory
7475 we could just check crtl->args.size to see how many bytes are
7476 being passed in argument registers, but it seems that it is unreliable.
7477 Sometimes it will have the value 0 when in fact arguments are being
7478 passed. (See testcase execute/20021111-1.c for an example). So we also
7479 check the args_info.nregs field as well. The problem with this field is
7480 that it makes no allowances for arguments that are passed to the
7481 function but which are not used. Hence we could miss an opportunity
7482 when a function has an unused argument in r3. But it is better to be
7483 safe than to be sorry. */
7484 if (! cfun
->machine
->uses_anonymous_args
7485 && crtl
->args
.size
>= 0
7486 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7487 && (TARGET_AAPCS_BASED
7488 ? crtl
->args
.info
.aapcs_ncrn
< 4
7489 : crtl
->args
.info
.nregs
< 4))
7490 return LAST_ARG_REGNUM
;
7492 /* Otherwise look for a call-saved register that is going to be pushed. */
7493 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7494 if (pushed_regs_mask
& (1 << reg
))
7499 /* Thumb-2 can use high regs. */
7500 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7501 if (pushed_regs_mask
& (1 << reg
))
7504 /* Something went wrong - thumb_compute_save_reg_mask()
7505 should have arranged for a suitable register to be pushed. */
7509 static GTY(()) int pic_labelno
;
7511 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7515 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7517 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7519 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7522 gcc_assert (flag_pic
);
7524 pic_reg
= cfun
->machine
->pic_reg
;
7525 if (TARGET_VXWORKS_RTP
)
7527 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7528 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7529 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7531 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7533 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7534 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7538 /* We use an UNSPEC rather than a LABEL_REF because this label
7539 never appears in the code stream. */
7541 labelno
= GEN_INT (pic_labelno
++);
7542 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7543 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7545 /* On the ARM the PC register contains 'dot + 8' at the time of the
7546 addition, on the Thumb it is 'dot + 4'. */
7547 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7548 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7550 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7554 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7556 else /* TARGET_THUMB1 */
7558 if (arm_pic_register
!= INVALID_REGNUM
7559 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7561 /* We will have pushed the pic register, so we should always be
7562 able to find a work register. */
7563 pic_tmp
= gen_rtx_REG (SImode
,
7564 thumb_find_work_register (saved_regs
));
7565 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7566 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7567 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7569 else if (arm_pic_register
!= INVALID_REGNUM
7570 && arm_pic_register
> LAST_LO_REGNUM
7571 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7573 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7574 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7575 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7578 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7582 /* Need to emit this whether or not we obey regdecls,
7583 since setjmp/longjmp can cause life info to screw up. */
7587 /* Generate code to load the address of a static var when flag_pic is set. */
7589 arm_pic_static_addr (rtx orig
, rtx reg
)
7591 rtx l1
, labelno
, offset_rtx
;
7593 gcc_assert (flag_pic
);
7595 /* We use an UNSPEC rather than a LABEL_REF because this label
7596 never appears in the code stream. */
7597 labelno
= GEN_INT (pic_labelno
++);
7598 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7599 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7601 /* On the ARM the PC register contains 'dot + 8' at the time of the
7602 addition, on the Thumb it is 'dot + 4'. */
7603 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7604 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7605 UNSPEC_SYMBOL_OFFSET
);
7606 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7608 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7611 /* Return nonzero if X is valid as an ARM state addressing register. */
7613 arm_address_register_rtx_p (rtx x
, int strict_p
)
7623 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7625 return (regno
<= LAST_ARM_REGNUM
7626 || regno
>= FIRST_PSEUDO_REGISTER
7627 || regno
== FRAME_POINTER_REGNUM
7628 || regno
== ARG_POINTER_REGNUM
);
7631 /* Return TRUE if this rtx is the difference of a symbol and a label,
7632 and will reduce to a PC-relative relocation in the object file.
7633 Expressions like this can be left alone when generating PIC, rather
7634 than forced through the GOT. */
7636 pcrel_constant_p (rtx x
)
7638 if (GET_CODE (x
) == MINUS
)
7639 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7644 /* Return true if X will surely end up in an index register after next
7647 will_be_in_index_register (const_rtx x
)
7649 /* arm.md: calculate_pic_address will split this into a register. */
7650 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7653 /* Return nonzero if X is a valid ARM state address operand. */
7655 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7659 enum rtx_code code
= GET_CODE (x
);
7661 if (arm_address_register_rtx_p (x
, strict_p
))
7664 use_ldrd
= (TARGET_LDRD
7665 && (mode
== DImode
|| mode
== DFmode
));
7667 if (code
== POST_INC
|| code
== PRE_DEC
7668 || ((code
== PRE_INC
|| code
== POST_DEC
)
7669 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7670 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7672 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7673 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7674 && GET_CODE (XEXP (x
, 1)) == PLUS
7675 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7677 rtx addend
= XEXP (XEXP (x
, 1), 1);
7679 /* Don't allow ldrd post increment by register because it's hard
7680 to fixup invalid register choices. */
7682 && GET_CODE (x
) == POST_MODIFY
7686 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7687 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7690 /* After reload constants split into minipools will have addresses
7691 from a LABEL_REF. */
7692 else if (reload_completed
7693 && (code
== LABEL_REF
7695 && GET_CODE (XEXP (x
, 0)) == PLUS
7696 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7697 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7700 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7703 else if (code
== PLUS
)
7705 rtx xop0
= XEXP (x
, 0);
7706 rtx xop1
= XEXP (x
, 1);
7708 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7709 && ((CONST_INT_P (xop1
)
7710 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7711 || (!strict_p
&& will_be_in_index_register (xop1
))))
7712 || (arm_address_register_rtx_p (xop1
, strict_p
)
7713 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7717 /* Reload currently can't handle MINUS, so disable this for now */
7718 else if (GET_CODE (x
) == MINUS
)
7720 rtx xop0
= XEXP (x
, 0);
7721 rtx xop1
= XEXP (x
, 1);
7723 return (arm_address_register_rtx_p (xop0
, strict_p
)
7724 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7728 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7729 && code
== SYMBOL_REF
7730 && CONSTANT_POOL_ADDRESS_P (x
)
7732 && symbol_mentioned_p (get_pool_constant (x
))
7733 && ! pcrel_constant_p (get_pool_constant (x
))))
7739 /* Return nonzero if X is a valid Thumb-2 address operand. */
7741 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7744 enum rtx_code code
= GET_CODE (x
);
7746 if (arm_address_register_rtx_p (x
, strict_p
))
7749 use_ldrd
= (TARGET_LDRD
7750 && (mode
== DImode
|| mode
== DFmode
));
7752 if (code
== POST_INC
|| code
== PRE_DEC
7753 || ((code
== PRE_INC
|| code
== POST_DEC
)
7754 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7755 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7757 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7758 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7759 && GET_CODE (XEXP (x
, 1)) == PLUS
7760 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7762 /* Thumb-2 only has autoincrement by constant. */
7763 rtx addend
= XEXP (XEXP (x
, 1), 1);
7764 HOST_WIDE_INT offset
;
7766 if (!CONST_INT_P (addend
))
7769 offset
= INTVAL(addend
);
7770 if (GET_MODE_SIZE (mode
) <= 4)
7771 return (offset
> -256 && offset
< 256);
7773 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7774 && (offset
& 3) == 0);
7777 /* After reload constants split into minipools will have addresses
7778 from a LABEL_REF. */
7779 else if (reload_completed
7780 && (code
== LABEL_REF
7782 && GET_CODE (XEXP (x
, 0)) == PLUS
7783 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7784 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7787 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7790 else if (code
== PLUS
)
7792 rtx xop0
= XEXP (x
, 0);
7793 rtx xop1
= XEXP (x
, 1);
7795 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7796 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7797 || (!strict_p
&& will_be_in_index_register (xop1
))))
7798 || (arm_address_register_rtx_p (xop1
, strict_p
)
7799 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7802 /* Normally we can assign constant values to target registers without
7803 the help of constant pool. But there are cases we have to use constant
7805 1) assign a label to register.
7806 2) sign-extend a 8bit value to 32bit and then assign to register.
7808 Constant pool access in format:
7809 (set (reg r0) (mem (symbol_ref (".LC0"))))
7810 will cause the use of literal pool (later in function arm_reorg).
7811 So here we mark such format as an invalid format, then the compiler
7812 will adjust it into:
7813 (set (reg r0) (symbol_ref (".LC0")))
7814 (set (reg r0) (mem (reg r0))).
7815 No extra register is required, and (mem (reg r0)) won't cause the use
7816 of literal pools. */
7817 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7818 && CONSTANT_POOL_ADDRESS_P (x
))
7821 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7822 && code
== SYMBOL_REF
7823 && CONSTANT_POOL_ADDRESS_P (x
)
7825 && symbol_mentioned_p (get_pool_constant (x
))
7826 && ! pcrel_constant_p (get_pool_constant (x
))))
7832 /* Return nonzero if INDEX is valid for an address index operand in
7835 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7838 HOST_WIDE_INT range
;
7839 enum rtx_code code
= GET_CODE (index
);
7841 /* Standard coprocessor addressing modes. */
7842 if (TARGET_HARD_FLOAT
7843 && (mode
== SFmode
|| mode
== DFmode
))
7844 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7845 && INTVAL (index
) > -1024
7846 && (INTVAL (index
) & 3) == 0);
7848 /* For quad modes, we restrict the constant offset to be slightly less
7849 than what the instruction format permits. We do this because for
7850 quad mode moves, we will actually decompose them into two separate
7851 double-mode reads or writes. INDEX must therefore be a valid
7852 (double-mode) offset and so should INDEX+8. */
7853 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7854 return (code
== CONST_INT
7855 && INTVAL (index
) < 1016
7856 && INTVAL (index
) > -1024
7857 && (INTVAL (index
) & 3) == 0);
7859 /* We have no such constraint on double mode offsets, so we permit the
7860 full range of the instruction format. */
7861 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7862 return (code
== CONST_INT
7863 && INTVAL (index
) < 1024
7864 && INTVAL (index
) > -1024
7865 && (INTVAL (index
) & 3) == 0);
7867 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7868 return (code
== CONST_INT
7869 && INTVAL (index
) < 1024
7870 && INTVAL (index
) > -1024
7871 && (INTVAL (index
) & 3) == 0);
7873 if (arm_address_register_rtx_p (index
, strict_p
)
7874 && (GET_MODE_SIZE (mode
) <= 4))
7877 if (mode
== DImode
|| mode
== DFmode
)
7879 if (code
== CONST_INT
)
7881 HOST_WIDE_INT val
= INTVAL (index
);
7884 return val
> -256 && val
< 256;
7886 return val
> -4096 && val
< 4092;
7889 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7892 if (GET_MODE_SIZE (mode
) <= 4
7896 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7900 rtx xiop0
= XEXP (index
, 0);
7901 rtx xiop1
= XEXP (index
, 1);
7903 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7904 && power_of_two_operand (xiop1
, SImode
))
7905 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7906 && power_of_two_operand (xiop0
, SImode
)));
7908 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7909 || code
== ASHIFT
|| code
== ROTATERT
)
7911 rtx op
= XEXP (index
, 1);
7913 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7916 && INTVAL (op
) <= 31);
7920 /* For ARM v4 we may be doing a sign-extend operation during the
7926 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7932 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7934 return (code
== CONST_INT
7935 && INTVAL (index
) < range
7936 && INTVAL (index
) > -range
);
7939 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7940 index operand. i.e. 1, 2, 4 or 8. */
7942 thumb2_index_mul_operand (rtx op
)
7946 if (!CONST_INT_P (op
))
7950 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7953 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7955 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7957 enum rtx_code code
= GET_CODE (index
);
7959 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7960 /* Standard coprocessor addressing modes. */
7961 if (TARGET_HARD_FLOAT
7962 && (mode
== SFmode
|| mode
== DFmode
))
7963 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7964 /* Thumb-2 allows only > -256 index range for it's core register
7965 load/stores. Since we allow SF/DF in core registers, we have
7966 to use the intersection between -256~4096 (core) and -1024~1024
7968 && INTVAL (index
) > -256
7969 && (INTVAL (index
) & 3) == 0);
7971 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7973 /* For DImode assume values will usually live in core regs
7974 and only allow LDRD addressing modes. */
7975 if (!TARGET_LDRD
|| mode
!= DImode
)
7976 return (code
== CONST_INT
7977 && INTVAL (index
) < 1024
7978 && INTVAL (index
) > -1024
7979 && (INTVAL (index
) & 3) == 0);
7982 /* For quad modes, we restrict the constant offset to be slightly less
7983 than what the instruction format permits. We do this because for
7984 quad mode moves, we will actually decompose them into two separate
7985 double-mode reads or writes. INDEX must therefore be a valid
7986 (double-mode) offset and so should INDEX+8. */
7987 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7988 return (code
== CONST_INT
7989 && INTVAL (index
) < 1016
7990 && INTVAL (index
) > -1024
7991 && (INTVAL (index
) & 3) == 0);
7993 /* We have no such constraint on double mode offsets, so we permit the
7994 full range of the instruction format. */
7995 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7996 return (code
== CONST_INT
7997 && INTVAL (index
) < 1024
7998 && INTVAL (index
) > -1024
7999 && (INTVAL (index
) & 3) == 0);
8001 if (arm_address_register_rtx_p (index
, strict_p
)
8002 && (GET_MODE_SIZE (mode
) <= 4))
8005 if (mode
== DImode
|| mode
== DFmode
)
8007 if (code
== CONST_INT
)
8009 HOST_WIDE_INT val
= INTVAL (index
);
8010 /* ??? Can we assume ldrd for thumb2? */
8011 /* Thumb-2 ldrd only has reg+const addressing modes. */
8012 /* ldrd supports offsets of +-1020.
8013 However the ldr fallback does not. */
8014 return val
> -256 && val
< 256 && (val
& 3) == 0;
8022 rtx xiop0
= XEXP (index
, 0);
8023 rtx xiop1
= XEXP (index
, 1);
8025 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8026 && thumb2_index_mul_operand (xiop1
))
8027 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8028 && thumb2_index_mul_operand (xiop0
)));
8030 else if (code
== ASHIFT
)
8032 rtx op
= XEXP (index
, 1);
8034 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8037 && INTVAL (op
) <= 3);
8040 return (code
== CONST_INT
8041 && INTVAL (index
) < 4096
8042 && INTVAL (index
) > -256);
8045 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8047 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8057 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8059 return (regno
<= LAST_LO_REGNUM
8060 || regno
> LAST_VIRTUAL_REGISTER
8061 || regno
== FRAME_POINTER_REGNUM
8062 || (GET_MODE_SIZE (mode
) >= 4
8063 && (regno
== STACK_POINTER_REGNUM
8064 || regno
>= FIRST_PSEUDO_REGISTER
8065 || x
== hard_frame_pointer_rtx
8066 || x
== arg_pointer_rtx
)));
8069 /* Return nonzero if x is a legitimate index register. This is the case
8070 for any base register that can access a QImode object. */
8072 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8074 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8077 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8079 The AP may be eliminated to either the SP or the FP, so we use the
8080 least common denominator, e.g. SImode, and offsets from 0 to 64.
8082 ??? Verify whether the above is the right approach.
8084 ??? Also, the FP may be eliminated to the SP, so perhaps that
8085 needs special handling also.
8087 ??? Look at how the mips16 port solves this problem. It probably uses
8088 better ways to solve some of these problems.
8090 Although it is not incorrect, we don't accept QImode and HImode
8091 addresses based on the frame pointer or arg pointer until the
8092 reload pass starts. This is so that eliminating such addresses
8093 into stack based ones won't produce impossible code. */
8095 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8097 /* ??? Not clear if this is right. Experiment. */
8098 if (GET_MODE_SIZE (mode
) < 4
8099 && !(reload_in_progress
|| reload_completed
)
8100 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8101 || reg_mentioned_p (arg_pointer_rtx
, x
)
8102 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8103 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8104 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8105 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8108 /* Accept any base register. SP only in SImode or larger. */
8109 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8112 /* This is PC relative data before arm_reorg runs. */
8113 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8114 && GET_CODE (x
) == SYMBOL_REF
8115 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8118 /* This is PC relative data after arm_reorg runs. */
8119 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8121 && (GET_CODE (x
) == LABEL_REF
8122 || (GET_CODE (x
) == CONST
8123 && GET_CODE (XEXP (x
, 0)) == PLUS
8124 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8125 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8128 /* Post-inc indexing only supported for SImode and larger. */
8129 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8130 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8133 else if (GET_CODE (x
) == PLUS
)
8135 /* REG+REG address can be any two index registers. */
8136 /* We disallow FRAME+REG addressing since we know that FRAME
8137 will be replaced with STACK, and SP relative addressing only
8138 permits SP+OFFSET. */
8139 if (GET_MODE_SIZE (mode
) <= 4
8140 && XEXP (x
, 0) != frame_pointer_rtx
8141 && XEXP (x
, 1) != frame_pointer_rtx
8142 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8143 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8144 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8147 /* REG+const has 5-7 bit offset for non-SP registers. */
8148 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8149 || XEXP (x
, 0) == arg_pointer_rtx
)
8150 && CONST_INT_P (XEXP (x
, 1))
8151 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8154 /* REG+const has 10-bit offset for SP, but only SImode and
8155 larger is supported. */
8156 /* ??? Should probably check for DI/DFmode overflow here
8157 just like GO_IF_LEGITIMATE_OFFSET does. */
8158 else if (REG_P (XEXP (x
, 0))
8159 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8160 && GET_MODE_SIZE (mode
) >= 4
8161 && CONST_INT_P (XEXP (x
, 1))
8162 && INTVAL (XEXP (x
, 1)) >= 0
8163 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8164 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8167 else if (REG_P (XEXP (x
, 0))
8168 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8169 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8170 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8171 && REGNO (XEXP (x
, 0))
8172 <= LAST_VIRTUAL_POINTER_REGISTER
))
8173 && GET_MODE_SIZE (mode
) >= 4
8174 && CONST_INT_P (XEXP (x
, 1))
8175 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8179 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8180 && GET_MODE_SIZE (mode
) == 4
8181 && GET_CODE (x
) == SYMBOL_REF
8182 && CONSTANT_POOL_ADDRESS_P (x
)
8184 && symbol_mentioned_p (get_pool_constant (x
))
8185 && ! pcrel_constant_p (get_pool_constant (x
))))
8191 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8192 instruction of mode MODE. */
8194 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8196 switch (GET_MODE_SIZE (mode
))
8199 return val
>= 0 && val
< 32;
8202 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8206 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8212 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8215 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8216 else if (TARGET_THUMB2
)
8217 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8218 else /* if (TARGET_THUMB1) */
8219 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8222 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8224 Given an rtx X being reloaded into a reg required to be
8225 in class CLASS, return the class of reg to actually use.
8226 In general this is just CLASS, but for the Thumb core registers and
8227 immediate constants we prefer a LO_REGS class or a subset. */
8230 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8236 if (rclass
== GENERAL_REGS
)
8243 /* Build the SYMBOL_REF for __tls_get_addr. */
8245 static GTY(()) rtx tls_get_addr_libfunc
;
8248 get_tls_get_addr (void)
8250 if (!tls_get_addr_libfunc
)
8251 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8252 return tls_get_addr_libfunc
;
8256 arm_load_tp (rtx target
)
8259 target
= gen_reg_rtx (SImode
);
8263 /* Can return in any reg. */
8264 emit_insn (gen_load_tp_hard (target
));
8268 /* Always returned in r0. Immediately copy the result into a pseudo,
8269 otherwise other uses of r0 (e.g. setting up function arguments) may
8270 clobber the value. */
8274 emit_insn (gen_load_tp_soft ());
8276 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8277 emit_move_insn (target
, tmp
);
8283 load_tls_operand (rtx x
, rtx reg
)
8287 if (reg
== NULL_RTX
)
8288 reg
= gen_reg_rtx (SImode
);
8290 tmp
= gen_rtx_CONST (SImode
, x
);
8292 emit_move_insn (reg
, tmp
);
8298 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8300 rtx label
, labelno
, sum
;
8302 gcc_assert (reloc
!= TLS_DESCSEQ
);
8305 labelno
= GEN_INT (pic_labelno
++);
8306 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8307 label
= gen_rtx_CONST (VOIDmode
, label
);
8309 sum
= gen_rtx_UNSPEC (Pmode
,
8310 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8311 GEN_INT (TARGET_ARM
? 8 : 4)),
8313 reg
= load_tls_operand (sum
, reg
);
8316 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8318 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8320 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8321 LCT_PURE
, /* LCT_CONST? */
8322 Pmode
, 1, reg
, Pmode
);
8324 rtx_insn
*insns
= get_insns ();
8331 arm_tls_descseq_addr (rtx x
, rtx reg
)
8333 rtx labelno
= GEN_INT (pic_labelno
++);
8334 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8335 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8336 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8337 gen_rtx_CONST (VOIDmode
, label
),
8338 GEN_INT (!TARGET_ARM
)),
8340 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8342 emit_insn (gen_tlscall (x
, labelno
));
8344 reg
= gen_reg_rtx (SImode
);
8346 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8348 emit_move_insn (reg
, reg0
);
8354 legitimize_tls_address (rtx x
, rtx reg
)
8356 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8358 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8362 case TLS_MODEL_GLOBAL_DYNAMIC
:
8363 if (TARGET_GNU2_TLS
)
8365 reg
= arm_tls_descseq_addr (x
, reg
);
8367 tp
= arm_load_tp (NULL_RTX
);
8369 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8373 /* Original scheme */
8374 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8375 dest
= gen_reg_rtx (Pmode
);
8376 emit_libcall_block (insns
, dest
, ret
, x
);
8380 case TLS_MODEL_LOCAL_DYNAMIC
:
8381 if (TARGET_GNU2_TLS
)
8383 reg
= arm_tls_descseq_addr (x
, reg
);
8385 tp
= arm_load_tp (NULL_RTX
);
8387 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8391 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8393 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8394 share the LDM result with other LD model accesses. */
8395 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8397 dest
= gen_reg_rtx (Pmode
);
8398 emit_libcall_block (insns
, dest
, ret
, eqv
);
8400 /* Load the addend. */
8401 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8402 GEN_INT (TLS_LDO32
)),
8404 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8405 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8409 case TLS_MODEL_INITIAL_EXEC
:
8410 labelno
= GEN_INT (pic_labelno
++);
8411 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8412 label
= gen_rtx_CONST (VOIDmode
, label
);
8413 sum
= gen_rtx_UNSPEC (Pmode
,
8414 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8415 GEN_INT (TARGET_ARM
? 8 : 4)),
8417 reg
= load_tls_operand (sum
, reg
);
8420 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8421 else if (TARGET_THUMB2
)
8422 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8425 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8426 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8429 tp
= arm_load_tp (NULL_RTX
);
8431 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8433 case TLS_MODEL_LOCAL_EXEC
:
8434 tp
= arm_load_tp (NULL_RTX
);
8436 reg
= gen_rtx_UNSPEC (Pmode
,
8437 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8439 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8441 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8448 /* Try machine-dependent ways of modifying an illegitimate address
8449 to be legitimate. If we find one, return the new, valid address. */
8451 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8453 if (arm_tls_referenced_p (x
))
8457 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8459 addend
= XEXP (XEXP (x
, 0), 1);
8460 x
= XEXP (XEXP (x
, 0), 0);
8463 if (GET_CODE (x
) != SYMBOL_REF
)
8466 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8468 x
= legitimize_tls_address (x
, NULL_RTX
);
8472 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8481 /* TODO: legitimize_address for Thumb2. */
8484 return thumb_legitimize_address (x
, orig_x
, mode
);
8487 if (GET_CODE (x
) == PLUS
)
8489 rtx xop0
= XEXP (x
, 0);
8490 rtx xop1
= XEXP (x
, 1);
8492 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8493 xop0
= force_reg (SImode
, xop0
);
8495 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8496 && !symbol_mentioned_p (xop1
))
8497 xop1
= force_reg (SImode
, xop1
);
8499 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8500 && CONST_INT_P (xop1
))
8502 HOST_WIDE_INT n
, low_n
;
8506 /* VFP addressing modes actually allow greater offsets, but for
8507 now we just stick with the lowest common denominator. */
8508 if (mode
== DImode
|| mode
== DFmode
)
8520 low_n
= ((mode
) == TImode
? 0
8521 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8525 base_reg
= gen_reg_rtx (SImode
);
8526 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8527 emit_move_insn (base_reg
, val
);
8528 x
= plus_constant (Pmode
, base_reg
, low_n
);
8530 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8531 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8534 /* XXX We don't allow MINUS any more -- see comment in
8535 arm_legitimate_address_outer_p (). */
8536 else if (GET_CODE (x
) == MINUS
)
8538 rtx xop0
= XEXP (x
, 0);
8539 rtx xop1
= XEXP (x
, 1);
8541 if (CONSTANT_P (xop0
))
8542 xop0
= force_reg (SImode
, xop0
);
8544 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8545 xop1
= force_reg (SImode
, xop1
);
8547 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8548 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8551 /* Make sure to take full advantage of the pre-indexed addressing mode
8552 with absolute addresses which often allows for the base register to
8553 be factorized for multiple adjacent memory references, and it might
8554 even allows for the mini pool to be avoided entirely. */
8555 else if (CONST_INT_P (x
) && optimize
> 0)
8558 HOST_WIDE_INT mask
, base
, index
;
8561 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8562 use a 8-bit index. So let's use a 12-bit index for SImode only and
8563 hope that arm_gen_constant will enable ldrb to use more bits. */
8564 bits
= (mode
== SImode
) ? 12 : 8;
8565 mask
= (1 << bits
) - 1;
8566 base
= INTVAL (x
) & ~mask
;
8567 index
= INTVAL (x
) & mask
;
8568 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8570 /* It'll most probably be more efficient to generate the base
8571 with more bits set and use a negative index instead. */
8575 base_reg
= force_reg (SImode
, GEN_INT (base
));
8576 x
= plus_constant (Pmode
, base_reg
, index
);
8581 /* We need to find and carefully transform any SYMBOL and LABEL
8582 references; so go back to the original address expression. */
8583 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8585 if (new_x
!= orig_x
)
8593 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8594 to be legitimate. If we find one, return the new, valid address. */
8596 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8598 if (GET_CODE (x
) == PLUS
8599 && CONST_INT_P (XEXP (x
, 1))
8600 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8601 || INTVAL (XEXP (x
, 1)) < 0))
8603 rtx xop0
= XEXP (x
, 0);
8604 rtx xop1
= XEXP (x
, 1);
8605 HOST_WIDE_INT offset
= INTVAL (xop1
);
8607 /* Try and fold the offset into a biasing of the base register and
8608 then offsetting that. Don't do this when optimizing for space
8609 since it can cause too many CSEs. */
8610 if (optimize_size
&& offset
>= 0
8611 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8613 HOST_WIDE_INT delta
;
8616 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8617 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8618 delta
= 31 * GET_MODE_SIZE (mode
);
8620 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8622 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8624 x
= plus_constant (Pmode
, xop0
, delta
);
8626 else if (offset
< 0 && offset
> -256)
8627 /* Small negative offsets are best done with a subtract before the
8628 dereference, forcing these into a register normally takes two
8630 x
= force_operand (x
, NULL_RTX
);
8633 /* For the remaining cases, force the constant into a register. */
8634 xop1
= force_reg (SImode
, xop1
);
8635 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8638 else if (GET_CODE (x
) == PLUS
8639 && s_register_operand (XEXP (x
, 1), SImode
)
8640 && !s_register_operand (XEXP (x
, 0), SImode
))
8642 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8644 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8649 /* We need to find and carefully transform any SYMBOL and LABEL
8650 references; so go back to the original address expression. */
8651 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8653 if (new_x
!= orig_x
)
8660 /* Return TRUE if X contains any TLS symbol references. */
8663 arm_tls_referenced_p (rtx x
)
8665 if (! TARGET_HAVE_TLS
)
8668 subrtx_iterator::array_type array
;
8669 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8671 const_rtx x
= *iter
;
8672 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8675 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8676 TLS offsets, not real symbol references. */
8677 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8678 iter
.skip_subrtxes ();
8683 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8685 On the ARM, allow any integer (invalid ones are removed later by insn
8686 patterns), nice doubles and symbol_refs which refer to the function's
8689 When generating pic allow anything. */
8692 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8694 return flag_pic
|| !label_mentioned_p (x
);
8698 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8700 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8701 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8702 for ARMv8-M Baseline or later the result is valid. */
8703 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8706 return (CONST_INT_P (x
)
8707 || CONST_DOUBLE_P (x
)
8708 || CONSTANT_ADDRESS_P (x
)
8713 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8715 return (!arm_cannot_force_const_mem (mode
, x
)
8717 ? arm_legitimate_constant_p_1 (mode
, x
)
8718 : thumb_legitimate_constant_p (mode
, x
)));
8721 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8724 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8728 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8730 split_const (x
, &base
, &offset
);
8731 if (GET_CODE (base
) == SYMBOL_REF
8732 && !offset_within_block_p (base
, INTVAL (offset
)))
8735 return arm_tls_referenced_p (x
);
8738 #define REG_OR_SUBREG_REG(X) \
8740 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8742 #define REG_OR_SUBREG_RTX(X) \
8743 (REG_P (X) ? (X) : SUBREG_REG (X))
8746 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8748 machine_mode mode
= GET_MODE (x
);
8757 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8764 return COSTS_N_INSNS (1);
8767 if (arm_arch6m
&& arm_m_profile_small_mul
)
8768 return COSTS_N_INSNS (32);
8770 if (CONST_INT_P (XEXP (x
, 1)))
8773 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8780 return COSTS_N_INSNS (2) + cycles
;
8782 return COSTS_N_INSNS (1) + 16;
8785 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8787 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8788 return (COSTS_N_INSNS (words
)
8789 + 4 * ((MEM_P (SET_SRC (x
)))
8790 + MEM_P (SET_DEST (x
))));
8795 if (UINTVAL (x
) < 256
8796 /* 16-bit constant. */
8797 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8799 if (thumb_shiftable_const (INTVAL (x
)))
8800 return COSTS_N_INSNS (2);
8801 return COSTS_N_INSNS (3);
8803 else if ((outer
== PLUS
|| outer
== COMPARE
)
8804 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8806 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8807 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8808 return COSTS_N_INSNS (1);
8809 else if (outer
== AND
)
8812 /* This duplicates the tests in the andsi3 expander. */
8813 for (i
= 9; i
<= 31; i
++)
8814 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8815 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8816 return COSTS_N_INSNS (2);
8818 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8819 || outer
== LSHIFTRT
)
8821 return COSTS_N_INSNS (2);
8827 return COSTS_N_INSNS (3);
8845 /* XXX another guess. */
8846 /* Memory costs quite a lot for the first word, but subsequent words
8847 load at the equivalent of a single insn each. */
8848 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8849 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8854 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8860 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8861 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8867 return total
+ COSTS_N_INSNS (1);
8869 /* Assume a two-shift sequence. Increase the cost slightly so
8870 we prefer actual shifts over an extend operation. */
8871 return total
+ 1 + COSTS_N_INSNS (2);
8878 /* Estimates the size cost of thumb1 instructions.
8879 For now most of the code is copied from thumb1_rtx_costs. We need more
8880 fine grain tuning when we have more related test cases. */
8882 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8884 machine_mode mode
= GET_MODE (x
);
8893 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8897 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8898 defined by RTL expansion, especially for the expansion of
8900 if ((GET_CODE (XEXP (x
, 0)) == MULT
8901 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8902 || (GET_CODE (XEXP (x
, 1)) == MULT
8903 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8904 return COSTS_N_INSNS (2);
8909 return COSTS_N_INSNS (1);
8912 if (CONST_INT_P (XEXP (x
, 1)))
8914 /* Thumb1 mul instruction can't operate on const. We must Load it
8915 into a register first. */
8916 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8917 /* For the targets which have a very small and high-latency multiply
8918 unit, we prefer to synthesize the mult with up to 5 instructions,
8919 giving a good balance between size and performance. */
8920 if (arm_arch6m
&& arm_m_profile_small_mul
)
8921 return COSTS_N_INSNS (5);
8923 return COSTS_N_INSNS (1) + const_size
;
8925 return COSTS_N_INSNS (1);
8928 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8930 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8931 cost
= COSTS_N_INSNS (words
);
8932 if (satisfies_constraint_J (SET_SRC (x
))
8933 || satisfies_constraint_K (SET_SRC (x
))
8934 /* Too big an immediate for a 2-byte mov, using MOVT. */
8935 || (CONST_INT_P (SET_SRC (x
))
8936 && UINTVAL (SET_SRC (x
)) >= 256
8938 && satisfies_constraint_j (SET_SRC (x
)))
8939 /* thumb1_movdi_insn. */
8940 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8941 cost
+= COSTS_N_INSNS (1);
8947 if (UINTVAL (x
) < 256)
8948 return COSTS_N_INSNS (1);
8949 /* movw is 4byte long. */
8950 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8951 return COSTS_N_INSNS (2);
8952 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8953 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8954 return COSTS_N_INSNS (2);
8955 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8956 if (thumb_shiftable_const (INTVAL (x
)))
8957 return COSTS_N_INSNS (2);
8958 return COSTS_N_INSNS (3);
8960 else if ((outer
== PLUS
|| outer
== COMPARE
)
8961 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8963 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8964 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8965 return COSTS_N_INSNS (1);
8966 else if (outer
== AND
)
8969 /* This duplicates the tests in the andsi3 expander. */
8970 for (i
= 9; i
<= 31; i
++)
8971 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8972 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8973 return COSTS_N_INSNS (2);
8975 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8976 || outer
== LSHIFTRT
)
8978 return COSTS_N_INSNS (2);
8984 return COSTS_N_INSNS (3);
8998 return COSTS_N_INSNS (1);
9001 return (COSTS_N_INSNS (1)
9003 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9004 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9005 ? COSTS_N_INSNS (1) : 0));
9009 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9014 /* XXX still guessing. */
9015 switch (GET_MODE (XEXP (x
, 0)))
9018 return (1 + (mode
== DImode
? 4 : 0)
9019 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9022 return (4 + (mode
== DImode
? 4 : 0)
9023 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9026 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9037 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9038 operand, then return the operand that is being shifted. If the shift
9039 is not by a constant, then set SHIFT_REG to point to the operand.
9040 Return NULL if OP is not a shifter operand. */
9042 shifter_op_p (rtx op
, rtx
*shift_reg
)
9044 enum rtx_code code
= GET_CODE (op
);
9046 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9047 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9048 return XEXP (op
, 0);
9049 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9050 return XEXP (op
, 0);
9051 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9052 || code
== ASHIFTRT
)
9054 if (!CONST_INT_P (XEXP (op
, 1)))
9055 *shift_reg
= XEXP (op
, 1);
9056 return XEXP (op
, 0);
9063 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9065 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9066 rtx_code code
= GET_CODE (x
);
9067 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9069 switch (XINT (x
, 1))
9071 case UNSPEC_UNALIGNED_LOAD
:
9072 /* We can only do unaligned loads into the integer unit, and we can't
9074 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9076 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9077 + extra_cost
->ldst
.load_unaligned
);
9080 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9081 ADDR_SPACE_GENERIC
, speed_p
);
9085 case UNSPEC_UNALIGNED_STORE
:
9086 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9088 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9089 + extra_cost
->ldst
.store_unaligned
);
9091 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9093 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9094 ADDR_SPACE_GENERIC
, speed_p
);
9105 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9109 *cost
= COSTS_N_INSNS (2);
9115 /* Cost of a libcall. We assume one insn per argument, an amount for the
9116 call (one insn for -Os) and then one for processing the result. */
9117 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9119 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9122 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9123 if (shift_op != NULL \
9124 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9129 *cost += extra_cost->alu.arith_shift_reg; \
9130 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9131 ASHIFT, 1, speed_p); \
9134 *cost += extra_cost->alu.arith_shift; \
9136 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9137 ASHIFT, 0, speed_p) \
9138 + rtx_cost (XEXP (x, 1 - IDX), \
9139 GET_MODE (shift_op), \
9146 /* RTX costs. Make an estimate of the cost of executing the operation
9147 X, which is contained with an operation with code OUTER_CODE.
9148 SPEED_P indicates whether the cost desired is the performance cost,
9149 or the size cost. The estimate is stored in COST and the return
9150 value is TRUE if the cost calculation is final, or FALSE if the
9151 caller should recurse through the operands of X to add additional
9154 We currently make no attempt to model the size savings of Thumb-2
9155 16-bit instructions. At the normal points in compilation where
9156 this code is called we have no measure of whether the condition
9157 flags are live or not, and thus no realistic way to determine what
9158 the size will eventually be. */
9160 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9161 const struct cpu_cost_table
*extra_cost
,
9162 int *cost
, bool speed_p
)
9164 machine_mode mode
= GET_MODE (x
);
9166 *cost
= COSTS_N_INSNS (1);
9171 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9173 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9181 /* SET RTXs don't have a mode so we get it from the destination. */
9182 mode
= GET_MODE (SET_DEST (x
));
9184 if (REG_P (SET_SRC (x
))
9185 && REG_P (SET_DEST (x
)))
9187 /* Assume that most copies can be done with a single insn,
9188 unless we don't have HW FP, in which case everything
9189 larger than word mode will require two insns. */
9190 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9191 && GET_MODE_SIZE (mode
) > 4)
9194 /* Conditional register moves can be encoded
9195 in 16 bits in Thumb mode. */
9196 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9202 if (CONST_INT_P (SET_SRC (x
)))
9204 /* Handle CONST_INT here, since the value doesn't have a mode
9205 and we would otherwise be unable to work out the true cost. */
9206 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9209 /* Slightly lower the cost of setting a core reg to a constant.
9210 This helps break up chains and allows for better scheduling. */
9211 if (REG_P (SET_DEST (x
))
9212 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9215 /* Immediate moves with an immediate in the range [0, 255] can be
9216 encoded in 16 bits in Thumb mode. */
9217 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9218 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9220 goto const_int_cost
;
9226 /* A memory access costs 1 insn if the mode is small, or the address is
9227 a single register, otherwise it costs one insn per word. */
9228 if (REG_P (XEXP (x
, 0)))
9229 *cost
= COSTS_N_INSNS (1);
9231 && GET_CODE (XEXP (x
, 0)) == PLUS
9232 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9233 /* This will be split into two instructions.
9234 See arm.md:calculate_pic_address. */
9235 *cost
= COSTS_N_INSNS (2);
9237 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9239 /* For speed optimizations, add the costs of the address and
9240 accessing memory. */
9243 *cost
+= (extra_cost
->ldst
.load
9244 + arm_address_cost (XEXP (x
, 0), mode
,
9245 ADDR_SPACE_GENERIC
, speed_p
));
9247 *cost
+= extra_cost
->ldst
.load
;
9253 /* Calculations of LDM costs are complex. We assume an initial cost
9254 (ldm_1st) which will load the number of registers mentioned in
9255 ldm_regs_per_insn_1st registers; then each additional
9256 ldm_regs_per_insn_subsequent registers cost one more insn. The
9257 formula for N regs is thus:
9259 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9260 + ldm_regs_per_insn_subsequent - 1)
9261 / ldm_regs_per_insn_subsequent).
9263 Additional costs may also be added for addressing. A similar
9264 formula is used for STM. */
9266 bool is_ldm
= load_multiple_operation (x
, SImode
);
9267 bool is_stm
= store_multiple_operation (x
, SImode
);
9269 if (is_ldm
|| is_stm
)
9273 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9274 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9275 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9276 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9277 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9278 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9279 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9281 *cost
+= regs_per_insn_1st
9282 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9283 + regs_per_insn_sub
- 1)
9284 / regs_per_insn_sub
);
9293 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9294 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9295 *cost
+= COSTS_N_INSNS (speed_p
9296 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9297 else if (mode
== SImode
&& TARGET_IDIV
)
9298 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9300 *cost
= LIBCALL_COST (2);
9301 return false; /* All arguments must be in registers. */
9304 /* MOD by a power of 2 can be expanded as:
9306 and r0, r0, #(n - 1)
9307 and r1, r1, #(n - 1)
9308 rsbpl r0, r1, #0. */
9309 if (CONST_INT_P (XEXP (x
, 1))
9310 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9313 *cost
+= COSTS_N_INSNS (3);
9316 *cost
+= 2 * extra_cost
->alu
.logical
9317 + extra_cost
->alu
.arith
;
9323 *cost
= LIBCALL_COST (2);
9324 return false; /* All arguments must be in registers. */
9327 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9329 *cost
+= (COSTS_N_INSNS (1)
9330 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9332 *cost
+= extra_cost
->alu
.shift_reg
;
9340 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9342 *cost
+= (COSTS_N_INSNS (2)
9343 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9345 *cost
+= 2 * extra_cost
->alu
.shift
;
9348 else if (mode
== SImode
)
9350 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9351 /* Slightly disparage register shifts at -Os, but not by much. */
9352 if (!CONST_INT_P (XEXP (x
, 1)))
9353 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9354 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9357 else if (GET_MODE_CLASS (mode
) == MODE_INT
9358 && GET_MODE_SIZE (mode
) < 4)
9362 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9363 /* Slightly disparage register shifts at -Os, but not by
9365 if (!CONST_INT_P (XEXP (x
, 1)))
9366 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9367 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9369 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9371 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9373 /* Can use SBFX/UBFX. */
9375 *cost
+= extra_cost
->alu
.bfx
;
9376 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9380 *cost
+= COSTS_N_INSNS (1);
9381 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9384 if (CONST_INT_P (XEXP (x
, 1)))
9385 *cost
+= 2 * extra_cost
->alu
.shift
;
9387 *cost
+= (extra_cost
->alu
.shift
9388 + extra_cost
->alu
.shift_reg
);
9391 /* Slightly disparage register shifts. */
9392 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9397 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9398 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9401 if (CONST_INT_P (XEXP (x
, 1)))
9402 *cost
+= (2 * extra_cost
->alu
.shift
9403 + extra_cost
->alu
.log_shift
);
9405 *cost
+= (extra_cost
->alu
.shift
9406 + extra_cost
->alu
.shift_reg
9407 + extra_cost
->alu
.log_shift_reg
);
9413 *cost
= LIBCALL_COST (2);
9422 *cost
+= extra_cost
->alu
.rev
;
9429 /* No rev instruction available. Look at arm_legacy_rev
9430 and thumb_legacy_rev for the form of RTL used then. */
9433 *cost
+= COSTS_N_INSNS (9);
9437 *cost
+= 6 * extra_cost
->alu
.shift
;
9438 *cost
+= 3 * extra_cost
->alu
.logical
;
9443 *cost
+= COSTS_N_INSNS (4);
9447 *cost
+= 2 * extra_cost
->alu
.shift
;
9448 *cost
+= extra_cost
->alu
.arith_shift
;
9449 *cost
+= 2 * extra_cost
->alu
.logical
;
9457 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9458 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9460 if (GET_CODE (XEXP (x
, 0)) == MULT
9461 || GET_CODE (XEXP (x
, 1)) == MULT
)
9463 rtx mul_op0
, mul_op1
, sub_op
;
9466 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9468 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9470 mul_op0
= XEXP (XEXP (x
, 0), 0);
9471 mul_op1
= XEXP (XEXP (x
, 0), 1);
9472 sub_op
= XEXP (x
, 1);
9476 mul_op0
= XEXP (XEXP (x
, 1), 0);
9477 mul_op1
= XEXP (XEXP (x
, 1), 1);
9478 sub_op
= XEXP (x
, 0);
9481 /* The first operand of the multiply may be optionally
9483 if (GET_CODE (mul_op0
) == NEG
)
9484 mul_op0
= XEXP (mul_op0
, 0);
9486 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9487 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9488 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9494 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9500 rtx shift_by_reg
= NULL
;
9504 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9505 if (shift_op
== NULL
)
9507 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9508 non_shift_op
= XEXP (x
, 0);
9511 non_shift_op
= XEXP (x
, 1);
9513 if (shift_op
!= NULL
)
9515 if (shift_by_reg
!= NULL
)
9518 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9519 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9522 *cost
+= extra_cost
->alu
.arith_shift
;
9524 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9525 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9530 && GET_CODE (XEXP (x
, 1)) == MULT
)
9534 *cost
+= extra_cost
->mult
[0].add
;
9535 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9536 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9537 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9541 if (CONST_INT_P (XEXP (x
, 0)))
9543 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9544 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9546 *cost
= COSTS_N_INSNS (insns
);
9548 *cost
+= insns
* extra_cost
->alu
.arith
;
9549 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9553 *cost
+= extra_cost
->alu
.arith
;
9558 if (GET_MODE_CLASS (mode
) == MODE_INT
9559 && GET_MODE_SIZE (mode
) < 4)
9561 rtx shift_op
, shift_reg
;
9564 /* We check both sides of the MINUS for shifter operands since,
9565 unlike PLUS, it's not commutative. */
9567 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9568 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9570 /* Slightly disparage, as we might need to widen the result. */
9573 *cost
+= extra_cost
->alu
.arith
;
9575 if (CONST_INT_P (XEXP (x
, 0)))
9577 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9586 *cost
+= COSTS_N_INSNS (1);
9588 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9590 rtx op1
= XEXP (x
, 1);
9593 *cost
+= 2 * extra_cost
->alu
.arith
;
9595 if (GET_CODE (op1
) == ZERO_EXTEND
)
9596 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9599 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9600 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9604 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9607 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9608 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9610 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9613 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9614 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9617 *cost
+= (extra_cost
->alu
.arith
9618 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9619 ? extra_cost
->alu
.arith
9620 : extra_cost
->alu
.arith_shift
));
9621 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9622 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9623 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9628 *cost
+= 2 * extra_cost
->alu
.arith
;
9634 *cost
= LIBCALL_COST (2);
9638 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9639 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9641 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9643 rtx mul_op0
, mul_op1
, add_op
;
9646 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9648 mul_op0
= XEXP (XEXP (x
, 0), 0);
9649 mul_op1
= XEXP (XEXP (x
, 0), 1);
9650 add_op
= XEXP (x
, 1);
9652 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9653 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9654 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9660 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9663 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9665 *cost
= LIBCALL_COST (2);
9669 /* Narrow modes can be synthesized in SImode, but the range
9670 of useful sub-operations is limited. Check for shift operations
9671 on one of the operands. Only left shifts can be used in the
9673 if (GET_MODE_CLASS (mode
) == MODE_INT
9674 && GET_MODE_SIZE (mode
) < 4)
9676 rtx shift_op
, shift_reg
;
9679 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9681 if (CONST_INT_P (XEXP (x
, 1)))
9683 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9684 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9686 *cost
= COSTS_N_INSNS (insns
);
9688 *cost
+= insns
* extra_cost
->alu
.arith
;
9689 /* Slightly penalize a narrow operation as the result may
9691 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9695 /* Slightly penalize a narrow operation as the result may
9699 *cost
+= extra_cost
->alu
.arith
;
9706 rtx shift_op
, shift_reg
;
9709 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9710 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9712 /* UXTA[BH] or SXTA[BH]. */
9714 *cost
+= extra_cost
->alu
.extend_arith
;
9715 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9717 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9722 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9723 if (shift_op
!= NULL
)
9728 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9729 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9732 *cost
+= extra_cost
->alu
.arith_shift
;
9734 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9735 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9738 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9740 rtx mul_op
= XEXP (x
, 0);
9742 if (TARGET_DSP_MULTIPLY
9743 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9744 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9745 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9746 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9747 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9748 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9749 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9750 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9751 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9752 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9753 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9754 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9759 *cost
+= extra_cost
->mult
[0].extend_add
;
9760 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9761 SIGN_EXTEND
, 0, speed_p
)
9762 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9763 SIGN_EXTEND
, 0, speed_p
)
9764 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9769 *cost
+= extra_cost
->mult
[0].add
;
9770 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9771 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9772 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9775 if (CONST_INT_P (XEXP (x
, 1)))
9777 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9778 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9780 *cost
= COSTS_N_INSNS (insns
);
9782 *cost
+= insns
* extra_cost
->alu
.arith
;
9783 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9787 *cost
+= extra_cost
->alu
.arith
;
9795 && GET_CODE (XEXP (x
, 0)) == MULT
9796 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9797 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9798 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9799 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9802 *cost
+= extra_cost
->mult
[1].extend_add
;
9803 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9804 ZERO_EXTEND
, 0, speed_p
)
9805 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9806 ZERO_EXTEND
, 0, speed_p
)
9807 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9811 *cost
+= COSTS_N_INSNS (1);
9813 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9814 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9817 *cost
+= (extra_cost
->alu
.arith
9818 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9819 ? extra_cost
->alu
.arith
9820 : extra_cost
->alu
.arith_shift
));
9822 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9824 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9829 *cost
+= 2 * extra_cost
->alu
.arith
;
9834 *cost
= LIBCALL_COST (2);
9837 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9840 *cost
+= extra_cost
->alu
.rev
;
9848 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9849 rtx op0
= XEXP (x
, 0);
9850 rtx shift_op
, shift_reg
;
9854 || (code
== IOR
&& TARGET_THUMB2
)))
9855 op0
= XEXP (op0
, 0);
9858 shift_op
= shifter_op_p (op0
, &shift_reg
);
9859 if (shift_op
!= NULL
)
9864 *cost
+= extra_cost
->alu
.log_shift_reg
;
9865 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9868 *cost
+= extra_cost
->alu
.log_shift
;
9870 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9871 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9875 if (CONST_INT_P (XEXP (x
, 1)))
9877 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9878 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9881 *cost
= COSTS_N_INSNS (insns
);
9883 *cost
+= insns
* extra_cost
->alu
.logical
;
9884 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9889 *cost
+= extra_cost
->alu
.logical
;
9890 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9891 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9897 rtx op0
= XEXP (x
, 0);
9898 enum rtx_code subcode
= GET_CODE (op0
);
9900 *cost
+= COSTS_N_INSNS (1);
9904 || (code
== IOR
&& TARGET_THUMB2
)))
9905 op0
= XEXP (op0
, 0);
9907 if (GET_CODE (op0
) == ZERO_EXTEND
)
9910 *cost
+= 2 * extra_cost
->alu
.logical
;
9912 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9914 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9917 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9920 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9922 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9924 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9929 *cost
+= 2 * extra_cost
->alu
.logical
;
9935 *cost
= LIBCALL_COST (2);
9939 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9940 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9942 rtx op0
= XEXP (x
, 0);
9944 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9945 op0
= XEXP (op0
, 0);
9948 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9950 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9951 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9954 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9956 *cost
= LIBCALL_COST (2);
9962 if (TARGET_DSP_MULTIPLY
9963 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9964 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9967 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9968 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9969 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9970 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9971 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9972 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9973 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9974 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9979 *cost
+= extra_cost
->mult
[0].extend
;
9980 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9981 SIGN_EXTEND
, 0, speed_p
);
9982 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
9983 SIGN_EXTEND
, 1, speed_p
);
9987 *cost
+= extra_cost
->mult
[0].simple
;
9994 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9995 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9996 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9997 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10000 *cost
+= extra_cost
->mult
[1].extend
;
10001 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10002 ZERO_EXTEND
, 0, speed_p
)
10003 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10004 ZERO_EXTEND
, 0, speed_p
));
10008 *cost
= LIBCALL_COST (2);
10013 *cost
= LIBCALL_COST (2);
10017 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10018 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10020 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10023 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10028 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10032 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10034 *cost
= LIBCALL_COST (1);
10038 if (mode
== SImode
)
10040 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10042 *cost
+= COSTS_N_INSNS (1);
10043 /* Assume the non-flag-changing variant. */
10045 *cost
+= (extra_cost
->alu
.log_shift
10046 + extra_cost
->alu
.arith_shift
);
10047 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10051 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10052 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10054 *cost
+= COSTS_N_INSNS (1);
10055 /* No extra cost for MOV imm and MVN imm. */
10056 /* If the comparison op is using the flags, there's no further
10057 cost, otherwise we need to add the cost of the comparison. */
10058 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10059 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10060 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10062 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10063 *cost
+= (COSTS_N_INSNS (1)
10064 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10066 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10069 *cost
+= extra_cost
->alu
.arith
;
10075 *cost
+= extra_cost
->alu
.arith
;
10079 if (GET_MODE_CLASS (mode
) == MODE_INT
10080 && GET_MODE_SIZE (mode
) < 4)
10082 /* Slightly disparage, as we might need an extend operation. */
10085 *cost
+= extra_cost
->alu
.arith
;
10089 if (mode
== DImode
)
10091 *cost
+= COSTS_N_INSNS (1);
10093 *cost
+= 2 * extra_cost
->alu
.arith
;
10098 *cost
= LIBCALL_COST (1);
10102 if (mode
== SImode
)
10105 rtx shift_reg
= NULL
;
10107 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10111 if (shift_reg
!= NULL
)
10114 *cost
+= extra_cost
->alu
.log_shift_reg
;
10115 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10118 *cost
+= extra_cost
->alu
.log_shift
;
10119 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10124 *cost
+= extra_cost
->alu
.logical
;
10127 if (mode
== DImode
)
10129 *cost
+= COSTS_N_INSNS (1);
10135 *cost
+= LIBCALL_COST (1);
10140 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10142 *cost
+= COSTS_N_INSNS (3);
10145 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10146 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10148 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10149 /* Assume that if one arm of the if_then_else is a register,
10150 that it will be tied with the result and eliminate the
10151 conditional insn. */
10152 if (REG_P (XEXP (x
, 1)))
10154 else if (REG_P (XEXP (x
, 2)))
10160 if (extra_cost
->alu
.non_exec_costs_exec
)
10161 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10163 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10166 *cost
+= op1cost
+ op2cost
;
10172 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10176 machine_mode op0mode
;
10177 /* We'll mostly assume that the cost of a compare is the cost of the
10178 LHS. However, there are some notable exceptions. */
10180 /* Floating point compares are never done as side-effects. */
10181 op0mode
= GET_MODE (XEXP (x
, 0));
10182 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10183 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10186 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10188 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10190 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10196 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10198 *cost
= LIBCALL_COST (2);
10202 /* DImode compares normally take two insns. */
10203 if (op0mode
== DImode
)
10205 *cost
+= COSTS_N_INSNS (1);
10207 *cost
+= 2 * extra_cost
->alu
.arith
;
10211 if (op0mode
== SImode
)
10216 if (XEXP (x
, 1) == const0_rtx
10217 && !(REG_P (XEXP (x
, 0))
10218 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10219 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10221 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10223 /* Multiply operations that set the flags are often
10224 significantly more expensive. */
10226 && GET_CODE (XEXP (x
, 0)) == MULT
10227 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10228 *cost
+= extra_cost
->mult
[0].flag_setting
;
10231 && GET_CODE (XEXP (x
, 0)) == PLUS
10232 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10233 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10235 *cost
+= extra_cost
->mult
[0].flag_setting
;
10240 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10241 if (shift_op
!= NULL
)
10243 if (shift_reg
!= NULL
)
10245 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10248 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10251 *cost
+= extra_cost
->alu
.arith_shift
;
10252 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10253 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10258 *cost
+= extra_cost
->alu
.arith
;
10259 if (CONST_INT_P (XEXP (x
, 1))
10260 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10262 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10270 *cost
= LIBCALL_COST (2);
10293 if (outer_code
== SET
)
10295 /* Is it a store-flag operation? */
10296 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10297 && XEXP (x
, 1) == const0_rtx
)
10299 /* Thumb also needs an IT insn. */
10300 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10303 if (XEXP (x
, 1) == const0_rtx
)
10308 /* LSR Rd, Rn, #31. */
10310 *cost
+= extra_cost
->alu
.shift
;
10320 *cost
+= COSTS_N_INSNS (1);
10324 /* RSBS T1, Rn, Rn, LSR #31
10326 *cost
+= COSTS_N_INSNS (1);
10328 *cost
+= extra_cost
->alu
.arith_shift
;
10332 /* RSB Rd, Rn, Rn, ASR #1
10333 LSR Rd, Rd, #31. */
10334 *cost
+= COSTS_N_INSNS (1);
10336 *cost
+= (extra_cost
->alu
.arith_shift
10337 + extra_cost
->alu
.shift
);
10343 *cost
+= COSTS_N_INSNS (1);
10345 *cost
+= extra_cost
->alu
.shift
;
10349 /* Remaining cases are either meaningless or would take
10350 three insns anyway. */
10351 *cost
= COSTS_N_INSNS (3);
10354 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10359 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10360 if (CONST_INT_P (XEXP (x
, 1))
10361 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10363 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10370 /* Not directly inside a set. If it involves the condition code
10371 register it must be the condition for a branch, cond_exec or
10372 I_T_E operation. Since the comparison is performed elsewhere
10373 this is just the control part which has no additional
10375 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10376 && XEXP (x
, 1) == const0_rtx
)
10384 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10385 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10388 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10392 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10394 *cost
= LIBCALL_COST (1);
10398 if (mode
== SImode
)
10401 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10405 *cost
= LIBCALL_COST (1);
10409 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10410 && MEM_P (XEXP (x
, 0)))
10412 if (mode
== DImode
)
10413 *cost
+= COSTS_N_INSNS (1);
10418 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10419 *cost
+= extra_cost
->ldst
.load
;
10421 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10423 if (mode
== DImode
)
10424 *cost
+= extra_cost
->alu
.shift
;
10429 /* Widening from less than 32-bits requires an extend operation. */
10430 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10432 /* We have SXTB/SXTH. */
10433 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10435 *cost
+= extra_cost
->alu
.extend
;
10437 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10439 /* Needs two shifts. */
10440 *cost
+= COSTS_N_INSNS (1);
10441 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10443 *cost
+= 2 * extra_cost
->alu
.shift
;
10446 /* Widening beyond 32-bits requires one more insn. */
10447 if (mode
== DImode
)
10449 *cost
+= COSTS_N_INSNS (1);
10451 *cost
+= extra_cost
->alu
.shift
;
10458 || GET_MODE (XEXP (x
, 0)) == SImode
10459 || GET_MODE (XEXP (x
, 0)) == QImode
)
10460 && MEM_P (XEXP (x
, 0)))
10462 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10464 if (mode
== DImode
)
10465 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10470 /* Widening from less than 32-bits requires an extend operation. */
10471 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10473 /* UXTB can be a shorter instruction in Thumb2, but it might
10474 be slower than the AND Rd, Rn, #255 alternative. When
10475 optimizing for speed it should never be slower to use
10476 AND, and we don't really model 16-bit vs 32-bit insns
10479 *cost
+= extra_cost
->alu
.logical
;
10481 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10483 /* We have UXTB/UXTH. */
10484 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10486 *cost
+= extra_cost
->alu
.extend
;
10488 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10490 /* Needs two shifts. It's marginally preferable to use
10491 shifts rather than two BIC instructions as the second
10492 shift may merge with a subsequent insn as a shifter
10494 *cost
= COSTS_N_INSNS (2);
10495 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10497 *cost
+= 2 * extra_cost
->alu
.shift
;
10500 /* Widening beyond 32-bits requires one more insn. */
10501 if (mode
== DImode
)
10503 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10510 /* CONST_INT has no mode, so we cannot tell for sure how many
10511 insns are really going to be needed. The best we can do is
10512 look at the value passed. If it fits in SImode, then assume
10513 that's the mode it will be used for. Otherwise assume it
10514 will be used in DImode. */
10515 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10520 /* Avoid blowing up in arm_gen_constant (). */
10521 if (!(outer_code
== PLUS
10522 || outer_code
== AND
10523 || outer_code
== IOR
10524 || outer_code
== XOR
10525 || outer_code
== MINUS
))
10529 if (mode
== SImode
)
10531 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10532 INTVAL (x
), NULL
, NULL
,
10538 *cost
+= COSTS_N_INSNS (arm_gen_constant
10539 (outer_code
, SImode
, NULL
,
10540 trunc_int_for_mode (INTVAL (x
), SImode
),
10542 + arm_gen_constant (outer_code
, SImode
, NULL
,
10543 INTVAL (x
) >> 32, NULL
,
10555 if (arm_arch_thumb2
&& !flag_pic
)
10556 *cost
+= COSTS_N_INSNS (1);
10558 *cost
+= extra_cost
->ldst
.load
;
10561 *cost
+= COSTS_N_INSNS (1);
10565 *cost
+= COSTS_N_INSNS (1);
10567 *cost
+= extra_cost
->alu
.arith
;
10573 *cost
= COSTS_N_INSNS (4);
10578 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10579 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10581 if (vfp3_const_double_rtx (x
))
10584 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10590 if (mode
== DFmode
)
10591 *cost
+= extra_cost
->ldst
.loadd
;
10593 *cost
+= extra_cost
->ldst
.loadf
;
10596 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10600 *cost
= COSTS_N_INSNS (4);
10606 && TARGET_HARD_FLOAT
10607 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10608 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10609 *cost
= COSTS_N_INSNS (1);
10611 *cost
= COSTS_N_INSNS (4);
10616 /* When optimizing for size, we prefer constant pool entries to
10617 MOVW/MOVT pairs, so bump the cost of these slightly. */
10624 *cost
+= extra_cost
->alu
.clz
;
10628 if (XEXP (x
, 1) == const0_rtx
)
10631 *cost
+= extra_cost
->alu
.log_shift
;
10632 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10635 /* Fall through. */
10639 *cost
+= COSTS_N_INSNS (1);
10643 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10644 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10645 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10646 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10647 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10648 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10649 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10650 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10654 *cost
+= extra_cost
->mult
[1].extend
;
10655 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10656 ZERO_EXTEND
, 0, speed_p
)
10657 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10658 ZERO_EXTEND
, 0, speed_p
));
10661 *cost
= LIBCALL_COST (1);
10664 case UNSPEC_VOLATILE
:
10666 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10669 /* Reading the PC is like reading any other register. Writing it
10670 is more expensive, but we take that into account elsewhere. */
10675 /* TODO: Simple zero_extract of bottom bits using AND. */
10676 /* Fall through. */
10680 && CONST_INT_P (XEXP (x
, 1))
10681 && CONST_INT_P (XEXP (x
, 2)))
10684 *cost
+= extra_cost
->alu
.bfx
;
10685 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10688 /* Without UBFX/SBFX, need to resort to shift operations. */
10689 *cost
+= COSTS_N_INSNS (1);
10691 *cost
+= 2 * extra_cost
->alu
.shift
;
10692 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10696 if (TARGET_HARD_FLOAT
)
10699 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10700 if (!TARGET_FPU_ARMV8
10701 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10703 /* Pre v8, widening HF->DF is a two-step process, first
10704 widening to SFmode. */
10705 *cost
+= COSTS_N_INSNS (1);
10707 *cost
+= extra_cost
->fp
[0].widen
;
10709 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10713 *cost
= LIBCALL_COST (1);
10716 case FLOAT_TRUNCATE
:
10717 if (TARGET_HARD_FLOAT
)
10720 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10721 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10723 /* Vector modes? */
10725 *cost
= LIBCALL_COST (1);
10729 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10731 rtx op0
= XEXP (x
, 0);
10732 rtx op1
= XEXP (x
, 1);
10733 rtx op2
= XEXP (x
, 2);
10736 /* vfms or vfnma. */
10737 if (GET_CODE (op0
) == NEG
)
10738 op0
= XEXP (op0
, 0);
10740 /* vfnms or vfnma. */
10741 if (GET_CODE (op2
) == NEG
)
10742 op2
= XEXP (op2
, 0);
10744 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10745 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10746 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10749 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10754 *cost
= LIBCALL_COST (3);
10759 if (TARGET_HARD_FLOAT
)
10761 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10762 a vcvt fixed-point conversion. */
10763 if (code
== FIX
&& mode
== SImode
10764 && GET_CODE (XEXP (x
, 0)) == FIX
10765 && GET_MODE (XEXP (x
, 0)) == SFmode
10766 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10767 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10771 *cost
+= extra_cost
->fp
[0].toint
;
10773 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10778 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10780 mode
= GET_MODE (XEXP (x
, 0));
10782 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10783 /* Strip of the 'cost' of rounding towards zero. */
10784 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10785 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10788 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10789 /* ??? Increase the cost to deal with transferring from
10790 FP -> CORE registers? */
10793 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10794 && TARGET_FPU_ARMV8
)
10797 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10800 /* Vector costs? */
10802 *cost
= LIBCALL_COST (1);
10806 case UNSIGNED_FLOAT
:
10807 if (TARGET_HARD_FLOAT
)
10809 /* ??? Increase the cost to deal with transferring from CORE
10810 -> FP registers? */
10812 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10815 *cost
= LIBCALL_COST (1);
10823 /* Just a guess. Guess number of instructions in the asm
10824 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10825 though (see PR60663). */
10826 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10827 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10829 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10833 if (mode
!= VOIDmode
)
10834 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10836 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10841 #undef HANDLE_NARROW_SHIFT_ARITH
10843 /* RTX costs entry point. */
10846 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10847 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10850 int code
= GET_CODE (x
);
10851 gcc_assert (current_tune
->insn_extra_cost
);
10853 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10854 (enum rtx_code
) outer_code
,
10855 current_tune
->insn_extra_cost
,
10858 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10860 print_rtl_single (dump_file
, x
);
10861 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10862 *total
, result
? "final" : "partial");
10867 /* All address computations that can be done are free, but rtx cost returns
10868 the same for practically all of them. So we weight the different types
10869 of address here in the order (most pref first):
10870 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10872 arm_arm_address_cost (rtx x
)
10874 enum rtx_code c
= GET_CODE (x
);
10876 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10878 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10883 if (CONST_INT_P (XEXP (x
, 1)))
10886 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10896 arm_thumb_address_cost (rtx x
)
10898 enum rtx_code c
= GET_CODE (x
);
10903 && REG_P (XEXP (x
, 0))
10904 && CONST_INT_P (XEXP (x
, 1)))
10911 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10912 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10914 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10917 /* Adjust cost hook for XScale. */
10919 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10922 /* Some true dependencies can have a higher cost depending
10923 on precisely how certain input operands are used. */
10925 && recog_memoized (insn
) >= 0
10926 && recog_memoized (dep
) >= 0)
10928 int shift_opnum
= get_attr_shift (insn
);
10929 enum attr_type attr_type
= get_attr_type (dep
);
10931 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10932 operand for INSN. If we have a shifted input operand and the
10933 instruction we depend on is another ALU instruction, then we may
10934 have to account for an additional stall. */
10935 if (shift_opnum
!= 0
10936 && (attr_type
== TYPE_ALU_SHIFT_IMM
10937 || attr_type
== TYPE_ALUS_SHIFT_IMM
10938 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10939 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10940 || attr_type
== TYPE_ALU_SHIFT_REG
10941 || attr_type
== TYPE_ALUS_SHIFT_REG
10942 || attr_type
== TYPE_LOGIC_SHIFT_REG
10943 || attr_type
== TYPE_LOGICS_SHIFT_REG
10944 || attr_type
== TYPE_MOV_SHIFT
10945 || attr_type
== TYPE_MVN_SHIFT
10946 || attr_type
== TYPE_MOV_SHIFT_REG
10947 || attr_type
== TYPE_MVN_SHIFT_REG
))
10949 rtx shifted_operand
;
10952 /* Get the shifted operand. */
10953 extract_insn (insn
);
10954 shifted_operand
= recog_data
.operand
[shift_opnum
];
10956 /* Iterate over all the operands in DEP. If we write an operand
10957 that overlaps with SHIFTED_OPERAND, then we have increase the
10958 cost of this dependency. */
10959 extract_insn (dep
);
10960 preprocess_constraints (dep
);
10961 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10963 /* We can ignore strict inputs. */
10964 if (recog_data
.operand_type
[opno
] == OP_IN
)
10967 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10979 /* Adjust cost hook for Cortex A9. */
10981 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10991 case REG_DEP_OUTPUT
:
10992 if (recog_memoized (insn
) >= 0
10993 && recog_memoized (dep
) >= 0)
10995 if (GET_CODE (PATTERN (insn
)) == SET
)
10998 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11000 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11002 enum attr_type attr_type_insn
= get_attr_type (insn
);
11003 enum attr_type attr_type_dep
= get_attr_type (dep
);
11005 /* By default all dependencies of the form
11008 have an extra latency of 1 cycle because
11009 of the input and output dependency in this
11010 case. However this gets modeled as an true
11011 dependency and hence all these checks. */
11012 if (REG_P (SET_DEST (PATTERN (insn
)))
11013 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11015 /* FMACS is a special case where the dependent
11016 instruction can be issued 3 cycles before
11017 the normal latency in case of an output
11019 if ((attr_type_insn
== TYPE_FMACS
11020 || attr_type_insn
== TYPE_FMACD
)
11021 && (attr_type_dep
== TYPE_FMACS
11022 || attr_type_dep
== TYPE_FMACD
))
11024 if (dep_type
== REG_DEP_OUTPUT
)
11025 *cost
= insn_default_latency (dep
) - 3;
11027 *cost
= insn_default_latency (dep
);
11032 if (dep_type
== REG_DEP_OUTPUT
)
11033 *cost
= insn_default_latency (dep
) + 1;
11035 *cost
= insn_default_latency (dep
);
11045 gcc_unreachable ();
11051 /* Adjust cost hook for FA726TE. */
11053 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11056 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11057 have penalty of 3. */
11058 if (dep_type
== REG_DEP_TRUE
11059 && recog_memoized (insn
) >= 0
11060 && recog_memoized (dep
) >= 0
11061 && get_attr_conds (dep
) == CONDS_SET
)
11063 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11064 if (get_attr_conds (insn
) == CONDS_USE
11065 && get_attr_type (insn
) != TYPE_BRANCH
)
11071 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11072 || get_attr_conds (insn
) == CONDS_USE
)
11082 /* Implement TARGET_REGISTER_MOVE_COST.
11084 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11085 it is typically more expensive than a single memory access. We set
11086 the cost to less than two memory accesses so that floating
11087 point to integer conversion does not go through memory. */
11090 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11091 reg_class_t from
, reg_class_t to
)
11095 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11096 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11098 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11099 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11101 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11108 if (from
== HI_REGS
|| to
== HI_REGS
)
11115 /* Implement TARGET_MEMORY_MOVE_COST. */
11118 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11119 bool in ATTRIBUTE_UNUSED
)
11125 if (GET_MODE_SIZE (mode
) < 4)
11128 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11132 /* Vectorizer cost model implementation. */
11134 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11136 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11138 int misalign ATTRIBUTE_UNUSED
)
11142 switch (type_of_cost
)
11145 return current_tune
->vec_costs
->scalar_stmt_cost
;
11148 return current_tune
->vec_costs
->scalar_load_cost
;
11151 return current_tune
->vec_costs
->scalar_store_cost
;
11154 return current_tune
->vec_costs
->vec_stmt_cost
;
11157 return current_tune
->vec_costs
->vec_align_load_cost
;
11160 return current_tune
->vec_costs
->vec_store_cost
;
11162 case vec_to_scalar
:
11163 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11165 case scalar_to_vec
:
11166 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11168 case unaligned_load
:
11169 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11171 case unaligned_store
:
11172 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11174 case cond_branch_taken
:
11175 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11177 case cond_branch_not_taken
:
11178 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11181 case vec_promote_demote
:
11182 return current_tune
->vec_costs
->vec_stmt_cost
;
11184 case vec_construct
:
11185 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11186 return elements
/ 2 + 1;
11189 gcc_unreachable ();
11193 /* Implement targetm.vectorize.add_stmt_cost. */
11196 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11197 struct _stmt_vec_info
*stmt_info
, int misalign
,
11198 enum vect_cost_model_location where
)
11200 unsigned *cost
= (unsigned *) data
;
11201 unsigned retval
= 0;
11203 if (flag_vect_cost_model
)
11205 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11206 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11208 /* Statements in an inner loop relative to the loop being
11209 vectorized are weighted more heavily. The value here is
11210 arbitrary and could potentially be improved with analysis. */
11211 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11212 count
*= 50; /* FIXME. */
11214 retval
= (unsigned) (count
* stmt_cost
);
11215 cost
[where
] += retval
;
11221 /* Return true if and only if this insn can dual-issue only as older. */
11223 cortexa7_older_only (rtx_insn
*insn
)
11225 if (recog_memoized (insn
) < 0)
11228 switch (get_attr_type (insn
))
11230 case TYPE_ALU_DSP_REG
:
11231 case TYPE_ALU_SREG
:
11232 case TYPE_ALUS_SREG
:
11233 case TYPE_LOGIC_REG
:
11234 case TYPE_LOGICS_REG
:
11236 case TYPE_ADCS_REG
:
11241 case TYPE_SHIFT_IMM
:
11242 case TYPE_SHIFT_REG
:
11243 case TYPE_LOAD_BYTE
:
11246 case TYPE_FFARITHS
:
11248 case TYPE_FFARITHD
:
11266 case TYPE_F_STORES
:
11273 /* Return true if and only if this insn can dual-issue as younger. */
11275 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11277 if (recog_memoized (insn
) < 0)
11280 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11284 switch (get_attr_type (insn
))
11287 case TYPE_ALUS_IMM
:
11288 case TYPE_LOGIC_IMM
:
11289 case TYPE_LOGICS_IMM
:
11294 case TYPE_MOV_SHIFT
:
11295 case TYPE_MOV_SHIFT_REG
:
11305 /* Look for an instruction that can dual issue only as an older
11306 instruction, and move it in front of any instructions that can
11307 dual-issue as younger, while preserving the relative order of all
11308 other instructions in the ready list. This is a hueuristic to help
11309 dual-issue in later cycles, by postponing issue of more flexible
11310 instructions. This heuristic may affect dual issue opportunities
11311 in the current cycle. */
11313 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11314 int *n_readyp
, int clock
)
11317 int first_older_only
= -1, first_younger
= -1;
11321 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11325 /* Traverse the ready list from the head (the instruction to issue
11326 first), and looking for the first instruction that can issue as
11327 younger and the first instruction that can dual-issue only as
11329 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11331 rtx_insn
*insn
= ready
[i
];
11332 if (cortexa7_older_only (insn
))
11334 first_older_only
= i
;
11336 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11339 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11343 /* Nothing to reorder because either no younger insn found or insn
11344 that can dual-issue only as older appears before any insn that
11345 can dual-issue as younger. */
11346 if (first_younger
== -1)
11349 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11353 /* Nothing to reorder because no older-only insn in the ready list. */
11354 if (first_older_only
== -1)
11357 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11361 /* Move first_older_only insn before first_younger. */
11363 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11364 INSN_UID(ready
[first_older_only
]),
11365 INSN_UID(ready
[first_younger
]));
11366 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11367 for (i
= first_older_only
; i
< first_younger
; i
++)
11369 ready
[i
] = ready
[i
+1];
11372 ready
[i
] = first_older_only_insn
;
11376 /* Implement TARGET_SCHED_REORDER. */
11378 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11383 case TARGET_CPU_cortexa7
:
11384 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11387 /* Do nothing for other cores. */
11391 return arm_issue_rate ();
11394 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11395 It corrects the value of COST based on the relationship between
11396 INSN and DEP through the dependence LINK. It returns the new
11397 value. There is a per-core adjust_cost hook to adjust scheduler costs
11398 and the per-core hook can choose to completely override the generic
11399 adjust_cost function. Only put bits of code into arm_adjust_cost that
11400 are common across all cores. */
11402 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11407 /* When generating Thumb-1 code, we want to place flag-setting operations
11408 close to a conditional branch which depends on them, so that we can
11409 omit the comparison. */
11412 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11413 && recog_memoized (dep
) >= 0
11414 && get_attr_conds (dep
) == CONDS_SET
)
11417 if (current_tune
->sched_adjust_cost
!= NULL
)
11419 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11423 /* XXX Is this strictly true? */
11424 if (dep_type
== REG_DEP_ANTI
11425 || dep_type
== REG_DEP_OUTPUT
)
11428 /* Call insns don't incur a stall, even if they follow a load. */
11433 if ((i_pat
= single_set (insn
)) != NULL
11434 && MEM_P (SET_SRC (i_pat
))
11435 && (d_pat
= single_set (dep
)) != NULL
11436 && MEM_P (SET_DEST (d_pat
)))
11438 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11439 /* This is a load after a store, there is no conflict if the load reads
11440 from a cached area. Assume that loads from the stack, and from the
11441 constant pool are cached, and that others will miss. This is a
11444 if ((GET_CODE (src_mem
) == SYMBOL_REF
11445 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11446 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11447 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11448 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11456 arm_max_conditional_execute (void)
11458 return max_insns_skipped
;
11462 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11465 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11467 return (optimize
> 0) ? 2 : 0;
11471 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11473 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11476 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11477 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11478 sequences of non-executed instructions in IT blocks probably take the same
11479 amount of time as executed instructions (and the IT instruction itself takes
11480 space in icache). This function was experimentally determined to give good
11481 results on a popular embedded benchmark. */
11484 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11486 return (TARGET_32BIT
&& speed_p
) ? 1
11487 : arm_default_branch_cost (speed_p
, predictable_p
);
11491 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11493 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11496 static bool fp_consts_inited
= false;
11498 static REAL_VALUE_TYPE value_fp0
;
11501 init_fp_table (void)
11505 r
= REAL_VALUE_ATOF ("0", DFmode
);
11507 fp_consts_inited
= true;
11510 /* Return TRUE if rtx X is a valid immediate FP constant. */
11512 arm_const_double_rtx (rtx x
)
11514 const REAL_VALUE_TYPE
*r
;
11516 if (!fp_consts_inited
)
11519 r
= CONST_DOUBLE_REAL_VALUE (x
);
11520 if (REAL_VALUE_MINUS_ZERO (*r
))
11523 if (real_equal (r
, &value_fp0
))
11529 /* VFPv3 has a fairly wide range of representable immediates, formed from
11530 "quarter-precision" floating-point values. These can be evaluated using this
11531 formula (with ^ for exponentiation):
11535 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11536 16 <= n <= 31 and 0 <= r <= 7.
11538 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11540 - A (most-significant) is the sign bit.
11541 - BCD are the exponent (encoded as r XOR 3).
11542 - EFGH are the mantissa (encoded as n - 16).
11545 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11546 fconst[sd] instruction, or -1 if X isn't suitable. */
11548 vfp3_const_double_index (rtx x
)
11550 REAL_VALUE_TYPE r
, m
;
11551 int sign
, exponent
;
11552 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11553 unsigned HOST_WIDE_INT mask
;
11554 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11557 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11560 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11562 /* We can't represent these things, so detect them first. */
11563 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11566 /* Extract sign, exponent and mantissa. */
11567 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11568 r
= real_value_abs (&r
);
11569 exponent
= REAL_EXP (&r
);
11570 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11571 highest (sign) bit, with a fixed binary point at bit point_pos.
11572 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11573 bits for the mantissa, this may fail (low bits would be lost). */
11574 real_ldexp (&m
, &r
, point_pos
- exponent
);
11575 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11576 mantissa
= w
.elt (0);
11577 mant_hi
= w
.elt (1);
11579 /* If there are bits set in the low part of the mantissa, we can't
11580 represent this value. */
11584 /* Now make it so that mantissa contains the most-significant bits, and move
11585 the point_pos to indicate that the least-significant bits have been
11587 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11588 mantissa
= mant_hi
;
11590 /* We can permit four significant bits of mantissa only, plus a high bit
11591 which is always 1. */
11592 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11593 if ((mantissa
& mask
) != 0)
11596 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11597 mantissa
>>= point_pos
- 5;
11599 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11600 floating-point immediate zero with Neon using an integer-zero load, but
11601 that case is handled elsewhere.) */
11605 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11607 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11608 normalized significands are in the range [1, 2). (Our mantissa is shifted
11609 left 4 places at this point relative to normalized IEEE754 values). GCC
11610 internally uses [0.5, 1) (see real.c), so the exponent returned from
11611 REAL_EXP must be altered. */
11612 exponent
= 5 - exponent
;
11614 if (exponent
< 0 || exponent
> 7)
11617 /* Sign, mantissa and exponent are now in the correct form to plug into the
11618 formula described in the comment above. */
11619 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11622 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11624 vfp3_const_double_rtx (rtx x
)
11629 return vfp3_const_double_index (x
) != -1;
11632 /* Recognize immediates which can be used in various Neon instructions. Legal
11633 immediates are described by the following table (for VMVN variants, the
11634 bitwise inverse of the constant shown is recognized. In either case, VMOV
11635 is output and the correct instruction to use for a given constant is chosen
11636 by the assembler). The constant shown is replicated across all elements of
11637 the destination vector.
11639 insn elems variant constant (binary)
11640 ---- ----- ------- -----------------
11641 vmov i32 0 00000000 00000000 00000000 abcdefgh
11642 vmov i32 1 00000000 00000000 abcdefgh 00000000
11643 vmov i32 2 00000000 abcdefgh 00000000 00000000
11644 vmov i32 3 abcdefgh 00000000 00000000 00000000
11645 vmov i16 4 00000000 abcdefgh
11646 vmov i16 5 abcdefgh 00000000
11647 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11648 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11649 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11650 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11651 vmvn i16 10 00000000 abcdefgh
11652 vmvn i16 11 abcdefgh 00000000
11653 vmov i32 12 00000000 00000000 abcdefgh 11111111
11654 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11655 vmov i32 14 00000000 abcdefgh 11111111 11111111
11656 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11657 vmov i8 16 abcdefgh
11658 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11659 eeeeeeee ffffffff gggggggg hhhhhhhh
11660 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11661 vmov f32 19 00000000 00000000 00000000 00000000
11663 For case 18, B = !b. Representable values are exactly those accepted by
11664 vfp3_const_double_index, but are output as floating-point numbers rather
11667 For case 19, we will change it to vmov.i32 when assembling.
11669 Variants 0-5 (inclusive) may also be used as immediates for the second
11670 operand of VORR/VBIC instructions.
11672 The INVERSE argument causes the bitwise inverse of the given operand to be
11673 recognized instead (used for recognizing legal immediates for the VAND/VORN
11674 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11675 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11676 output, rather than the real insns vbic/vorr).
11678 INVERSE makes no difference to the recognition of float vectors.
11680 The return value is the variant of immediate as shown in the above table, or
11681 -1 if the given value doesn't match any of the listed patterns.
11684 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11685 rtx
*modconst
, int *elementwidth
)
11687 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11689 for (i = 0; i < idx; i += (STRIDE)) \
11694 immtype = (CLASS); \
11695 elsize = (ELSIZE); \
11699 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11700 unsigned int innersize
;
11701 unsigned char bytes
[16];
11702 int immtype
= -1, matches
;
11703 unsigned int invmask
= inverse
? 0xff : 0;
11704 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11707 n_elts
= CONST_VECTOR_NUNITS (op
);
11711 if (mode
== VOIDmode
)
11715 innersize
= GET_MODE_UNIT_SIZE (mode
);
11717 /* Vectors of float constants. */
11718 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11720 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11722 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11725 /* FP16 vectors cannot be represented. */
11726 if (GET_MODE_INNER (mode
) == HFmode
)
11729 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11730 are distinct in this context. */
11731 if (!const_vec_duplicate_p (op
))
11735 *modconst
= CONST_VECTOR_ELT (op
, 0);
11740 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11746 /* The tricks done in the code below apply for little-endian vector layout.
11747 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11748 FIXME: Implement logic for big-endian vectors. */
11749 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11752 /* Splat vector constant out into a byte vector. */
11753 for (i
= 0; i
< n_elts
; i
++)
11755 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11756 unsigned HOST_WIDE_INT elpart
;
11758 gcc_assert (CONST_INT_P (el
));
11759 elpart
= INTVAL (el
);
11761 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11763 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11764 elpart
>>= BITS_PER_UNIT
;
11768 /* Sanity check. */
11769 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11773 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11774 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11776 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11777 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11779 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11780 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11782 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11783 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11785 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11787 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11789 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11790 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11792 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11793 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11795 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11796 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11798 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11799 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11801 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11803 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11805 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11806 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11808 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11809 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11811 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11812 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11814 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11815 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11817 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11819 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11820 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11828 *elementwidth
= elsize
;
11832 unsigned HOST_WIDE_INT imm
= 0;
11834 /* Un-invert bytes of recognized vector, if necessary. */
11836 for (i
= 0; i
< idx
; i
++)
11837 bytes
[i
] ^= invmask
;
11841 /* FIXME: Broken on 32-bit H_W_I hosts. */
11842 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11844 for (i
= 0; i
< 8; i
++)
11845 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11846 << (i
* BITS_PER_UNIT
);
11848 *modconst
= GEN_INT (imm
);
11852 unsigned HOST_WIDE_INT imm
= 0;
11854 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11855 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11857 *modconst
= GEN_INT (imm
);
11865 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11866 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11867 float elements), and a modified constant (whatever should be output for a
11868 VMOV) in *MODCONST. */
11871 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11872 rtx
*modconst
, int *elementwidth
)
11876 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11882 *modconst
= tmpconst
;
11885 *elementwidth
= tmpwidth
;
11890 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11891 the immediate is valid, write a constant suitable for using as an operand
11892 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11893 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11896 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11897 rtx
*modconst
, int *elementwidth
)
11901 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11903 if (retval
< 0 || retval
> 5)
11907 *modconst
= tmpconst
;
11910 *elementwidth
= tmpwidth
;
11915 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11916 the immediate is valid, write a constant suitable for using as an operand
11917 to VSHR/VSHL to *MODCONST and the corresponding element width to
11918 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11919 because they have different limitations. */
11922 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11923 rtx
*modconst
, int *elementwidth
,
11926 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11927 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11928 unsigned HOST_WIDE_INT last_elt
= 0;
11929 unsigned HOST_WIDE_INT maxshift
;
11931 /* Split vector constant out into a byte vector. */
11932 for (i
= 0; i
< n_elts
; i
++)
11934 rtx el
= CONST_VECTOR_ELT (op
, i
);
11935 unsigned HOST_WIDE_INT elpart
;
11937 if (CONST_INT_P (el
))
11938 elpart
= INTVAL (el
);
11939 else if (CONST_DOUBLE_P (el
))
11942 gcc_unreachable ();
11944 if (i
!= 0 && elpart
!= last_elt
)
11950 /* Shift less than element size. */
11951 maxshift
= innersize
* 8;
11955 /* Left shift immediate value can be from 0 to <size>-1. */
11956 if (last_elt
>= maxshift
)
11961 /* Right shift immediate value can be from 1 to <size>. */
11962 if (last_elt
== 0 || last_elt
> maxshift
)
11967 *elementwidth
= innersize
* 8;
11970 *modconst
= CONST_VECTOR_ELT (op
, 0);
11975 /* Return a string suitable for output of Neon immediate logic operation
11979 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11980 int inverse
, int quad
)
11982 int width
, is_valid
;
11983 static char templ
[40];
11985 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11987 gcc_assert (is_valid
!= 0);
11990 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11992 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11997 /* Return a string suitable for output of Neon immediate shift operation
11998 (VSHR or VSHL) MNEM. */
12001 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12002 machine_mode mode
, int quad
,
12005 int width
, is_valid
;
12006 static char templ
[40];
12008 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12009 gcc_assert (is_valid
!= 0);
12012 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12014 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12019 /* Output a sequence of pairwise operations to implement a reduction.
12020 NOTE: We do "too much work" here, because pairwise operations work on two
12021 registers-worth of operands in one go. Unfortunately we can't exploit those
12022 extra calculations to do the full operation in fewer steps, I don't think.
12023 Although all vector elements of the result but the first are ignored, we
12024 actually calculate the same result in each of the elements. An alternative
12025 such as initially loading a vector with zero to use as each of the second
12026 operands would use up an additional register and take an extra instruction,
12027 for no particular gain. */
12030 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12031 rtx (*reduc
) (rtx
, rtx
, rtx
))
12033 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12036 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12038 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12039 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12044 /* If VALS is a vector constant that can be loaded into a register
12045 using VDUP, generate instructions to do so and return an RTX to
12046 assign to the register. Otherwise return NULL_RTX. */
12049 neon_vdup_constant (rtx vals
)
12051 machine_mode mode
= GET_MODE (vals
);
12052 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12055 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12058 if (!const_vec_duplicate_p (vals
, &x
))
12059 /* The elements are not all the same. We could handle repeating
12060 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12061 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12065 /* We can load this constant by using VDUP and a constant in a
12066 single ARM register. This will be cheaper than a vector
12069 x
= copy_to_mode_reg (inner_mode
, x
);
12070 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12073 /* Generate code to load VALS, which is a PARALLEL containing only
12074 constants (for vec_init) or CONST_VECTOR, efficiently into a
12075 register. Returns an RTX to copy into the register, or NULL_RTX
12076 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12079 neon_make_constant (rtx vals
)
12081 machine_mode mode
= GET_MODE (vals
);
12083 rtx const_vec
= NULL_RTX
;
12084 int n_elts
= GET_MODE_NUNITS (mode
);
12088 if (GET_CODE (vals
) == CONST_VECTOR
)
12090 else if (GET_CODE (vals
) == PARALLEL
)
12092 /* A CONST_VECTOR must contain only CONST_INTs and
12093 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12094 Only store valid constants in a CONST_VECTOR. */
12095 for (i
= 0; i
< n_elts
; ++i
)
12097 rtx x
= XVECEXP (vals
, 0, i
);
12098 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12101 if (n_const
== n_elts
)
12102 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12105 gcc_unreachable ();
12107 if (const_vec
!= NULL
12108 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12109 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12111 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12112 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12113 pipeline cycle; creating the constant takes one or two ARM
12114 pipeline cycles. */
12116 else if (const_vec
!= NULL_RTX
)
12117 /* Load from constant pool. On Cortex-A8 this takes two cycles
12118 (for either double or quad vectors). We can not take advantage
12119 of single-cycle VLD1 because we need a PC-relative addressing
12123 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12124 We can not construct an initializer. */
12128 /* Initialize vector TARGET to VALS. */
12131 neon_expand_vector_init (rtx target
, rtx vals
)
12133 machine_mode mode
= GET_MODE (target
);
12134 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12135 int n_elts
= GET_MODE_NUNITS (mode
);
12136 int n_var
= 0, one_var
= -1;
12137 bool all_same
= true;
12141 for (i
= 0; i
< n_elts
; ++i
)
12143 x
= XVECEXP (vals
, 0, i
);
12144 if (!CONSTANT_P (x
))
12145 ++n_var
, one_var
= i
;
12147 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12153 rtx constant
= neon_make_constant (vals
);
12154 if (constant
!= NULL_RTX
)
12156 emit_move_insn (target
, constant
);
12161 /* Splat a single non-constant element if we can. */
12162 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12164 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12165 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12169 /* One field is non-constant. Load constant then overwrite varying
12170 field. This is more efficient than using the stack. */
12173 rtx copy
= copy_rtx (vals
);
12174 rtx index
= GEN_INT (one_var
);
12176 /* Load constant part of vector, substitute neighboring value for
12177 varying element. */
12178 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12179 neon_expand_vector_init (target
, copy
);
12181 /* Insert variable. */
12182 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12186 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12189 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12192 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12195 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12198 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12201 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12204 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12207 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12210 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12213 gcc_unreachable ();
12218 /* Construct the vector in memory one field at a time
12219 and load the whole vector. */
12220 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12221 for (i
= 0; i
< n_elts
; i
++)
12222 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12223 i
* GET_MODE_SIZE (inner_mode
)),
12224 XVECEXP (vals
, 0, i
));
12225 emit_move_insn (target
, mem
);
12228 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12229 ERR if it doesn't. EXP indicates the source location, which includes the
12230 inlining history for intrinsics. */
12233 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12234 const_tree exp
, const char *desc
)
12236 HOST_WIDE_INT lane
;
12238 gcc_assert (CONST_INT_P (operand
));
12240 lane
= INTVAL (operand
);
12242 if (lane
< low
|| lane
>= high
)
12245 error ("%K%s %wd out of range %wd - %wd",
12246 exp
, desc
, lane
, low
, high
- 1);
12248 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12252 /* Bounds-check lanes. */
12255 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12258 bounds_check (operand
, low
, high
, exp
, "lane");
12261 /* Bounds-check constants. */
12264 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12266 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12270 neon_element_bits (machine_mode mode
)
12272 return GET_MODE_UNIT_BITSIZE (mode
);
12276 /* Predicates for `match_operand' and `match_operator'. */
12278 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12279 WB is true if full writeback address modes are allowed and is false
12280 if limited writeback address modes (POST_INC and PRE_DEC) are
12284 arm_coproc_mem_operand (rtx op
, bool wb
)
12288 /* Reject eliminable registers. */
12289 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12290 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12291 || reg_mentioned_p (arg_pointer_rtx
, op
)
12292 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12293 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12294 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12295 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12298 /* Constants are converted into offsets from labels. */
12302 ind
= XEXP (op
, 0);
12304 if (reload_completed
12305 && (GET_CODE (ind
) == LABEL_REF
12306 || (GET_CODE (ind
) == CONST
12307 && GET_CODE (XEXP (ind
, 0)) == PLUS
12308 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12309 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12312 /* Match: (mem (reg)). */
12314 return arm_address_register_rtx_p (ind
, 0);
12316 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12317 acceptable in any case (subject to verification by
12318 arm_address_register_rtx_p). We need WB to be true to accept
12319 PRE_INC and POST_DEC. */
12320 if (GET_CODE (ind
) == POST_INC
12321 || GET_CODE (ind
) == PRE_DEC
12323 && (GET_CODE (ind
) == PRE_INC
12324 || GET_CODE (ind
) == POST_DEC
)))
12325 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12328 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12329 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12330 && GET_CODE (XEXP (ind
, 1)) == PLUS
12331 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12332 ind
= XEXP (ind
, 1);
12337 if (GET_CODE (ind
) == PLUS
12338 && REG_P (XEXP (ind
, 0))
12339 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12340 && CONST_INT_P (XEXP (ind
, 1))
12341 && INTVAL (XEXP (ind
, 1)) > -1024
12342 && INTVAL (XEXP (ind
, 1)) < 1024
12343 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12349 /* Return TRUE if OP is a memory operand which we can load or store a vector
12350 to/from. TYPE is one of the following values:
12351 0 - Vector load/stor (vldr)
12352 1 - Core registers (ldm)
12353 2 - Element/structure loads (vld1)
12356 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12360 /* Reject eliminable registers. */
12361 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12362 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12363 || reg_mentioned_p (arg_pointer_rtx
, op
)
12364 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12365 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12366 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12367 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12370 /* Constants are converted into offsets from labels. */
12374 ind
= XEXP (op
, 0);
12376 if (reload_completed
12377 && (GET_CODE (ind
) == LABEL_REF
12378 || (GET_CODE (ind
) == CONST
12379 && GET_CODE (XEXP (ind
, 0)) == PLUS
12380 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12381 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12384 /* Match: (mem (reg)). */
12386 return arm_address_register_rtx_p (ind
, 0);
12388 /* Allow post-increment with Neon registers. */
12389 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12390 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12391 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12393 /* Allow post-increment by register for VLDn */
12394 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12395 && GET_CODE (XEXP (ind
, 1)) == PLUS
12396 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12403 && GET_CODE (ind
) == PLUS
12404 && REG_P (XEXP (ind
, 0))
12405 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12406 && CONST_INT_P (XEXP (ind
, 1))
12407 && INTVAL (XEXP (ind
, 1)) > -1024
12408 /* For quad modes, we restrict the constant offset to be slightly less
12409 than what the instruction format permits. We have no such constraint
12410 on double mode offsets. (This must match arm_legitimate_index_p.) */
12411 && (INTVAL (XEXP (ind
, 1))
12412 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12413 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12419 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12422 neon_struct_mem_operand (rtx op
)
12426 /* Reject eliminable registers. */
12427 if (! (reload_in_progress
|| reload_completed
)
12428 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12429 || reg_mentioned_p (arg_pointer_rtx
, op
)
12430 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12431 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12432 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12433 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12436 /* Constants are converted into offsets from labels. */
12440 ind
= XEXP (op
, 0);
12442 if (reload_completed
12443 && (GET_CODE (ind
) == LABEL_REF
12444 || (GET_CODE (ind
) == CONST
12445 && GET_CODE (XEXP (ind
, 0)) == PLUS
12446 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12447 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12450 /* Match: (mem (reg)). */
12452 return arm_address_register_rtx_p (ind
, 0);
12454 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12455 if (GET_CODE (ind
) == POST_INC
12456 || GET_CODE (ind
) == PRE_DEC
)
12457 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12462 /* Return true if X is a register that will be eliminated later on. */
12464 arm_eliminable_register (rtx x
)
12466 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12467 || REGNO (x
) == ARG_POINTER_REGNUM
12468 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12469 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12472 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12473 coprocessor registers. Otherwise return NO_REGS. */
12476 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12478 if (mode
== HFmode
)
12480 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12481 return GENERAL_REGS
;
12482 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12484 return GENERAL_REGS
;
12487 /* The neon move patterns handle all legitimate vector and struct
12490 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12491 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12492 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12493 || VALID_NEON_STRUCT_MODE (mode
)))
12496 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12499 return GENERAL_REGS
;
12502 /* Values which must be returned in the most-significant end of the return
12506 arm_return_in_msb (const_tree valtype
)
12508 return (TARGET_AAPCS_BASED
12509 && BYTES_BIG_ENDIAN
12510 && (AGGREGATE_TYPE_P (valtype
)
12511 || TREE_CODE (valtype
) == COMPLEX_TYPE
12512 || FIXED_POINT_TYPE_P (valtype
)));
12515 /* Return TRUE if X references a SYMBOL_REF. */
12517 symbol_mentioned_p (rtx x
)
12522 if (GET_CODE (x
) == SYMBOL_REF
)
12525 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12526 are constant offsets, not symbols. */
12527 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12530 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12532 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12538 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12539 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12542 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12549 /* Return TRUE if X references a LABEL_REF. */
12551 label_mentioned_p (rtx x
)
12556 if (GET_CODE (x
) == LABEL_REF
)
12559 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12560 instruction, but they are constant offsets, not symbols. */
12561 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12564 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12565 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12571 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12572 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12575 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12583 tls_mentioned_p (rtx x
)
12585 switch (GET_CODE (x
))
12588 return tls_mentioned_p (XEXP (x
, 0));
12591 if (XINT (x
, 1) == UNSPEC_TLS
)
12594 /* Fall through. */
12600 /* Must not copy any rtx that uses a pc-relative address.
12601 Also, disallow copying of load-exclusive instructions that
12602 may appear after splitting of compare-and-swap-style operations
12603 so as to prevent those loops from being transformed away from their
12604 canonical forms (see PR 69904). */
12607 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12609 /* The tls call insn cannot be copied, as it is paired with a data
12611 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12614 subrtx_iterator::array_type array
;
12615 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12617 const_rtx x
= *iter
;
12618 if (GET_CODE (x
) == UNSPEC
12619 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12620 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12624 rtx set
= single_set (insn
);
12627 rtx src
= SET_SRC (set
);
12628 if (GET_CODE (src
) == ZERO_EXTEND
)
12629 src
= XEXP (src
, 0);
12631 /* Catch the load-exclusive and load-acquire operations. */
12632 if (GET_CODE (src
) == UNSPEC_VOLATILE
12633 && (XINT (src
, 1) == VUNSPEC_LL
12634 || XINT (src
, 1) == VUNSPEC_LAX
))
12641 minmax_code (rtx x
)
12643 enum rtx_code code
= GET_CODE (x
);
12656 gcc_unreachable ();
12660 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12663 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12664 int *mask
, bool *signed_sat
)
12666 /* The high bound must be a power of two minus one. */
12667 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12671 /* The low bound is either zero (for usat) or one less than the
12672 negation of the high bound (for ssat). */
12673 if (INTVAL (lo_bound
) == 0)
12678 *signed_sat
= false;
12683 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12688 *signed_sat
= true;
12696 /* Return 1 if memory locations are adjacent. */
12698 adjacent_mem_locations (rtx a
, rtx b
)
12700 /* We don't guarantee to preserve the order of these memory refs. */
12701 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12704 if ((REG_P (XEXP (a
, 0))
12705 || (GET_CODE (XEXP (a
, 0)) == PLUS
12706 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12707 && (REG_P (XEXP (b
, 0))
12708 || (GET_CODE (XEXP (b
, 0)) == PLUS
12709 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12711 HOST_WIDE_INT val0
= 0, val1
= 0;
12715 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12717 reg0
= XEXP (XEXP (a
, 0), 0);
12718 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12721 reg0
= XEXP (a
, 0);
12723 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12725 reg1
= XEXP (XEXP (b
, 0), 0);
12726 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12729 reg1
= XEXP (b
, 0);
12731 /* Don't accept any offset that will require multiple
12732 instructions to handle, since this would cause the
12733 arith_adjacentmem pattern to output an overlong sequence. */
12734 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12737 /* Don't allow an eliminable register: register elimination can make
12738 the offset too large. */
12739 if (arm_eliminable_register (reg0
))
12742 val_diff
= val1
- val0
;
12746 /* If the target has load delay slots, then there's no benefit
12747 to using an ldm instruction unless the offset is zero and
12748 we are optimizing for size. */
12749 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12750 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12751 && (val_diff
== 4 || val_diff
== -4));
12754 return ((REGNO (reg0
) == REGNO (reg1
))
12755 && (val_diff
== 4 || val_diff
== -4));
12761 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12762 for load operations, false for store operations. CONSECUTIVE is true
12763 if the register numbers in the operation must be consecutive in the register
12764 bank. RETURN_PC is true if value is to be loaded in PC.
12765 The pattern we are trying to match for load is:
12766 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12767 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12770 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12773 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12774 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12775 3. If consecutive is TRUE, then for kth register being loaded,
12776 REGNO (R_dk) = REGNO (R_d0) + k.
12777 The pattern for store is similar. */
12779 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12780 bool consecutive
, bool return_pc
)
12782 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12783 rtx reg
, mem
, addr
;
12785 unsigned first_regno
;
12786 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12788 bool addr_reg_in_reglist
= false;
12789 bool update
= false;
12794 /* If not in SImode, then registers must be consecutive
12795 (e.g., VLDM instructions for DFmode). */
12796 gcc_assert ((mode
== SImode
) || consecutive
);
12797 /* Setting return_pc for stores is illegal. */
12798 gcc_assert (!return_pc
|| load
);
12800 /* Set up the increments and the regs per val based on the mode. */
12801 reg_increment
= GET_MODE_SIZE (mode
);
12802 regs_per_val
= reg_increment
/ 4;
12803 offset_adj
= return_pc
? 1 : 0;
12806 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12807 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12810 /* Check if this is a write-back. */
12811 elt
= XVECEXP (op
, 0, offset_adj
);
12812 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12818 /* The offset adjustment must be the number of registers being
12819 popped times the size of a single register. */
12820 if (!REG_P (SET_DEST (elt
))
12821 || !REG_P (XEXP (SET_SRC (elt
), 0))
12822 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12823 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12824 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12825 ((count
- 1 - offset_adj
) * reg_increment
))
12829 i
= i
+ offset_adj
;
12830 base
= base
+ offset_adj
;
12831 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12832 success depends on the type: VLDM can do just one reg,
12833 LDM must do at least two. */
12834 if ((count
<= i
) && (mode
== SImode
))
12837 elt
= XVECEXP (op
, 0, i
- 1);
12838 if (GET_CODE (elt
) != SET
)
12843 reg
= SET_DEST (elt
);
12844 mem
= SET_SRC (elt
);
12848 reg
= SET_SRC (elt
);
12849 mem
= SET_DEST (elt
);
12852 if (!REG_P (reg
) || !MEM_P (mem
))
12855 regno
= REGNO (reg
);
12856 first_regno
= regno
;
12857 addr
= XEXP (mem
, 0);
12858 if (GET_CODE (addr
) == PLUS
)
12860 if (!CONST_INT_P (XEXP (addr
, 1)))
12863 offset
= INTVAL (XEXP (addr
, 1));
12864 addr
= XEXP (addr
, 0);
12870 /* Don't allow SP to be loaded unless it is also the base register. It
12871 guarantees that SP is reset correctly when an LDM instruction
12872 is interrupted. Otherwise, we might end up with a corrupt stack. */
12873 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12876 for (; i
< count
; i
++)
12878 elt
= XVECEXP (op
, 0, i
);
12879 if (GET_CODE (elt
) != SET
)
12884 reg
= SET_DEST (elt
);
12885 mem
= SET_SRC (elt
);
12889 reg
= SET_SRC (elt
);
12890 mem
= SET_DEST (elt
);
12894 || GET_MODE (reg
) != mode
12895 || REGNO (reg
) <= regno
12898 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12899 /* Don't allow SP to be loaded unless it is also the base register. It
12900 guarantees that SP is reset correctly when an LDM instruction
12901 is interrupted. Otherwise, we might end up with a corrupt stack. */
12902 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12904 || GET_MODE (mem
) != mode
12905 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12906 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12907 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12908 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12909 offset
+ (i
- base
) * reg_increment
))
12910 && (!REG_P (XEXP (mem
, 0))
12911 || offset
+ (i
- base
) * reg_increment
!= 0)))
12914 regno
= REGNO (reg
);
12915 if (regno
== REGNO (addr
))
12916 addr_reg_in_reglist
= true;
12921 if (update
&& addr_reg_in_reglist
)
12924 /* For Thumb-1, address register is always modified - either by write-back
12925 or by explicit load. If the pattern does not describe an update,
12926 then the address register must be in the list of loaded registers. */
12928 return update
|| addr_reg_in_reglist
;
12934 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12935 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12936 instruction. ADD_OFFSET is nonzero if the base address register needs
12937 to be modified with an add instruction before we can use it. */
12940 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12941 int nops
, HOST_WIDE_INT add_offset
)
12943 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12944 if the offset isn't small enough. The reason 2 ldrs are faster
12945 is because these ARMs are able to do more than one cache access
12946 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12947 whilst the ARM8 has a double bandwidth cache. This means that
12948 these cores can do both an instruction fetch and a data fetch in
12949 a single cycle, so the trick of calculating the address into a
12950 scratch register (one of the result regs) and then doing a load
12951 multiple actually becomes slower (and no smaller in code size).
12952 That is the transformation
12954 ldr rd1, [rbase + offset]
12955 ldr rd2, [rbase + offset + 4]
12959 add rd1, rbase, offset
12960 ldmia rd1, {rd1, rd2}
12962 produces worse code -- '3 cycles + any stalls on rd2' instead of
12963 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12964 access per cycle, the first sequence could never complete in less
12965 than 6 cycles, whereas the ldm sequence would only take 5 and
12966 would make better use of sequential accesses if not hitting the
12969 We cheat here and test 'arm_ld_sched' which we currently know to
12970 only be true for the ARM8, ARM9 and StrongARM. If this ever
12971 changes, then the test below needs to be reworked. */
12972 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12975 /* XScale has load-store double instructions, but they have stricter
12976 alignment requirements than load-store multiple, so we cannot
12979 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12980 the pipeline until completion.
12988 An ldr instruction takes 1-3 cycles, but does not block the
12997 Best case ldr will always win. However, the more ldr instructions
12998 we issue, the less likely we are to be able to schedule them well.
12999 Using ldr instructions also increases code size.
13001 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13002 for counts of 3 or 4 regs. */
13003 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13008 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13009 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13010 an array ORDER which describes the sequence to use when accessing the
13011 offsets that produces an ascending order. In this sequence, each
13012 offset must be larger by exactly 4 than the previous one. ORDER[0]
13013 must have been filled in with the lowest offset by the caller.
13014 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13015 we use to verify that ORDER produces an ascending order of registers.
13016 Return true if it was possible to construct such an order, false if
13020 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13021 int *unsorted_regs
)
13024 for (i
= 1; i
< nops
; i
++)
13028 order
[i
] = order
[i
- 1];
13029 for (j
= 0; j
< nops
; j
++)
13030 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13032 /* We must find exactly one offset that is higher than the
13033 previous one by 4. */
13034 if (order
[i
] != order
[i
- 1])
13038 if (order
[i
] == order
[i
- 1])
13040 /* The register numbers must be ascending. */
13041 if (unsorted_regs
!= NULL
13042 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13048 /* Used to determine in a peephole whether a sequence of load
13049 instructions can be changed into a load-multiple instruction.
13050 NOPS is the number of separate load instructions we are examining. The
13051 first NOPS entries in OPERANDS are the destination registers, the
13052 next NOPS entries are memory operands. If this function is
13053 successful, *BASE is set to the common base register of the memory
13054 accesses; *LOAD_OFFSET is set to the first memory location's offset
13055 from that base register.
13056 REGS is an array filled in with the destination register numbers.
13057 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13058 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13059 the sequence of registers in REGS matches the loads from ascending memory
13060 locations, and the function verifies that the register numbers are
13061 themselves ascending. If CHECK_REGS is false, the register numbers
13062 are stored in the order they are found in the operands. */
13064 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13065 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13067 int unsorted_regs
[MAX_LDM_STM_OPS
];
13068 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13069 int order
[MAX_LDM_STM_OPS
];
13070 rtx base_reg_rtx
= NULL
;
13074 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13075 easily extended if required. */
13076 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13078 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13080 /* Loop over the operands and check that the memory references are
13081 suitable (i.e. immediate offsets from the same base register). At
13082 the same time, extract the target register, and the memory
13084 for (i
= 0; i
< nops
; i
++)
13089 /* Convert a subreg of a mem into the mem itself. */
13090 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13091 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13093 gcc_assert (MEM_P (operands
[nops
+ i
]));
13095 /* Don't reorder volatile memory references; it doesn't seem worth
13096 looking for the case where the order is ok anyway. */
13097 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13100 offset
= const0_rtx
;
13102 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13103 || (GET_CODE (reg
) == SUBREG
13104 && REG_P (reg
= SUBREG_REG (reg
))))
13105 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13106 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13107 || (GET_CODE (reg
) == SUBREG
13108 && REG_P (reg
= SUBREG_REG (reg
))))
13109 && (CONST_INT_P (offset
13110 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13114 base_reg
= REGNO (reg
);
13115 base_reg_rtx
= reg
;
13116 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13119 else if (base_reg
!= (int) REGNO (reg
))
13120 /* Not addressed from the same base register. */
13123 unsorted_regs
[i
] = (REG_P (operands
[i
])
13124 ? REGNO (operands
[i
])
13125 : REGNO (SUBREG_REG (operands
[i
])));
13127 /* If it isn't an integer register, or if it overwrites the
13128 base register but isn't the last insn in the list, then
13129 we can't do this. */
13130 if (unsorted_regs
[i
] < 0
13131 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13132 || unsorted_regs
[i
] > 14
13133 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13136 /* Don't allow SP to be loaded unless it is also the base
13137 register. It guarantees that SP is reset correctly when
13138 an LDM instruction is interrupted. Otherwise, we might
13139 end up with a corrupt stack. */
13140 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13143 unsorted_offsets
[i
] = INTVAL (offset
);
13144 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13148 /* Not a suitable memory address. */
13152 /* All the useful information has now been extracted from the
13153 operands into unsorted_regs and unsorted_offsets; additionally,
13154 order[0] has been set to the lowest offset in the list. Sort
13155 the offsets into order, verifying that they are adjacent, and
13156 check that the register numbers are ascending. */
13157 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13158 check_regs
? unsorted_regs
: NULL
))
13162 memcpy (saved_order
, order
, sizeof order
);
13168 for (i
= 0; i
< nops
; i
++)
13169 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13171 *load_offset
= unsorted_offsets
[order
[0]];
13175 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13178 if (unsorted_offsets
[order
[0]] == 0)
13179 ldm_case
= 1; /* ldmia */
13180 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13181 ldm_case
= 2; /* ldmib */
13182 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13183 ldm_case
= 3; /* ldmda */
13184 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13185 ldm_case
= 4; /* ldmdb */
13186 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13187 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13192 if (!multiple_operation_profitable_p (false, nops
,
13194 ? unsorted_offsets
[order
[0]] : 0))
13200 /* Used to determine in a peephole whether a sequence of store instructions can
13201 be changed into a store-multiple instruction.
13202 NOPS is the number of separate store instructions we are examining.
13203 NOPS_TOTAL is the total number of instructions recognized by the peephole
13205 The first NOPS entries in OPERANDS are the source registers, the next
13206 NOPS entries are memory operands. If this function is successful, *BASE is
13207 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13208 to the first memory location's offset from that base register. REGS is an
13209 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13210 likewise filled with the corresponding rtx's.
13211 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13212 numbers to an ascending order of stores.
13213 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13214 from ascending memory locations, and the function verifies that the register
13215 numbers are themselves ascending. If CHECK_REGS is false, the register
13216 numbers are stored in the order they are found in the operands. */
13218 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13219 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13220 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13222 int unsorted_regs
[MAX_LDM_STM_OPS
];
13223 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13224 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13225 int order
[MAX_LDM_STM_OPS
];
13227 rtx base_reg_rtx
= NULL
;
13230 /* Write back of base register is currently only supported for Thumb 1. */
13231 int base_writeback
= TARGET_THUMB1
;
13233 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13234 easily extended if required. */
13235 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13237 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13239 /* Loop over the operands and check that the memory references are
13240 suitable (i.e. immediate offsets from the same base register). At
13241 the same time, extract the target register, and the memory
13243 for (i
= 0; i
< nops
; i
++)
13248 /* Convert a subreg of a mem into the mem itself. */
13249 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13250 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13252 gcc_assert (MEM_P (operands
[nops
+ i
]));
13254 /* Don't reorder volatile memory references; it doesn't seem worth
13255 looking for the case where the order is ok anyway. */
13256 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13259 offset
= const0_rtx
;
13261 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13262 || (GET_CODE (reg
) == SUBREG
13263 && REG_P (reg
= SUBREG_REG (reg
))))
13264 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13265 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13266 || (GET_CODE (reg
) == SUBREG
13267 && REG_P (reg
= SUBREG_REG (reg
))))
13268 && (CONST_INT_P (offset
13269 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13271 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13272 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13273 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13277 base_reg
= REGNO (reg
);
13278 base_reg_rtx
= reg
;
13279 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13282 else if (base_reg
!= (int) REGNO (reg
))
13283 /* Not addressed from the same base register. */
13286 /* If it isn't an integer register, then we can't do this. */
13287 if (unsorted_regs
[i
] < 0
13288 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13289 /* The effects are unpredictable if the base register is
13290 both updated and stored. */
13291 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13292 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13293 || unsorted_regs
[i
] > 14)
13296 unsorted_offsets
[i
] = INTVAL (offset
);
13297 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13301 /* Not a suitable memory address. */
13305 /* All the useful information has now been extracted from the
13306 operands into unsorted_regs and unsorted_offsets; additionally,
13307 order[0] has been set to the lowest offset in the list. Sort
13308 the offsets into order, verifying that they are adjacent, and
13309 check that the register numbers are ascending. */
13310 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13311 check_regs
? unsorted_regs
: NULL
))
13315 memcpy (saved_order
, order
, sizeof order
);
13321 for (i
= 0; i
< nops
; i
++)
13323 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13325 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13328 *load_offset
= unsorted_offsets
[order
[0]];
13332 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13335 if (unsorted_offsets
[order
[0]] == 0)
13336 stm_case
= 1; /* stmia */
13337 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13338 stm_case
= 2; /* stmib */
13339 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13340 stm_case
= 3; /* stmda */
13341 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13342 stm_case
= 4; /* stmdb */
13346 if (!multiple_operation_profitable_p (false, nops
, 0))
13352 /* Routines for use in generating RTL. */
13354 /* Generate a load-multiple instruction. COUNT is the number of loads in
13355 the instruction; REGS and MEMS are arrays containing the operands.
13356 BASEREG is the base register to be used in addressing the memory operands.
13357 WBACK_OFFSET is nonzero if the instruction should update the base
13361 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13362 HOST_WIDE_INT wback_offset
)
13367 if (!multiple_operation_profitable_p (false, count
, 0))
13373 for (i
= 0; i
< count
; i
++)
13374 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13376 if (wback_offset
!= 0)
13377 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13379 seq
= get_insns ();
13385 result
= gen_rtx_PARALLEL (VOIDmode
,
13386 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13387 if (wback_offset
!= 0)
13389 XVECEXP (result
, 0, 0)
13390 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13395 for (j
= 0; i
< count
; i
++, j
++)
13396 XVECEXP (result
, 0, i
)
13397 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13402 /* Generate a store-multiple instruction. COUNT is the number of stores in
13403 the instruction; REGS and MEMS are arrays containing the operands.
13404 BASEREG is the base register to be used in addressing the memory operands.
13405 WBACK_OFFSET is nonzero if the instruction should update the base
13409 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13410 HOST_WIDE_INT wback_offset
)
13415 if (GET_CODE (basereg
) == PLUS
)
13416 basereg
= XEXP (basereg
, 0);
13418 if (!multiple_operation_profitable_p (false, count
, 0))
13424 for (i
= 0; i
< count
; i
++)
13425 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13427 if (wback_offset
!= 0)
13428 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13430 seq
= get_insns ();
13436 result
= gen_rtx_PARALLEL (VOIDmode
,
13437 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13438 if (wback_offset
!= 0)
13440 XVECEXP (result
, 0, 0)
13441 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13446 for (j
= 0; i
< count
; i
++, j
++)
13447 XVECEXP (result
, 0, i
)
13448 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13453 /* Generate either a load-multiple or a store-multiple instruction. This
13454 function can be used in situations where we can start with a single MEM
13455 rtx and adjust its address upwards.
13456 COUNT is the number of operations in the instruction, not counting a
13457 possible update of the base register. REGS is an array containing the
13459 BASEREG is the base register to be used in addressing the memory operands,
13460 which are constructed from BASEMEM.
13461 WRITE_BACK specifies whether the generated instruction should include an
13462 update of the base register.
13463 OFFSETP is used to pass an offset to and from this function; this offset
13464 is not used when constructing the address (instead BASEMEM should have an
13465 appropriate offset in its address), it is used only for setting
13466 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13469 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13470 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13472 rtx mems
[MAX_LDM_STM_OPS
];
13473 HOST_WIDE_INT offset
= *offsetp
;
13476 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13478 if (GET_CODE (basereg
) == PLUS
)
13479 basereg
= XEXP (basereg
, 0);
13481 for (i
= 0; i
< count
; i
++)
13483 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13484 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13492 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13493 write_back
? 4 * count
: 0);
13495 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13496 write_back
? 4 * count
: 0);
13500 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13501 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13503 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13508 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13509 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13511 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13515 /* Called from a peephole2 expander to turn a sequence of loads into an
13516 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13517 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13518 is true if we can reorder the registers because they are used commutatively
13520 Returns true iff we could generate a new instruction. */
13523 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13525 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13526 rtx mems
[MAX_LDM_STM_OPS
];
13527 int i
, j
, base_reg
;
13529 HOST_WIDE_INT offset
;
13530 int write_back
= FALSE
;
13534 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13535 &base_reg
, &offset
, !sort_regs
);
13541 for (i
= 0; i
< nops
- 1; i
++)
13542 for (j
= i
+ 1; j
< nops
; j
++)
13543 if (regs
[i
] > regs
[j
])
13549 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13553 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13554 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13560 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13561 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13563 if (!TARGET_THUMB1
)
13565 base_reg
= regs
[0];
13566 base_reg_rtx
= newbase
;
13570 for (i
= 0; i
< nops
; i
++)
13572 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13573 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13576 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13577 write_back
? offset
+ i
* 4 : 0));
13581 /* Called from a peephole2 expander to turn a sequence of stores into an
13582 STM instruction. OPERANDS are the operands found by the peephole matcher;
13583 NOPS indicates how many separate stores we are trying to combine.
13584 Returns true iff we could generate a new instruction. */
13587 gen_stm_seq (rtx
*operands
, int nops
)
13590 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13591 rtx mems
[MAX_LDM_STM_OPS
];
13594 HOST_WIDE_INT offset
;
13595 int write_back
= FALSE
;
13598 bool base_reg_dies
;
13600 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13601 mem_order
, &base_reg
, &offset
, true);
13606 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13608 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13611 gcc_assert (base_reg_dies
);
13617 gcc_assert (base_reg_dies
);
13618 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13622 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13624 for (i
= 0; i
< nops
; i
++)
13626 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13627 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13630 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13631 write_back
? offset
+ i
* 4 : 0));
13635 /* Called from a peephole2 expander to turn a sequence of stores that are
13636 preceded by constant loads into an STM instruction. OPERANDS are the
13637 operands found by the peephole matcher; NOPS indicates how many
13638 separate stores we are trying to combine; there are 2 * NOPS
13639 instructions in the peephole.
13640 Returns true iff we could generate a new instruction. */
13643 gen_const_stm_seq (rtx
*operands
, int nops
)
13645 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13646 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13647 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13648 rtx mems
[MAX_LDM_STM_OPS
];
13651 HOST_WIDE_INT offset
;
13652 int write_back
= FALSE
;
13655 bool base_reg_dies
;
13657 HARD_REG_SET allocated
;
13659 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13660 mem_order
, &base_reg
, &offset
, false);
13665 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13667 /* If the same register is used more than once, try to find a free
13669 CLEAR_HARD_REG_SET (allocated
);
13670 for (i
= 0; i
< nops
; i
++)
13672 for (j
= i
+ 1; j
< nops
; j
++)
13673 if (regs
[i
] == regs
[j
])
13675 rtx t
= peep2_find_free_register (0, nops
* 2,
13676 TARGET_THUMB1
? "l" : "r",
13677 SImode
, &allocated
);
13681 regs
[i
] = REGNO (t
);
13685 /* Compute an ordering that maps the register numbers to an ascending
13688 for (i
= 0; i
< nops
; i
++)
13689 if (regs
[i
] < regs
[reg_order
[0]])
13692 for (i
= 1; i
< nops
; i
++)
13694 int this_order
= reg_order
[i
- 1];
13695 for (j
= 0; j
< nops
; j
++)
13696 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13697 && (this_order
== reg_order
[i
- 1]
13698 || regs
[j
] < regs
[this_order
]))
13700 reg_order
[i
] = this_order
;
13703 /* Ensure that registers that must be live after the instruction end
13704 up with the correct value. */
13705 for (i
= 0; i
< nops
; i
++)
13707 int this_order
= reg_order
[i
];
13708 if ((this_order
!= mem_order
[i
]
13709 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13710 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13714 /* Load the constants. */
13715 for (i
= 0; i
< nops
; i
++)
13717 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13718 sorted_regs
[i
] = regs
[reg_order
[i
]];
13719 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13722 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13724 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13727 gcc_assert (base_reg_dies
);
13733 gcc_assert (base_reg_dies
);
13734 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13738 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13740 for (i
= 0; i
< nops
; i
++)
13742 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13743 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13746 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13747 write_back
? offset
+ i
* 4 : 0));
13751 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13752 unaligned copies on processors which support unaligned semantics for those
13753 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13754 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13755 An interleave factor of 1 (the minimum) will perform no interleaving.
13756 Load/store multiple are used for aligned addresses where possible. */
13759 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13760 HOST_WIDE_INT length
,
13761 unsigned int interleave_factor
)
13763 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13764 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13765 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13766 HOST_WIDE_INT i
, j
;
13767 HOST_WIDE_INT remaining
= length
, words
;
13768 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13770 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13771 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13772 HOST_WIDE_INT srcoffset
, dstoffset
;
13773 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13776 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13778 /* Use hard registers if we have aligned source or destination so we can use
13779 load/store multiple with contiguous registers. */
13780 if (dst_aligned
|| src_aligned
)
13781 for (i
= 0; i
< interleave_factor
; i
++)
13782 regs
[i
] = gen_rtx_REG (SImode
, i
);
13784 for (i
= 0; i
< interleave_factor
; i
++)
13785 regs
[i
] = gen_reg_rtx (SImode
);
13787 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13788 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13790 srcoffset
= dstoffset
= 0;
13792 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13793 For copying the last bytes we want to subtract this offset again. */
13794 src_autoinc
= dst_autoinc
= 0;
13796 for (i
= 0; i
< interleave_factor
; i
++)
13799 /* Copy BLOCK_SIZE_BYTES chunks. */
13801 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13804 if (src_aligned
&& interleave_factor
> 1)
13806 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13807 TRUE
, srcbase
, &srcoffset
));
13808 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13812 for (j
= 0; j
< interleave_factor
; j
++)
13814 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13816 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13817 srcoffset
+ j
* UNITS_PER_WORD
);
13818 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13820 srcoffset
+= block_size_bytes
;
13824 if (dst_aligned
&& interleave_factor
> 1)
13826 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13827 TRUE
, dstbase
, &dstoffset
));
13828 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13832 for (j
= 0; j
< interleave_factor
; j
++)
13834 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13836 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13837 dstoffset
+ j
* UNITS_PER_WORD
);
13838 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13840 dstoffset
+= block_size_bytes
;
13843 remaining
-= block_size_bytes
;
13846 /* Copy any whole words left (note these aren't interleaved with any
13847 subsequent halfword/byte load/stores in the interests of simplicity). */
13849 words
= remaining
/ UNITS_PER_WORD
;
13851 gcc_assert (words
< interleave_factor
);
13853 if (src_aligned
&& words
> 1)
13855 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13857 src_autoinc
+= UNITS_PER_WORD
* words
;
13861 for (j
= 0; j
< words
; j
++)
13863 addr
= plus_constant (Pmode
, src
,
13864 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13865 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13866 srcoffset
+ j
* UNITS_PER_WORD
);
13868 emit_move_insn (regs
[j
], mem
);
13870 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13872 srcoffset
+= words
* UNITS_PER_WORD
;
13875 if (dst_aligned
&& words
> 1)
13877 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13879 dst_autoinc
+= words
* UNITS_PER_WORD
;
13883 for (j
= 0; j
< words
; j
++)
13885 addr
= plus_constant (Pmode
, dst
,
13886 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13887 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13888 dstoffset
+ j
* UNITS_PER_WORD
);
13890 emit_move_insn (mem
, regs
[j
]);
13892 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13894 dstoffset
+= words
* UNITS_PER_WORD
;
13897 remaining
-= words
* UNITS_PER_WORD
;
13899 gcc_assert (remaining
< 4);
13901 /* Copy a halfword if necessary. */
13903 if (remaining
>= 2)
13905 halfword_tmp
= gen_reg_rtx (SImode
);
13907 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13908 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13909 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13911 /* Either write out immediately, or delay until we've loaded the last
13912 byte, depending on interleave factor. */
13913 if (interleave_factor
== 1)
13915 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13916 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13917 emit_insn (gen_unaligned_storehi (mem
,
13918 gen_lowpart (HImode
, halfword_tmp
)));
13919 halfword_tmp
= NULL
;
13927 gcc_assert (remaining
< 2);
13929 /* Copy last byte. */
13931 if ((remaining
& 1) != 0)
13933 byte_tmp
= gen_reg_rtx (SImode
);
13935 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13936 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13937 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13939 if (interleave_factor
== 1)
13941 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13942 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13943 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13952 /* Store last halfword if we haven't done so already. */
13956 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13957 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13958 emit_insn (gen_unaligned_storehi (mem
,
13959 gen_lowpart (HImode
, halfword_tmp
)));
13963 /* Likewise for last byte. */
13967 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13968 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13969 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13973 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13976 /* From mips_adjust_block_mem:
13978 Helper function for doing a loop-based block operation on memory
13979 reference MEM. Each iteration of the loop will operate on LENGTH
13982 Create a new base register for use within the loop and point it to
13983 the start of MEM. Create a new memory reference that uses this
13984 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13987 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13990 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13992 /* Although the new mem does not refer to a known location,
13993 it does keep up to LENGTH bytes of alignment. */
13994 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13995 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13998 /* From mips_block_move_loop:
14000 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14001 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14002 the memory regions do not overlap. */
14005 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14006 unsigned int interleave_factor
,
14007 HOST_WIDE_INT bytes_per_iter
)
14009 rtx src_reg
, dest_reg
, final_src
, test
;
14010 HOST_WIDE_INT leftover
;
14012 leftover
= length
% bytes_per_iter
;
14013 length
-= leftover
;
14015 /* Create registers and memory references for use within the loop. */
14016 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14017 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14019 /* Calculate the value that SRC_REG should have after the last iteration of
14021 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14022 0, 0, OPTAB_WIDEN
);
14024 /* Emit the start of the loop. */
14025 rtx_code_label
*label
= gen_label_rtx ();
14026 emit_label (label
);
14028 /* Emit the loop body. */
14029 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14030 interleave_factor
);
14032 /* Move on to the next block. */
14033 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14034 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14036 /* Emit the loop condition. */
14037 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14038 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14040 /* Mop up any left-over bytes. */
14042 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14045 /* Emit a block move when either the source or destination is unaligned (not
14046 aligned to a four-byte boundary). This may need further tuning depending on
14047 core type, optimize_size setting, etc. */
14050 arm_movmemqi_unaligned (rtx
*operands
)
14052 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14056 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14057 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14058 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14059 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14060 or dst_aligned though: allow more interleaving in those cases since the
14061 resulting code can be smaller. */
14062 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14063 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14066 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14067 interleave_factor
, bytes_per_iter
);
14069 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14070 interleave_factor
);
14074 /* Note that the loop created by arm_block_move_unaligned_loop may be
14075 subject to loop unrolling, which makes tuning this condition a little
14078 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14080 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14087 arm_gen_movmemqi (rtx
*operands
)
14089 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14090 HOST_WIDE_INT srcoffset
, dstoffset
;
14092 rtx src
, dst
, srcbase
, dstbase
;
14093 rtx part_bytes_reg
= NULL
;
14096 if (!CONST_INT_P (operands
[2])
14097 || !CONST_INT_P (operands
[3])
14098 || INTVAL (operands
[2]) > 64)
14101 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14102 return arm_movmemqi_unaligned (operands
);
14104 if (INTVAL (operands
[3]) & 3)
14107 dstbase
= operands
[0];
14108 srcbase
= operands
[1];
14110 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14111 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14113 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14114 out_words_to_go
= INTVAL (operands
[2]) / 4;
14115 last_bytes
= INTVAL (operands
[2]) & 3;
14116 dstoffset
= srcoffset
= 0;
14118 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14119 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14121 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14123 if (in_words_to_go
> 4)
14124 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14125 TRUE
, srcbase
, &srcoffset
));
14127 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14128 src
, FALSE
, srcbase
,
14131 if (out_words_to_go
)
14133 if (out_words_to_go
> 4)
14134 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14135 TRUE
, dstbase
, &dstoffset
));
14136 else if (out_words_to_go
!= 1)
14137 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14138 out_words_to_go
, dst
,
14141 dstbase
, &dstoffset
));
14144 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14145 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14146 if (last_bytes
!= 0)
14148 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14154 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14155 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14158 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14159 if (out_words_to_go
)
14163 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14164 sreg
= copy_to_reg (mem
);
14166 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14167 emit_move_insn (mem
, sreg
);
14170 gcc_assert (!in_words_to_go
); /* Sanity check */
14173 if (in_words_to_go
)
14175 gcc_assert (in_words_to_go
> 0);
14177 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14178 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14181 gcc_assert (!last_bytes
|| part_bytes_reg
);
14183 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14185 rtx tmp
= gen_reg_rtx (SImode
);
14187 /* The bytes we want are in the top end of the word. */
14188 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14189 GEN_INT (8 * (4 - last_bytes
))));
14190 part_bytes_reg
= tmp
;
14194 mem
= adjust_automodify_address (dstbase
, QImode
,
14195 plus_constant (Pmode
, dst
,
14197 dstoffset
+ last_bytes
- 1);
14198 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14202 tmp
= gen_reg_rtx (SImode
);
14203 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14204 part_bytes_reg
= tmp
;
14211 if (last_bytes
> 1)
14213 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14214 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14218 rtx tmp
= gen_reg_rtx (SImode
);
14219 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14220 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14221 part_bytes_reg
= tmp
;
14228 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14229 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14236 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14239 next_consecutive_mem (rtx mem
)
14241 machine_mode mode
= GET_MODE (mem
);
14242 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14243 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14245 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14248 /* Copy using LDRD/STRD instructions whenever possible.
14249 Returns true upon success. */
14251 gen_movmem_ldrd_strd (rtx
*operands
)
14253 unsigned HOST_WIDE_INT len
;
14254 HOST_WIDE_INT align
;
14255 rtx src
, dst
, base
;
14257 bool src_aligned
, dst_aligned
;
14258 bool src_volatile
, dst_volatile
;
14260 gcc_assert (CONST_INT_P (operands
[2]));
14261 gcc_assert (CONST_INT_P (operands
[3]));
14263 len
= UINTVAL (operands
[2]);
14267 /* Maximum alignment we can assume for both src and dst buffers. */
14268 align
= INTVAL (operands
[3]);
14270 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14273 /* Place src and dst addresses in registers
14274 and update the corresponding mem rtx. */
14276 dst_volatile
= MEM_VOLATILE_P (dst
);
14277 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14278 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14279 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14282 src_volatile
= MEM_VOLATILE_P (src
);
14283 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14284 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14285 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14287 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14290 if (src_volatile
|| dst_volatile
)
14293 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14294 if (!(dst_aligned
|| src_aligned
))
14295 return arm_gen_movmemqi (operands
);
14297 /* If the either src or dst is unaligned we'll be accessing it as pairs
14298 of unaligned SImode accesses. Otherwise we can generate DImode
14299 ldrd/strd instructions. */
14300 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14301 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14306 reg0
= gen_reg_rtx (DImode
);
14307 rtx low_reg
= NULL_RTX
;
14308 rtx hi_reg
= NULL_RTX
;
14310 if (!src_aligned
|| !dst_aligned
)
14312 low_reg
= gen_lowpart (SImode
, reg0
);
14313 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14316 emit_move_insn (reg0
, src
);
14319 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14320 src
= next_consecutive_mem (src
);
14321 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14325 emit_move_insn (dst
, reg0
);
14328 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14329 dst
= next_consecutive_mem (dst
);
14330 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14333 src
= next_consecutive_mem (src
);
14334 dst
= next_consecutive_mem (dst
);
14337 gcc_assert (len
< 8);
14340 /* More than a word but less than a double-word to copy. Copy a word. */
14341 reg0
= gen_reg_rtx (SImode
);
14342 src
= adjust_address (src
, SImode
, 0);
14343 dst
= adjust_address (dst
, SImode
, 0);
14345 emit_move_insn (reg0
, src
);
14347 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14350 emit_move_insn (dst
, reg0
);
14352 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14354 src
= next_consecutive_mem (src
);
14355 dst
= next_consecutive_mem (dst
);
14362 /* Copy the remaining bytes. */
14365 dst
= adjust_address (dst
, HImode
, 0);
14366 src
= adjust_address (src
, HImode
, 0);
14367 reg0
= gen_reg_rtx (SImode
);
14369 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14371 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14374 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14376 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14378 src
= next_consecutive_mem (src
);
14379 dst
= next_consecutive_mem (dst
);
14384 dst
= adjust_address (dst
, QImode
, 0);
14385 src
= adjust_address (src
, QImode
, 0);
14386 reg0
= gen_reg_rtx (QImode
);
14387 emit_move_insn (reg0
, src
);
14388 emit_move_insn (dst
, reg0
);
14392 /* Select a dominance comparison mode if possible for a test of the general
14393 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14394 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14395 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14396 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14397 In all cases OP will be either EQ or NE, but we don't need to know which
14398 here. If we are unable to support a dominance comparison we return
14399 CC mode. This will then fail to match for the RTL expressions that
14400 generate this call. */
14402 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14404 enum rtx_code cond1
, cond2
;
14407 /* Currently we will probably get the wrong result if the individual
14408 comparisons are not simple. This also ensures that it is safe to
14409 reverse a comparison if necessary. */
14410 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14412 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14416 /* The if_then_else variant of this tests the second condition if the
14417 first passes, but is true if the first fails. Reverse the first
14418 condition to get a true "inclusive-or" expression. */
14419 if (cond_or
== DOM_CC_NX_OR_Y
)
14420 cond1
= reverse_condition (cond1
);
14422 /* If the comparisons are not equal, and one doesn't dominate the other,
14423 then we can't do this. */
14425 && !comparison_dominates_p (cond1
, cond2
)
14426 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14430 std::swap (cond1
, cond2
);
14435 if (cond_or
== DOM_CC_X_AND_Y
)
14440 case EQ
: return CC_DEQmode
;
14441 case LE
: return CC_DLEmode
;
14442 case LEU
: return CC_DLEUmode
;
14443 case GE
: return CC_DGEmode
;
14444 case GEU
: return CC_DGEUmode
;
14445 default: gcc_unreachable ();
14449 if (cond_or
== DOM_CC_X_AND_Y
)
14461 gcc_unreachable ();
14465 if (cond_or
== DOM_CC_X_AND_Y
)
14477 gcc_unreachable ();
14481 if (cond_or
== DOM_CC_X_AND_Y
)
14482 return CC_DLTUmode
;
14487 return CC_DLTUmode
;
14489 return CC_DLEUmode
;
14493 gcc_unreachable ();
14497 if (cond_or
== DOM_CC_X_AND_Y
)
14498 return CC_DGTUmode
;
14503 return CC_DGTUmode
;
14505 return CC_DGEUmode
;
14509 gcc_unreachable ();
14512 /* The remaining cases only occur when both comparisons are the
14515 gcc_assert (cond1
== cond2
);
14519 gcc_assert (cond1
== cond2
);
14523 gcc_assert (cond1
== cond2
);
14527 gcc_assert (cond1
== cond2
);
14528 return CC_DLEUmode
;
14531 gcc_assert (cond1
== cond2
);
14532 return CC_DGEUmode
;
14535 gcc_unreachable ();
14540 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14542 /* All floating point compares return CCFP if it is an equality
14543 comparison, and CCFPE otherwise. */
14544 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14567 gcc_unreachable ();
14571 /* A compare with a shifted operand. Because of canonicalization, the
14572 comparison will have to be swapped when we emit the assembler. */
14573 if (GET_MODE (y
) == SImode
14574 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14575 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14576 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14577 || GET_CODE (x
) == ROTATERT
))
14580 /* This operation is performed swapped, but since we only rely on the Z
14581 flag we don't need an additional mode. */
14582 if (GET_MODE (y
) == SImode
14583 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14584 && GET_CODE (x
) == NEG
14585 && (op
== EQ
|| op
== NE
))
14588 /* This is a special case that is used by combine to allow a
14589 comparison of a shifted byte load to be split into a zero-extend
14590 followed by a comparison of the shifted integer (only valid for
14591 equalities and unsigned inequalities). */
14592 if (GET_MODE (x
) == SImode
14593 && GET_CODE (x
) == ASHIFT
14594 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14595 && GET_CODE (XEXP (x
, 0)) == SUBREG
14596 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14597 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14598 && (op
== EQ
|| op
== NE
14599 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14600 && CONST_INT_P (y
))
14603 /* A construct for a conditional compare, if the false arm contains
14604 0, then both conditions must be true, otherwise either condition
14605 must be true. Not all conditions are possible, so CCmode is
14606 returned if it can't be done. */
14607 if (GET_CODE (x
) == IF_THEN_ELSE
14608 && (XEXP (x
, 2) == const0_rtx
14609 || XEXP (x
, 2) == const1_rtx
)
14610 && COMPARISON_P (XEXP (x
, 0))
14611 && COMPARISON_P (XEXP (x
, 1)))
14612 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14613 INTVAL (XEXP (x
, 2)));
14615 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14616 if (GET_CODE (x
) == AND
14617 && (op
== EQ
|| op
== NE
)
14618 && COMPARISON_P (XEXP (x
, 0))
14619 && COMPARISON_P (XEXP (x
, 1)))
14620 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14623 if (GET_CODE (x
) == IOR
14624 && (op
== EQ
|| op
== NE
)
14625 && COMPARISON_P (XEXP (x
, 0))
14626 && COMPARISON_P (XEXP (x
, 1)))
14627 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14630 /* An operation (on Thumb) where we want to test for a single bit.
14631 This is done by shifting that bit up into the top bit of a
14632 scratch register; we can then branch on the sign bit. */
14634 && GET_MODE (x
) == SImode
14635 && (op
== EQ
|| op
== NE
)
14636 && GET_CODE (x
) == ZERO_EXTRACT
14637 && XEXP (x
, 1) == const1_rtx
)
14640 /* An operation that sets the condition codes as a side-effect, the
14641 V flag is not set correctly, so we can only use comparisons where
14642 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14644 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14645 if (GET_MODE (x
) == SImode
14647 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14648 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14649 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14650 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14651 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14652 || GET_CODE (x
) == LSHIFTRT
14653 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14654 || GET_CODE (x
) == ROTATERT
14655 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14656 return CC_NOOVmode
;
14658 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14661 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14662 && GET_CODE (x
) == PLUS
14663 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14666 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14672 /* A DImode comparison against zero can be implemented by
14673 or'ing the two halves together. */
14674 if (y
== const0_rtx
)
14677 /* We can do an equality test in three Thumb instructions. */
14687 /* DImode unsigned comparisons can be implemented by cmp +
14688 cmpeq without a scratch register. Not worth doing in
14699 /* DImode signed and unsigned comparisons can be implemented
14700 by cmp + sbcs with a scratch register, but that does not
14701 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14702 gcc_assert (op
!= EQ
&& op
!= NE
);
14706 gcc_unreachable ();
14710 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14711 return GET_MODE (x
);
14716 /* X and Y are two things to compare using CODE. Emit the compare insn and
14717 return the rtx for register 0 in the proper mode. FP means this is a
14718 floating point compare: I don't think that it is needed on the arm. */
14720 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14724 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14726 /* We might have X as a constant, Y as a register because of the predicates
14727 used for cmpdi. If so, force X to a register here. */
14728 if (dimode_comparison
&& !REG_P (x
))
14729 x
= force_reg (DImode
, x
);
14731 mode
= SELECT_CC_MODE (code
, x
, y
);
14732 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14734 if (dimode_comparison
14735 && mode
!= CC_CZmode
)
14739 /* To compare two non-zero values for equality, XOR them and
14740 then compare against zero. Not used for ARM mode; there
14741 CC_CZmode is cheaper. */
14742 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14744 gcc_assert (!reload_completed
);
14745 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14749 /* A scratch register is required. */
14750 if (reload_completed
)
14751 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14753 scratch
= gen_rtx_SCRATCH (SImode
);
14755 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14756 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14757 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14760 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14765 /* Generate a sequence of insns that will generate the correct return
14766 address mask depending on the physical architecture that the program
14769 arm_gen_return_addr_mask (void)
14771 rtx reg
= gen_reg_rtx (Pmode
);
14773 emit_insn (gen_return_addr_mask (reg
));
14778 arm_reload_in_hi (rtx
*operands
)
14780 rtx ref
= operands
[1];
14782 HOST_WIDE_INT offset
= 0;
14784 if (GET_CODE (ref
) == SUBREG
)
14786 offset
= SUBREG_BYTE (ref
);
14787 ref
= SUBREG_REG (ref
);
14792 /* We have a pseudo which has been spilt onto the stack; there
14793 are two cases here: the first where there is a simple
14794 stack-slot replacement and a second where the stack-slot is
14795 out of range, or is used as a subreg. */
14796 if (reg_equiv_mem (REGNO (ref
)))
14798 ref
= reg_equiv_mem (REGNO (ref
));
14799 base
= find_replacement (&XEXP (ref
, 0));
14802 /* The slot is out of range, or was dressed up in a SUBREG. */
14803 base
= reg_equiv_address (REGNO (ref
));
14805 /* PR 62554: If there is no equivalent memory location then just move
14806 the value as an SImode register move. This happens when the target
14807 architecture variant does not have an HImode register move. */
14810 gcc_assert (REG_P (operands
[0]));
14811 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14812 gen_rtx_SUBREG (SImode
, ref
, 0)));
14817 base
= find_replacement (&XEXP (ref
, 0));
14819 /* Handle the case where the address is too complex to be offset by 1. */
14820 if (GET_CODE (base
) == MINUS
14821 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14823 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14825 emit_set_insn (base_plus
, base
);
14828 else if (GET_CODE (base
) == PLUS
)
14830 /* The addend must be CONST_INT, or we would have dealt with it above. */
14831 HOST_WIDE_INT hi
, lo
;
14833 offset
+= INTVAL (XEXP (base
, 1));
14834 base
= XEXP (base
, 0);
14836 /* Rework the address into a legal sequence of insns. */
14837 /* Valid range for lo is -4095 -> 4095 */
14840 : -((-offset
) & 0xfff));
14842 /* Corner case, if lo is the max offset then we would be out of range
14843 once we have added the additional 1 below, so bump the msb into the
14844 pre-loading insn(s). */
14848 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14849 ^ (HOST_WIDE_INT
) 0x80000000)
14850 - (HOST_WIDE_INT
) 0x80000000);
14852 gcc_assert (hi
+ lo
== offset
);
14856 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14858 /* Get the base address; addsi3 knows how to handle constants
14859 that require more than one insn. */
14860 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14866 /* Operands[2] may overlap operands[0] (though it won't overlap
14867 operands[1]), that's why we asked for a DImode reg -- so we can
14868 use the bit that does not overlap. */
14869 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14870 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14872 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14874 emit_insn (gen_zero_extendqisi2 (scratch
,
14875 gen_rtx_MEM (QImode
,
14876 plus_constant (Pmode
, base
,
14878 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14879 gen_rtx_MEM (QImode
,
14880 plus_constant (Pmode
, base
,
14882 if (!BYTES_BIG_ENDIAN
)
14883 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14884 gen_rtx_IOR (SImode
,
14887 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14891 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14892 gen_rtx_IOR (SImode
,
14893 gen_rtx_ASHIFT (SImode
, scratch
,
14895 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14898 /* Handle storing a half-word to memory during reload by synthesizing as two
14899 byte stores. Take care not to clobber the input values until after we
14900 have moved them somewhere safe. This code assumes that if the DImode
14901 scratch in operands[2] overlaps either the input value or output address
14902 in some way, then that value must die in this insn (we absolutely need
14903 two scratch registers for some corner cases). */
14905 arm_reload_out_hi (rtx
*operands
)
14907 rtx ref
= operands
[0];
14908 rtx outval
= operands
[1];
14910 HOST_WIDE_INT offset
= 0;
14912 if (GET_CODE (ref
) == SUBREG
)
14914 offset
= SUBREG_BYTE (ref
);
14915 ref
= SUBREG_REG (ref
);
14920 /* We have a pseudo which has been spilt onto the stack; there
14921 are two cases here: the first where there is a simple
14922 stack-slot replacement and a second where the stack-slot is
14923 out of range, or is used as a subreg. */
14924 if (reg_equiv_mem (REGNO (ref
)))
14926 ref
= reg_equiv_mem (REGNO (ref
));
14927 base
= find_replacement (&XEXP (ref
, 0));
14930 /* The slot is out of range, or was dressed up in a SUBREG. */
14931 base
= reg_equiv_address (REGNO (ref
));
14933 /* PR 62254: If there is no equivalent memory location then just move
14934 the value as an SImode register move. This happens when the target
14935 architecture variant does not have an HImode register move. */
14938 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14940 if (REG_P (outval
))
14942 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14943 gen_rtx_SUBREG (SImode
, outval
, 0)));
14945 else /* SUBREG_P (outval) */
14947 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14948 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14949 SUBREG_REG (outval
)));
14951 /* FIXME: Handle other cases ? */
14952 gcc_unreachable ();
14958 base
= find_replacement (&XEXP (ref
, 0));
14960 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14962 /* Handle the case where the address is too complex to be offset by 1. */
14963 if (GET_CODE (base
) == MINUS
14964 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14966 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14968 /* Be careful not to destroy OUTVAL. */
14969 if (reg_overlap_mentioned_p (base_plus
, outval
))
14971 /* Updating base_plus might destroy outval, see if we can
14972 swap the scratch and base_plus. */
14973 if (!reg_overlap_mentioned_p (scratch
, outval
))
14974 std::swap (scratch
, base_plus
);
14977 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14979 /* Be conservative and copy OUTVAL into the scratch now,
14980 this should only be necessary if outval is a subreg
14981 of something larger than a word. */
14982 /* XXX Might this clobber base? I can't see how it can,
14983 since scratch is known to overlap with OUTVAL, and
14984 must be wider than a word. */
14985 emit_insn (gen_movhi (scratch_hi
, outval
));
14986 outval
= scratch_hi
;
14990 emit_set_insn (base_plus
, base
);
14993 else if (GET_CODE (base
) == PLUS
)
14995 /* The addend must be CONST_INT, or we would have dealt with it above. */
14996 HOST_WIDE_INT hi
, lo
;
14998 offset
+= INTVAL (XEXP (base
, 1));
14999 base
= XEXP (base
, 0);
15001 /* Rework the address into a legal sequence of insns. */
15002 /* Valid range for lo is -4095 -> 4095 */
15005 : -((-offset
) & 0xfff));
15007 /* Corner case, if lo is the max offset then we would be out of range
15008 once we have added the additional 1 below, so bump the msb into the
15009 pre-loading insn(s). */
15013 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15014 ^ (HOST_WIDE_INT
) 0x80000000)
15015 - (HOST_WIDE_INT
) 0x80000000);
15017 gcc_assert (hi
+ lo
== offset
);
15021 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15023 /* Be careful not to destroy OUTVAL. */
15024 if (reg_overlap_mentioned_p (base_plus
, outval
))
15026 /* Updating base_plus might destroy outval, see if we
15027 can swap the scratch and base_plus. */
15028 if (!reg_overlap_mentioned_p (scratch
, outval
))
15029 std::swap (scratch
, base_plus
);
15032 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15034 /* Be conservative and copy outval into scratch now,
15035 this should only be necessary if outval is a
15036 subreg of something larger than a word. */
15037 /* XXX Might this clobber base? I can't see how it
15038 can, since scratch is known to overlap with
15040 emit_insn (gen_movhi (scratch_hi
, outval
));
15041 outval
= scratch_hi
;
15045 /* Get the base address; addsi3 knows how to handle constants
15046 that require more than one insn. */
15047 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15053 if (BYTES_BIG_ENDIAN
)
15055 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15056 plus_constant (Pmode
, base
,
15058 gen_lowpart (QImode
, outval
)));
15059 emit_insn (gen_lshrsi3 (scratch
,
15060 gen_rtx_SUBREG (SImode
, outval
, 0),
15062 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15064 gen_lowpart (QImode
, scratch
)));
15068 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15070 gen_lowpart (QImode
, outval
)));
15071 emit_insn (gen_lshrsi3 (scratch
,
15072 gen_rtx_SUBREG (SImode
, outval
, 0),
15074 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15075 plus_constant (Pmode
, base
,
15077 gen_lowpart (QImode
, scratch
)));
15081 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15082 (padded to the size of a word) should be passed in a register. */
15085 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15087 if (TARGET_AAPCS_BASED
)
15088 return must_pass_in_stack_var_size (mode
, type
);
15090 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15094 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15095 Return true if an argument passed on the stack should be padded upwards,
15096 i.e. if the least-significant byte has useful data.
15097 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15098 aggregate types are placed in the lowest memory address. */
15101 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15103 if (!TARGET_AAPCS_BASED
)
15104 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15106 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15113 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15114 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15115 register has useful data, and return the opposite if the most
15116 significant byte does. */
15119 arm_pad_reg_upward (machine_mode mode
,
15120 tree type
, int first ATTRIBUTE_UNUSED
)
15122 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15124 /* For AAPCS, small aggregates, small fixed-point types,
15125 and small complex types are always padded upwards. */
15128 if ((AGGREGATE_TYPE_P (type
)
15129 || TREE_CODE (type
) == COMPLEX_TYPE
15130 || FIXED_POINT_TYPE_P (type
))
15131 && int_size_in_bytes (type
) <= 4)
15136 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15137 && GET_MODE_SIZE (mode
) <= 4)
15142 /* Otherwise, use default padding. */
15143 return !BYTES_BIG_ENDIAN
;
15146 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15147 assuming that the address in the base register is word aligned. */
15149 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15151 HOST_WIDE_INT max_offset
;
15153 /* Offset must be a multiple of 4 in Thumb mode. */
15154 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15159 else if (TARGET_ARM
)
15164 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15167 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15168 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15169 Assumes that the address in the base register RN is word aligned. Pattern
15170 guarantees that both memory accesses use the same base register,
15171 the offsets are constants within the range, and the gap between the offsets is 4.
15172 If preload complete then check that registers are legal. WBACK indicates whether
15173 address is updated. LOAD indicates whether memory access is load or store. */
15175 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15176 bool wback
, bool load
)
15178 unsigned int t
, t2
, n
;
15180 if (!reload_completed
)
15183 if (!offset_ok_for_ldrd_strd (offset
))
15190 if ((TARGET_THUMB2
)
15191 && ((wback
&& (n
== t
|| n
== t2
))
15192 || (t
== SP_REGNUM
)
15193 || (t
== PC_REGNUM
)
15194 || (t2
== SP_REGNUM
)
15195 || (t2
== PC_REGNUM
)
15196 || (!load
&& (n
== PC_REGNUM
))
15197 || (load
&& (t
== t2
))
15198 /* Triggers Cortex-M3 LDRD errata. */
15199 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15203 && ((wback
&& (n
== t
|| n
== t2
))
15204 || (t2
== PC_REGNUM
)
15205 || (t
% 2 != 0) /* First destination register is not even. */
15207 /* PC can be used as base register (for offset addressing only),
15208 but it is depricated. */
15209 || (n
== PC_REGNUM
)))
15215 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15216 operand MEM's address contains an immediate offset from the base
15217 register and has no side effects, in which case it sets BASE and
15218 OFFSET accordingly. */
15220 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15224 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15226 /* TODO: Handle more general memory operand patterns, such as
15227 PRE_DEC and PRE_INC. */
15229 if (side_effects_p (mem
))
15232 /* Can't deal with subregs. */
15233 if (GET_CODE (mem
) == SUBREG
)
15236 gcc_assert (MEM_P (mem
));
15238 *offset
= const0_rtx
;
15240 addr
= XEXP (mem
, 0);
15242 /* If addr isn't valid for DImode, then we can't handle it. */
15243 if (!arm_legitimate_address_p (DImode
, addr
,
15244 reload_in_progress
|| reload_completed
))
15252 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15254 *base
= XEXP (addr
, 0);
15255 *offset
= XEXP (addr
, 1);
15256 return (REG_P (*base
) && CONST_INT_P (*offset
));
15262 /* Called from a peephole2 to replace two word-size accesses with a
15263 single LDRD/STRD instruction. Returns true iff we can generate a
15264 new instruction sequence. That is, both accesses use the same base
15265 register and the gap between constant offsets is 4. This function
15266 may reorder its operands to match ldrd/strd RTL templates.
15267 OPERANDS are the operands found by the peephole matcher;
15268 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15269 corresponding memory operands. LOAD indicaates whether the access
15270 is load or store. CONST_STORE indicates a store of constant
15271 integer values held in OPERANDS[4,5] and assumes that the pattern
15272 is of length 4 insn, for the purpose of checking dead registers.
15273 COMMUTE indicates that register operands may be reordered. */
15275 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15276 bool const_store
, bool commute
)
15279 HOST_WIDE_INT offsets
[2], offset
;
15280 rtx base
= NULL_RTX
;
15281 rtx cur_base
, cur_offset
, tmp
;
15283 HARD_REG_SET regset
;
15285 gcc_assert (!const_store
|| !load
);
15286 /* Check that the memory references are immediate offsets from the
15287 same base register. Extract the base register, the destination
15288 registers, and the corresponding memory offsets. */
15289 for (i
= 0; i
< nops
; i
++)
15291 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15296 else if (REGNO (base
) != REGNO (cur_base
))
15299 offsets
[i
] = INTVAL (cur_offset
);
15300 if (GET_CODE (operands
[i
]) == SUBREG
)
15302 tmp
= SUBREG_REG (operands
[i
]);
15303 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15308 /* Make sure there is no dependency between the individual loads. */
15309 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15310 return false; /* RAW */
15312 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15313 return false; /* WAW */
15315 /* If the same input register is used in both stores
15316 when storing different constants, try to find a free register.
15317 For example, the code
15322 can be transformed into
15326 in Thumb mode assuming that r1 is free.
15327 For ARM mode do the same but only if the starting register
15328 can be made to be even. */
15330 && REGNO (operands
[0]) == REGNO (operands
[1])
15331 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15335 CLEAR_HARD_REG_SET (regset
);
15336 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15337 if (tmp
== NULL_RTX
)
15340 /* Use the new register in the first load to ensure that
15341 if the original input register is not dead after peephole,
15342 then it will have the correct constant value. */
15345 else if (TARGET_ARM
)
15347 int regno
= REGNO (operands
[0]);
15348 if (!peep2_reg_dead_p (4, operands
[0]))
15350 /* When the input register is even and is not dead after the
15351 pattern, it has to hold the second constant but we cannot
15352 form a legal STRD in ARM mode with this register as the second
15354 if (regno
% 2 == 0)
15357 /* Is regno-1 free? */
15358 SET_HARD_REG_SET (regset
);
15359 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15360 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15361 if (tmp
== NULL_RTX
)
15368 /* Find a DImode register. */
15369 CLEAR_HARD_REG_SET (regset
);
15370 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15371 if (tmp
!= NULL_RTX
)
15373 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15374 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15378 /* Can we use the input register to form a DI register? */
15379 SET_HARD_REG_SET (regset
);
15380 CLEAR_HARD_REG_BIT(regset
,
15381 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15382 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15383 if (tmp
== NULL_RTX
)
15385 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15389 gcc_assert (operands
[0] != NULL_RTX
);
15390 gcc_assert (operands
[1] != NULL_RTX
);
15391 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15392 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15396 /* Make sure the instructions are ordered with lower memory access first. */
15397 if (offsets
[0] > offsets
[1])
15399 gap
= offsets
[0] - offsets
[1];
15400 offset
= offsets
[1];
15402 /* Swap the instructions such that lower memory is accessed first. */
15403 std::swap (operands
[0], operands
[1]);
15404 std::swap (operands
[2], operands
[3]);
15406 std::swap (operands
[4], operands
[5]);
15410 gap
= offsets
[1] - offsets
[0];
15411 offset
= offsets
[0];
15414 /* Make sure accesses are to consecutive memory locations. */
15418 /* Make sure we generate legal instructions. */
15419 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15423 /* In Thumb state, where registers are almost unconstrained, there
15424 is little hope to fix it. */
15428 if (load
&& commute
)
15430 /* Try reordering registers. */
15431 std::swap (operands
[0], operands
[1]);
15432 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15439 /* If input registers are dead after this pattern, they can be
15440 reordered or replaced by other registers that are free in the
15441 current pattern. */
15442 if (!peep2_reg_dead_p (4, operands
[0])
15443 || !peep2_reg_dead_p (4, operands
[1]))
15446 /* Try to reorder the input registers. */
15447 /* For example, the code
15452 can be transformed into
15457 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15460 std::swap (operands
[0], operands
[1]);
15464 /* Try to find a free DI register. */
15465 CLEAR_HARD_REG_SET (regset
);
15466 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15467 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15470 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15471 if (tmp
== NULL_RTX
)
15474 /* DREG must be an even-numbered register in DImode.
15475 Split it into SI registers. */
15476 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15477 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15478 gcc_assert (operands
[0] != NULL_RTX
);
15479 gcc_assert (operands
[1] != NULL_RTX
);
15480 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15481 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15483 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15495 /* Print a symbolic form of X to the debug file, F. */
15497 arm_print_value (FILE *f
, rtx x
)
15499 switch (GET_CODE (x
))
15502 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15506 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15514 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15516 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15517 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15525 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15529 fprintf (f
, "`%s'", XSTR (x
, 0));
15533 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15537 arm_print_value (f
, XEXP (x
, 0));
15541 arm_print_value (f
, XEXP (x
, 0));
15543 arm_print_value (f
, XEXP (x
, 1));
15551 fprintf (f
, "????");
15556 /* Routines for manipulation of the constant pool. */
15558 /* Arm instructions cannot load a large constant directly into a
15559 register; they have to come from a pc relative load. The constant
15560 must therefore be placed in the addressable range of the pc
15561 relative load. Depending on the precise pc relative load
15562 instruction the range is somewhere between 256 bytes and 4k. This
15563 means that we often have to dump a constant inside a function, and
15564 generate code to branch around it.
15566 It is important to minimize this, since the branches will slow
15567 things down and make the code larger.
15569 Normally we can hide the table after an existing unconditional
15570 branch so that there is no interruption of the flow, but in the
15571 worst case the code looks like this:
15589 We fix this by performing a scan after scheduling, which notices
15590 which instructions need to have their operands fetched from the
15591 constant table and builds the table.
15593 The algorithm starts by building a table of all the constants that
15594 need fixing up and all the natural barriers in the function (places
15595 where a constant table can be dropped without breaking the flow).
15596 For each fixup we note how far the pc-relative replacement will be
15597 able to reach and the offset of the instruction into the function.
15599 Having built the table we then group the fixes together to form
15600 tables that are as large as possible (subject to addressing
15601 constraints) and emit each table of constants after the last
15602 barrier that is within range of all the instructions in the group.
15603 If a group does not contain a barrier, then we forcibly create one
15604 by inserting a jump instruction into the flow. Once the table has
15605 been inserted, the insns are then modified to reference the
15606 relevant entry in the pool.
15608 Possible enhancements to the algorithm (not implemented) are:
15610 1) For some processors and object formats, there may be benefit in
15611 aligning the pools to the start of cache lines; this alignment
15612 would need to be taken into account when calculating addressability
15615 /* These typedefs are located at the start of this file, so that
15616 they can be used in the prototypes there. This comment is to
15617 remind readers of that fact so that the following structures
15618 can be understood more easily.
15620 typedef struct minipool_node Mnode;
15621 typedef struct minipool_fixup Mfix; */
15623 struct minipool_node
15625 /* Doubly linked chain of entries. */
15628 /* The maximum offset into the code that this entry can be placed. While
15629 pushing fixes for forward references, all entries are sorted in order
15630 of increasing max_address. */
15631 HOST_WIDE_INT max_address
;
15632 /* Similarly for an entry inserted for a backwards ref. */
15633 HOST_WIDE_INT min_address
;
15634 /* The number of fixes referencing this entry. This can become zero
15635 if we "unpush" an entry. In this case we ignore the entry when we
15636 come to emit the code. */
15638 /* The offset from the start of the minipool. */
15639 HOST_WIDE_INT offset
;
15640 /* The value in table. */
15642 /* The mode of value. */
15644 /* The size of the value. With iWMMXt enabled
15645 sizes > 4 also imply an alignment of 8-bytes. */
15649 struct minipool_fixup
15653 HOST_WIDE_INT address
;
15659 HOST_WIDE_INT forwards
;
15660 HOST_WIDE_INT backwards
;
15663 /* Fixes less than a word need padding out to a word boundary. */
15664 #define MINIPOOL_FIX_SIZE(mode) \
15665 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15667 static Mnode
* minipool_vector_head
;
15668 static Mnode
* minipool_vector_tail
;
15669 static rtx_code_label
*minipool_vector_label
;
15670 static int minipool_pad
;
15672 /* The linked list of all minipool fixes required for this function. */
15673 Mfix
* minipool_fix_head
;
15674 Mfix
* minipool_fix_tail
;
15675 /* The fix entry for the current minipool, once it has been placed. */
15676 Mfix
* minipool_barrier
;
15678 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15679 #define JUMP_TABLES_IN_TEXT_SECTION 0
15682 static HOST_WIDE_INT
15683 get_jump_table_size (rtx_jump_table_data
*insn
)
15685 /* ADDR_VECs only take room if read-only data does into the text
15687 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15689 rtx body
= PATTERN (insn
);
15690 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15691 HOST_WIDE_INT size
;
15692 HOST_WIDE_INT modesize
;
15694 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15695 size
= modesize
* XVECLEN (body
, elt
);
15699 /* Round up size of TBB table to a halfword boundary. */
15700 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15703 /* No padding necessary for TBH. */
15706 /* Add two bytes for alignment on Thumb. */
15711 gcc_unreachable ();
15719 /* Return the maximum amount of padding that will be inserted before
15722 static HOST_WIDE_INT
15723 get_label_padding (rtx label
)
15725 HOST_WIDE_INT align
, min_insn_size
;
15727 align
= 1 << label_to_alignment (label
);
15728 min_insn_size
= TARGET_THUMB
? 2 : 4;
15729 return align
> min_insn_size
? align
- min_insn_size
: 0;
15732 /* Move a minipool fix MP from its current location to before MAX_MP.
15733 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15734 constraints may need updating. */
15736 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15737 HOST_WIDE_INT max_address
)
15739 /* The code below assumes these are different. */
15740 gcc_assert (mp
!= max_mp
);
15742 if (max_mp
== NULL
)
15744 if (max_address
< mp
->max_address
)
15745 mp
->max_address
= max_address
;
15749 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15750 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15752 mp
->max_address
= max_address
;
15754 /* Unlink MP from its current position. Since max_mp is non-null,
15755 mp->prev must be non-null. */
15756 mp
->prev
->next
= mp
->next
;
15757 if (mp
->next
!= NULL
)
15758 mp
->next
->prev
= mp
->prev
;
15760 minipool_vector_tail
= mp
->prev
;
15762 /* Re-insert it before MAX_MP. */
15764 mp
->prev
= max_mp
->prev
;
15767 if (mp
->prev
!= NULL
)
15768 mp
->prev
->next
= mp
;
15770 minipool_vector_head
= mp
;
15773 /* Save the new entry. */
15776 /* Scan over the preceding entries and adjust their addresses as
15778 while (mp
->prev
!= NULL
15779 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15781 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15788 /* Add a constant to the minipool for a forward reference. Returns the
15789 node added or NULL if the constant will not fit in this pool. */
15791 add_minipool_forward_ref (Mfix
*fix
)
15793 /* If set, max_mp is the first pool_entry that has a lower
15794 constraint than the one we are trying to add. */
15795 Mnode
* max_mp
= NULL
;
15796 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15799 /* If the minipool starts before the end of FIX->INSN then this FIX
15800 can not be placed into the current pool. Furthermore, adding the
15801 new constant pool entry may cause the pool to start FIX_SIZE bytes
15803 if (minipool_vector_head
&&
15804 (fix
->address
+ get_attr_length (fix
->insn
)
15805 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15808 /* Scan the pool to see if a constant with the same value has
15809 already been added. While we are doing this, also note the
15810 location where we must insert the constant if it doesn't already
15812 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15814 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15815 && fix
->mode
== mp
->mode
15816 && (!LABEL_P (fix
->value
)
15817 || (CODE_LABEL_NUMBER (fix
->value
)
15818 == CODE_LABEL_NUMBER (mp
->value
)))
15819 && rtx_equal_p (fix
->value
, mp
->value
))
15821 /* More than one fix references this entry. */
15823 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15826 /* Note the insertion point if necessary. */
15828 && mp
->max_address
> max_address
)
15831 /* If we are inserting an 8-bytes aligned quantity and
15832 we have not already found an insertion point, then
15833 make sure that all such 8-byte aligned quantities are
15834 placed at the start of the pool. */
15835 if (ARM_DOUBLEWORD_ALIGN
15837 && fix
->fix_size
>= 8
15838 && mp
->fix_size
< 8)
15841 max_address
= mp
->max_address
;
15845 /* The value is not currently in the minipool, so we need to create
15846 a new entry for it. If MAX_MP is NULL, the entry will be put on
15847 the end of the list since the placement is less constrained than
15848 any existing entry. Otherwise, we insert the new fix before
15849 MAX_MP and, if necessary, adjust the constraints on the other
15852 mp
->fix_size
= fix
->fix_size
;
15853 mp
->mode
= fix
->mode
;
15854 mp
->value
= fix
->value
;
15856 /* Not yet required for a backwards ref. */
15857 mp
->min_address
= -65536;
15859 if (max_mp
== NULL
)
15861 mp
->max_address
= max_address
;
15863 mp
->prev
= minipool_vector_tail
;
15865 if (mp
->prev
== NULL
)
15867 minipool_vector_head
= mp
;
15868 minipool_vector_label
= gen_label_rtx ();
15871 mp
->prev
->next
= mp
;
15873 minipool_vector_tail
= mp
;
15877 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15878 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15880 mp
->max_address
= max_address
;
15883 mp
->prev
= max_mp
->prev
;
15885 if (mp
->prev
!= NULL
)
15886 mp
->prev
->next
= mp
;
15888 minipool_vector_head
= mp
;
15891 /* Save the new entry. */
15894 /* Scan over the preceding entries and adjust their addresses as
15896 while (mp
->prev
!= NULL
15897 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15899 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15907 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15908 HOST_WIDE_INT min_address
)
15910 HOST_WIDE_INT offset
;
15912 /* The code below assumes these are different. */
15913 gcc_assert (mp
!= min_mp
);
15915 if (min_mp
== NULL
)
15917 if (min_address
> mp
->min_address
)
15918 mp
->min_address
= min_address
;
15922 /* We will adjust this below if it is too loose. */
15923 mp
->min_address
= min_address
;
15925 /* Unlink MP from its current position. Since min_mp is non-null,
15926 mp->next must be non-null. */
15927 mp
->next
->prev
= mp
->prev
;
15928 if (mp
->prev
!= NULL
)
15929 mp
->prev
->next
= mp
->next
;
15931 minipool_vector_head
= mp
->next
;
15933 /* Reinsert it after MIN_MP. */
15935 mp
->next
= min_mp
->next
;
15937 if (mp
->next
!= NULL
)
15938 mp
->next
->prev
= mp
;
15940 minipool_vector_tail
= mp
;
15946 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15948 mp
->offset
= offset
;
15949 if (mp
->refcount
> 0)
15950 offset
+= mp
->fix_size
;
15952 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15953 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15959 /* Add a constant to the minipool for a backward reference. Returns the
15960 node added or NULL if the constant will not fit in this pool.
15962 Note that the code for insertion for a backwards reference can be
15963 somewhat confusing because the calculated offsets for each fix do
15964 not take into account the size of the pool (which is still under
15967 add_minipool_backward_ref (Mfix
*fix
)
15969 /* If set, min_mp is the last pool_entry that has a lower constraint
15970 than the one we are trying to add. */
15971 Mnode
*min_mp
= NULL
;
15972 /* This can be negative, since it is only a constraint. */
15973 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15976 /* If we can't reach the current pool from this insn, or if we can't
15977 insert this entry at the end of the pool without pushing other
15978 fixes out of range, then we don't try. This ensures that we
15979 can't fail later on. */
15980 if (min_address
>= minipool_barrier
->address
15981 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15982 >= minipool_barrier
->address
))
15985 /* Scan the pool to see if a constant with the same value has
15986 already been added. While we are doing this, also note the
15987 location where we must insert the constant if it doesn't already
15989 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15991 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15992 && fix
->mode
== mp
->mode
15993 && (!LABEL_P (fix
->value
)
15994 || (CODE_LABEL_NUMBER (fix
->value
)
15995 == CODE_LABEL_NUMBER (mp
->value
)))
15996 && rtx_equal_p (fix
->value
, mp
->value
)
15997 /* Check that there is enough slack to move this entry to the
15998 end of the table (this is conservative). */
15999 && (mp
->max_address
16000 > (minipool_barrier
->address
16001 + minipool_vector_tail
->offset
16002 + minipool_vector_tail
->fix_size
)))
16005 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16008 if (min_mp
!= NULL
)
16009 mp
->min_address
+= fix
->fix_size
;
16012 /* Note the insertion point if necessary. */
16013 if (mp
->min_address
< min_address
)
16015 /* For now, we do not allow the insertion of 8-byte alignment
16016 requiring nodes anywhere but at the start of the pool. */
16017 if (ARM_DOUBLEWORD_ALIGN
16018 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16023 else if (mp
->max_address
16024 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16026 /* Inserting before this entry would push the fix beyond
16027 its maximum address (which can happen if we have
16028 re-located a forwards fix); force the new fix to come
16030 if (ARM_DOUBLEWORD_ALIGN
16031 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16036 min_address
= mp
->min_address
+ fix
->fix_size
;
16039 /* Do not insert a non-8-byte aligned quantity before 8-byte
16040 aligned quantities. */
16041 else if (ARM_DOUBLEWORD_ALIGN
16042 && fix
->fix_size
< 8
16043 && mp
->fix_size
>= 8)
16046 min_address
= mp
->min_address
+ fix
->fix_size
;
16051 /* We need to create a new entry. */
16053 mp
->fix_size
= fix
->fix_size
;
16054 mp
->mode
= fix
->mode
;
16055 mp
->value
= fix
->value
;
16057 mp
->max_address
= minipool_barrier
->address
+ 65536;
16059 mp
->min_address
= min_address
;
16061 if (min_mp
== NULL
)
16064 mp
->next
= minipool_vector_head
;
16066 if (mp
->next
== NULL
)
16068 minipool_vector_tail
= mp
;
16069 minipool_vector_label
= gen_label_rtx ();
16072 mp
->next
->prev
= mp
;
16074 minipool_vector_head
= mp
;
16078 mp
->next
= min_mp
->next
;
16082 if (mp
->next
!= NULL
)
16083 mp
->next
->prev
= mp
;
16085 minipool_vector_tail
= mp
;
16088 /* Save the new entry. */
16096 /* Scan over the following entries and adjust their offsets. */
16097 while (mp
->next
!= NULL
)
16099 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16100 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16103 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16105 mp
->next
->offset
= mp
->offset
;
16114 assign_minipool_offsets (Mfix
*barrier
)
16116 HOST_WIDE_INT offset
= 0;
16119 minipool_barrier
= barrier
;
16121 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16123 mp
->offset
= offset
;
16125 if (mp
->refcount
> 0)
16126 offset
+= mp
->fix_size
;
16130 /* Output the literal table */
16132 dump_minipool (rtx_insn
*scan
)
16138 if (ARM_DOUBLEWORD_ALIGN
)
16139 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16140 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16147 fprintf (dump_file
,
16148 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16149 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16151 scan
= emit_label_after (gen_label_rtx (), scan
);
16152 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16153 scan
= emit_label_after (minipool_vector_label
, scan
);
16155 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16157 if (mp
->refcount
> 0)
16161 fprintf (dump_file
,
16162 ";; Offset %u, min %ld, max %ld ",
16163 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16164 (unsigned long) mp
->max_address
);
16165 arm_print_value (dump_file
, mp
->value
);
16166 fputc ('\n', dump_file
);
16169 rtx val
= copy_rtx (mp
->value
);
16171 switch (GET_MODE_SIZE (mp
->mode
))
16173 #ifdef HAVE_consttable_1
16175 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16179 #ifdef HAVE_consttable_2
16181 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16185 #ifdef HAVE_consttable_4
16187 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16191 #ifdef HAVE_consttable_8
16193 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16197 #ifdef HAVE_consttable_16
16199 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16204 gcc_unreachable ();
16212 minipool_vector_head
= minipool_vector_tail
= NULL
;
16213 scan
= emit_insn_after (gen_consttable_end (), scan
);
16214 scan
= emit_barrier_after (scan
);
16217 /* Return the cost of forcibly inserting a barrier after INSN. */
16219 arm_barrier_cost (rtx_insn
*insn
)
16221 /* Basing the location of the pool on the loop depth is preferable,
16222 but at the moment, the basic block information seems to be
16223 corrupt by this stage of the compilation. */
16224 int base_cost
= 50;
16225 rtx_insn
*next
= next_nonnote_insn (insn
);
16227 if (next
!= NULL
&& LABEL_P (next
))
16230 switch (GET_CODE (insn
))
16233 /* It will always be better to place the table before the label, rather
16242 return base_cost
- 10;
16245 return base_cost
+ 10;
16249 /* Find the best place in the insn stream in the range
16250 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16251 Create the barrier by inserting a jump and add a new fix entry for
16254 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16256 HOST_WIDE_INT count
= 0;
16257 rtx_barrier
*barrier
;
16258 rtx_insn
*from
= fix
->insn
;
16259 /* The instruction after which we will insert the jump. */
16260 rtx_insn
*selected
= NULL
;
16262 /* The address at which the jump instruction will be placed. */
16263 HOST_WIDE_INT selected_address
;
16265 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16266 rtx_code_label
*label
= gen_label_rtx ();
16268 selected_cost
= arm_barrier_cost (from
);
16269 selected_address
= fix
->address
;
16271 while (from
&& count
< max_count
)
16273 rtx_jump_table_data
*tmp
;
16276 /* This code shouldn't have been called if there was a natural barrier
16278 gcc_assert (!BARRIER_P (from
));
16280 /* Count the length of this insn. This must stay in sync with the
16281 code that pushes minipool fixes. */
16282 if (LABEL_P (from
))
16283 count
+= get_label_padding (from
);
16285 count
+= get_attr_length (from
);
16287 /* If there is a jump table, add its length. */
16288 if (tablejump_p (from
, NULL
, &tmp
))
16290 count
+= get_jump_table_size (tmp
);
16292 /* Jump tables aren't in a basic block, so base the cost on
16293 the dispatch insn. If we select this location, we will
16294 still put the pool after the table. */
16295 new_cost
= arm_barrier_cost (from
);
16297 if (count
< max_count
16298 && (!selected
|| new_cost
<= selected_cost
))
16301 selected_cost
= new_cost
;
16302 selected_address
= fix
->address
+ count
;
16305 /* Continue after the dispatch table. */
16306 from
= NEXT_INSN (tmp
);
16310 new_cost
= arm_barrier_cost (from
);
16312 if (count
< max_count
16313 && (!selected
|| new_cost
<= selected_cost
))
16316 selected_cost
= new_cost
;
16317 selected_address
= fix
->address
+ count
;
16320 from
= NEXT_INSN (from
);
16323 /* Make sure that we found a place to insert the jump. */
16324 gcc_assert (selected
);
16326 /* Make sure we do not split a call and its corresponding
16327 CALL_ARG_LOCATION note. */
16328 if (CALL_P (selected
))
16330 rtx_insn
*next
= NEXT_INSN (selected
);
16331 if (next
&& NOTE_P (next
)
16332 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16336 /* Create a new JUMP_INSN that branches around a barrier. */
16337 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16338 JUMP_LABEL (from
) = label
;
16339 barrier
= emit_barrier_after (from
);
16340 emit_label_after (label
, barrier
);
16342 /* Create a minipool barrier entry for the new barrier. */
16343 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16344 new_fix
->insn
= barrier
;
16345 new_fix
->address
= selected_address
;
16346 new_fix
->next
= fix
->next
;
16347 fix
->next
= new_fix
;
16352 /* Record that there is a natural barrier in the insn stream at
16355 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16357 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16360 fix
->address
= address
;
16363 if (minipool_fix_head
!= NULL
)
16364 minipool_fix_tail
->next
= fix
;
16366 minipool_fix_head
= fix
;
16368 minipool_fix_tail
= fix
;
16371 /* Record INSN, which will need fixing up to load a value from the
16372 minipool. ADDRESS is the offset of the insn since the start of the
16373 function; LOC is a pointer to the part of the insn which requires
16374 fixing; VALUE is the constant that must be loaded, which is of type
16377 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16378 machine_mode mode
, rtx value
)
16380 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16383 fix
->address
= address
;
16386 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16387 fix
->value
= value
;
16388 fix
->forwards
= get_attr_pool_range (insn
);
16389 fix
->backwards
= get_attr_neg_pool_range (insn
);
16390 fix
->minipool
= NULL
;
16392 /* If an insn doesn't have a range defined for it, then it isn't
16393 expecting to be reworked by this code. Better to stop now than
16394 to generate duff assembly code. */
16395 gcc_assert (fix
->forwards
|| fix
->backwards
);
16397 /* If an entry requires 8-byte alignment then assume all constant pools
16398 require 4 bytes of padding. Trying to do this later on a per-pool
16399 basis is awkward because existing pool entries have to be modified. */
16400 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16405 fprintf (dump_file
,
16406 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16407 GET_MODE_NAME (mode
),
16408 INSN_UID (insn
), (unsigned long) address
,
16409 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16410 arm_print_value (dump_file
, fix
->value
);
16411 fprintf (dump_file
, "\n");
16414 /* Add it to the chain of fixes. */
16417 if (minipool_fix_head
!= NULL
)
16418 minipool_fix_tail
->next
= fix
;
16420 minipool_fix_head
= fix
;
16422 minipool_fix_tail
= fix
;
16425 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16426 Returns the number of insns needed, or 99 if we always want to synthesize
16429 arm_max_const_double_inline_cost ()
16431 /* Let the value get synthesized to avoid the use of literal pools. */
16432 if (arm_disable_literal_pool
)
16435 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16438 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16439 Returns the number of insns needed, or 99 if we don't know how to
16442 arm_const_double_inline_cost (rtx val
)
16444 rtx lowpart
, highpart
;
16447 mode
= GET_MODE (val
);
16449 if (mode
== VOIDmode
)
16452 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16454 lowpart
= gen_lowpart (SImode
, val
);
16455 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16457 gcc_assert (CONST_INT_P (lowpart
));
16458 gcc_assert (CONST_INT_P (highpart
));
16460 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16461 NULL_RTX
, NULL_RTX
, 0, 0)
16462 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16463 NULL_RTX
, NULL_RTX
, 0, 0));
16466 /* Cost of loading a SImode constant. */
16468 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16470 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16471 NULL_RTX
, NULL_RTX
, 1, 0);
16474 /* Return true if it is worthwhile to split a 64-bit constant into two
16475 32-bit operations. This is the case if optimizing for size, or
16476 if we have load delay slots, or if one 32-bit part can be done with
16477 a single data operation. */
16479 arm_const_double_by_parts (rtx val
)
16481 machine_mode mode
= GET_MODE (val
);
16484 if (optimize_size
|| arm_ld_sched
)
16487 if (mode
== VOIDmode
)
16490 part
= gen_highpart_mode (SImode
, mode
, val
);
16492 gcc_assert (CONST_INT_P (part
));
16494 if (const_ok_for_arm (INTVAL (part
))
16495 || const_ok_for_arm (~INTVAL (part
)))
16498 part
= gen_lowpart (SImode
, val
);
16500 gcc_assert (CONST_INT_P (part
));
16502 if (const_ok_for_arm (INTVAL (part
))
16503 || const_ok_for_arm (~INTVAL (part
)))
16509 /* Return true if it is possible to inline both the high and low parts
16510 of a 64-bit constant into 32-bit data processing instructions. */
16512 arm_const_double_by_immediates (rtx val
)
16514 machine_mode mode
= GET_MODE (val
);
16517 if (mode
== VOIDmode
)
16520 part
= gen_highpart_mode (SImode
, mode
, val
);
16522 gcc_assert (CONST_INT_P (part
));
16524 if (!const_ok_for_arm (INTVAL (part
)))
16527 part
= gen_lowpart (SImode
, val
);
16529 gcc_assert (CONST_INT_P (part
));
16531 if (!const_ok_for_arm (INTVAL (part
)))
16537 /* Scan INSN and note any of its operands that need fixing.
16538 If DO_PUSHES is false we do not actually push any of the fixups
16541 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16545 extract_constrain_insn (insn
);
16547 if (recog_data
.n_alternatives
== 0)
16550 /* Fill in recog_op_alt with information about the constraints of
16552 preprocess_constraints (insn
);
16554 const operand_alternative
*op_alt
= which_op_alt ();
16555 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16557 /* Things we need to fix can only occur in inputs. */
16558 if (recog_data
.operand_type
[opno
] != OP_IN
)
16561 /* If this alternative is a memory reference, then any mention
16562 of constants in this alternative is really to fool reload
16563 into allowing us to accept one there. We need to fix them up
16564 now so that we output the right code. */
16565 if (op_alt
[opno
].memory_ok
)
16567 rtx op
= recog_data
.operand
[opno
];
16569 if (CONSTANT_P (op
))
16572 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16573 recog_data
.operand_mode
[opno
], op
);
16575 else if (MEM_P (op
)
16576 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16577 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16581 rtx cop
= avoid_constant_pool_reference (op
);
16583 /* Casting the address of something to a mode narrower
16584 than a word can cause avoid_constant_pool_reference()
16585 to return the pool reference itself. That's no good to
16586 us here. Lets just hope that we can use the
16587 constant pool value directly. */
16589 cop
= get_pool_constant (XEXP (op
, 0));
16591 push_minipool_fix (insn
, address
,
16592 recog_data
.operand_loc
[opno
],
16593 recog_data
.operand_mode
[opno
], cop
);
16603 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16604 and unions in the context of ARMv8-M Security Extensions. It is used as a
16605 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16606 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16607 or four masks, depending on whether it is being computed for a
16608 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16609 respectively. The tree for the type of the argument or a field within an
16610 argument is passed in ARG_TYPE, the current register this argument or field
16611 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16612 argument or field starts at is passed in STARTING_BIT and the last used bit
16613 is kept in LAST_USED_BIT which is also updated accordingly. */
16615 static unsigned HOST_WIDE_INT
16616 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16617 uint32_t * padding_bits_to_clear
,
16618 unsigned starting_bit
, int * last_used_bit
)
16621 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16623 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16625 unsigned current_bit
= starting_bit
;
16627 long int offset
, size
;
16630 field
= TYPE_FIELDS (arg_type
);
16633 /* The offset within a structure is always an offset from
16634 the start of that structure. Make sure we take that into the
16635 calculation of the register based offset that we use here. */
16636 offset
= starting_bit
;
16637 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16640 /* This is the actual size of the field, for bitfields this is the
16641 bitfield width and not the container size. */
16642 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16644 if (*last_used_bit
!= offset
)
16646 if (offset
< *last_used_bit
)
16648 /* This field's offset is before the 'last_used_bit', that
16649 means this field goes on the next register. So we need to
16650 pad the rest of the current register and increase the
16651 register number. */
16653 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16656 padding_bits_to_clear
[*regno
] |= mask
;
16657 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16662 /* Otherwise we pad the bits between the last field's end and
16663 the start of the new field. */
16666 mask
= ((uint32_t)-1) >> (32 - offset
);
16667 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16668 padding_bits_to_clear
[*regno
] |= mask
;
16670 current_bit
= offset
;
16673 /* Calculate further padding bits for inner structs/unions too. */
16674 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16676 *last_used_bit
= current_bit
;
16677 not_to_clear_reg_mask
16678 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16679 padding_bits_to_clear
, offset
,
16684 /* Update 'current_bit' with this field's size. If the
16685 'current_bit' lies in a subsequent register, update 'regno' and
16686 reset 'current_bit' to point to the current bit in that new
16688 current_bit
+= size
;
16689 while (current_bit
>= 32)
16692 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16695 *last_used_bit
= current_bit
;
16698 field
= TREE_CHAIN (field
);
16700 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16702 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16704 tree field
, field_t
;
16705 int i
, regno_t
, field_size
;
16709 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16710 = {-1, -1, -1, -1};
16712 /* To compute the padding bits in a union we only consider bits as
16713 padding bits if they are always either a padding bit or fall outside a
16714 fields size for all fields in the union. */
16715 field
= TYPE_FIELDS (arg_type
);
16718 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16719 = {0U, 0U, 0U, 0U};
16720 int last_used_bit_t
= *last_used_bit
;
16722 field_t
= TREE_TYPE (field
);
16724 /* If the field's type is either a record or a union make sure to
16725 compute their padding bits too. */
16726 if (RECORD_OR_UNION_TYPE_P (field_t
))
16727 not_to_clear_reg_mask
16728 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16729 &padding_bits_to_clear_t
[0],
16730 starting_bit
, &last_used_bit_t
);
16733 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16734 regno_t
= (field_size
/ 32) + *regno
;
16735 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16738 for (i
= *regno
; i
< regno_t
; i
++)
16740 /* For all but the last register used by this field only keep the
16741 padding bits that were padding bits in this field. */
16742 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16745 /* For the last register, keep all padding bits that were padding
16746 bits in this field and any padding bits that are still valid
16747 as padding bits but fall outside of this field's size. */
16748 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16749 padding_bits_to_clear_res
[regno_t
]
16750 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16752 /* Update the maximum size of the fields in terms of registers used
16753 ('max_reg') and the 'last_used_bit' in said register. */
16754 if (max_reg
< regno_t
)
16757 max_bit
= last_used_bit_t
;
16759 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16760 max_bit
= last_used_bit_t
;
16762 field
= TREE_CHAIN (field
);
16765 /* Update the current padding_bits_to_clear using the intersection of the
16766 padding bits of all the fields. */
16767 for (i
=*regno
; i
< max_reg
; i
++)
16768 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16770 /* Do not keep trailing padding bits, we do not know yet whether this
16771 is the end of the argument. */
16772 mask
= ((uint32_t) 1 << max_bit
) - 1;
16773 padding_bits_to_clear
[max_reg
]
16774 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16777 *last_used_bit
= max_bit
;
16780 /* This function should only be used for structs and unions. */
16781 gcc_unreachable ();
16783 return not_to_clear_reg_mask
;
16786 /* In the context of ARMv8-M Security Extensions, this function is used for both
16787 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16788 registers are used when returning or passing arguments, which is then
16789 returned as a mask. It will also compute a mask to indicate padding/unused
16790 bits for each of these registers, and passes this through the
16791 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16792 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16793 the starting register used to pass this argument or return value is passed
16794 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16795 for struct and union types. */
16797 static unsigned HOST_WIDE_INT
16798 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16799 uint32_t * padding_bits_to_clear
)
16802 int last_used_bit
= 0;
16803 unsigned HOST_WIDE_INT not_to_clear_mask
;
16805 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16808 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16809 padding_bits_to_clear
, 0,
16813 /* If the 'last_used_bit' is not zero, that means we are still using a
16814 part of the last 'regno'. In such cases we must clear the trailing
16815 bits. Otherwise we are not using regno and we should mark it as to
16817 if (last_used_bit
!= 0)
16818 padding_bits_to_clear
[regno
]
16819 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16821 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16825 not_to_clear_mask
= 0;
16826 /* We are not dealing with structs nor unions. So these arguments may be
16827 passed in floating point registers too. In some cases a BLKmode is
16828 used when returning or passing arguments in multiple VFP registers. */
16829 if (GET_MODE (arg_rtx
) == BLKmode
)
16834 /* This should really only occur when dealing with the hard-float
16836 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16838 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16840 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16841 gcc_assert (REG_P (reg
));
16843 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16845 /* If we are dealing with DF mode, make sure we don't
16846 clear either of the registers it addresses. */
16847 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16850 unsigned HOST_WIDE_INT mask
;
16851 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16852 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16853 not_to_clear_mask
|= mask
;
16859 /* Otherwise we can rely on the MODE to determine how many registers
16860 are being used by this argument. */
16861 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16862 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16865 unsigned HOST_WIDE_INT
16866 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16867 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16868 not_to_clear_mask
|= mask
;
16873 return not_to_clear_mask
;
16876 /* Saves callee saved registers, clears callee saved registers and caller saved
16877 registers not used to pass arguments before a cmse_nonsecure_call. And
16878 restores the callee saved registers after. */
16881 cmse_nonsecure_call_clear_caller_saved (void)
16885 FOR_EACH_BB_FN (bb
, cfun
)
16889 FOR_BB_INSNS (bb
, insn
)
16891 uint64_t to_clear_mask
, float_mask
;
16893 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16894 unsigned int regno
, maxregno
;
16896 CUMULATIVE_ARGS args_so_far_v
;
16897 cumulative_args_t args_so_far
;
16898 tree arg_type
, fntype
;
16899 bool using_r4
, first_param
= true;
16900 function_args_iterator args_iter
;
16901 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16902 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16904 if (!NONDEBUG_INSN_P (insn
))
16907 if (!CALL_P (insn
))
16910 pat
= PATTERN (insn
);
16911 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16912 call
= XVECEXP (pat
, 0, 0);
16914 /* Get the real call RTX if the insn sets a value, ie. returns. */
16915 if (GET_CODE (call
) == SET
)
16916 call
= SET_SRC (call
);
16918 /* Check if it is a cmse_nonsecure_call. */
16919 unspec
= XEXP (call
, 0);
16920 if (GET_CODE (unspec
) != UNSPEC
16921 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16924 /* Determine the caller-saved registers we need to clear. */
16925 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16926 maxregno
= NUM_ARG_REGS
- 1;
16927 /* Only look at the caller-saved floating point registers in case of
16928 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16929 lazy store and loads which clear both caller- and callee-saved
16931 if (TARGET_HARD_FLOAT_ABI
)
16933 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16934 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16935 to_clear_mask
|= float_mask
;
16936 maxregno
= D7_VFP_REGNUM
;
16939 /* Make sure the register used to hold the function address is not
16941 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16942 gcc_assert (MEM_P (address
));
16943 gcc_assert (REG_P (XEXP (address
, 0)));
16944 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
16946 /* Set basic block of call insn so that df rescan is performed on
16947 insns inserted here. */
16948 set_block_for_insn (insn
, bb
);
16949 df_set_flags (DF_DEFER_INSN_RESCAN
);
16952 /* Make sure the scheduler doesn't schedule other insns beyond
16954 emit_insn (gen_blockage ());
16956 /* Walk through all arguments and clear registers appropriately.
16958 fntype
= TREE_TYPE (MEM_EXPR (address
));
16959 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
16961 args_so_far
= pack_cumulative_args (&args_so_far_v
);
16962 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
16965 machine_mode arg_mode
= TYPE_MODE (arg_type
);
16967 if (VOID_TYPE_P (arg_type
))
16971 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
16974 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
16976 gcc_assert (REG_P (arg_rtx
));
16978 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
16980 padding_bits_to_clear_ptr
);
16982 first_param
= false;
16985 /* Clear padding bits where needed. */
16986 cleared_reg
= XEXP (address
, 0);
16987 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
16989 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
16991 if (padding_bits_to_clear
[regno
] == 0)
16994 /* If this is a Thumb-1 target copy the address of the function
16995 we are calling from 'r4' into 'ip' such that we can use r4 to
16996 clear the unused bits in the arguments. */
16997 if (TARGET_THUMB1
&& !using_r4
)
17001 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17005 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17006 emit_move_insn (reg
, tmp
);
17007 /* Also fill the top half of the negated
17008 padding_bits_to_clear. */
17009 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17011 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17012 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17018 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17019 gen_rtx_REG (SImode
, regno
),
17024 emit_move_insn (cleared_reg
,
17025 gen_rtx_REG (SImode
, IP_REGNUM
));
17027 /* We use right shift and left shift to clear the LSB of the address
17028 we jump to instead of using bic, to avoid having to use an extra
17029 register on Thumb-1. */
17030 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17031 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17032 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17033 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17035 /* Clearing all registers that leak before doing a non-secure
17037 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17039 if (!(to_clear_mask
& (1LL << regno
)))
17042 /* If regno is an even vfp register and its successor is also to
17043 be cleared, use vmov. */
17044 if (IS_VFP_REGNUM (regno
))
17046 if (TARGET_VFP_DOUBLE
17047 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17048 && to_clear_mask
& (1LL << (regno
+ 1)))
17049 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17050 CONST0_RTX (DFmode
));
17052 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17053 CONST0_RTX (SFmode
));
17056 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17059 seq
= get_insns ();
17061 emit_insn_before (seq
, insn
);
17067 /* Rewrite move insn into subtract of 0 if the condition codes will
17068 be useful in next conditional jump insn. */
17071 thumb1_reorg (void)
17075 FOR_EACH_BB_FN (bb
, cfun
)
17078 rtx cmp
, op0
, op1
, set
= NULL
;
17079 rtx_insn
*prev
, *insn
= BB_END (bb
);
17080 bool insn_clobbered
= false;
17082 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17083 insn
= PREV_INSN (insn
);
17085 /* Find the last cbranchsi4_insn in basic block BB. */
17086 if (insn
== BB_HEAD (bb
)
17087 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17090 /* Get the register with which we are comparing. */
17091 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17092 op0
= XEXP (cmp
, 0);
17093 op1
= XEXP (cmp
, 1);
17095 /* Check that comparison is against ZERO. */
17096 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17099 /* Find the first flag setting insn before INSN in basic block BB. */
17100 gcc_assert (insn
!= BB_HEAD (bb
));
17101 for (prev
= PREV_INSN (insn
);
17103 && prev
!= BB_HEAD (bb
)
17105 || DEBUG_INSN_P (prev
)
17106 || ((set
= single_set (prev
)) != NULL
17107 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17108 prev
= PREV_INSN (prev
))
17110 if (reg_set_p (op0
, prev
))
17111 insn_clobbered
= true;
17114 /* Skip if op0 is clobbered by insn other than prev. */
17115 if (insn_clobbered
)
17121 dest
= SET_DEST (set
);
17122 src
= SET_SRC (set
);
17123 if (!low_register_operand (dest
, SImode
)
17124 || !low_register_operand (src
, SImode
))
17127 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17128 in INSN. Both src and dest of the move insn are checked. */
17129 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17131 dest
= copy_rtx (dest
);
17132 src
= copy_rtx (src
);
17133 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17134 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17135 INSN_CODE (prev
) = -1;
17136 /* Set test register in INSN to dest. */
17137 XEXP (cmp
, 0) = copy_rtx (dest
);
17138 INSN_CODE (insn
) = -1;
17143 /* Convert instructions to their cc-clobbering variant if possible, since
17144 that allows us to use smaller encodings. */
17147 thumb2_reorg (void)
17152 INIT_REG_SET (&live
);
17154 /* We are freeing block_for_insn in the toplev to keep compatibility
17155 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17156 compute_bb_for_insn ();
17159 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17161 FOR_EACH_BB_FN (bb
, cfun
)
17163 if ((current_tune
->disparage_flag_setting_t16_encodings
17164 == tune_params::DISPARAGE_FLAGS_ALL
)
17165 && optimize_bb_for_speed_p (bb
))
17169 Convert_Action action
= SKIP
;
17170 Convert_Action action_for_partial_flag_setting
17171 = ((current_tune
->disparage_flag_setting_t16_encodings
17172 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17173 && optimize_bb_for_speed_p (bb
))
17176 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17177 df_simulate_initialize_backwards (bb
, &live
);
17178 FOR_BB_INSNS_REVERSE (bb
, insn
)
17180 if (NONJUMP_INSN_P (insn
)
17181 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17182 && GET_CODE (PATTERN (insn
)) == SET
)
17185 rtx pat
= PATTERN (insn
);
17186 rtx dst
= XEXP (pat
, 0);
17187 rtx src
= XEXP (pat
, 1);
17188 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17190 if (UNARY_P (src
) || BINARY_P (src
))
17191 op0
= XEXP (src
, 0);
17193 if (BINARY_P (src
))
17194 op1
= XEXP (src
, 1);
17196 if (low_register_operand (dst
, SImode
))
17198 switch (GET_CODE (src
))
17201 /* Adding two registers and storing the result
17202 in the first source is already a 16-bit
17204 if (rtx_equal_p (dst
, op0
)
17205 && register_operand (op1
, SImode
))
17208 if (low_register_operand (op0
, SImode
))
17210 /* ADDS <Rd>,<Rn>,<Rm> */
17211 if (low_register_operand (op1
, SImode
))
17213 /* ADDS <Rdn>,#<imm8> */
17214 /* SUBS <Rdn>,#<imm8> */
17215 else if (rtx_equal_p (dst
, op0
)
17216 && CONST_INT_P (op1
)
17217 && IN_RANGE (INTVAL (op1
), -255, 255))
17219 /* ADDS <Rd>,<Rn>,#<imm3> */
17220 /* SUBS <Rd>,<Rn>,#<imm3> */
17221 else if (CONST_INT_P (op1
)
17222 && IN_RANGE (INTVAL (op1
), -7, 7))
17225 /* ADCS <Rd>, <Rn> */
17226 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17227 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17228 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17230 && COMPARISON_P (op1
)
17231 && cc_register (XEXP (op1
, 0), VOIDmode
)
17232 && maybe_get_arm_condition_code (op1
) == ARM_CS
17233 && XEXP (op1
, 1) == const0_rtx
)
17238 /* RSBS <Rd>,<Rn>,#0
17239 Not handled here: see NEG below. */
17240 /* SUBS <Rd>,<Rn>,#<imm3>
17242 Not handled here: see PLUS above. */
17243 /* SUBS <Rd>,<Rn>,<Rm> */
17244 if (low_register_operand (op0
, SImode
)
17245 && low_register_operand (op1
, SImode
))
17250 /* MULS <Rdm>,<Rn>,<Rdm>
17251 As an exception to the rule, this is only used
17252 when optimizing for size since MULS is slow on all
17253 known implementations. We do not even want to use
17254 MULS in cold code, if optimizing for speed, so we
17255 test the global flag here. */
17256 if (!optimize_size
)
17258 /* Fall through. */
17262 /* ANDS <Rdn>,<Rm> */
17263 if (rtx_equal_p (dst
, op0
)
17264 && low_register_operand (op1
, SImode
))
17265 action
= action_for_partial_flag_setting
;
17266 else if (rtx_equal_p (dst
, op1
)
17267 && low_register_operand (op0
, SImode
))
17268 action
= action_for_partial_flag_setting
== SKIP
17269 ? SKIP
: SWAP_CONV
;
17275 /* ASRS <Rdn>,<Rm> */
17276 /* LSRS <Rdn>,<Rm> */
17277 /* LSLS <Rdn>,<Rm> */
17278 if (rtx_equal_p (dst
, op0
)
17279 && low_register_operand (op1
, SImode
))
17280 action
= action_for_partial_flag_setting
;
17281 /* ASRS <Rd>,<Rm>,#<imm5> */
17282 /* LSRS <Rd>,<Rm>,#<imm5> */
17283 /* LSLS <Rd>,<Rm>,#<imm5> */
17284 else if (low_register_operand (op0
, SImode
)
17285 && CONST_INT_P (op1
)
17286 && IN_RANGE (INTVAL (op1
), 0, 31))
17287 action
= action_for_partial_flag_setting
;
17291 /* RORS <Rdn>,<Rm> */
17292 if (rtx_equal_p (dst
, op0
)
17293 && low_register_operand (op1
, SImode
))
17294 action
= action_for_partial_flag_setting
;
17298 /* MVNS <Rd>,<Rm> */
17299 if (low_register_operand (op0
, SImode
))
17300 action
= action_for_partial_flag_setting
;
17304 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17305 if (low_register_operand (op0
, SImode
))
17310 /* MOVS <Rd>,#<imm8> */
17311 if (CONST_INT_P (src
)
17312 && IN_RANGE (INTVAL (src
), 0, 255))
17313 action
= action_for_partial_flag_setting
;
17317 /* MOVS and MOV<c> with registers have different
17318 encodings, so are not relevant here. */
17326 if (action
!= SKIP
)
17328 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17329 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17332 if (action
== SWAP_CONV
)
17334 src
= copy_rtx (src
);
17335 XEXP (src
, 0) = op1
;
17336 XEXP (src
, 1) = op0
;
17337 pat
= gen_rtx_SET (dst
, src
);
17338 vec
= gen_rtvec (2, pat
, clobber
);
17340 else /* action == CONV */
17341 vec
= gen_rtvec (2, pat
, clobber
);
17343 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17344 INSN_CODE (insn
) = -1;
17348 if (NONDEBUG_INSN_P (insn
))
17349 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17353 CLEAR_REG_SET (&live
);
17356 /* Gcc puts the pool in the wrong place for ARM, since we can only
17357 load addresses a limited distance around the pc. We do some
17358 special munging to move the constant pool values to the correct
17359 point in the code. */
17364 HOST_WIDE_INT address
= 0;
17368 cmse_nonsecure_call_clear_caller_saved ();
17371 else if (TARGET_THUMB2
)
17374 /* Ensure all insns that must be split have been split at this point.
17375 Otherwise, the pool placement code below may compute incorrect
17376 insn lengths. Note that when optimizing, all insns have already
17377 been split at this point. */
17379 split_all_insns_noflow ();
17381 minipool_fix_head
= minipool_fix_tail
= NULL
;
17383 /* The first insn must always be a note, or the code below won't
17384 scan it properly. */
17385 insn
= get_insns ();
17386 gcc_assert (NOTE_P (insn
));
17389 /* Scan all the insns and record the operands that will need fixing. */
17390 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17392 if (BARRIER_P (insn
))
17393 push_minipool_barrier (insn
, address
);
17394 else if (INSN_P (insn
))
17396 rtx_jump_table_data
*table
;
17398 note_invalid_constants (insn
, address
, true);
17399 address
+= get_attr_length (insn
);
17401 /* If the insn is a vector jump, add the size of the table
17402 and skip the table. */
17403 if (tablejump_p (insn
, NULL
, &table
))
17405 address
+= get_jump_table_size (table
);
17409 else if (LABEL_P (insn
))
17410 /* Add the worst-case padding due to alignment. We don't add
17411 the _current_ padding because the minipool insertions
17412 themselves might change it. */
17413 address
+= get_label_padding (insn
);
17416 fix
= minipool_fix_head
;
17418 /* Now scan the fixups and perform the required changes. */
17423 Mfix
* last_added_fix
;
17424 Mfix
* last_barrier
= NULL
;
17427 /* Skip any further barriers before the next fix. */
17428 while (fix
&& BARRIER_P (fix
->insn
))
17431 /* No more fixes. */
17435 last_added_fix
= NULL
;
17437 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17439 if (BARRIER_P (ftmp
->insn
))
17441 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17444 last_barrier
= ftmp
;
17446 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17449 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17452 /* If we found a barrier, drop back to that; any fixes that we
17453 could have reached but come after the barrier will now go in
17454 the next mini-pool. */
17455 if (last_barrier
!= NULL
)
17457 /* Reduce the refcount for those fixes that won't go into this
17459 for (fdel
= last_barrier
->next
;
17460 fdel
&& fdel
!= ftmp
;
17463 fdel
->minipool
->refcount
--;
17464 fdel
->minipool
= NULL
;
17467 ftmp
= last_barrier
;
17471 /* ftmp is first fix that we can't fit into this pool and
17472 there no natural barriers that we could use. Insert a
17473 new barrier in the code somewhere between the previous
17474 fix and this one, and arrange to jump around it. */
17475 HOST_WIDE_INT max_address
;
17477 /* The last item on the list of fixes must be a barrier, so
17478 we can never run off the end of the list of fixes without
17479 last_barrier being set. */
17482 max_address
= minipool_vector_head
->max_address
;
17483 /* Check that there isn't another fix that is in range that
17484 we couldn't fit into this pool because the pool was
17485 already too large: we need to put the pool before such an
17486 instruction. The pool itself may come just after the
17487 fix because create_fix_barrier also allows space for a
17488 jump instruction. */
17489 if (ftmp
->address
< max_address
)
17490 max_address
= ftmp
->address
+ 1;
17492 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17495 assign_minipool_offsets (last_barrier
);
17499 if (!BARRIER_P (ftmp
->insn
)
17500 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17507 /* Scan over the fixes we have identified for this pool, fixing them
17508 up and adding the constants to the pool itself. */
17509 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17510 this_fix
= this_fix
->next
)
17511 if (!BARRIER_P (this_fix
->insn
))
17514 = plus_constant (Pmode
,
17515 gen_rtx_LABEL_REF (VOIDmode
,
17516 minipool_vector_label
),
17517 this_fix
->minipool
->offset
);
17518 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17521 dump_minipool (last_barrier
->insn
);
17525 /* From now on we must synthesize any constants that we can't handle
17526 directly. This can happen if the RTL gets split during final
17527 instruction generation. */
17528 cfun
->machine
->after_arm_reorg
= 1;
17530 /* Free the minipool memory. */
17531 obstack_free (&minipool_obstack
, minipool_startobj
);
17534 /* Routines to output assembly language. */
17536 /* Return string representation of passed in real value. */
17537 static const char *
17538 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17540 if (!fp_consts_inited
)
17543 gcc_assert (real_equal (r
, &value_fp0
));
17547 /* OPERANDS[0] is the entire list of insns that constitute pop,
17548 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17549 is in the list, UPDATE is true iff the list contains explicit
17550 update of base register. */
17552 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17558 const char *conditional
;
17559 int num_saves
= XVECLEN (operands
[0], 0);
17560 unsigned int regno
;
17561 unsigned int regno_base
= REGNO (operands
[1]);
17562 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17565 offset
+= update
? 1 : 0;
17566 offset
+= return_pc
? 1 : 0;
17568 /* Is the base register in the list? */
17569 for (i
= offset
; i
< num_saves
; i
++)
17571 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17572 /* If SP is in the list, then the base register must be SP. */
17573 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17574 /* If base register is in the list, there must be no explicit update. */
17575 if (regno
== regno_base
)
17576 gcc_assert (!update
);
17579 conditional
= reverse
? "%?%D0" : "%?%d0";
17580 /* Can't use POP if returning from an interrupt. */
17581 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17582 sprintf (pattern
, "pop%s\t{", conditional
);
17585 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17586 It's just a convention, their semantics are identical. */
17587 if (regno_base
== SP_REGNUM
)
17588 sprintf (pattern
, "ldmfd%s\t", conditional
);
17590 sprintf (pattern
, "ldmia%s\t", conditional
);
17592 sprintf (pattern
, "ldm%s\t", conditional
);
17594 strcat (pattern
, reg_names
[regno_base
]);
17596 strcat (pattern
, "!, {");
17598 strcat (pattern
, ", {");
17601 /* Output the first destination register. */
17603 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17605 /* Output the rest of the destination registers. */
17606 for (i
= offset
+ 1; i
< num_saves
; i
++)
17608 strcat (pattern
, ", ");
17610 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17613 strcat (pattern
, "}");
17615 if (interrupt_p
&& return_pc
)
17616 strcat (pattern
, "^");
17618 output_asm_insn (pattern
, &cond
);
17622 /* Output the assembly for a store multiple. */
17625 vfp_output_vstmd (rtx
* operands
)
17631 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17632 ? XEXP (operands
[0], 0)
17633 : XEXP (XEXP (operands
[0], 0), 0);
17634 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17637 strcpy (pattern
, "vpush%?.64\t{%P1");
17639 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17641 p
= strlen (pattern
);
17643 gcc_assert (REG_P (operands
[1]));
17645 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17646 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17648 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17650 strcpy (&pattern
[p
], "}");
17652 output_asm_insn (pattern
, operands
);
17657 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17658 number of bytes pushed. */
17661 vfp_emit_fstmd (int base_reg
, int count
)
17668 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17669 register pairs are stored by a store multiple insn. We avoid this
17670 by pushing an extra pair. */
17671 if (count
== 2 && !arm_arch6
)
17673 if (base_reg
== LAST_VFP_REGNUM
- 3)
17678 /* FSTMD may not store more than 16 doubleword registers at once. Split
17679 larger stores into multiple parts (up to a maximum of two, in
17684 /* NOTE: base_reg is an internal register number, so each D register
17686 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17687 saved
+= vfp_emit_fstmd (base_reg
, 16);
17691 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17692 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17694 reg
= gen_rtx_REG (DFmode
, base_reg
);
17697 XVECEXP (par
, 0, 0)
17698 = gen_rtx_SET (gen_frame_mem
17700 gen_rtx_PRE_MODIFY (Pmode
,
17703 (Pmode
, stack_pointer_rtx
,
17706 gen_rtx_UNSPEC (BLKmode
,
17707 gen_rtvec (1, reg
),
17708 UNSPEC_PUSH_MULT
));
17710 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17711 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17712 RTX_FRAME_RELATED_P (tmp
) = 1;
17713 XVECEXP (dwarf
, 0, 0) = tmp
;
17715 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17716 RTX_FRAME_RELATED_P (tmp
) = 1;
17717 XVECEXP (dwarf
, 0, 1) = tmp
;
17719 for (i
= 1; i
< count
; i
++)
17721 reg
= gen_rtx_REG (DFmode
, base_reg
);
17723 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17725 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17726 plus_constant (Pmode
,
17730 RTX_FRAME_RELATED_P (tmp
) = 1;
17731 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17734 par
= emit_insn (par
);
17735 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17736 RTX_FRAME_RELATED_P (par
) = 1;
17741 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17742 has the cmse_nonsecure_call attribute and returns false otherwise. */
17745 detect_cmse_nonsecure_call (tree addr
)
17750 tree fntype
= TREE_TYPE (addr
);
17751 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17752 TYPE_ATTRIBUTES (fntype
)))
17758 /* Emit a call instruction with pattern PAT. ADDR is the address of
17759 the call target. */
17762 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17766 insn
= emit_call_insn (pat
);
17768 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17769 If the call might use such an entry, add a use of the PIC register
17770 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17771 if (TARGET_VXWORKS_RTP
17774 && GET_CODE (addr
) == SYMBOL_REF
17775 && (SYMBOL_REF_DECL (addr
)
17776 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17777 : !SYMBOL_REF_LOCAL_P (addr
)))
17779 require_pic_register ();
17780 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17783 if (TARGET_AAPCS_BASED
)
17785 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17786 linker. We need to add an IP clobber to allow setting
17787 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17788 is not needed since it's a fixed register. */
17789 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17790 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17794 /* Output a 'call' insn. */
17796 output_call (rtx
*operands
)
17798 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17800 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17801 if (REGNO (operands
[0]) == LR_REGNUM
)
17803 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17804 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17807 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17809 if (TARGET_INTERWORK
|| arm_arch4t
)
17810 output_asm_insn ("bx%?\t%0", operands
);
17812 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17817 /* Output a move from arm registers to arm registers of a long double
17818 OPERANDS[0] is the destination.
17819 OPERANDS[1] is the source. */
17821 output_mov_long_double_arm_from_arm (rtx
*operands
)
17823 /* We have to be careful here because the two might overlap. */
17824 int dest_start
= REGNO (operands
[0]);
17825 int src_start
= REGNO (operands
[1]);
17829 if (dest_start
< src_start
)
17831 for (i
= 0; i
< 3; i
++)
17833 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17834 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17835 output_asm_insn ("mov%?\t%0, %1", ops
);
17840 for (i
= 2; i
>= 0; i
--)
17842 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17843 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17844 output_asm_insn ("mov%?\t%0, %1", ops
);
17852 arm_emit_movpair (rtx dest
, rtx src
)
17854 /* If the src is an immediate, simplify it. */
17855 if (CONST_INT_P (src
))
17857 HOST_WIDE_INT val
= INTVAL (src
);
17858 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17859 if ((val
>> 16) & 0x0000ffff)
17861 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17863 GEN_INT ((val
>> 16) & 0x0000ffff));
17864 rtx_insn
*insn
= get_last_insn ();
17865 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17869 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17870 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17871 rtx_insn
*insn
= get_last_insn ();
17872 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17875 /* Output a move between double words. It must be REG<-MEM
17878 output_move_double (rtx
*operands
, bool emit
, int *count
)
17880 enum rtx_code code0
= GET_CODE (operands
[0]);
17881 enum rtx_code code1
= GET_CODE (operands
[1]);
17886 /* The only case when this might happen is when
17887 you are looking at the length of a DImode instruction
17888 that has an invalid constant in it. */
17889 if (code0
== REG
&& code1
!= MEM
)
17891 gcc_assert (!emit
);
17898 unsigned int reg0
= REGNO (operands
[0]);
17900 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17902 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17904 switch (GET_CODE (XEXP (operands
[1], 0)))
17911 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17912 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17914 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17919 gcc_assert (TARGET_LDRD
);
17921 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17928 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17930 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17938 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17940 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17945 gcc_assert (TARGET_LDRD
);
17947 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17952 /* Autoicrement addressing modes should never have overlapping
17953 base and destination registers, and overlapping index registers
17954 are already prohibited, so this doesn't need to worry about
17956 otherops
[0] = operands
[0];
17957 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17958 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17960 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17962 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17964 /* Registers overlap so split out the increment. */
17967 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17968 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17975 /* Use a single insn if we can.
17976 FIXME: IWMMXT allows offsets larger than ldrd can
17977 handle, fix these up with a pair of ldr. */
17979 || !CONST_INT_P (otherops
[2])
17980 || (INTVAL (otherops
[2]) > -256
17981 && INTVAL (otherops
[2]) < 256))
17984 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
17990 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17991 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18001 /* Use a single insn if we can.
18002 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18003 fix these up with a pair of ldr. */
18005 || !CONST_INT_P (otherops
[2])
18006 || (INTVAL (otherops
[2]) > -256
18007 && INTVAL (otherops
[2]) < 256))
18010 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18016 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18017 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18027 /* We might be able to use ldrd %0, %1 here. However the range is
18028 different to ldr/adr, and it is broken on some ARMv7-M
18029 implementations. */
18030 /* Use the second register of the pair to avoid problematic
18032 otherops
[1] = operands
[1];
18034 output_asm_insn ("adr%?\t%0, %1", otherops
);
18035 operands
[1] = otherops
[0];
18039 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18041 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18048 /* ??? This needs checking for thumb2. */
18050 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18051 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18053 otherops
[0] = operands
[0];
18054 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18055 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18057 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18059 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18061 switch ((int) INTVAL (otherops
[2]))
18065 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18071 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18077 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18081 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18082 operands
[1] = otherops
[0];
18084 && (REG_P (otherops
[2])
18086 || (CONST_INT_P (otherops
[2])
18087 && INTVAL (otherops
[2]) > -256
18088 && INTVAL (otherops
[2]) < 256)))
18090 if (reg_overlap_mentioned_p (operands
[0],
18093 /* Swap base and index registers over to
18094 avoid a conflict. */
18095 std::swap (otherops
[1], otherops
[2]);
18097 /* If both registers conflict, it will usually
18098 have been fixed by a splitter. */
18099 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18100 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18104 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18105 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18112 otherops
[0] = operands
[0];
18114 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18119 if (CONST_INT_P (otherops
[2]))
18123 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18124 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18126 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18132 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18138 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18145 return "ldrd%?\t%0, [%1]";
18147 return "ldmia%?\t%1, %M0";
18151 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18152 /* Take care of overlapping base/data reg. */
18153 if (reg_mentioned_p (operands
[0], operands
[1]))
18157 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18158 output_asm_insn ("ldr%?\t%0, %1", operands
);
18168 output_asm_insn ("ldr%?\t%0, %1", operands
);
18169 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18179 /* Constraints should ensure this. */
18180 gcc_assert (code0
== MEM
&& code1
== REG
);
18181 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18182 || (TARGET_ARM
&& TARGET_LDRD
));
18184 switch (GET_CODE (XEXP (operands
[0], 0)))
18190 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18192 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18197 gcc_assert (TARGET_LDRD
);
18199 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18206 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18208 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18216 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18218 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18223 gcc_assert (TARGET_LDRD
);
18225 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18230 otherops
[0] = operands
[1];
18231 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18232 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18234 /* IWMMXT allows offsets larger than ldrd can handle,
18235 fix these up with a pair of ldr. */
18237 && CONST_INT_P (otherops
[2])
18238 && (INTVAL(otherops
[2]) <= -256
18239 || INTVAL(otherops
[2]) >= 256))
18241 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18245 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18246 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18255 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18256 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18262 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18265 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18270 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18275 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18276 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18278 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18282 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18289 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18296 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18301 && (REG_P (otherops
[2])
18303 || (CONST_INT_P (otherops
[2])
18304 && INTVAL (otherops
[2]) > -256
18305 && INTVAL (otherops
[2]) < 256)))
18307 otherops
[0] = operands
[1];
18308 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18310 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18316 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18317 otherops
[1] = operands
[1];
18320 output_asm_insn ("str%?\t%1, %0", operands
);
18321 output_asm_insn ("str%?\t%H1, %0", otherops
);
18331 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18332 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18335 output_move_quad (rtx
*operands
)
18337 if (REG_P (operands
[0]))
18339 /* Load, or reg->reg move. */
18341 if (MEM_P (operands
[1]))
18343 switch (GET_CODE (XEXP (operands
[1], 0)))
18346 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18351 output_asm_insn ("adr%?\t%0, %1", operands
);
18352 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18356 gcc_unreachable ();
18364 gcc_assert (REG_P (operands
[1]));
18366 dest
= REGNO (operands
[0]);
18367 src
= REGNO (operands
[1]);
18369 /* This seems pretty dumb, but hopefully GCC won't try to do it
18372 for (i
= 0; i
< 4; i
++)
18374 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18375 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18376 output_asm_insn ("mov%?\t%0, %1", ops
);
18379 for (i
= 3; i
>= 0; i
--)
18381 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18382 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18383 output_asm_insn ("mov%?\t%0, %1", ops
);
18389 gcc_assert (MEM_P (operands
[0]));
18390 gcc_assert (REG_P (operands
[1]));
18391 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18393 switch (GET_CODE (XEXP (operands
[0], 0)))
18396 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18400 gcc_unreachable ();
18407 /* Output a VFP load or store instruction. */
18410 output_move_vfp (rtx
*operands
)
18412 rtx reg
, mem
, addr
, ops
[2];
18413 int load
= REG_P (operands
[0]);
18414 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18415 int sp
= (!TARGET_VFP_FP16INST
18416 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18417 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18422 reg
= operands
[!load
];
18423 mem
= operands
[load
];
18425 mode
= GET_MODE (reg
);
18427 gcc_assert (REG_P (reg
));
18428 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18429 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18435 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18436 gcc_assert (MEM_P (mem
));
18438 addr
= XEXP (mem
, 0);
18440 switch (GET_CODE (addr
))
18443 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18444 ops
[0] = XEXP (addr
, 0);
18449 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18450 ops
[0] = XEXP (addr
, 0);
18455 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18461 sprintf (buff
, templ
,
18462 load
? "ld" : "st",
18463 dp
? "64" : sp
? "32" : "16",
18465 integer_p
? "\t%@ int" : "");
18466 output_asm_insn (buff
, ops
);
18471 /* Output a Neon double-word or quad-word load or store, or a load
18472 or store for larger structure modes.
18474 WARNING: The ordering of elements is weird in big-endian mode,
18475 because the EABI requires that vectors stored in memory appear
18476 as though they were stored by a VSTM, as required by the EABI.
18477 GCC RTL defines element ordering based on in-memory order.
18478 This can be different from the architectural ordering of elements
18479 within a NEON register. The intrinsics defined in arm_neon.h use the
18480 NEON register element ordering, not the GCC RTL element ordering.
18482 For example, the in-memory ordering of a big-endian a quadword
18483 vector with 16-bit elements when stored from register pair {d0,d1}
18484 will be (lowest address first, d0[N] is NEON register element N):
18486 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18488 When necessary, quadword registers (dN, dN+1) are moved to ARM
18489 registers from rN in the order:
18491 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18493 So that STM/LDM can be used on vectors in ARM registers, and the
18494 same memory layout will result as if VSTM/VLDM were used.
18496 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18497 possible, which allows use of appropriate alignment tags.
18498 Note that the choice of "64" is independent of the actual vector
18499 element size; this size simply ensures that the behavior is
18500 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18502 Due to limitations of those instructions, use of VST1.64/VLD1.64
18503 is not possible if:
18504 - the address contains PRE_DEC, or
18505 - the mode refers to more than 4 double-word registers
18507 In those cases, it would be possible to replace VSTM/VLDM by a
18508 sequence of instructions; this is not currently implemented since
18509 this is not certain to actually improve performance. */
18512 output_move_neon (rtx
*operands
)
18514 rtx reg
, mem
, addr
, ops
[2];
18515 int regno
, nregs
, load
= REG_P (operands
[0]);
18520 reg
= operands
[!load
];
18521 mem
= operands
[load
];
18523 mode
= GET_MODE (reg
);
18525 gcc_assert (REG_P (reg
));
18526 regno
= REGNO (reg
);
18527 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18528 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18529 || NEON_REGNO_OK_FOR_QUAD (regno
));
18530 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18531 || VALID_NEON_QREG_MODE (mode
)
18532 || VALID_NEON_STRUCT_MODE (mode
));
18533 gcc_assert (MEM_P (mem
));
18535 addr
= XEXP (mem
, 0);
18537 /* Strip off const from addresses like (const (plus (...))). */
18538 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18539 addr
= XEXP (addr
, 0);
18541 switch (GET_CODE (addr
))
18544 /* We have to use vldm / vstm for too-large modes. */
18547 templ
= "v%smia%%?\t%%0!, %%h1";
18548 ops
[0] = XEXP (addr
, 0);
18552 templ
= "v%s1.64\t%%h1, %%A0";
18559 /* We have to use vldm / vstm in this case, since there is no
18560 pre-decrement form of the vld1 / vst1 instructions. */
18561 templ
= "v%smdb%%?\t%%0!, %%h1";
18562 ops
[0] = XEXP (addr
, 0);
18567 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18568 gcc_unreachable ();
18571 /* We have to use vldm / vstm for too-large modes. */
18575 templ
= "v%smia%%?\t%%m0, %%h1";
18577 templ
= "v%s1.64\t%%h1, %%A0";
18583 /* Fall through. */
18589 for (i
= 0; i
< nregs
; i
++)
18591 /* We're only using DImode here because it's a convenient size. */
18592 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18593 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18594 if (reg_overlap_mentioned_p (ops
[0], mem
))
18596 gcc_assert (overlap
== -1);
18601 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18602 output_asm_insn (buff
, ops
);
18607 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18608 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18609 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18610 output_asm_insn (buff
, ops
);
18617 gcc_unreachable ();
18620 sprintf (buff
, templ
, load
? "ld" : "st");
18621 output_asm_insn (buff
, ops
);
18626 /* Compute and return the length of neon_mov<mode>, where <mode> is
18627 one of VSTRUCT modes: EI, OI, CI or XI. */
18629 arm_attr_length_move_neon (rtx_insn
*insn
)
18631 rtx reg
, mem
, addr
;
18635 extract_insn_cached (insn
);
18637 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18639 mode
= GET_MODE (recog_data
.operand
[0]);
18650 gcc_unreachable ();
18654 load
= REG_P (recog_data
.operand
[0]);
18655 reg
= recog_data
.operand
[!load
];
18656 mem
= recog_data
.operand
[load
];
18658 gcc_assert (MEM_P (mem
));
18660 mode
= GET_MODE (reg
);
18661 addr
= XEXP (mem
, 0);
18663 /* Strip off const from addresses like (const (plus (...))). */
18664 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18665 addr
= XEXP (addr
, 0);
18667 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18669 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18676 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18680 arm_address_offset_is_imm (rtx_insn
*insn
)
18684 extract_insn_cached (insn
);
18686 if (REG_P (recog_data
.operand
[0]))
18689 mem
= recog_data
.operand
[0];
18691 gcc_assert (MEM_P (mem
));
18693 addr
= XEXP (mem
, 0);
18696 || (GET_CODE (addr
) == PLUS
18697 && REG_P (XEXP (addr
, 0))
18698 && CONST_INT_P (XEXP (addr
, 1))))
18704 /* Output an ADD r, s, #n where n may be too big for one instruction.
18705 If adding zero to one register, output nothing. */
18707 output_add_immediate (rtx
*operands
)
18709 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18711 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18714 output_multi_immediate (operands
,
18715 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18718 output_multi_immediate (operands
,
18719 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18726 /* Output a multiple immediate operation.
18727 OPERANDS is the vector of operands referred to in the output patterns.
18728 INSTR1 is the output pattern to use for the first constant.
18729 INSTR2 is the output pattern to use for subsequent constants.
18730 IMMED_OP is the index of the constant slot in OPERANDS.
18731 N is the constant value. */
18732 static const char *
18733 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18734 int immed_op
, HOST_WIDE_INT n
)
18736 #if HOST_BITS_PER_WIDE_INT > 32
18742 /* Quick and easy output. */
18743 operands
[immed_op
] = const0_rtx
;
18744 output_asm_insn (instr1
, operands
);
18749 const char * instr
= instr1
;
18751 /* Note that n is never zero here (which would give no output). */
18752 for (i
= 0; i
< 32; i
+= 2)
18756 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18757 output_asm_insn (instr
, operands
);
18767 /* Return the name of a shifter operation. */
18768 static const char *
18769 arm_shift_nmem(enum rtx_code code
)
18774 return ARM_LSL_NAME
;
18790 /* Return the appropriate ARM instruction for the operation code.
18791 The returned result should not be overwritten. OP is the rtx of the
18792 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18795 arithmetic_instr (rtx op
, int shift_first_arg
)
18797 switch (GET_CODE (op
))
18803 return shift_first_arg
? "rsb" : "sub";
18818 return arm_shift_nmem(GET_CODE(op
));
18821 gcc_unreachable ();
18825 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18826 for the operation code. The returned result should not be overwritten.
18827 OP is the rtx code of the shift.
18828 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18830 static const char *
18831 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18834 enum rtx_code code
= GET_CODE (op
);
18839 if (!CONST_INT_P (XEXP (op
, 1)))
18841 output_operand_lossage ("invalid shift operand");
18846 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18854 mnem
= arm_shift_nmem(code
);
18855 if (CONST_INT_P (XEXP (op
, 1)))
18857 *amountp
= INTVAL (XEXP (op
, 1));
18859 else if (REG_P (XEXP (op
, 1)))
18866 output_operand_lossage ("invalid shift operand");
18872 /* We never have to worry about the amount being other than a
18873 power of 2, since this case can never be reloaded from a reg. */
18874 if (!CONST_INT_P (XEXP (op
, 1)))
18876 output_operand_lossage ("invalid shift operand");
18880 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18882 /* Amount must be a power of two. */
18883 if (*amountp
& (*amountp
- 1))
18885 output_operand_lossage ("invalid shift operand");
18889 *amountp
= exact_log2 (*amountp
);
18890 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18891 return ARM_LSL_NAME
;
18894 output_operand_lossage ("invalid shift operand");
18898 /* This is not 100% correct, but follows from the desire to merge
18899 multiplication by a power of 2 with the recognizer for a
18900 shift. >=32 is not a valid shift for "lsl", so we must try and
18901 output a shift that produces the correct arithmetical result.
18902 Using lsr #32 is identical except for the fact that the carry bit
18903 is not set correctly if we set the flags; but we never use the
18904 carry bit from such an operation, so we can ignore that. */
18905 if (code
== ROTATERT
)
18906 /* Rotate is just modulo 32. */
18908 else if (*amountp
!= (*amountp
& 31))
18910 if (code
== ASHIFT
)
18915 /* Shifts of 0 are no-ops. */
18922 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18923 because /bin/as is horribly restrictive. The judgement about
18924 whether or not each character is 'printable' (and can be output as
18925 is) or not (and must be printed with an octal escape) must be made
18926 with reference to the *host* character set -- the situation is
18927 similar to that discussed in the comments above pp_c_char in
18928 c-pretty-print.c. */
18930 #define MAX_ASCII_LEN 51
18933 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18936 int len_so_far
= 0;
18938 fputs ("\t.ascii\t\"", stream
);
18940 for (i
= 0; i
< len
; i
++)
18944 if (len_so_far
>= MAX_ASCII_LEN
)
18946 fputs ("\"\n\t.ascii\t\"", stream
);
18952 if (c
== '\\' || c
== '\"')
18954 putc ('\\', stream
);
18962 fprintf (stream
, "\\%03o", c
);
18967 fputs ("\"\n", stream
);
18970 /* Whether a register is callee saved or not. This is necessary because high
18971 registers are marked as caller saved when optimizing for size on Thumb-1
18972 targets despite being callee saved in order to avoid using them. */
18973 #define callee_saved_reg_p(reg) \
18974 (!call_used_regs[reg] \
18975 || (TARGET_THUMB1 && optimize_size \
18976 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18978 /* Compute the register save mask for registers 0 through 12
18979 inclusive. This code is used by arm_compute_save_reg_mask. */
18981 static unsigned long
18982 arm_compute_save_reg0_reg12_mask (void)
18984 unsigned long func_type
= arm_current_func_type ();
18985 unsigned long save_reg_mask
= 0;
18988 if (IS_INTERRUPT (func_type
))
18990 unsigned int max_reg
;
18991 /* Interrupt functions must not corrupt any registers,
18992 even call clobbered ones. If this is a leaf function
18993 we can just examine the registers used by the RTL, but
18994 otherwise we have to assume that whatever function is
18995 called might clobber anything, and so we have to save
18996 all the call-clobbered registers as well. */
18997 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18998 /* FIQ handlers have registers r8 - r12 banked, so
18999 we only need to check r0 - r7, Normal ISRs only
19000 bank r14 and r15, so we must check up to r12.
19001 r13 is the stack pointer which is always preserved,
19002 so we do not need to consider it here. */
19007 for (reg
= 0; reg
<= max_reg
; reg
++)
19008 if (df_regs_ever_live_p (reg
)
19009 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19010 save_reg_mask
|= (1 << reg
);
19012 /* Also save the pic base register if necessary. */
19014 && !TARGET_SINGLE_PIC_BASE
19015 && arm_pic_register
!= INVALID_REGNUM
19016 && crtl
->uses_pic_offset_table
)
19017 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19019 else if (IS_VOLATILE(func_type
))
19021 /* For noreturn functions we historically omitted register saves
19022 altogether. However this really messes up debugging. As a
19023 compromise save just the frame pointers. Combined with the link
19024 register saved elsewhere this should be sufficient to get
19026 if (frame_pointer_needed
)
19027 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19028 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19029 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19030 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19031 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19035 /* In the normal case we only need to save those registers
19036 which are call saved and which are used by this function. */
19037 for (reg
= 0; reg
<= 11; reg
++)
19038 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19039 save_reg_mask
|= (1 << reg
);
19041 /* Handle the frame pointer as a special case. */
19042 if (frame_pointer_needed
)
19043 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19045 /* If we aren't loading the PIC register,
19046 don't stack it even though it may be live. */
19048 && !TARGET_SINGLE_PIC_BASE
19049 && arm_pic_register
!= INVALID_REGNUM
19050 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19051 || crtl
->uses_pic_offset_table
))
19052 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19054 /* The prologue will copy SP into R0, so save it. */
19055 if (IS_STACKALIGN (func_type
))
19056 save_reg_mask
|= 1;
19059 /* Save registers so the exception handler can modify them. */
19060 if (crtl
->calls_eh_return
)
19066 reg
= EH_RETURN_DATA_REGNO (i
);
19067 if (reg
== INVALID_REGNUM
)
19069 save_reg_mask
|= 1 << reg
;
19073 return save_reg_mask
;
19076 /* Return true if r3 is live at the start of the function. */
19079 arm_r3_live_at_start_p (void)
19081 /* Just look at cfg info, which is still close enough to correct at this
19082 point. This gives false positives for broken functions that might use
19083 uninitialized data that happens to be allocated in r3, but who cares? */
19084 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19087 /* Compute the number of bytes used to store the static chain register on the
19088 stack, above the stack frame. We need to know this accurately to get the
19089 alignment of the rest of the stack frame correct. */
19092 arm_compute_static_chain_stack_bytes (void)
19094 /* See the defining assertion in arm_expand_prologue. */
19095 if (IS_NESTED (arm_current_func_type ())
19096 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19097 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19098 && !df_regs_ever_live_p (LR_REGNUM
)))
19099 && arm_r3_live_at_start_p ()
19100 && crtl
->args
.pretend_args_size
== 0)
19106 /* Compute a bit mask of which registers need to be
19107 saved on the stack for the current function.
19108 This is used by arm_get_frame_offsets, which may add extra registers. */
19110 static unsigned long
19111 arm_compute_save_reg_mask (void)
19113 unsigned int save_reg_mask
= 0;
19114 unsigned long func_type
= arm_current_func_type ();
19117 if (IS_NAKED (func_type
))
19118 /* This should never really happen. */
19121 /* If we are creating a stack frame, then we must save the frame pointer,
19122 IP (which will hold the old stack pointer), LR and the PC. */
19123 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19125 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19128 | (1 << PC_REGNUM
);
19130 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19132 /* Decide if we need to save the link register.
19133 Interrupt routines have their own banked link register,
19134 so they never need to save it.
19135 Otherwise if we do not use the link register we do not need to save
19136 it. If we are pushing other registers onto the stack however, we
19137 can save an instruction in the epilogue by pushing the link register
19138 now and then popping it back into the PC. This incurs extra memory
19139 accesses though, so we only do it when optimizing for size, and only
19140 if we know that we will not need a fancy return sequence. */
19141 if (df_regs_ever_live_p (LR_REGNUM
)
19144 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19145 && !crtl
->tail_call_emit
19146 && !crtl
->calls_eh_return
))
19147 save_reg_mask
|= 1 << LR_REGNUM
;
19149 if (cfun
->machine
->lr_save_eliminated
)
19150 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19152 if (TARGET_REALLY_IWMMXT
19153 && ((bit_count (save_reg_mask
)
19154 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19155 arm_compute_static_chain_stack_bytes())
19158 /* The total number of registers that are going to be pushed
19159 onto the stack is odd. We need to ensure that the stack
19160 is 64-bit aligned before we start to save iWMMXt registers,
19161 and also before we start to create locals. (A local variable
19162 might be a double or long long which we will load/store using
19163 an iWMMXt instruction). Therefore we need to push another
19164 ARM register, so that the stack will be 64-bit aligned. We
19165 try to avoid using the arg registers (r0 -r3) as they might be
19166 used to pass values in a tail call. */
19167 for (reg
= 4; reg
<= 12; reg
++)
19168 if ((save_reg_mask
& (1 << reg
)) == 0)
19172 save_reg_mask
|= (1 << reg
);
19175 cfun
->machine
->sibcall_blocked
= 1;
19176 save_reg_mask
|= (1 << 3);
19180 /* We may need to push an additional register for use initializing the
19181 PIC base register. */
19182 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19183 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19185 reg
= thumb_find_work_register (1 << 4);
19186 if (!call_used_regs
[reg
])
19187 save_reg_mask
|= (1 << reg
);
19190 return save_reg_mask
;
19193 /* Compute a bit mask of which registers need to be
19194 saved on the stack for the current function. */
19195 static unsigned long
19196 thumb1_compute_save_reg_mask (void)
19198 unsigned long mask
;
19202 for (reg
= 0; reg
< 12; reg
++)
19203 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19206 /* Handle the frame pointer as a special case. */
19207 if (frame_pointer_needed
)
19208 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19211 && !TARGET_SINGLE_PIC_BASE
19212 && arm_pic_register
!= INVALID_REGNUM
19213 && crtl
->uses_pic_offset_table
)
19214 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19216 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19217 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19218 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19220 /* LR will also be pushed if any lo regs are pushed. */
19221 if (mask
& 0xff || thumb_force_lr_save ())
19222 mask
|= (1 << LR_REGNUM
);
19224 /* Make sure we have a low work register if we need one.
19225 We will need one if we are going to push a high register,
19226 but we are not currently intending to push a low register. */
19227 if ((mask
& 0xff) == 0
19228 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19230 /* Use thumb_find_work_register to choose which register
19231 we will use. If the register is live then we will
19232 have to push it. Use LAST_LO_REGNUM as our fallback
19233 choice for the register to select. */
19234 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19235 /* Make sure the register returned by thumb_find_work_register is
19236 not part of the return value. */
19237 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19238 reg
= LAST_LO_REGNUM
;
19240 if (callee_saved_reg_p (reg
))
19244 /* The 504 below is 8 bytes less than 512 because there are two possible
19245 alignment words. We can't tell here if they will be present or not so we
19246 have to play it safe and assume that they are. */
19247 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19248 ROUND_UP_WORD (get_frame_size ()) +
19249 crtl
->outgoing_args_size
) >= 504)
19251 /* This is the same as the code in thumb1_expand_prologue() which
19252 determines which register to use for stack decrement. */
19253 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19254 if (mask
& (1 << reg
))
19257 if (reg
> LAST_LO_REGNUM
)
19259 /* Make sure we have a register available for stack decrement. */
19260 mask
|= 1 << LAST_LO_REGNUM
;
19268 /* Return the number of bytes required to save VFP registers. */
19270 arm_get_vfp_saved_size (void)
19272 unsigned int regno
;
19277 /* Space for saved VFP registers. */
19278 if (TARGET_HARD_FLOAT
)
19281 for (regno
= FIRST_VFP_REGNUM
;
19282 regno
< LAST_VFP_REGNUM
;
19285 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19286 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19290 /* Workaround ARM10 VFPr1 bug. */
19291 if (count
== 2 && !arm_arch6
)
19293 saved
+= count
* 8;
19302 if (count
== 2 && !arm_arch6
)
19304 saved
+= count
* 8;
19311 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19312 everything bar the final return instruction. If simple_return is true,
19313 then do not output epilogue, because it has already been emitted in RTL. */
19315 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19316 bool simple_return
)
19318 char conditional
[10];
19321 unsigned long live_regs_mask
;
19322 unsigned long func_type
;
19323 arm_stack_offsets
*offsets
;
19325 func_type
= arm_current_func_type ();
19327 if (IS_NAKED (func_type
))
19330 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19332 /* If this function was declared non-returning, and we have
19333 found a tail call, then we have to trust that the called
19334 function won't return. */
19339 /* Otherwise, trap an attempted return by aborting. */
19341 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19343 assemble_external_libcall (ops
[1]);
19344 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19350 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19352 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19354 cfun
->machine
->return_used_this_function
= 1;
19356 offsets
= arm_get_frame_offsets ();
19357 live_regs_mask
= offsets
->saved_regs_mask
;
19359 if (!simple_return
&& live_regs_mask
)
19361 const char * return_reg
;
19363 /* If we do not have any special requirements for function exit
19364 (e.g. interworking) then we can load the return address
19365 directly into the PC. Otherwise we must load it into LR. */
19367 && !IS_CMSE_ENTRY (func_type
)
19368 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19369 return_reg
= reg_names
[PC_REGNUM
];
19371 return_reg
= reg_names
[LR_REGNUM
];
19373 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19375 /* There are three possible reasons for the IP register
19376 being saved. 1) a stack frame was created, in which case
19377 IP contains the old stack pointer, or 2) an ISR routine
19378 corrupted it, or 3) it was saved to align the stack on
19379 iWMMXt. In case 1, restore IP into SP, otherwise just
19381 if (frame_pointer_needed
)
19383 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19384 live_regs_mask
|= (1 << SP_REGNUM
);
19387 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19390 /* On some ARM architectures it is faster to use LDR rather than
19391 LDM to load a single register. On other architectures, the
19392 cost is the same. In 26 bit mode, or for exception handlers,
19393 we have to use LDM to load the PC so that the CPSR is also
19395 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19396 if (live_regs_mask
== (1U << reg
))
19399 if (reg
<= LAST_ARM_REGNUM
19400 && (reg
!= LR_REGNUM
19402 || ! IS_INTERRUPT (func_type
)))
19404 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19405 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19412 /* Generate the load multiple instruction to restore the
19413 registers. Note we can get here, even if
19414 frame_pointer_needed is true, but only if sp already
19415 points to the base of the saved core registers. */
19416 if (live_regs_mask
& (1 << SP_REGNUM
))
19418 unsigned HOST_WIDE_INT stack_adjust
;
19420 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19421 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19423 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19424 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19427 /* If we can't use ldmib (SA110 bug),
19428 then try to pop r3 instead. */
19430 live_regs_mask
|= 1 << 3;
19432 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19435 /* For interrupt returns we have to use an LDM rather than
19436 a POP so that we can use the exception return variant. */
19437 else if (IS_INTERRUPT (func_type
))
19438 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19440 sprintf (instr
, "pop%s\t{", conditional
);
19442 p
= instr
+ strlen (instr
);
19444 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19445 if (live_regs_mask
& (1 << reg
))
19447 int l
= strlen (reg_names
[reg
]);
19453 memcpy (p
, ", ", 2);
19457 memcpy (p
, "%|", 2);
19458 memcpy (p
+ 2, reg_names
[reg
], l
);
19462 if (live_regs_mask
& (1 << LR_REGNUM
))
19464 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19465 /* If returning from an interrupt, restore the CPSR. */
19466 if (IS_INTERRUPT (func_type
))
19473 output_asm_insn (instr
, & operand
);
19475 /* See if we need to generate an extra instruction to
19476 perform the actual function return. */
19478 && func_type
!= ARM_FT_INTERWORKED
19479 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19481 /* The return has already been handled
19482 by loading the LR into the PC. */
19489 switch ((int) ARM_FUNC_TYPE (func_type
))
19493 /* ??? This is wrong for unified assembly syntax. */
19494 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19497 case ARM_FT_INTERWORKED
:
19498 gcc_assert (arm_arch5
|| arm_arch4t
);
19499 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19502 case ARM_FT_EXCEPTION
:
19503 /* ??? This is wrong for unified assembly syntax. */
19504 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19508 if (IS_CMSE_ENTRY (func_type
))
19510 /* Check if we have to clear the 'GE bits' which is only used if
19511 parallel add and subtraction instructions are available. */
19512 if (TARGET_INT_SIMD
)
19513 snprintf (instr
, sizeof (instr
),
19514 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19516 snprintf (instr
, sizeof (instr
),
19517 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19519 output_asm_insn (instr
, & operand
);
19520 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19522 /* Clear the cumulative exception-status bits (0-4,7) and the
19523 condition code bits (28-31) of the FPSCR. We need to
19524 remember to clear the first scratch register used (IP) and
19525 save and restore the second (r4). */
19526 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19527 output_asm_insn (instr
, & operand
);
19528 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19529 output_asm_insn (instr
, & operand
);
19530 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19531 output_asm_insn (instr
, & operand
);
19532 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19533 output_asm_insn (instr
, & operand
);
19534 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19535 output_asm_insn (instr
, & operand
);
19536 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19537 output_asm_insn (instr
, & operand
);
19538 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19539 output_asm_insn (instr
, & operand
);
19540 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19541 output_asm_insn (instr
, & operand
);
19543 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19545 /* Use bx if it's available. */
19546 else if (arm_arch5
|| arm_arch4t
)
19547 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19549 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19553 output_asm_insn (instr
, & operand
);
19559 /* Output in FILE asm statements needed to declare the NAME of the function
19560 defined by its DECL node. */
19563 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19565 size_t cmse_name_len
;
19566 char *cmse_name
= 0;
19567 char cmse_prefix
[] = "__acle_se_";
19569 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19570 extra function label for each function with the 'cmse_nonsecure_entry'
19571 attribute. This extra function label should be prepended with
19572 '__acle_se_', telling the linker that it needs to create secure gateway
19573 veneers for this function. */
19574 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19575 DECL_ATTRIBUTES (decl
)))
19577 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19578 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19579 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19580 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19582 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19583 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19586 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19587 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19588 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19589 ASM_OUTPUT_LABEL (file
, name
);
19592 ASM_OUTPUT_LABEL (file
, cmse_name
);
19594 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19597 /* Write the function name into the code section, directly preceding
19598 the function prologue.
19600 Code will be output similar to this:
19602 .ascii "arm_poke_function_name", 0
19605 .word 0xff000000 + (t1 - t0)
19606 arm_poke_function_name
19608 stmfd sp!, {fp, ip, lr, pc}
19611 When performing a stack backtrace, code can inspect the value
19612 of 'pc' stored at 'fp' + 0. If the trace function then looks
19613 at location pc - 12 and the top 8 bits are set, then we know
19614 that there is a function name embedded immediately preceding this
19615 location and has length ((pc[-3]) & 0xff000000).
19617 We assume that pc is declared as a pointer to an unsigned long.
19619 It is of no benefit to output the function name if we are assembling
19620 a leaf function. These function types will not contain a stack
19621 backtrace structure, therefore it is not possible to determine the
19624 arm_poke_function_name (FILE *stream
, const char *name
)
19626 unsigned long alignlength
;
19627 unsigned long length
;
19630 length
= strlen (name
) + 1;
19631 alignlength
= ROUND_UP_WORD (length
);
19633 ASM_OUTPUT_ASCII (stream
, name
, length
);
19634 ASM_OUTPUT_ALIGN (stream
, 2);
19635 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19636 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19639 /* Place some comments into the assembler stream
19640 describing the current function. */
19642 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19644 unsigned long func_type
;
19646 /* Sanity check. */
19647 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19649 func_type
= arm_current_func_type ();
19651 switch ((int) ARM_FUNC_TYPE (func_type
))
19654 case ARM_FT_NORMAL
:
19656 case ARM_FT_INTERWORKED
:
19657 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19660 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19663 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19665 case ARM_FT_EXCEPTION
:
19666 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19670 if (IS_NAKED (func_type
))
19671 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19673 if (IS_VOLATILE (func_type
))
19674 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19676 if (IS_NESTED (func_type
))
19677 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19678 if (IS_STACKALIGN (func_type
))
19679 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19680 if (IS_CMSE_ENTRY (func_type
))
19681 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19683 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19685 crtl
->args
.pretend_args_size
, frame_size
);
19687 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19688 frame_pointer_needed
,
19689 cfun
->machine
->uses_anonymous_args
);
19691 if (cfun
->machine
->lr_save_eliminated
)
19692 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19694 if (crtl
->calls_eh_return
)
19695 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19700 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19701 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19703 arm_stack_offsets
*offsets
;
19709 /* Emit any call-via-reg trampolines that are needed for v4t support
19710 of call_reg and call_value_reg type insns. */
19711 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19713 rtx label
= cfun
->machine
->call_via
[regno
];
19717 switch_to_section (function_section (current_function_decl
));
19718 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19719 CODE_LABEL_NUMBER (label
));
19720 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19724 /* ??? Probably not safe to set this here, since it assumes that a
19725 function will be emitted as assembly immediately after we generate
19726 RTL for it. This does not happen for inline functions. */
19727 cfun
->machine
->return_used_this_function
= 0;
19729 else /* TARGET_32BIT */
19731 /* We need to take into account any stack-frame rounding. */
19732 offsets
= arm_get_frame_offsets ();
19734 gcc_assert (!use_return_insn (FALSE
, NULL
)
19735 || (cfun
->machine
->return_used_this_function
!= 0)
19736 || offsets
->saved_regs
== offsets
->outgoing_args
19737 || frame_pointer_needed
);
19741 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19742 STR and STRD. If an even number of registers are being pushed, one
19743 or more STRD patterns are created for each register pair. If an
19744 odd number of registers are pushed, emit an initial STR followed by
19745 as many STRD instructions as are needed. This works best when the
19746 stack is initially 64-bit aligned (the normal case), since it
19747 ensures that each STRD is also 64-bit aligned. */
19749 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19754 rtx par
= NULL_RTX
;
19755 rtx dwarf
= NULL_RTX
;
19759 num_regs
= bit_count (saved_regs_mask
);
19761 /* Must be at least one register to save, and can't save SP or PC. */
19762 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19763 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19764 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19766 /* Create sequence for DWARF info. All the frame-related data for
19767 debugging is held in this wrapper. */
19768 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19770 /* Describe the stack adjustment. */
19771 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19772 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19773 RTX_FRAME_RELATED_P (tmp
) = 1;
19774 XVECEXP (dwarf
, 0, 0) = tmp
;
19776 /* Find the first register. */
19777 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19782 /* If there's an odd number of registers to push. Start off by
19783 pushing a single register. This ensures that subsequent strd
19784 operations are dword aligned (assuming that SP was originally
19785 64-bit aligned). */
19786 if ((num_regs
& 1) != 0)
19788 rtx reg
, mem
, insn
;
19790 reg
= gen_rtx_REG (SImode
, regno
);
19792 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19793 stack_pointer_rtx
));
19795 mem
= gen_frame_mem (Pmode
,
19797 (Pmode
, stack_pointer_rtx
,
19798 plus_constant (Pmode
, stack_pointer_rtx
,
19801 tmp
= gen_rtx_SET (mem
, reg
);
19802 RTX_FRAME_RELATED_P (tmp
) = 1;
19803 insn
= emit_insn (tmp
);
19804 RTX_FRAME_RELATED_P (insn
) = 1;
19805 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19806 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19807 RTX_FRAME_RELATED_P (tmp
) = 1;
19810 XVECEXP (dwarf
, 0, i
) = tmp
;
19814 while (i
< num_regs
)
19815 if (saved_regs_mask
& (1 << regno
))
19817 rtx reg1
, reg2
, mem1
, mem2
;
19818 rtx tmp0
, tmp1
, tmp2
;
19821 /* Find the register to pair with this one. */
19822 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19826 reg1
= gen_rtx_REG (SImode
, regno
);
19827 reg2
= gen_rtx_REG (SImode
, regno2
);
19834 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19837 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19839 -4 * (num_regs
- 1)));
19840 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19841 plus_constant (Pmode
, stack_pointer_rtx
,
19843 tmp1
= gen_rtx_SET (mem1
, reg1
);
19844 tmp2
= gen_rtx_SET (mem2
, reg2
);
19845 RTX_FRAME_RELATED_P (tmp0
) = 1;
19846 RTX_FRAME_RELATED_P (tmp1
) = 1;
19847 RTX_FRAME_RELATED_P (tmp2
) = 1;
19848 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19849 XVECEXP (par
, 0, 0) = tmp0
;
19850 XVECEXP (par
, 0, 1) = tmp1
;
19851 XVECEXP (par
, 0, 2) = tmp2
;
19852 insn
= emit_insn (par
);
19853 RTX_FRAME_RELATED_P (insn
) = 1;
19854 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19858 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19861 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19864 tmp1
= gen_rtx_SET (mem1
, reg1
);
19865 tmp2
= gen_rtx_SET (mem2
, reg2
);
19866 RTX_FRAME_RELATED_P (tmp1
) = 1;
19867 RTX_FRAME_RELATED_P (tmp2
) = 1;
19868 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19869 XVECEXP (par
, 0, 0) = tmp1
;
19870 XVECEXP (par
, 0, 1) = tmp2
;
19874 /* Create unwind information. This is an approximation. */
19875 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19876 plus_constant (Pmode
,
19880 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19881 plus_constant (Pmode
,
19886 RTX_FRAME_RELATED_P (tmp1
) = 1;
19887 RTX_FRAME_RELATED_P (tmp2
) = 1;
19888 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19889 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19891 regno
= regno2
+ 1;
19899 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19900 whenever possible, otherwise it emits single-word stores. The first store
19901 also allocates stack space for all saved registers, using writeback with
19902 post-addressing mode. All other stores use offset addressing. If no STRD
19903 can be emitted, this function emits a sequence of single-word stores,
19904 and not an STM as before, because single-word stores provide more freedom
19905 scheduling and can be turned into an STM by peephole optimizations. */
19907 arm_emit_strd_push (unsigned long saved_regs_mask
)
19910 int i
, j
, dwarf_index
= 0;
19912 rtx dwarf
= NULL_RTX
;
19913 rtx insn
= NULL_RTX
;
19916 /* TODO: A more efficient code can be emitted by changing the
19917 layout, e.g., first push all pairs that can use STRD to keep the
19918 stack aligned, and then push all other registers. */
19919 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19920 if (saved_regs_mask
& (1 << i
))
19923 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19924 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19925 gcc_assert (num_regs
> 0);
19927 /* Create sequence for DWARF info. */
19928 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19930 /* For dwarf info, we generate explicit stack update. */
19931 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19932 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19933 RTX_FRAME_RELATED_P (tmp
) = 1;
19934 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19936 /* Save registers. */
19937 offset
= - 4 * num_regs
;
19939 while (j
<= LAST_ARM_REGNUM
)
19940 if (saved_regs_mask
& (1 << j
))
19943 && (saved_regs_mask
& (1 << (j
+ 1))))
19945 /* Current register and previous register form register pair for
19946 which STRD can be generated. */
19949 /* Allocate stack space for all saved registers. */
19950 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19951 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19952 mem
= gen_frame_mem (DImode
, tmp
);
19955 else if (offset
> 0)
19956 mem
= gen_frame_mem (DImode
,
19957 plus_constant (Pmode
,
19961 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19963 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19964 RTX_FRAME_RELATED_P (tmp
) = 1;
19965 tmp
= emit_insn (tmp
);
19967 /* Record the first store insn. */
19968 if (dwarf_index
== 1)
19971 /* Generate dwarf info. */
19972 mem
= gen_frame_mem (SImode
,
19973 plus_constant (Pmode
,
19976 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19977 RTX_FRAME_RELATED_P (tmp
) = 1;
19978 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19980 mem
= gen_frame_mem (SImode
,
19981 plus_constant (Pmode
,
19984 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19985 RTX_FRAME_RELATED_P (tmp
) = 1;
19986 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19993 /* Emit a single word store. */
19996 /* Allocate stack space for all saved registers. */
19997 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19998 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19999 mem
= gen_frame_mem (SImode
, tmp
);
20002 else if (offset
> 0)
20003 mem
= gen_frame_mem (SImode
,
20004 plus_constant (Pmode
,
20008 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20010 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20011 RTX_FRAME_RELATED_P (tmp
) = 1;
20012 tmp
= emit_insn (tmp
);
20014 /* Record the first store insn. */
20015 if (dwarf_index
== 1)
20018 /* Generate dwarf info. */
20019 mem
= gen_frame_mem (SImode
,
20020 plus_constant(Pmode
,
20023 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20024 RTX_FRAME_RELATED_P (tmp
) = 1;
20025 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20034 /* Attach dwarf info to the first insn we generate. */
20035 gcc_assert (insn
!= NULL_RTX
);
20036 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20037 RTX_FRAME_RELATED_P (insn
) = 1;
20040 /* Generate and emit an insn that we will recognize as a push_multi.
20041 Unfortunately, since this insn does not reflect very well the actual
20042 semantics of the operation, we need to annotate the insn for the benefit
20043 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20044 MASK for registers that should be annotated for DWARF2 frame unwind
20047 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20050 int num_dwarf_regs
= 0;
20054 int dwarf_par_index
;
20057 /* We don't record the PC in the dwarf frame information. */
20058 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20060 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20062 if (mask
& (1 << i
))
20064 if (dwarf_regs_mask
& (1 << i
))
20068 gcc_assert (num_regs
&& num_regs
<= 16);
20069 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20071 /* For the body of the insn we are going to generate an UNSPEC in
20072 parallel with several USEs. This allows the insn to be recognized
20073 by the push_multi pattern in the arm.md file.
20075 The body of the insn looks something like this:
20078 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20079 (const_int:SI <num>)))
20080 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20086 For the frame note however, we try to be more explicit and actually
20087 show each register being stored into the stack frame, plus a (single)
20088 decrement of the stack pointer. We do it this way in order to be
20089 friendly to the stack unwinding code, which only wants to see a single
20090 stack decrement per instruction. The RTL we generate for the note looks
20091 something like this:
20094 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20095 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20096 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20097 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20101 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20102 instead we'd have a parallel expression detailing all
20103 the stores to the various memory addresses so that debug
20104 information is more up-to-date. Remember however while writing
20105 this to take care of the constraints with the push instruction.
20107 Note also that this has to be taken care of for the VFP registers.
20109 For more see PR43399. */
20111 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20112 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20113 dwarf_par_index
= 1;
20115 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20117 if (mask
& (1 << i
))
20119 reg
= gen_rtx_REG (SImode
, i
);
20121 XVECEXP (par
, 0, 0)
20122 = gen_rtx_SET (gen_frame_mem
20124 gen_rtx_PRE_MODIFY (Pmode
,
20127 (Pmode
, stack_pointer_rtx
,
20130 gen_rtx_UNSPEC (BLKmode
,
20131 gen_rtvec (1, reg
),
20132 UNSPEC_PUSH_MULT
));
20134 if (dwarf_regs_mask
& (1 << i
))
20136 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20138 RTX_FRAME_RELATED_P (tmp
) = 1;
20139 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20146 for (j
= 1, i
++; j
< num_regs
; i
++)
20148 if (mask
& (1 << i
))
20150 reg
= gen_rtx_REG (SImode
, i
);
20152 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20154 if (dwarf_regs_mask
& (1 << i
))
20157 = gen_rtx_SET (gen_frame_mem
20159 plus_constant (Pmode
, stack_pointer_rtx
,
20162 RTX_FRAME_RELATED_P (tmp
) = 1;
20163 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20170 par
= emit_insn (par
);
20172 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20173 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20174 RTX_FRAME_RELATED_P (tmp
) = 1;
20175 XVECEXP (dwarf
, 0, 0) = tmp
;
20177 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20182 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20183 SIZE is the offset to be adjusted.
20184 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20186 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20190 RTX_FRAME_RELATED_P (insn
) = 1;
20191 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20192 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20195 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20196 SAVED_REGS_MASK shows which registers need to be restored.
20198 Unfortunately, since this insn does not reflect very well the actual
20199 semantics of the operation, we need to annotate the insn for the benefit
20200 of DWARF2 frame unwind information. */
20202 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20207 rtx dwarf
= NULL_RTX
;
20209 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20213 offset_adj
= return_in_pc
? 1 : 0;
20214 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20215 if (saved_regs_mask
& (1 << i
))
20218 gcc_assert (num_regs
&& num_regs
<= 16);
20220 /* If SP is in reglist, then we don't emit SP update insn. */
20221 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20223 /* The parallel needs to hold num_regs SETs
20224 and one SET for the stack update. */
20225 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20228 XVECEXP (par
, 0, 0) = ret_rtx
;
20232 /* Increment the stack pointer, based on there being
20233 num_regs 4-byte registers to restore. */
20234 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20235 plus_constant (Pmode
,
20238 RTX_FRAME_RELATED_P (tmp
) = 1;
20239 XVECEXP (par
, 0, offset_adj
) = tmp
;
20242 /* Now restore every reg, which may include PC. */
20243 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20244 if (saved_regs_mask
& (1 << i
))
20246 reg
= gen_rtx_REG (SImode
, i
);
20247 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20249 /* Emit single load with writeback. */
20250 tmp
= gen_frame_mem (SImode
,
20251 gen_rtx_POST_INC (Pmode
,
20252 stack_pointer_rtx
));
20253 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20254 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20258 tmp
= gen_rtx_SET (reg
,
20261 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20262 RTX_FRAME_RELATED_P (tmp
) = 1;
20263 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20265 /* We need to maintain a sequence for DWARF info too. As dwarf info
20266 should not have PC, skip PC. */
20267 if (i
!= PC_REGNUM
)
20268 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20274 par
= emit_jump_insn (par
);
20276 par
= emit_insn (par
);
20278 REG_NOTES (par
) = dwarf
;
20280 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20281 stack_pointer_rtx
, stack_pointer_rtx
);
20284 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20285 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20287 Unfortunately, since this insn does not reflect very well the actual
20288 semantics of the operation, we need to annotate the insn for the benefit
20289 of DWARF2 frame unwind information. */
20291 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20295 rtx dwarf
= NULL_RTX
;
20298 gcc_assert (num_regs
&& num_regs
<= 32);
20300 /* Workaround ARM10 VFPr1 bug. */
20301 if (num_regs
== 2 && !arm_arch6
)
20303 if (first_reg
== 15)
20309 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20310 there could be up to 32 D-registers to restore.
20311 If there are more than 16 D-registers, make two recursive calls,
20312 each of which emits one pop_multi instruction. */
20315 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20316 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20320 /* The parallel needs to hold num_regs SETs
20321 and one SET for the stack update. */
20322 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20324 /* Increment the stack pointer, based on there being
20325 num_regs 8-byte registers to restore. */
20326 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20327 RTX_FRAME_RELATED_P (tmp
) = 1;
20328 XVECEXP (par
, 0, 0) = tmp
;
20330 /* Now show every reg that will be restored, using a SET for each. */
20331 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20333 reg
= gen_rtx_REG (DFmode
, i
);
20335 tmp
= gen_rtx_SET (reg
,
20338 plus_constant (Pmode
, base_reg
, 8 * j
)));
20339 RTX_FRAME_RELATED_P (tmp
) = 1;
20340 XVECEXP (par
, 0, j
+ 1) = tmp
;
20342 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20347 par
= emit_insn (par
);
20348 REG_NOTES (par
) = dwarf
;
20350 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20351 if (REGNO (base_reg
) == IP_REGNUM
)
20353 RTX_FRAME_RELATED_P (par
) = 1;
20354 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20357 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20358 base_reg
, base_reg
);
20361 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20362 number of registers are being popped, multiple LDRD patterns are created for
20363 all register pairs. If odd number of registers are popped, last register is
20364 loaded by using LDR pattern. */
20366 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20370 rtx par
= NULL_RTX
;
20371 rtx dwarf
= NULL_RTX
;
20372 rtx tmp
, reg
, tmp1
;
20373 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20375 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20376 if (saved_regs_mask
& (1 << i
))
20379 gcc_assert (num_regs
&& num_regs
<= 16);
20381 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20382 to be popped. So, if num_regs is even, now it will become odd,
20383 and we can generate pop with PC. If num_regs is odd, it will be
20384 even now, and ldr with return can be generated for PC. */
20388 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20390 /* Var j iterates over all the registers to gather all the registers in
20391 saved_regs_mask. Var i gives index of saved registers in stack frame.
20392 A PARALLEL RTX of register-pair is created here, so that pattern for
20393 LDRD can be matched. As PC is always last register to be popped, and
20394 we have already decremented num_regs if PC, we don't have to worry
20395 about PC in this loop. */
20396 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20397 if (saved_regs_mask
& (1 << j
))
20399 /* Create RTX for memory load. */
20400 reg
= gen_rtx_REG (SImode
, j
);
20401 tmp
= gen_rtx_SET (reg
,
20402 gen_frame_mem (SImode
,
20403 plus_constant (Pmode
,
20404 stack_pointer_rtx
, 4 * i
)));
20405 RTX_FRAME_RELATED_P (tmp
) = 1;
20409 /* When saved-register index (i) is even, the RTX to be emitted is
20410 yet to be created. Hence create it first. The LDRD pattern we
20411 are generating is :
20412 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20413 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20414 where target registers need not be consecutive. */
20415 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20419 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20420 added as 0th element and if i is odd, reg_i is added as 1st element
20421 of LDRD pattern shown above. */
20422 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20423 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20427 /* When saved-register index (i) is odd, RTXs for both the registers
20428 to be loaded are generated in above given LDRD pattern, and the
20429 pattern can be emitted now. */
20430 par
= emit_insn (par
);
20431 REG_NOTES (par
) = dwarf
;
20432 RTX_FRAME_RELATED_P (par
) = 1;
20438 /* If the number of registers pushed is odd AND return_in_pc is false OR
20439 number of registers are even AND return_in_pc is true, last register is
20440 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20441 then LDR with post increment. */
20443 /* Increment the stack pointer, based on there being
20444 num_regs 4-byte registers to restore. */
20445 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20446 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20447 RTX_FRAME_RELATED_P (tmp
) = 1;
20448 tmp
= emit_insn (tmp
);
20451 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20452 stack_pointer_rtx
, stack_pointer_rtx
);
20457 if (((num_regs
% 2) == 1 && !return_in_pc
)
20458 || ((num_regs
% 2) == 0 && return_in_pc
))
20460 /* Scan for the single register to be popped. Skip until the saved
20461 register is found. */
20462 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20464 /* Gen LDR with post increment here. */
20465 tmp1
= gen_rtx_MEM (SImode
,
20466 gen_rtx_POST_INC (SImode
,
20467 stack_pointer_rtx
));
20468 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20470 reg
= gen_rtx_REG (SImode
, j
);
20471 tmp
= gen_rtx_SET (reg
, tmp1
);
20472 RTX_FRAME_RELATED_P (tmp
) = 1;
20473 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20477 /* If return_in_pc, j must be PC_REGNUM. */
20478 gcc_assert (j
== PC_REGNUM
);
20479 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20480 XVECEXP (par
, 0, 0) = ret_rtx
;
20481 XVECEXP (par
, 0, 1) = tmp
;
20482 par
= emit_jump_insn (par
);
20486 par
= emit_insn (tmp
);
20487 REG_NOTES (par
) = dwarf
;
20488 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20489 stack_pointer_rtx
, stack_pointer_rtx
);
20493 else if ((num_regs
% 2) == 1 && return_in_pc
)
20495 /* There are 2 registers to be popped. So, generate the pattern
20496 pop_multiple_with_stack_update_and_return to pop in PC. */
20497 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20503 /* LDRD in ARM mode needs consecutive registers as operands. This function
20504 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20505 offset addressing and then generates one separate stack udpate. This provides
20506 more scheduling freedom, compared to writeback on every load. However,
20507 if the function returns using load into PC directly
20508 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20509 before the last load. TODO: Add a peephole optimization to recognize
20510 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20511 peephole optimization to merge the load at stack-offset zero
20512 with the stack update instruction using load with writeback
20513 in post-index addressing mode. */
20515 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20519 rtx par
= NULL_RTX
;
20520 rtx dwarf
= NULL_RTX
;
20523 /* Restore saved registers. */
20524 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20526 while (j
<= LAST_ARM_REGNUM
)
20527 if (saved_regs_mask
& (1 << j
))
20530 && (saved_regs_mask
& (1 << (j
+ 1)))
20531 && (j
+ 1) != PC_REGNUM
)
20533 /* Current register and next register form register pair for which
20534 LDRD can be generated. PC is always the last register popped, and
20535 we handle it separately. */
20537 mem
= gen_frame_mem (DImode
,
20538 plus_constant (Pmode
,
20542 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20544 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20545 tmp
= emit_insn (tmp
);
20546 RTX_FRAME_RELATED_P (tmp
) = 1;
20548 /* Generate dwarf info. */
20550 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20551 gen_rtx_REG (SImode
, j
),
20553 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20554 gen_rtx_REG (SImode
, j
+ 1),
20557 REG_NOTES (tmp
) = dwarf
;
20562 else if (j
!= PC_REGNUM
)
20564 /* Emit a single word load. */
20566 mem
= gen_frame_mem (SImode
,
20567 plus_constant (Pmode
,
20571 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20573 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20574 tmp
= emit_insn (tmp
);
20575 RTX_FRAME_RELATED_P (tmp
) = 1;
20577 /* Generate dwarf info. */
20578 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20579 gen_rtx_REG (SImode
, j
),
20585 else /* j == PC_REGNUM */
20591 /* Update the stack. */
20594 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20595 plus_constant (Pmode
,
20598 tmp
= emit_insn (tmp
);
20599 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20600 stack_pointer_rtx
, stack_pointer_rtx
);
20604 if (saved_regs_mask
& (1 << PC_REGNUM
))
20606 /* Only PC is to be popped. */
20607 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20608 XVECEXP (par
, 0, 0) = ret_rtx
;
20609 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20610 gen_frame_mem (SImode
,
20611 gen_rtx_POST_INC (SImode
,
20612 stack_pointer_rtx
)));
20613 RTX_FRAME_RELATED_P (tmp
) = 1;
20614 XVECEXP (par
, 0, 1) = tmp
;
20615 par
= emit_jump_insn (par
);
20617 /* Generate dwarf info. */
20618 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20619 gen_rtx_REG (SImode
, PC_REGNUM
),
20621 REG_NOTES (par
) = dwarf
;
20622 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20623 stack_pointer_rtx
, stack_pointer_rtx
);
20627 /* Calculate the size of the return value that is passed in registers. */
20629 arm_size_return_regs (void)
20633 if (crtl
->return_rtx
!= 0)
20634 mode
= GET_MODE (crtl
->return_rtx
);
20636 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20638 return GET_MODE_SIZE (mode
);
20641 /* Return true if the current function needs to save/restore LR. */
20643 thumb_force_lr_save (void)
20645 return !cfun
->machine
->lr_save_eliminated
20647 || thumb_far_jump_used_p ()
20648 || df_regs_ever_live_p (LR_REGNUM
));
20651 /* We do not know if r3 will be available because
20652 we do have an indirect tailcall happening in this
20653 particular case. */
20655 is_indirect_tailcall_p (rtx call
)
20657 rtx pat
= PATTERN (call
);
20659 /* Indirect tail call. */
20660 pat
= XVECEXP (pat
, 0, 0);
20661 if (GET_CODE (pat
) == SET
)
20662 pat
= SET_SRC (pat
);
20664 pat
= XEXP (XEXP (pat
, 0), 0);
20665 return REG_P (pat
);
20668 /* Return true if r3 is used by any of the tail call insns in the
20669 current function. */
20671 any_sibcall_could_use_r3 (void)
20676 if (!crtl
->tail_call_emit
)
20678 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20679 if (e
->flags
& EDGE_SIBCALL
)
20681 rtx_insn
*call
= BB_END (e
->src
);
20682 if (!CALL_P (call
))
20683 call
= prev_nonnote_nondebug_insn (call
);
20684 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20685 if (find_regno_fusage (call
, USE
, 3)
20686 || is_indirect_tailcall_p (call
))
20693 /* Compute the distance from register FROM to register TO.
20694 These can be the arg pointer (26), the soft frame pointer (25),
20695 the stack pointer (13) or the hard frame pointer (11).
20696 In thumb mode r7 is used as the soft frame pointer, if needed.
20697 Typical stack layout looks like this:
20699 old stack pointer -> | |
20702 | | saved arguments for
20703 | | vararg functions
20706 hard FP & arg pointer -> | | \
20714 soft frame pointer -> | | /
20719 locals base pointer -> | | /
20724 current stack pointer -> | | /
20727 For a given function some or all of these stack components
20728 may not be needed, giving rise to the possibility of
20729 eliminating some of the registers.
20731 The values returned by this function must reflect the behavior
20732 of arm_expand_prologue() and arm_compute_save_reg_mask().
20734 The sign of the number returned reflects the direction of stack
20735 growth, so the values are positive for all eliminations except
20736 from the soft frame pointer to the hard frame pointer.
20738 SFP may point just inside the local variables block to ensure correct
20742 /* Calculate stack offsets. These are used to calculate register elimination
20743 offsets and in prologue/epilogue code. Also calculates which registers
20744 should be saved. */
20746 static arm_stack_offsets
*
20747 arm_get_frame_offsets (void)
20749 struct arm_stack_offsets
*offsets
;
20750 unsigned long func_type
;
20753 HOST_WIDE_INT frame_size
;
20756 offsets
= &cfun
->machine
->stack_offsets
;
20758 if (reload_completed
)
20761 /* Initially this is the size of the local variables. It will translated
20762 into an offset once we have determined the size of preceding data. */
20763 frame_size
= ROUND_UP_WORD (get_frame_size ());
20765 /* Space for variadic functions. */
20766 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20768 /* In Thumb mode this is incorrect, but never used. */
20770 = (offsets
->saved_args
20771 + arm_compute_static_chain_stack_bytes ()
20772 + (frame_pointer_needed
? 4 : 0));
20776 unsigned int regno
;
20778 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20779 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20780 saved
= core_saved
;
20782 /* We know that SP will be doubleword aligned on entry, and we must
20783 preserve that condition at any subroutine call. We also require the
20784 soft frame pointer to be doubleword aligned. */
20786 if (TARGET_REALLY_IWMMXT
)
20788 /* Check for the call-saved iWMMXt registers. */
20789 for (regno
= FIRST_IWMMXT_REGNUM
;
20790 regno
<= LAST_IWMMXT_REGNUM
;
20792 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20796 func_type
= arm_current_func_type ();
20797 /* Space for saved VFP registers. */
20798 if (! IS_VOLATILE (func_type
)
20799 && TARGET_HARD_FLOAT
)
20800 saved
+= arm_get_vfp_saved_size ();
20802 else /* TARGET_THUMB1 */
20804 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20805 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20806 saved
= core_saved
;
20807 if (TARGET_BACKTRACE
)
20811 /* Saved registers include the stack frame. */
20812 offsets
->saved_regs
20813 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20814 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20816 /* A leaf function does not need any stack alignment if it has nothing
20818 if (crtl
->is_leaf
&& frame_size
== 0
20819 /* However if it calls alloca(), we have a dynamically allocated
20820 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20821 && ! cfun
->calls_alloca
)
20823 offsets
->outgoing_args
= offsets
->soft_frame
;
20824 offsets
->locals_base
= offsets
->soft_frame
;
20828 /* Ensure SFP has the correct alignment. */
20829 if (ARM_DOUBLEWORD_ALIGN
20830 && (offsets
->soft_frame
& 7))
20832 offsets
->soft_frame
+= 4;
20833 /* Try to align stack by pushing an extra reg. Don't bother doing this
20834 when there is a stack frame as the alignment will be rolled into
20835 the normal stack adjustment. */
20836 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20840 /* Register r3 is caller-saved. Normally it does not need to be
20841 saved on entry by the prologue. However if we choose to save
20842 it for padding then we may confuse the compiler into thinking
20843 a prologue sequence is required when in fact it is not. This
20844 will occur when shrink-wrapping if r3 is used as a scratch
20845 register and there are no other callee-saved writes.
20847 This situation can be avoided when other callee-saved registers
20848 are available and r3 is not mandatory if we choose a callee-saved
20849 register for padding. */
20850 bool prefer_callee_reg_p
= false;
20852 /* If it is safe to use r3, then do so. This sometimes
20853 generates better code on Thumb-2 by avoiding the need to
20854 use 32-bit push/pop instructions. */
20855 if (! any_sibcall_could_use_r3 ()
20856 && arm_size_return_regs () <= 12
20857 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20859 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20862 if (!TARGET_THUMB2
)
20863 prefer_callee_reg_p
= true;
20866 || prefer_callee_reg_p
)
20868 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20870 /* Avoid fixed registers; they may be changed at
20871 arbitrary times so it's unsafe to restore them
20872 during the epilogue. */
20874 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20884 offsets
->saved_regs
+= 4;
20885 offsets
->saved_regs_mask
|= (1 << reg
);
20890 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20891 offsets
->outgoing_args
= (offsets
->locals_base
20892 + crtl
->outgoing_args_size
);
20894 if (ARM_DOUBLEWORD_ALIGN
)
20896 /* Ensure SP remains doubleword aligned. */
20897 if (offsets
->outgoing_args
& 7)
20898 offsets
->outgoing_args
+= 4;
20899 gcc_assert (!(offsets
->outgoing_args
& 7));
20906 /* Calculate the relative offsets for the different stack pointers. Positive
20907 offsets are in the direction of stack growth. */
20910 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20912 arm_stack_offsets
*offsets
;
20914 offsets
= arm_get_frame_offsets ();
20916 /* OK, now we have enough information to compute the distances.
20917 There must be an entry in these switch tables for each pair
20918 of registers in ELIMINABLE_REGS, even if some of the entries
20919 seem to be redundant or useless. */
20922 case ARG_POINTER_REGNUM
:
20925 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20928 case FRAME_POINTER_REGNUM
:
20929 /* This is the reverse of the soft frame pointer
20930 to hard frame pointer elimination below. */
20931 return offsets
->soft_frame
- offsets
->saved_args
;
20933 case ARM_HARD_FRAME_POINTER_REGNUM
:
20934 /* This is only non-zero in the case where the static chain register
20935 is stored above the frame. */
20936 return offsets
->frame
- offsets
->saved_args
- 4;
20938 case STACK_POINTER_REGNUM
:
20939 /* If nothing has been pushed on the stack at all
20940 then this will return -4. This *is* correct! */
20941 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20944 gcc_unreachable ();
20946 gcc_unreachable ();
20948 case FRAME_POINTER_REGNUM
:
20951 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20954 case ARM_HARD_FRAME_POINTER_REGNUM
:
20955 /* The hard frame pointer points to the top entry in the
20956 stack frame. The soft frame pointer to the bottom entry
20957 in the stack frame. If there is no stack frame at all,
20958 then they are identical. */
20960 return offsets
->frame
- offsets
->soft_frame
;
20962 case STACK_POINTER_REGNUM
:
20963 return offsets
->outgoing_args
- offsets
->soft_frame
;
20966 gcc_unreachable ();
20968 gcc_unreachable ();
20971 /* You cannot eliminate from the stack pointer.
20972 In theory you could eliminate from the hard frame
20973 pointer to the stack pointer, but this will never
20974 happen, since if a stack frame is not needed the
20975 hard frame pointer will never be used. */
20976 gcc_unreachable ();
20980 /* Given FROM and TO register numbers, say whether this elimination is
20981 allowed. Frame pointer elimination is automatically handled.
20983 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20984 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20985 pointer, we must eliminate FRAME_POINTER_REGNUM into
20986 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20987 ARG_POINTER_REGNUM. */
20990 arm_can_eliminate (const int from
, const int to
)
20992 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20993 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20994 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20995 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20999 /* Emit RTL to save coprocessor registers on function entry. Returns the
21000 number of bytes pushed. */
21003 arm_save_coproc_regs(void)
21005 int saved_size
= 0;
21007 unsigned start_reg
;
21010 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21011 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21013 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21014 insn
= gen_rtx_MEM (V2SImode
, insn
);
21015 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21016 RTX_FRAME_RELATED_P (insn
) = 1;
21020 if (TARGET_HARD_FLOAT
)
21022 start_reg
= FIRST_VFP_REGNUM
;
21024 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21026 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21027 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21029 if (start_reg
!= reg
)
21030 saved_size
+= vfp_emit_fstmd (start_reg
,
21031 (reg
- start_reg
) / 2);
21032 start_reg
= reg
+ 2;
21035 if (start_reg
!= reg
)
21036 saved_size
+= vfp_emit_fstmd (start_reg
,
21037 (reg
- start_reg
) / 2);
21043 /* Set the Thumb frame pointer from the stack pointer. */
21046 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21048 HOST_WIDE_INT amount
;
21051 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21053 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21054 stack_pointer_rtx
, GEN_INT (amount
)));
21057 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21058 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21059 expects the first two operands to be the same. */
21062 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21064 hard_frame_pointer_rtx
));
21068 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21069 hard_frame_pointer_rtx
,
21070 stack_pointer_rtx
));
21072 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21073 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21074 RTX_FRAME_RELATED_P (dwarf
) = 1;
21075 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21078 RTX_FRAME_RELATED_P (insn
) = 1;
21081 struct scratch_reg
{
21086 /* Return a short-lived scratch register for use as a 2nd scratch register on
21087 function entry after the registers are saved in the prologue. This register
21088 must be released by means of release_scratch_register_on_entry. IP is not
21089 considered since it is always used as the 1st scratch register if available.
21091 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21092 mask of live registers. */
21095 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21096 unsigned long live_regs
)
21102 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21108 for (i
= 4; i
< 11; i
++)
21109 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21117 /* If IP is used as the 1st scratch register for a nested function,
21118 then either r3 wasn't available or is used to preserve IP. */
21119 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21121 regno
= (regno1
== 3 ? 2 : 3);
21123 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21128 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21131 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21132 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21133 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21134 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21135 RTX_FRAME_RELATED_P (insn
) = 1;
21136 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21140 /* Release a scratch register obtained from the preceding function. */
21143 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21147 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21148 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21149 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21150 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21151 RTX_FRAME_RELATED_P (insn
) = 1;
21152 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21156 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21158 #if PROBE_INTERVAL > 4096
21159 #error Cannot use indexed addressing mode for stack probing
21162 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21163 inclusive. These are offsets from the current stack pointer. REGNO1
21164 is the index number of the 1st scratch register and LIVE_REGS is the
21165 mask of live registers. */
21168 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21169 unsigned int regno1
, unsigned long live_regs
)
21171 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21173 /* See if we have a constant small number of probes to generate. If so,
21174 that's the easy case. */
21175 if (size
<= PROBE_INTERVAL
)
21177 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21178 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21179 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21182 /* The run-time loop is made up of 10 insns in the generic case while the
21183 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21184 else if (size
<= 5 * PROBE_INTERVAL
)
21186 HOST_WIDE_INT i
, rem
;
21188 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21189 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21190 emit_stack_probe (reg1
);
21192 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21193 it exceeds SIZE. If only two probes are needed, this will not
21194 generate any code. Then probe at FIRST + SIZE. */
21195 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21197 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21198 emit_stack_probe (reg1
);
21201 rem
= size
- (i
- PROBE_INTERVAL
);
21202 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21204 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21205 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21208 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21211 /* Otherwise, do the same as above, but in a loop. Note that we must be
21212 extra careful with variables wrapping around because we might be at
21213 the very top (or the very bottom) of the address space and we have
21214 to be able to handle this case properly; in particular, we use an
21215 equality test for the loop condition. */
21218 HOST_WIDE_INT rounded_size
;
21219 struct scratch_reg sr
;
21221 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21223 emit_move_insn (reg1
, GEN_INT (first
));
21226 /* Step 1: round SIZE to the previous multiple of the interval. */
21228 rounded_size
= size
& -PROBE_INTERVAL
;
21229 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21232 /* Step 2: compute initial and final value of the loop counter. */
21234 /* TEST_ADDR = SP + FIRST. */
21235 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21237 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21238 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21241 /* Step 3: the loop
21245 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21248 while (TEST_ADDR != LAST_ADDR)
21250 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21251 until it is equal to ROUNDED_SIZE. */
21253 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21256 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21257 that SIZE is equal to ROUNDED_SIZE. */
21259 if (size
!= rounded_size
)
21261 HOST_WIDE_INT rem
= size
- rounded_size
;
21263 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21265 emit_set_insn (sr
.reg
,
21266 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21267 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21268 PROBE_INTERVAL
- rem
));
21271 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21274 release_scratch_register_on_entry (&sr
);
21277 /* Make sure nothing is scheduled before we are done. */
21278 emit_insn (gen_blockage ());
21281 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21282 absolute addresses. */
21285 output_probe_stack_range (rtx reg1
, rtx reg2
)
21287 static int labelno
= 0;
21291 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21294 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21296 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21298 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21299 output_asm_insn ("sub\t%0, %0, %1", xops
);
21301 /* Probe at TEST_ADDR. */
21302 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21304 /* Test if TEST_ADDR == LAST_ADDR. */
21306 output_asm_insn ("cmp\t%0, %1", xops
);
21309 fputs ("\tbne\t", asm_out_file
);
21310 assemble_name_raw (asm_out_file
, loop_lab
);
21311 fputc ('\n', asm_out_file
);
21316 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21319 arm_expand_prologue (void)
21324 unsigned long live_regs_mask
;
21325 unsigned long func_type
;
21327 int saved_pretend_args
= 0;
21328 int saved_regs
= 0;
21329 unsigned HOST_WIDE_INT args_to_push
;
21330 HOST_WIDE_INT size
;
21331 arm_stack_offsets
*offsets
;
21334 func_type
= arm_current_func_type ();
21336 /* Naked functions don't have prologues. */
21337 if (IS_NAKED (func_type
))
21339 if (flag_stack_usage_info
)
21340 current_function_static_stack_size
= 0;
21344 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21345 args_to_push
= crtl
->args
.pretend_args_size
;
21347 /* Compute which register we will have to save onto the stack. */
21348 offsets
= arm_get_frame_offsets ();
21349 live_regs_mask
= offsets
->saved_regs_mask
;
21351 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21353 if (IS_STACKALIGN (func_type
))
21357 /* Handle a word-aligned stack pointer. We generate the following:
21362 <save and restore r0 in normal prologue/epilogue>
21366 The unwinder doesn't need to know about the stack realignment.
21367 Just tell it we saved SP in r0. */
21368 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21370 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21371 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21373 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21374 RTX_FRAME_RELATED_P (insn
) = 1;
21375 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21377 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21379 /* ??? The CFA changes here, which may cause GDB to conclude that it
21380 has entered a different function. That said, the unwind info is
21381 correct, individually, before and after this instruction because
21382 we've described the save of SP, which will override the default
21383 handling of SP as restoring from the CFA. */
21384 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21387 /* The static chain register is the same as the IP register. If it is
21388 clobbered when creating the frame, we need to save and restore it. */
21389 clobber_ip
= IS_NESTED (func_type
)
21390 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21391 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21392 && !df_regs_ever_live_p (LR_REGNUM
)
21393 && arm_r3_live_at_start_p ()));
21395 /* Find somewhere to store IP whilst the frame is being created.
21396 We try the following places in order:
21398 1. The last argument register r3 if it is available.
21399 2. A slot on the stack above the frame if there are no
21400 arguments to push onto the stack.
21401 3. Register r3 again, after pushing the argument registers
21402 onto the stack, if this is a varargs function.
21403 4. The last slot on the stack created for the arguments to
21404 push, if this isn't a varargs function.
21406 Note - we only need to tell the dwarf2 backend about the SP
21407 adjustment in the second variant; the static chain register
21408 doesn't need to be unwound, as it doesn't contain a value
21409 inherited from the caller. */
21412 if (!arm_r3_live_at_start_p ())
21413 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21414 else if (args_to_push
== 0)
21418 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21421 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21422 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21425 /* Just tell the dwarf backend that we adjusted SP. */
21426 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21427 plus_constant (Pmode
, stack_pointer_rtx
,
21429 RTX_FRAME_RELATED_P (insn
) = 1;
21430 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21434 /* Store the args on the stack. */
21435 if (cfun
->machine
->uses_anonymous_args
)
21437 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21438 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21439 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21440 saved_pretend_args
= 1;
21446 if (args_to_push
== 4)
21447 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21449 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21450 plus_constant (Pmode
,
21454 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21456 /* Just tell the dwarf backend that we adjusted SP. */
21457 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21458 plus_constant (Pmode
, stack_pointer_rtx
,
21460 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21463 RTX_FRAME_RELATED_P (insn
) = 1;
21464 fp_offset
= args_to_push
;
21469 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21471 if (IS_INTERRUPT (func_type
))
21473 /* Interrupt functions must not corrupt any registers.
21474 Creating a frame pointer however, corrupts the IP
21475 register, so we must push it first. */
21476 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21478 /* Do not set RTX_FRAME_RELATED_P on this insn.
21479 The dwarf stack unwinding code only wants to see one
21480 stack decrement per function, and this is not it. If
21481 this instruction is labeled as being part of the frame
21482 creation sequence then dwarf2out_frame_debug_expr will
21483 die when it encounters the assignment of IP to FP
21484 later on, since the use of SP here establishes SP as
21485 the CFA register and not IP.
21487 Anyway this instruction is not really part of the stack
21488 frame creation although it is part of the prologue. */
21491 insn
= emit_set_insn (ip_rtx
,
21492 plus_constant (Pmode
, stack_pointer_rtx
,
21494 RTX_FRAME_RELATED_P (insn
) = 1;
21499 /* Push the argument registers, or reserve space for them. */
21500 if (cfun
->machine
->uses_anonymous_args
)
21501 insn
= emit_multi_reg_push
21502 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21503 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21506 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21507 GEN_INT (- args_to_push
)));
21508 RTX_FRAME_RELATED_P (insn
) = 1;
21511 /* If this is an interrupt service routine, and the link register
21512 is going to be pushed, and we're not generating extra
21513 push of IP (needed when frame is needed and frame layout if apcs),
21514 subtracting four from LR now will mean that the function return
21515 can be done with a single instruction. */
21516 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21517 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21518 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21521 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21523 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21526 if (live_regs_mask
)
21528 unsigned long dwarf_regs_mask
= live_regs_mask
;
21530 saved_regs
+= bit_count (live_regs_mask
) * 4;
21531 if (optimize_size
&& !frame_pointer_needed
21532 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21534 /* If no coprocessor registers are being pushed and we don't have
21535 to worry about a frame pointer then push extra registers to
21536 create the stack frame. This is done is a way that does not
21537 alter the frame layout, so is independent of the epilogue. */
21541 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21543 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21544 if (frame
&& n
* 4 >= frame
)
21547 live_regs_mask
|= (1 << n
) - 1;
21548 saved_regs
+= frame
;
21553 && current_tune
->prefer_ldrd_strd
21554 && !optimize_function_for_size_p (cfun
))
21556 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21558 thumb2_emit_strd_push (live_regs_mask
);
21559 else if (TARGET_ARM
21560 && !TARGET_APCS_FRAME
21561 && !IS_INTERRUPT (func_type
))
21562 arm_emit_strd_push (live_regs_mask
);
21565 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21566 RTX_FRAME_RELATED_P (insn
) = 1;
21571 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21572 RTX_FRAME_RELATED_P (insn
) = 1;
21576 if (! IS_VOLATILE (func_type
))
21577 saved_regs
+= arm_save_coproc_regs ();
21579 if (frame_pointer_needed
&& TARGET_ARM
)
21581 /* Create the new frame pointer. */
21582 if (TARGET_APCS_FRAME
)
21584 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21585 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21586 RTX_FRAME_RELATED_P (insn
) = 1;
21590 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21591 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21592 stack_pointer_rtx
, insn
));
21593 RTX_FRAME_RELATED_P (insn
) = 1;
21597 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21598 if (flag_stack_usage_info
)
21599 current_function_static_stack_size
= size
;
21601 /* If this isn't an interrupt service routine and we have a frame, then do
21602 stack checking. We use IP as the first scratch register, except for the
21603 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21604 if (!IS_INTERRUPT (func_type
)
21605 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21607 unsigned int regno
;
21609 if (!IS_NESTED (func_type
) || clobber_ip
)
21611 else if (df_regs_ever_live_p (LR_REGNUM
))
21616 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21618 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21619 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21620 size
- STACK_CHECK_PROTECT
,
21621 regno
, live_regs_mask
);
21624 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21625 regno
, live_regs_mask
);
21628 /* Recover the static chain register. */
21631 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21632 insn
= gen_rtx_REG (SImode
, 3);
21635 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21636 insn
= gen_frame_mem (SImode
, insn
);
21638 emit_set_insn (ip_rtx
, insn
);
21639 emit_insn (gen_force_register_use (ip_rtx
));
21642 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21644 /* This add can produce multiple insns for a large constant, so we
21645 need to get tricky. */
21646 rtx_insn
*last
= get_last_insn ();
21648 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21649 - offsets
->outgoing_args
);
21651 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21655 last
= last
? NEXT_INSN (last
) : get_insns ();
21656 RTX_FRAME_RELATED_P (last
) = 1;
21658 while (last
!= insn
);
21660 /* If the frame pointer is needed, emit a special barrier that
21661 will prevent the scheduler from moving stores to the frame
21662 before the stack adjustment. */
21663 if (frame_pointer_needed
)
21664 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21665 hard_frame_pointer_rtx
));
21669 if (frame_pointer_needed
&& TARGET_THUMB2
)
21670 thumb_set_frame_pointer (offsets
);
21672 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21674 unsigned long mask
;
21676 mask
= live_regs_mask
;
21677 mask
&= THUMB2_WORK_REGS
;
21678 if (!IS_NESTED (func_type
))
21679 mask
|= (1 << IP_REGNUM
);
21680 arm_load_pic_register (mask
);
21683 /* If we are profiling, make sure no instructions are scheduled before
21684 the call to mcount. Similarly if the user has requested no
21685 scheduling in the prolog. Similarly if we want non-call exceptions
21686 using the EABI unwinder, to prevent faulting instructions from being
21687 swapped with a stack adjustment. */
21688 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21689 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21690 && cfun
->can_throw_non_call_exceptions
))
21691 emit_insn (gen_blockage ());
21693 /* If the link register is being kept alive, with the return address in it,
21694 then make sure that it does not get reused by the ce2 pass. */
21695 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21696 cfun
->machine
->lr_save_eliminated
= 1;
21699 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21701 arm_print_condition (FILE *stream
)
21703 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21705 /* Branch conversion is not implemented for Thumb-2. */
21708 output_operand_lossage ("predicated Thumb instruction");
21711 if (current_insn_predicate
!= NULL
)
21713 output_operand_lossage
21714 ("predicated instruction in conditional sequence");
21718 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21720 else if (current_insn_predicate
)
21722 enum arm_cond_code code
;
21726 output_operand_lossage ("predicated Thumb instruction");
21730 code
= get_arm_condition_code (current_insn_predicate
);
21731 fputs (arm_condition_codes
[code
], stream
);
21736 /* Globally reserved letters: acln
21737 Puncutation letters currently used: @_|?().!#
21738 Lower case letters currently used: bcdefhimpqtvwxyz
21739 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21740 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21742 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21744 If CODE is 'd', then the X is a condition operand and the instruction
21745 should only be executed if the condition is true.
21746 if CODE is 'D', then the X is a condition operand and the instruction
21747 should only be executed if the condition is false: however, if the mode
21748 of the comparison is CCFPEmode, then always execute the instruction -- we
21749 do this because in these circumstances !GE does not necessarily imply LT;
21750 in these cases the instruction pattern will take care to make sure that
21751 an instruction containing %d will follow, thereby undoing the effects of
21752 doing this instruction unconditionally.
21753 If CODE is 'N' then X is a floating point operand that must be negated
21755 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21756 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21758 arm_print_operand (FILE *stream
, rtx x
, int code
)
21763 fputs (ASM_COMMENT_START
, stream
);
21767 fputs (user_label_prefix
, stream
);
21771 fputs (REGISTER_PREFIX
, stream
);
21775 arm_print_condition (stream
);
21779 /* The current condition code for a condition code setting instruction.
21780 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21781 fputc('s', stream
);
21782 arm_print_condition (stream
);
21786 /* If the instruction is conditionally executed then print
21787 the current condition code, otherwise print 's'. */
21788 gcc_assert (TARGET_THUMB2
);
21789 if (current_insn_predicate
)
21790 arm_print_condition (stream
);
21792 fputc('s', stream
);
21795 /* %# is a "break" sequence. It doesn't output anything, but is used to
21796 separate e.g. operand numbers from following text, if that text consists
21797 of further digits which we don't want to be part of the operand
21805 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21806 fprintf (stream
, "%s", fp_const_from_val (&r
));
21810 /* An integer or symbol address without a preceding # sign. */
21812 switch (GET_CODE (x
))
21815 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21819 output_addr_const (stream
, x
);
21823 if (GET_CODE (XEXP (x
, 0)) == PLUS
21824 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21826 output_addr_const (stream
, x
);
21829 /* Fall through. */
21832 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21836 /* An integer that we want to print in HEX. */
21838 switch (GET_CODE (x
))
21841 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21845 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21850 if (CONST_INT_P (x
))
21853 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21854 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21858 putc ('~', stream
);
21859 output_addr_const (stream
, x
);
21864 /* Print the log2 of a CONST_INT. */
21868 if (!CONST_INT_P (x
)
21869 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21870 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21872 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21877 /* The low 16 bits of an immediate constant. */
21878 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21882 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21886 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21894 shift
= shift_op (x
, &val
);
21898 fprintf (stream
, ", %s ", shift
);
21900 arm_print_operand (stream
, XEXP (x
, 1), 0);
21902 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21907 /* An explanation of the 'Q', 'R' and 'H' register operands:
21909 In a pair of registers containing a DI or DF value the 'Q'
21910 operand returns the register number of the register containing
21911 the least significant part of the value. The 'R' operand returns
21912 the register number of the register containing the most
21913 significant part of the value.
21915 The 'H' operand returns the higher of the two register numbers.
21916 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21917 same as the 'Q' operand, since the most significant part of the
21918 value is held in the lower number register. The reverse is true
21919 on systems where WORDS_BIG_ENDIAN is false.
21921 The purpose of these operands is to distinguish between cases
21922 where the endian-ness of the values is important (for example
21923 when they are added together), and cases where the endian-ness
21924 is irrelevant, but the order of register operations is important.
21925 For example when loading a value from memory into a register
21926 pair, the endian-ness does not matter. Provided that the value
21927 from the lower memory address is put into the lower numbered
21928 register, and the value from the higher address is put into the
21929 higher numbered register, the load will work regardless of whether
21930 the value being loaded is big-wordian or little-wordian. The
21931 order of the two register loads can matter however, if the address
21932 of the memory location is actually held in one of the registers
21933 being overwritten by the load.
21935 The 'Q' and 'R' constraints are also available for 64-bit
21938 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21940 rtx part
= gen_lowpart (SImode
, x
);
21941 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21945 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21947 output_operand_lossage ("invalid operand for code '%c'", code
);
21951 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21955 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21957 machine_mode mode
= GET_MODE (x
);
21960 if (mode
== VOIDmode
)
21962 part
= gen_highpart_mode (SImode
, mode
, x
);
21963 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21967 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21969 output_operand_lossage ("invalid operand for code '%c'", code
);
21973 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21977 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21979 output_operand_lossage ("invalid operand for code '%c'", code
);
21983 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21987 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21989 output_operand_lossage ("invalid operand for code '%c'", code
);
21993 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21997 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21999 output_operand_lossage ("invalid operand for code '%c'", code
);
22003 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22007 asm_fprintf (stream
, "%r",
22008 REG_P (XEXP (x
, 0))
22009 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22013 asm_fprintf (stream
, "{%r-%r}",
22015 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22018 /* Like 'M', but writing doubleword vector registers, for use by Neon
22022 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22023 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22025 asm_fprintf (stream
, "{d%d}", regno
);
22027 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22032 /* CONST_TRUE_RTX means always -- that's the default. */
22033 if (x
== const_true_rtx
)
22036 if (!COMPARISON_P (x
))
22038 output_operand_lossage ("invalid operand for code '%c'", code
);
22042 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22047 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22048 want to do that. */
22049 if (x
== const_true_rtx
)
22051 output_operand_lossage ("instruction never executed");
22054 if (!COMPARISON_P (x
))
22056 output_operand_lossage ("invalid operand for code '%c'", code
);
22060 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22061 (get_arm_condition_code (x
))],
22071 /* Former Maverick support, removed after GCC-4.7. */
22072 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22077 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22078 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22079 /* Bad value for wCG register number. */
22081 output_operand_lossage ("invalid operand for code '%c'", code
);
22086 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22089 /* Print an iWMMXt control register name. */
22091 if (!CONST_INT_P (x
)
22093 || INTVAL (x
) >= 16)
22094 /* Bad value for wC register number. */
22096 output_operand_lossage ("invalid operand for code '%c'", code
);
22102 static const char * wc_reg_names
[16] =
22104 "wCID", "wCon", "wCSSF", "wCASF",
22105 "wC4", "wC5", "wC6", "wC7",
22106 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22107 "wC12", "wC13", "wC14", "wC15"
22110 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22114 /* Print the high single-precision register of a VFP double-precision
22118 machine_mode mode
= GET_MODE (x
);
22121 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22123 output_operand_lossage ("invalid operand for code '%c'", code
);
22128 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22130 output_operand_lossage ("invalid operand for code '%c'", code
);
22134 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22138 /* Print a VFP/Neon double precision or quad precision register name. */
22142 machine_mode mode
= GET_MODE (x
);
22143 int is_quad
= (code
== 'q');
22146 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22148 output_operand_lossage ("invalid operand for code '%c'", code
);
22153 || !IS_VFP_REGNUM (REGNO (x
)))
22155 output_operand_lossage ("invalid operand for code '%c'", code
);
22160 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22161 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22163 output_operand_lossage ("invalid operand for code '%c'", code
);
22167 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22168 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22172 /* These two codes print the low/high doubleword register of a Neon quad
22173 register, respectively. For pair-structure types, can also print
22174 low/high quadword registers. */
22178 machine_mode mode
= GET_MODE (x
);
22181 if ((GET_MODE_SIZE (mode
) != 16
22182 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22184 output_operand_lossage ("invalid operand for code '%c'", code
);
22189 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22191 output_operand_lossage ("invalid operand for code '%c'", code
);
22195 if (GET_MODE_SIZE (mode
) == 16)
22196 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22197 + (code
== 'f' ? 1 : 0));
22199 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22200 + (code
== 'f' ? 1 : 0));
22204 /* Print a VFPv3 floating-point constant, represented as an integer
22208 int index
= vfp3_const_double_index (x
);
22209 gcc_assert (index
!= -1);
22210 fprintf (stream
, "%d", index
);
22214 /* Print bits representing opcode features for Neon.
22216 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22217 and polynomials as unsigned.
22219 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22221 Bit 2 is 1 for rounding functions, 0 otherwise. */
22223 /* Identify the type as 's', 'u', 'p' or 'f'. */
22226 HOST_WIDE_INT bits
= INTVAL (x
);
22227 fputc ("uspf"[bits
& 3], stream
);
22231 /* Likewise, but signed and unsigned integers are both 'i'. */
22234 HOST_WIDE_INT bits
= INTVAL (x
);
22235 fputc ("iipf"[bits
& 3], stream
);
22239 /* As for 'T', but emit 'u' instead of 'p'. */
22242 HOST_WIDE_INT bits
= INTVAL (x
);
22243 fputc ("usuf"[bits
& 3], stream
);
22247 /* Bit 2: rounding (vs none). */
22250 HOST_WIDE_INT bits
= INTVAL (x
);
22251 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22255 /* Memory operand for vld1/vst1 instruction. */
22259 bool postinc
= FALSE
;
22260 rtx postinc_reg
= NULL
;
22261 unsigned align
, memsize
, align_bits
;
22263 gcc_assert (MEM_P (x
));
22264 addr
= XEXP (x
, 0);
22265 if (GET_CODE (addr
) == POST_INC
)
22268 addr
= XEXP (addr
, 0);
22270 if (GET_CODE (addr
) == POST_MODIFY
)
22272 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22273 addr
= XEXP (addr
, 0);
22275 asm_fprintf (stream
, "[%r", REGNO (addr
));
22277 /* We know the alignment of this access, so we can emit a hint in the
22278 instruction (for some alignments) as an aid to the memory subsystem
22280 align
= MEM_ALIGN (x
) >> 3;
22281 memsize
= MEM_SIZE (x
);
22283 /* Only certain alignment specifiers are supported by the hardware. */
22284 if (memsize
== 32 && (align
% 32) == 0)
22286 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22288 else if (memsize
>= 8 && (align
% 8) == 0)
22293 if (align_bits
!= 0)
22294 asm_fprintf (stream
, ":%d", align_bits
);
22296 asm_fprintf (stream
, "]");
22299 fputs("!", stream
);
22301 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22309 gcc_assert (MEM_P (x
));
22310 addr
= XEXP (x
, 0);
22311 gcc_assert (REG_P (addr
));
22312 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22316 /* Translate an S register number into a D register number and element index. */
22319 machine_mode mode
= GET_MODE (x
);
22322 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22324 output_operand_lossage ("invalid operand for code '%c'", code
);
22329 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22331 output_operand_lossage ("invalid operand for code '%c'", code
);
22335 regno
= regno
- FIRST_VFP_REGNUM
;
22336 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22341 gcc_assert (CONST_DOUBLE_P (x
));
22343 result
= vfp3_const_double_for_fract_bits (x
);
22345 result
= vfp3_const_double_for_bits (x
);
22346 fprintf (stream
, "#%d", result
);
22349 /* Register specifier for vld1.16/vst1.16. Translate the S register
22350 number into a D register number and element index. */
22353 machine_mode mode
= GET_MODE (x
);
22356 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22358 output_operand_lossage ("invalid operand for code '%c'", code
);
22363 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22365 output_operand_lossage ("invalid operand for code '%c'", code
);
22369 regno
= regno
- FIRST_VFP_REGNUM
;
22370 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22377 output_operand_lossage ("missing operand");
22381 switch (GET_CODE (x
))
22384 asm_fprintf (stream
, "%r", REGNO (x
));
22388 output_address (GET_MODE (x
), XEXP (x
, 0));
22394 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22395 sizeof (fpstr
), 0, 1);
22396 fprintf (stream
, "#%s", fpstr
);
22401 gcc_assert (GET_CODE (x
) != NEG
);
22402 fputc ('#', stream
);
22403 if (GET_CODE (x
) == HIGH
)
22405 fputs (":lower16:", stream
);
22409 output_addr_const (stream
, x
);
22415 /* Target hook for printing a memory address. */
22417 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22421 int is_minus
= GET_CODE (x
) == MINUS
;
22424 asm_fprintf (stream
, "[%r]", REGNO (x
));
22425 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22427 rtx base
= XEXP (x
, 0);
22428 rtx index
= XEXP (x
, 1);
22429 HOST_WIDE_INT offset
= 0;
22431 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22433 /* Ensure that BASE is a register. */
22434 /* (one of them must be). */
22435 /* Also ensure the SP is not used as in index register. */
22436 std::swap (base
, index
);
22438 switch (GET_CODE (index
))
22441 offset
= INTVAL (index
);
22444 asm_fprintf (stream
, "[%r, #%wd]",
22445 REGNO (base
), offset
);
22449 asm_fprintf (stream
, "[%r, %s%r]",
22450 REGNO (base
), is_minus
? "-" : "",
22460 asm_fprintf (stream
, "[%r, %s%r",
22461 REGNO (base
), is_minus
? "-" : "",
22462 REGNO (XEXP (index
, 0)));
22463 arm_print_operand (stream
, index
, 'S');
22464 fputs ("]", stream
);
22469 gcc_unreachable ();
22472 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22473 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22475 gcc_assert (REG_P (XEXP (x
, 0)));
22477 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22478 asm_fprintf (stream
, "[%r, #%s%d]!",
22479 REGNO (XEXP (x
, 0)),
22480 GET_CODE (x
) == PRE_DEC
? "-" : "",
22481 GET_MODE_SIZE (mode
));
22483 asm_fprintf (stream
, "[%r], #%s%d",
22484 REGNO (XEXP (x
, 0)),
22485 GET_CODE (x
) == POST_DEC
? "-" : "",
22486 GET_MODE_SIZE (mode
));
22488 else if (GET_CODE (x
) == PRE_MODIFY
)
22490 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22491 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22492 asm_fprintf (stream
, "#%wd]!",
22493 INTVAL (XEXP (XEXP (x
, 1), 1)));
22495 asm_fprintf (stream
, "%r]!",
22496 REGNO (XEXP (XEXP (x
, 1), 1)));
22498 else if (GET_CODE (x
) == POST_MODIFY
)
22500 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22501 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22502 asm_fprintf (stream
, "#%wd",
22503 INTVAL (XEXP (XEXP (x
, 1), 1)));
22505 asm_fprintf (stream
, "%r",
22506 REGNO (XEXP (XEXP (x
, 1), 1)));
22508 else output_addr_const (stream
, x
);
22513 asm_fprintf (stream
, "[%r]", REGNO (x
));
22514 else if (GET_CODE (x
) == POST_INC
)
22515 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22516 else if (GET_CODE (x
) == PLUS
)
22518 gcc_assert (REG_P (XEXP (x
, 0)));
22519 if (CONST_INT_P (XEXP (x
, 1)))
22520 asm_fprintf (stream
, "[%r, #%wd]",
22521 REGNO (XEXP (x
, 0)),
22522 INTVAL (XEXP (x
, 1)));
22524 asm_fprintf (stream
, "[%r, %r]",
22525 REGNO (XEXP (x
, 0)),
22526 REGNO (XEXP (x
, 1)));
22529 output_addr_const (stream
, x
);
22533 /* Target hook for indicating whether a punctuation character for
22534 TARGET_PRINT_OPERAND is valid. */
22536 arm_print_operand_punct_valid_p (unsigned char code
)
22538 return (code
== '@' || code
== '|' || code
== '.'
22539 || code
== '(' || code
== ')' || code
== '#'
22540 || (TARGET_32BIT
&& (code
== '?'))
22541 || (TARGET_THUMB2
&& (code
== '!'))
22542 || (TARGET_THUMB
&& (code
== '_')));
22545 /* Target hook for assembling integer objects. The ARM version needs to
22546 handle word-sized values specially. */
22548 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22552 if (size
== UNITS_PER_WORD
&& aligned_p
)
22554 fputs ("\t.word\t", asm_out_file
);
22555 output_addr_const (asm_out_file
, x
);
22557 /* Mark symbols as position independent. We only do this in the
22558 .text segment, not in the .data segment. */
22559 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22560 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22562 /* See legitimize_pic_address for an explanation of the
22563 TARGET_VXWORKS_RTP check. */
22564 /* References to weak symbols cannot be resolved locally:
22565 they may be overridden by a non-weak definition at link
22567 if (!arm_pic_data_is_text_relative
22568 || (GET_CODE (x
) == SYMBOL_REF
22569 && (!SYMBOL_REF_LOCAL_P (x
)
22570 || (SYMBOL_REF_DECL (x
)
22571 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22572 fputs ("(GOT)", asm_out_file
);
22574 fputs ("(GOTOFF)", asm_out_file
);
22576 fputc ('\n', asm_out_file
);
22580 mode
= GET_MODE (x
);
22582 if (arm_vector_mode_supported_p (mode
))
22586 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22588 units
= CONST_VECTOR_NUNITS (x
);
22589 size
= GET_MODE_UNIT_SIZE (mode
);
22591 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22592 for (i
= 0; i
< units
; i
++)
22594 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22596 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22599 for (i
= 0; i
< units
; i
++)
22601 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22603 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22604 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22610 return default_assemble_integer (x
, size
, aligned_p
);
22614 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22618 if (!TARGET_AAPCS_BASED
)
22621 default_named_section_asm_out_constructor
22622 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22626 /* Put these in the .init_array section, using a special relocation. */
22627 if (priority
!= DEFAULT_INIT_PRIORITY
)
22630 sprintf (buf
, "%s.%.5u",
22631 is_ctor
? ".init_array" : ".fini_array",
22633 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22640 switch_to_section (s
);
22641 assemble_align (POINTER_SIZE
);
22642 fputs ("\t.word\t", asm_out_file
);
22643 output_addr_const (asm_out_file
, symbol
);
22644 fputs ("(target1)\n", asm_out_file
);
22647 /* Add a function to the list of static constructors. */
22650 arm_elf_asm_constructor (rtx symbol
, int priority
)
22652 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22655 /* Add a function to the list of static destructors. */
22658 arm_elf_asm_destructor (rtx symbol
, int priority
)
22660 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22663 /* A finite state machine takes care of noticing whether or not instructions
22664 can be conditionally executed, and thus decrease execution time and code
22665 size by deleting branch instructions. The fsm is controlled by
22666 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22668 /* The state of the fsm controlling condition codes are:
22669 0: normal, do nothing special
22670 1: make ASM_OUTPUT_OPCODE not output this instruction
22671 2: make ASM_OUTPUT_OPCODE not output this instruction
22672 3: make instructions conditional
22673 4: make instructions conditional
22675 State transitions (state->state by whom under condition):
22676 0 -> 1 final_prescan_insn if the `target' is a label
22677 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22678 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22679 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22680 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22681 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22682 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22683 (the target insn is arm_target_insn).
22685 If the jump clobbers the conditions then we use states 2 and 4.
22687 A similar thing can be done with conditional return insns.
22689 XXX In case the `target' is an unconditional branch, this conditionalising
22690 of the instructions always reduces code size, but not always execution
22691 time. But then, I want to reduce the code size to somewhere near what
22692 /bin/cc produces. */
22694 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22695 instructions. When a COND_EXEC instruction is seen the subsequent
22696 instructions are scanned so that multiple conditional instructions can be
22697 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22698 specify the length and true/false mask for the IT block. These will be
22699 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22701 /* Returns the index of the ARM condition code string in
22702 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22703 COMPARISON should be an rtx like `(eq (...) (...))'. */
22706 maybe_get_arm_condition_code (rtx comparison
)
22708 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22709 enum arm_cond_code code
;
22710 enum rtx_code comp_code
= GET_CODE (comparison
);
22712 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22713 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22714 XEXP (comparison
, 1));
22718 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22719 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22720 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22721 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22722 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22723 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22724 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22725 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22726 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22727 case CC_DLTUmode
: code
= ARM_CC
;
22730 if (comp_code
== EQ
)
22731 return ARM_INVERSE_CONDITION_CODE (code
);
22732 if (comp_code
== NE
)
22739 case NE
: return ARM_NE
;
22740 case EQ
: return ARM_EQ
;
22741 case GE
: return ARM_PL
;
22742 case LT
: return ARM_MI
;
22743 default: return ARM_NV
;
22749 case NE
: return ARM_NE
;
22750 case EQ
: return ARM_EQ
;
22751 default: return ARM_NV
;
22757 case NE
: return ARM_MI
;
22758 case EQ
: return ARM_PL
;
22759 default: return ARM_NV
;
22764 /* We can handle all cases except UNEQ and LTGT. */
22767 case GE
: return ARM_GE
;
22768 case GT
: return ARM_GT
;
22769 case LE
: return ARM_LS
;
22770 case LT
: return ARM_MI
;
22771 case NE
: return ARM_NE
;
22772 case EQ
: return ARM_EQ
;
22773 case ORDERED
: return ARM_VC
;
22774 case UNORDERED
: return ARM_VS
;
22775 case UNLT
: return ARM_LT
;
22776 case UNLE
: return ARM_LE
;
22777 case UNGT
: return ARM_HI
;
22778 case UNGE
: return ARM_PL
;
22779 /* UNEQ and LTGT do not have a representation. */
22780 case UNEQ
: /* Fall through. */
22781 case LTGT
: /* Fall through. */
22782 default: return ARM_NV
;
22788 case NE
: return ARM_NE
;
22789 case EQ
: return ARM_EQ
;
22790 case GE
: return ARM_LE
;
22791 case GT
: return ARM_LT
;
22792 case LE
: return ARM_GE
;
22793 case LT
: return ARM_GT
;
22794 case GEU
: return ARM_LS
;
22795 case GTU
: return ARM_CC
;
22796 case LEU
: return ARM_CS
;
22797 case LTU
: return ARM_HI
;
22798 default: return ARM_NV
;
22804 case LTU
: return ARM_CS
;
22805 case GEU
: return ARM_CC
;
22806 case NE
: return ARM_CS
;
22807 case EQ
: return ARM_CC
;
22808 default: return ARM_NV
;
22814 case NE
: return ARM_NE
;
22815 case EQ
: return ARM_EQ
;
22816 case GEU
: return ARM_CS
;
22817 case GTU
: return ARM_HI
;
22818 case LEU
: return ARM_LS
;
22819 case LTU
: return ARM_CC
;
22820 default: return ARM_NV
;
22826 case GE
: return ARM_GE
;
22827 case LT
: return ARM_LT
;
22828 case GEU
: return ARM_CS
;
22829 case LTU
: return ARM_CC
;
22830 default: return ARM_NV
;
22836 case NE
: return ARM_VS
;
22837 case EQ
: return ARM_VC
;
22838 default: return ARM_NV
;
22844 case NE
: return ARM_NE
;
22845 case EQ
: return ARM_EQ
;
22846 case GE
: return ARM_GE
;
22847 case GT
: return ARM_GT
;
22848 case LE
: return ARM_LE
;
22849 case LT
: return ARM_LT
;
22850 case GEU
: return ARM_CS
;
22851 case GTU
: return ARM_HI
;
22852 case LEU
: return ARM_LS
;
22853 case LTU
: return ARM_CC
;
22854 default: return ARM_NV
;
22857 default: gcc_unreachable ();
22861 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22862 static enum arm_cond_code
22863 get_arm_condition_code (rtx comparison
)
22865 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22866 gcc_assert (code
!= ARM_NV
);
22870 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22873 thumb2_final_prescan_insn (rtx_insn
*insn
)
22875 rtx_insn
*first_insn
= insn
;
22876 rtx body
= PATTERN (insn
);
22878 enum arm_cond_code code
;
22883 /* max_insns_skipped in the tune was already taken into account in the
22884 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22885 just emit the IT blocks as we can. It does not make sense to split
22887 max
= MAX_INSN_PER_IT_BLOCK
;
22889 /* Remove the previous insn from the count of insns to be output. */
22890 if (arm_condexec_count
)
22891 arm_condexec_count
--;
22893 /* Nothing to do if we are already inside a conditional block. */
22894 if (arm_condexec_count
)
22897 if (GET_CODE (body
) != COND_EXEC
)
22900 /* Conditional jumps are implemented directly. */
22904 predicate
= COND_EXEC_TEST (body
);
22905 arm_current_cc
= get_arm_condition_code (predicate
);
22907 n
= get_attr_ce_count (insn
);
22908 arm_condexec_count
= 1;
22909 arm_condexec_mask
= (1 << n
) - 1;
22910 arm_condexec_masklen
= n
;
22911 /* See if subsequent instructions can be combined into the same block. */
22914 insn
= next_nonnote_insn (insn
);
22916 /* Jumping into the middle of an IT block is illegal, so a label or
22917 barrier terminates the block. */
22918 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22921 body
= PATTERN (insn
);
22922 /* USE and CLOBBER aren't really insns, so just skip them. */
22923 if (GET_CODE (body
) == USE
22924 || GET_CODE (body
) == CLOBBER
)
22927 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22928 if (GET_CODE (body
) != COND_EXEC
)
22930 /* Maximum number of conditionally executed instructions in a block. */
22931 n
= get_attr_ce_count (insn
);
22932 if (arm_condexec_masklen
+ n
> max
)
22935 predicate
= COND_EXEC_TEST (body
);
22936 code
= get_arm_condition_code (predicate
);
22937 mask
= (1 << n
) - 1;
22938 if (arm_current_cc
== code
)
22939 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22940 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22943 arm_condexec_count
++;
22944 arm_condexec_masklen
+= n
;
22946 /* A jump must be the last instruction in a conditional block. */
22950 /* Restore recog_data (getting the attributes of other insns can
22951 destroy this array, but final.c assumes that it remains intact
22952 across this call). */
22953 extract_constrain_insn_cached (first_insn
);
22957 arm_final_prescan_insn (rtx_insn
*insn
)
22959 /* BODY will hold the body of INSN. */
22960 rtx body
= PATTERN (insn
);
22962 /* This will be 1 if trying to repeat the trick, and things need to be
22963 reversed if it appears to fail. */
22966 /* If we start with a return insn, we only succeed if we find another one. */
22967 int seeking_return
= 0;
22968 enum rtx_code return_code
= UNKNOWN
;
22970 /* START_INSN will hold the insn from where we start looking. This is the
22971 first insn after the following code_label if REVERSE is true. */
22972 rtx_insn
*start_insn
= insn
;
22974 /* If in state 4, check if the target branch is reached, in order to
22975 change back to state 0. */
22976 if (arm_ccfsm_state
== 4)
22978 if (insn
== arm_target_insn
)
22980 arm_target_insn
= NULL
;
22981 arm_ccfsm_state
= 0;
22986 /* If in state 3, it is possible to repeat the trick, if this insn is an
22987 unconditional branch to a label, and immediately following this branch
22988 is the previous target label which is only used once, and the label this
22989 branch jumps to is not too far off. */
22990 if (arm_ccfsm_state
== 3)
22992 if (simplejump_p (insn
))
22994 start_insn
= next_nonnote_insn (start_insn
);
22995 if (BARRIER_P (start_insn
))
22997 /* XXX Isn't this always a barrier? */
22998 start_insn
= next_nonnote_insn (start_insn
);
23000 if (LABEL_P (start_insn
)
23001 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23002 && LABEL_NUSES (start_insn
) == 1)
23007 else if (ANY_RETURN_P (body
))
23009 start_insn
= next_nonnote_insn (start_insn
);
23010 if (BARRIER_P (start_insn
))
23011 start_insn
= next_nonnote_insn (start_insn
);
23012 if (LABEL_P (start_insn
)
23013 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23014 && LABEL_NUSES (start_insn
) == 1)
23017 seeking_return
= 1;
23018 return_code
= GET_CODE (body
);
23027 gcc_assert (!arm_ccfsm_state
|| reverse
);
23028 if (!JUMP_P (insn
))
23031 /* This jump might be paralleled with a clobber of the condition codes
23032 the jump should always come first */
23033 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23034 body
= XVECEXP (body
, 0, 0);
23037 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23038 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23041 int fail
= FALSE
, succeed
= FALSE
;
23042 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23043 int then_not_else
= TRUE
;
23044 rtx_insn
*this_insn
= start_insn
;
23047 /* Register the insn jumped to. */
23050 if (!seeking_return
)
23051 label
= XEXP (SET_SRC (body
), 0);
23053 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23054 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23055 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23057 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23058 then_not_else
= FALSE
;
23060 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23062 seeking_return
= 1;
23063 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23065 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23067 seeking_return
= 1;
23068 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23069 then_not_else
= FALSE
;
23072 gcc_unreachable ();
23074 /* See how many insns this branch skips, and what kind of insns. If all
23075 insns are okay, and the label or unconditional branch to the same
23076 label is not too far away, succeed. */
23077 for (insns_skipped
= 0;
23078 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23082 this_insn
= next_nonnote_insn (this_insn
);
23086 switch (GET_CODE (this_insn
))
23089 /* Succeed if it is the target label, otherwise fail since
23090 control falls in from somewhere else. */
23091 if (this_insn
== label
)
23093 arm_ccfsm_state
= 1;
23101 /* Succeed if the following insn is the target label.
23103 If return insns are used then the last insn in a function
23104 will be a barrier. */
23105 this_insn
= next_nonnote_insn (this_insn
);
23106 if (this_insn
&& this_insn
== label
)
23108 arm_ccfsm_state
= 1;
23116 /* The AAPCS says that conditional calls should not be
23117 used since they make interworking inefficient (the
23118 linker can't transform BL<cond> into BLX). That's
23119 only a problem if the machine has BLX. */
23126 /* Succeed if the following insn is the target label, or
23127 if the following two insns are a barrier and the
23129 this_insn
= next_nonnote_insn (this_insn
);
23130 if (this_insn
&& BARRIER_P (this_insn
))
23131 this_insn
= next_nonnote_insn (this_insn
);
23133 if (this_insn
&& this_insn
== label
23134 && insns_skipped
< max_insns_skipped
)
23136 arm_ccfsm_state
= 1;
23144 /* If this is an unconditional branch to the same label, succeed.
23145 If it is to another label, do nothing. If it is conditional,
23147 /* XXX Probably, the tests for SET and the PC are
23150 scanbody
= PATTERN (this_insn
);
23151 if (GET_CODE (scanbody
) == SET
23152 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23154 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23155 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23157 arm_ccfsm_state
= 2;
23160 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23163 /* Fail if a conditional return is undesirable (e.g. on a
23164 StrongARM), but still allow this if optimizing for size. */
23165 else if (GET_CODE (scanbody
) == return_code
23166 && !use_return_insn (TRUE
, NULL
)
23169 else if (GET_CODE (scanbody
) == return_code
)
23171 arm_ccfsm_state
= 2;
23174 else if (GET_CODE (scanbody
) == PARALLEL
)
23176 switch (get_attr_conds (this_insn
))
23186 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23191 /* Instructions using or affecting the condition codes make it
23193 scanbody
= PATTERN (this_insn
);
23194 if (!(GET_CODE (scanbody
) == SET
23195 || GET_CODE (scanbody
) == PARALLEL
)
23196 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23206 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23207 arm_target_label
= CODE_LABEL_NUMBER (label
);
23210 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23212 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23214 this_insn
= next_nonnote_insn (this_insn
);
23215 gcc_assert (!this_insn
23216 || (!BARRIER_P (this_insn
)
23217 && !LABEL_P (this_insn
)));
23221 /* Oh, dear! we ran off the end.. give up. */
23222 extract_constrain_insn_cached (insn
);
23223 arm_ccfsm_state
= 0;
23224 arm_target_insn
= NULL
;
23227 arm_target_insn
= this_insn
;
23230 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23233 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23235 if (reverse
|| then_not_else
)
23236 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23239 /* Restore recog_data (getting the attributes of other insns can
23240 destroy this array, but final.c assumes that it remains intact
23241 across this call. */
23242 extract_constrain_insn_cached (insn
);
23246 /* Output IT instructions. */
23248 thumb2_asm_output_opcode (FILE * stream
)
23253 if (arm_condexec_mask
)
23255 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23256 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23258 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23259 arm_condition_codes
[arm_current_cc
]);
23260 arm_condexec_mask
= 0;
23264 /* Returns true if REGNO is a valid register
23265 for holding a quantity of type MODE. */
23267 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23269 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23270 return (regno
== CC_REGNUM
23271 || (TARGET_HARD_FLOAT
23272 && regno
== VFPCC_REGNUM
));
23274 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23278 /* For the Thumb we only allow values bigger than SImode in
23279 registers 0 - 6, so that there is always a second low
23280 register available to hold the upper part of the value.
23281 We probably we ought to ensure that the register is the
23282 start of an even numbered register pair. */
23283 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23285 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23287 if (mode
== SFmode
|| mode
== SImode
)
23288 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23290 if (mode
== DFmode
)
23291 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23293 if (mode
== HFmode
)
23294 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23296 /* VFP registers can hold HImode values. */
23297 if (mode
== HImode
)
23298 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23301 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23302 || (VALID_NEON_QREG_MODE (mode
)
23303 && NEON_REGNO_OK_FOR_QUAD (regno
))
23304 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23305 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23306 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23307 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23308 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23313 if (TARGET_REALLY_IWMMXT
)
23315 if (IS_IWMMXT_GR_REGNUM (regno
))
23316 return mode
== SImode
;
23318 if (IS_IWMMXT_REGNUM (regno
))
23319 return VALID_IWMMXT_REG_MODE (mode
);
23322 /* We allow almost any value to be stored in the general registers.
23323 Restrict doubleword quantities to even register pairs in ARM state
23324 so that we can use ldrd. Do not allow very large Neon structure
23325 opaque modes in general registers; they would use too many. */
23326 if (regno
<= LAST_ARM_REGNUM
)
23328 if (ARM_NUM_REGS (mode
) > 4)
23334 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23337 if (regno
== FRAME_POINTER_REGNUM
23338 || regno
== ARG_POINTER_REGNUM
)
23339 /* We only allow integers in the fake hard registers. */
23340 return GET_MODE_CLASS (mode
) == MODE_INT
;
23345 /* Implement MODES_TIEABLE_P. */
23348 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23350 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23353 /* We specifically want to allow elements of "structure" modes to
23354 be tieable to the structure. This more general condition allows
23355 other rarer situations too. */
23357 && (VALID_NEON_DREG_MODE (mode1
)
23358 || VALID_NEON_QREG_MODE (mode1
)
23359 || VALID_NEON_STRUCT_MODE (mode1
))
23360 && (VALID_NEON_DREG_MODE (mode2
)
23361 || VALID_NEON_QREG_MODE (mode2
)
23362 || VALID_NEON_STRUCT_MODE (mode2
)))
23368 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23369 not used in arm mode. */
23372 arm_regno_class (int regno
)
23374 if (regno
== PC_REGNUM
)
23379 if (regno
== STACK_POINTER_REGNUM
)
23381 if (regno
== CC_REGNUM
)
23388 if (TARGET_THUMB2
&& regno
< 8)
23391 if ( regno
<= LAST_ARM_REGNUM
23392 || regno
== FRAME_POINTER_REGNUM
23393 || regno
== ARG_POINTER_REGNUM
)
23394 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23396 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23397 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23399 if (IS_VFP_REGNUM (regno
))
23401 if (regno
<= D7_VFP_REGNUM
)
23402 return VFP_D0_D7_REGS
;
23403 else if (regno
<= LAST_LO_VFP_REGNUM
)
23404 return VFP_LO_REGS
;
23406 return VFP_HI_REGS
;
23409 if (IS_IWMMXT_REGNUM (regno
))
23410 return IWMMXT_REGS
;
23412 if (IS_IWMMXT_GR_REGNUM (regno
))
23413 return IWMMXT_GR_REGS
;
23418 /* Handle a special case when computing the offset
23419 of an argument from the frame pointer. */
23421 arm_debugger_arg_offset (int value
, rtx addr
)
23425 /* We are only interested if dbxout_parms() failed to compute the offset. */
23429 /* We can only cope with the case where the address is held in a register. */
23433 /* If we are using the frame pointer to point at the argument, then
23434 an offset of 0 is correct. */
23435 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23438 /* If we are using the stack pointer to point at the
23439 argument, then an offset of 0 is correct. */
23440 /* ??? Check this is consistent with thumb2 frame layout. */
23441 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23442 && REGNO (addr
) == SP_REGNUM
)
23445 /* Oh dear. The argument is pointed to by a register rather
23446 than being held in a register, or being stored at a known
23447 offset from the frame pointer. Since GDB only understands
23448 those two kinds of argument we must translate the address
23449 held in the register into an offset from the frame pointer.
23450 We do this by searching through the insns for the function
23451 looking to see where this register gets its value. If the
23452 register is initialized from the frame pointer plus an offset
23453 then we are in luck and we can continue, otherwise we give up.
23455 This code is exercised by producing debugging information
23456 for a function with arguments like this:
23458 double func (double a, double b, int c, double d) {return d;}
23460 Without this code the stab for parameter 'd' will be set to
23461 an offset of 0 from the frame pointer, rather than 8. */
23463 /* The if() statement says:
23465 If the insn is a normal instruction
23466 and if the insn is setting the value in a register
23467 and if the register being set is the register holding the address of the argument
23468 and if the address is computing by an addition
23469 that involves adding to a register
23470 which is the frame pointer
23475 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23477 if ( NONJUMP_INSN_P (insn
)
23478 && GET_CODE (PATTERN (insn
)) == SET
23479 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23480 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23481 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23482 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23483 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23486 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23495 warning (0, "unable to compute real location of stacked parameter");
23496 value
= 8; /* XXX magic hack */
23502 /* Implement TARGET_PROMOTED_TYPE. */
23505 arm_promoted_type (const_tree t
)
23507 if (SCALAR_FLOAT_TYPE_P (t
)
23508 && TYPE_PRECISION (t
) == 16
23509 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23510 return float_type_node
;
23514 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23515 This simply adds HFmode as a supported mode; even though we don't
23516 implement arithmetic on this type directly, it's supported by
23517 optabs conversions, much the way the double-word arithmetic is
23518 special-cased in the default hook. */
23521 arm_scalar_mode_supported_p (machine_mode mode
)
23523 if (mode
== HFmode
)
23524 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23525 else if (ALL_FIXED_POINT_MODE_P (mode
))
23528 return default_scalar_mode_supported_p (mode
);
23531 /* Set the value of FLT_EVAL_METHOD.
23532 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23534 0: evaluate all operations and constants, whose semantic type has at
23535 most the range and precision of type float, to the range and
23536 precision of float; evaluate all other operations and constants to
23537 the range and precision of the semantic type;
23539 N, where _FloatN is a supported interchange floating type
23540 evaluate all operations and constants, whose semantic type has at
23541 most the range and precision of _FloatN type, to the range and
23542 precision of the _FloatN type; evaluate all other operations and
23543 constants to the range and precision of the semantic type;
23545 If we have the ARMv8.2-A extensions then we support _Float16 in native
23546 precision, so we should set this to 16. Otherwise, we support the type,
23547 but want to evaluate expressions in float precision, so set this to
23550 static enum flt_eval_method
23551 arm_excess_precision (enum excess_precision_type type
)
23555 case EXCESS_PRECISION_TYPE_FAST
:
23556 case EXCESS_PRECISION_TYPE_STANDARD
:
23557 /* We can calculate either in 16-bit range and precision or
23558 32-bit range and precision. Make that decision based on whether
23559 we have native support for the ARMv8.2-A 16-bit floating-point
23560 instructions or not. */
23561 return (TARGET_VFP_FP16INST
23562 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23563 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23564 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23565 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23567 gcc_unreachable ();
23569 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23573 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23574 _Float16 if we are using anything other than ieee format for 16-bit
23575 floating point. Otherwise, punt to the default implementation. */
23576 static machine_mode
23577 arm_floatn_mode (int n
, bool extended
)
23579 if (!extended
&& n
== 16)
23580 return arm_fp16_format
== ARM_FP16_FORMAT_IEEE
? HFmode
: VOIDmode
;
23582 return default_floatn_mode (n
, extended
);
23586 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23587 not to early-clobber SRC registers in the process.
23589 We assume that the operands described by SRC and DEST represent a
23590 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23591 number of components into which the copy has been decomposed. */
23593 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23597 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23598 || REGNO (operands
[0]) < REGNO (operands
[1]))
23600 for (i
= 0; i
< count
; i
++)
23602 operands
[2 * i
] = dest
[i
];
23603 operands
[2 * i
+ 1] = src
[i
];
23608 for (i
= 0; i
< count
; i
++)
23610 operands
[2 * i
] = dest
[count
- i
- 1];
23611 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23616 /* Split operands into moves from op[1] + op[2] into op[0]. */
23619 neon_split_vcombine (rtx operands
[3])
23621 unsigned int dest
= REGNO (operands
[0]);
23622 unsigned int src1
= REGNO (operands
[1]);
23623 unsigned int src2
= REGNO (operands
[2]);
23624 machine_mode halfmode
= GET_MODE (operands
[1]);
23625 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23626 rtx destlo
, desthi
;
23628 if (src1
== dest
&& src2
== dest
+ halfregs
)
23630 /* No-op move. Can't split to nothing; emit something. */
23631 emit_note (NOTE_INSN_DELETED
);
23635 /* Preserve register attributes for variable tracking. */
23636 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23637 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23638 GET_MODE_SIZE (halfmode
));
23640 /* Special case of reversed high/low parts. Use VSWP. */
23641 if (src2
== dest
&& src1
== dest
+ halfregs
)
23643 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23644 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23645 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23649 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23651 /* Try to avoid unnecessary moves if part of the result
23652 is in the right place already. */
23654 emit_move_insn (destlo
, operands
[1]);
23655 if (src2
!= dest
+ halfregs
)
23656 emit_move_insn (desthi
, operands
[2]);
23660 if (src2
!= dest
+ halfregs
)
23661 emit_move_insn (desthi
, operands
[2]);
23663 emit_move_insn (destlo
, operands
[1]);
23667 /* Return the number (counting from 0) of
23668 the least significant set bit in MASK. */
23671 number_of_first_bit_set (unsigned mask
)
23673 return ctz_hwi (mask
);
23676 /* Like emit_multi_reg_push, but allowing for a different set of
23677 registers to be described as saved. MASK is the set of registers
23678 to be saved; REAL_REGS is the set of registers to be described as
23679 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23682 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23684 unsigned long regno
;
23685 rtx par
[10], tmp
, reg
;
23689 /* Build the parallel of the registers actually being stored. */
23690 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23692 regno
= ctz_hwi (mask
);
23693 reg
= gen_rtx_REG (SImode
, regno
);
23696 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23698 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23703 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23704 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23705 tmp
= gen_frame_mem (BLKmode
, tmp
);
23706 tmp
= gen_rtx_SET (tmp
, par
[0]);
23709 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23710 insn
= emit_insn (tmp
);
23712 /* Always build the stack adjustment note for unwind info. */
23713 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23714 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23717 /* Build the parallel of the registers recorded as saved for unwind. */
23718 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23720 regno
= ctz_hwi (real_regs
);
23721 reg
= gen_rtx_REG (SImode
, regno
);
23723 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23724 tmp
= gen_frame_mem (SImode
, tmp
);
23725 tmp
= gen_rtx_SET (tmp
, reg
);
23726 RTX_FRAME_RELATED_P (tmp
) = 1;
23734 RTX_FRAME_RELATED_P (par
[0]) = 1;
23735 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23738 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23743 /* Emit code to push or pop registers to or from the stack. F is the
23744 assembly file. MASK is the registers to pop. */
23746 thumb_pop (FILE *f
, unsigned long mask
)
23749 int lo_mask
= mask
& 0xFF;
23750 int pushed_words
= 0;
23754 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23756 /* Special case. Do not generate a POP PC statement here, do it in
23758 thumb_exit (f
, -1);
23762 fprintf (f
, "\tpop\t{");
23764 /* Look at the low registers first. */
23765 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23769 asm_fprintf (f
, "%r", regno
);
23771 if ((lo_mask
& ~1) != 0)
23778 if (mask
& (1 << PC_REGNUM
))
23780 /* Catch popping the PC. */
23781 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23782 || IS_CMSE_ENTRY (arm_current_func_type ()))
23784 /* The PC is never poped directly, instead
23785 it is popped into r3 and then BX is used. */
23786 fprintf (f
, "}\n");
23788 thumb_exit (f
, -1);
23797 asm_fprintf (f
, "%r", PC_REGNUM
);
23801 fprintf (f
, "}\n");
23804 /* Generate code to return from a thumb function.
23805 If 'reg_containing_return_addr' is -1, then the return address is
23806 actually on the stack, at the stack pointer. */
23808 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23810 unsigned regs_available_for_popping
;
23811 unsigned regs_to_pop
;
23813 unsigned available
;
23817 int restore_a4
= FALSE
;
23819 /* Compute the registers we need to pop. */
23823 if (reg_containing_return_addr
== -1)
23825 regs_to_pop
|= 1 << LR_REGNUM
;
23829 if (TARGET_BACKTRACE
)
23831 /* Restore the (ARM) frame pointer and stack pointer. */
23832 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23836 /* If there is nothing to pop then just emit the BX instruction and
23838 if (pops_needed
== 0)
23840 if (crtl
->calls_eh_return
)
23841 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23843 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23845 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23846 reg_containing_return_addr
);
23847 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23850 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23853 /* Otherwise if we are not supporting interworking and we have not created
23854 a backtrace structure and the function was not entered in ARM mode then
23855 just pop the return address straight into the PC. */
23856 else if (!TARGET_INTERWORK
23857 && !TARGET_BACKTRACE
23858 && !is_called_in_ARM_mode (current_function_decl
)
23859 && !crtl
->calls_eh_return
23860 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23862 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23866 /* Find out how many of the (return) argument registers we can corrupt. */
23867 regs_available_for_popping
= 0;
23869 /* If returning via __builtin_eh_return, the bottom three registers
23870 all contain information needed for the return. */
23871 if (crtl
->calls_eh_return
)
23875 /* If we can deduce the registers used from the function's
23876 return value. This is more reliable that examining
23877 df_regs_ever_live_p () because that will be set if the register is
23878 ever used in the function, not just if the register is used
23879 to hold a return value. */
23881 if (crtl
->return_rtx
!= 0)
23882 mode
= GET_MODE (crtl
->return_rtx
);
23884 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23886 size
= GET_MODE_SIZE (mode
);
23890 /* In a void function we can use any argument register.
23891 In a function that returns a structure on the stack
23892 we can use the second and third argument registers. */
23893 if (mode
== VOIDmode
)
23894 regs_available_for_popping
=
23895 (1 << ARG_REGISTER (1))
23896 | (1 << ARG_REGISTER (2))
23897 | (1 << ARG_REGISTER (3));
23899 regs_available_for_popping
=
23900 (1 << ARG_REGISTER (2))
23901 | (1 << ARG_REGISTER (3));
23903 else if (size
<= 4)
23904 regs_available_for_popping
=
23905 (1 << ARG_REGISTER (2))
23906 | (1 << ARG_REGISTER (3));
23907 else if (size
<= 8)
23908 regs_available_for_popping
=
23909 (1 << ARG_REGISTER (3));
23912 /* Match registers to be popped with registers into which we pop them. */
23913 for (available
= regs_available_for_popping
,
23914 required
= regs_to_pop
;
23915 required
!= 0 && available
!= 0;
23916 available
&= ~(available
& - available
),
23917 required
&= ~(required
& - required
))
23920 /* If we have any popping registers left over, remove them. */
23922 regs_available_for_popping
&= ~available
;
23924 /* Otherwise if we need another popping register we can use
23925 the fourth argument register. */
23926 else if (pops_needed
)
23928 /* If we have not found any free argument registers and
23929 reg a4 contains the return address, we must move it. */
23930 if (regs_available_for_popping
== 0
23931 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23933 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23934 reg_containing_return_addr
= LR_REGNUM
;
23936 else if (size
> 12)
23938 /* Register a4 is being used to hold part of the return value,
23939 but we have dire need of a free, low register. */
23942 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23945 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23947 /* The fourth argument register is available. */
23948 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23954 /* Pop as many registers as we can. */
23955 thumb_pop (f
, regs_available_for_popping
);
23957 /* Process the registers we popped. */
23958 if (reg_containing_return_addr
== -1)
23960 /* The return address was popped into the lowest numbered register. */
23961 regs_to_pop
&= ~(1 << LR_REGNUM
);
23963 reg_containing_return_addr
=
23964 number_of_first_bit_set (regs_available_for_popping
);
23966 /* Remove this register for the mask of available registers, so that
23967 the return address will not be corrupted by further pops. */
23968 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23971 /* If we popped other registers then handle them here. */
23972 if (regs_available_for_popping
)
23976 /* Work out which register currently contains the frame pointer. */
23977 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23979 /* Move it into the correct place. */
23980 asm_fprintf (f
, "\tmov\t%r, %r\n",
23981 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23983 /* (Temporarily) remove it from the mask of popped registers. */
23984 regs_available_for_popping
&= ~(1 << frame_pointer
);
23985 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23987 if (regs_available_for_popping
)
23991 /* We popped the stack pointer as well,
23992 find the register that contains it. */
23993 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23995 /* Move it into the stack register. */
23996 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23998 /* At this point we have popped all necessary registers, so
23999 do not worry about restoring regs_available_for_popping
24000 to its correct value:
24002 assert (pops_needed == 0)
24003 assert (regs_available_for_popping == (1 << frame_pointer))
24004 assert (regs_to_pop == (1 << STACK_POINTER)) */
24008 /* Since we have just move the popped value into the frame
24009 pointer, the popping register is available for reuse, and
24010 we know that we still have the stack pointer left to pop. */
24011 regs_available_for_popping
|= (1 << frame_pointer
);
24015 /* If we still have registers left on the stack, but we no longer have
24016 any registers into which we can pop them, then we must move the return
24017 address into the link register and make available the register that
24019 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24021 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24023 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24024 reg_containing_return_addr
);
24026 reg_containing_return_addr
= LR_REGNUM
;
24029 /* If we have registers left on the stack then pop some more.
24030 We know that at most we will want to pop FP and SP. */
24031 if (pops_needed
> 0)
24036 thumb_pop (f
, regs_available_for_popping
);
24038 /* We have popped either FP or SP.
24039 Move whichever one it is into the correct register. */
24040 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24041 move_to
= number_of_first_bit_set (regs_to_pop
);
24043 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24045 regs_to_pop
&= ~(1 << move_to
);
24050 /* If we still have not popped everything then we must have only
24051 had one register available to us and we are now popping the SP. */
24052 if (pops_needed
> 0)
24056 thumb_pop (f
, regs_available_for_popping
);
24058 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24060 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24062 assert (regs_to_pop == (1 << STACK_POINTER))
24063 assert (pops_needed == 1)
24067 /* If necessary restore the a4 register. */
24070 if (reg_containing_return_addr
!= LR_REGNUM
)
24072 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24073 reg_containing_return_addr
= LR_REGNUM
;
24076 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24079 if (crtl
->calls_eh_return
)
24080 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24082 /* Return to caller. */
24083 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24085 /* This is for the cases where LR is not being used to contain the return
24086 address. It may therefore contain information that we might not want
24087 to leak, hence it must be cleared. The value in R0 will never be a
24088 secret at this point, so it is safe to use it, see the clearing code
24089 in 'cmse_nonsecure_entry_clear_before_return'. */
24090 if (reg_containing_return_addr
!= LR_REGNUM
)
24091 asm_fprintf (f
, "\tmov\tlr, r0\n");
24093 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24094 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24097 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24100 /* Scan INSN just before assembler is output for it.
24101 For Thumb-1, we track the status of the condition codes; this
24102 information is used in the cbranchsi4_insn pattern. */
24104 thumb1_final_prescan_insn (rtx_insn
*insn
)
24106 if (flag_print_asm_name
)
24107 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24108 INSN_ADDRESSES (INSN_UID (insn
)));
24109 /* Don't overwrite the previous setter when we get to a cbranch. */
24110 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24112 enum attr_conds conds
;
24114 if (cfun
->machine
->thumb1_cc_insn
)
24116 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24117 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24120 conds
= get_attr_conds (insn
);
24121 if (conds
== CONDS_SET
)
24123 rtx set
= single_set (insn
);
24124 cfun
->machine
->thumb1_cc_insn
= insn
;
24125 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24126 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24127 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24128 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24130 rtx src1
= XEXP (SET_SRC (set
), 1);
24131 if (src1
== const0_rtx
)
24132 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24134 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24136 /* Record the src register operand instead of dest because
24137 cprop_hardreg pass propagates src. */
24138 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24141 else if (conds
!= CONDS_NOCOND
)
24142 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24145 /* Check if unexpected far jump is used. */
24146 if (cfun
->machine
->lr_save_eliminated
24147 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24148 internal_error("Unexpected thumb1 far jump");
24152 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24154 unsigned HOST_WIDE_INT mask
= 0xff;
24157 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24158 if (val
== 0) /* XXX */
24161 for (i
= 0; i
< 25; i
++)
24162 if ((val
& (mask
<< i
)) == val
)
24168 /* Returns nonzero if the current function contains,
24169 or might contain a far jump. */
24171 thumb_far_jump_used_p (void)
24174 bool far_jump
= false;
24175 unsigned int func_size
= 0;
24177 /* If we have already decided that far jumps may be used,
24178 do not bother checking again, and always return true even if
24179 it turns out that they are not being used. Once we have made
24180 the decision that far jumps are present (and that hence the link
24181 register will be pushed onto the stack) we cannot go back on it. */
24182 if (cfun
->machine
->far_jump_used
)
24185 /* If this function is not being called from the prologue/epilogue
24186 generation code then it must be being called from the
24187 INITIAL_ELIMINATION_OFFSET macro. */
24188 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24190 /* In this case we know that we are being asked about the elimination
24191 of the arg pointer register. If that register is not being used,
24192 then there are no arguments on the stack, and we do not have to
24193 worry that a far jump might force the prologue to push the link
24194 register, changing the stack offsets. In this case we can just
24195 return false, since the presence of far jumps in the function will
24196 not affect stack offsets.
24198 If the arg pointer is live (or if it was live, but has now been
24199 eliminated and so set to dead) then we do have to test to see if
24200 the function might contain a far jump. This test can lead to some
24201 false negatives, since before reload is completed, then length of
24202 branch instructions is not known, so gcc defaults to returning their
24203 longest length, which in turn sets the far jump attribute to true.
24205 A false negative will not result in bad code being generated, but it
24206 will result in a needless push and pop of the link register. We
24207 hope that this does not occur too often.
24209 If we need doubleword stack alignment this could affect the other
24210 elimination offsets so we can't risk getting it wrong. */
24211 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24212 cfun
->machine
->arg_pointer_live
= 1;
24213 else if (!cfun
->machine
->arg_pointer_live
)
24217 /* We should not change far_jump_used during or after reload, as there is
24218 no chance to change stack frame layout. */
24219 if (reload_in_progress
|| reload_completed
)
24222 /* Check to see if the function contains a branch
24223 insn with the far jump attribute set. */
24224 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24226 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24230 func_size
+= get_attr_length (insn
);
24233 /* Attribute far_jump will always be true for thumb1 before
24234 shorten_branch pass. So checking far_jump attribute before
24235 shorten_branch isn't much useful.
24237 Following heuristic tries to estimate more accurately if a far jump
24238 may finally be used. The heuristic is very conservative as there is
24239 no chance to roll-back the decision of not to use far jump.
24241 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24242 2-byte insn is associated with a 4 byte constant pool. Using
24243 function size 2048/3 as the threshold is conservative enough. */
24246 if ((func_size
* 3) >= 2048)
24248 /* Record the fact that we have decided that
24249 the function does use far jumps. */
24250 cfun
->machine
->far_jump_used
= 1;
24258 /* Return nonzero if FUNC must be entered in ARM mode. */
24260 is_called_in_ARM_mode (tree func
)
24262 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24264 /* Ignore the problem about functions whose address is taken. */
24265 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24269 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24275 /* Given the stack offsets and register mask in OFFSETS, decide how
24276 many additional registers to push instead of subtracting a constant
24277 from SP. For epilogues the principle is the same except we use pop.
24278 FOR_PROLOGUE indicates which we're generating. */
24280 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24282 HOST_WIDE_INT amount
;
24283 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24284 /* Extract a mask of the ones we can give to the Thumb's push/pop
24286 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24287 /* Then count how many other high registers will need to be pushed. */
24288 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24289 int n_free
, reg_base
, size
;
24291 if (!for_prologue
&& frame_pointer_needed
)
24292 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24294 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24296 /* If the stack frame size is 512 exactly, we can save one load
24297 instruction, which should make this a win even when optimizing
24299 if (!optimize_size
&& amount
!= 512)
24302 /* Can't do this if there are high registers to push. */
24303 if (high_regs_pushed
!= 0)
24306 /* Shouldn't do it in the prologue if no registers would normally
24307 be pushed at all. In the epilogue, also allow it if we'll have
24308 a pop insn for the PC. */
24311 || TARGET_BACKTRACE
24312 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24313 || TARGET_INTERWORK
24314 || crtl
->args
.pretend_args_size
!= 0))
24317 /* Don't do this if thumb_expand_prologue wants to emit instructions
24318 between the push and the stack frame allocation. */
24320 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24321 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24328 size
= arm_size_return_regs ();
24329 reg_base
= ARM_NUM_INTS (size
);
24330 live_regs_mask
>>= reg_base
;
24333 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24334 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24336 live_regs_mask
>>= 1;
24342 gcc_assert (amount
/ 4 * 4 == amount
);
24344 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24345 return (amount
- 508) / 4;
24346 if (amount
<= n_free
* 4)
24351 /* The bits which aren't usefully expanded as rtl. */
24353 thumb1_unexpanded_epilogue (void)
24355 arm_stack_offsets
*offsets
;
24357 unsigned long live_regs_mask
= 0;
24358 int high_regs_pushed
= 0;
24360 int had_to_push_lr
;
24363 if (cfun
->machine
->return_used_this_function
!= 0)
24366 if (IS_NAKED (arm_current_func_type ()))
24369 offsets
= arm_get_frame_offsets ();
24370 live_regs_mask
= offsets
->saved_regs_mask
;
24371 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24373 /* If we can deduce the registers used from the function's return value.
24374 This is more reliable that examining df_regs_ever_live_p () because that
24375 will be set if the register is ever used in the function, not just if
24376 the register is used to hold a return value. */
24377 size
= arm_size_return_regs ();
24379 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24382 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24383 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24386 /* The prolog may have pushed some high registers to use as
24387 work registers. e.g. the testsuite file:
24388 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24389 compiles to produce:
24390 push {r4, r5, r6, r7, lr}
24394 as part of the prolog. We have to undo that pushing here. */
24396 if (high_regs_pushed
)
24398 unsigned long mask
= live_regs_mask
& 0xff;
24401 /* The available low registers depend on the size of the value we are
24409 /* Oh dear! We have no low registers into which we can pop
24412 ("no low registers available for popping high registers");
24414 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24415 if (live_regs_mask
& (1 << next_hi_reg
))
24418 while (high_regs_pushed
)
24420 /* Find lo register(s) into which the high register(s) can
24422 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24424 if (mask
& (1 << regno
))
24425 high_regs_pushed
--;
24426 if (high_regs_pushed
== 0)
24430 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24432 /* Pop the values into the low register(s). */
24433 thumb_pop (asm_out_file
, mask
);
24435 /* Move the value(s) into the high registers. */
24436 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24438 if (mask
& (1 << regno
))
24440 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24443 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24444 if (live_regs_mask
& (1 << next_hi_reg
))
24449 live_regs_mask
&= ~0x0f00;
24452 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24453 live_regs_mask
&= 0xff;
24455 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24457 /* Pop the return address into the PC. */
24458 if (had_to_push_lr
)
24459 live_regs_mask
|= 1 << PC_REGNUM
;
24461 /* Either no argument registers were pushed or a backtrace
24462 structure was created which includes an adjusted stack
24463 pointer, so just pop everything. */
24464 if (live_regs_mask
)
24465 thumb_pop (asm_out_file
, live_regs_mask
);
24467 /* We have either just popped the return address into the
24468 PC or it is was kept in LR for the entire function.
24469 Note that thumb_pop has already called thumb_exit if the
24470 PC was in the list. */
24471 if (!had_to_push_lr
)
24472 thumb_exit (asm_out_file
, LR_REGNUM
);
24476 /* Pop everything but the return address. */
24477 if (live_regs_mask
)
24478 thumb_pop (asm_out_file
, live_regs_mask
);
24480 if (had_to_push_lr
)
24484 /* We have no free low regs, so save one. */
24485 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24489 /* Get the return address into a temporary register. */
24490 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24494 /* Move the return address to lr. */
24495 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24497 /* Restore the low register. */
24498 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24503 regno
= LAST_ARG_REGNUM
;
24508 /* Remove the argument registers that were pushed onto the stack. */
24509 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24510 SP_REGNUM
, SP_REGNUM
,
24511 crtl
->args
.pretend_args_size
);
24513 thumb_exit (asm_out_file
, regno
);
24519 /* Functions to save and restore machine-specific function data. */
24520 static struct machine_function
*
24521 arm_init_machine_status (void)
24523 struct machine_function
*machine
;
24524 machine
= ggc_cleared_alloc
<machine_function
> ();
24526 #if ARM_FT_UNKNOWN != 0
24527 machine
->func_type
= ARM_FT_UNKNOWN
;
24532 /* Return an RTX indicating where the return address to the
24533 calling function can be found. */
24535 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24540 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24543 /* Do anything needed before RTL is emitted for each function. */
24545 arm_init_expanders (void)
24547 /* Arrange to initialize and mark the machine per-function status. */
24548 init_machine_status
= arm_init_machine_status
;
24550 /* This is to stop the combine pass optimizing away the alignment
24551 adjustment of va_arg. */
24552 /* ??? It is claimed that this should not be necessary. */
24554 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24557 /* Check that FUNC is called with a different mode. */
24560 arm_change_mode_p (tree func
)
24562 if (TREE_CODE (func
) != FUNCTION_DECL
)
24565 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24568 callee_tree
= target_option_default_node
;
24570 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24571 int flags
= callee_opts
->x_target_flags
;
24573 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24576 /* Like arm_compute_initial_elimination offset. Simpler because there
24577 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24578 to point at the base of the local variables after static stack
24579 space for a function has been allocated. */
24582 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24584 arm_stack_offsets
*offsets
;
24586 offsets
= arm_get_frame_offsets ();
24590 case ARG_POINTER_REGNUM
:
24593 case STACK_POINTER_REGNUM
:
24594 return offsets
->outgoing_args
- offsets
->saved_args
;
24596 case FRAME_POINTER_REGNUM
:
24597 return offsets
->soft_frame
- offsets
->saved_args
;
24599 case ARM_HARD_FRAME_POINTER_REGNUM
:
24600 return offsets
->saved_regs
- offsets
->saved_args
;
24602 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24603 return offsets
->locals_base
- offsets
->saved_args
;
24606 gcc_unreachable ();
24610 case FRAME_POINTER_REGNUM
:
24613 case STACK_POINTER_REGNUM
:
24614 return offsets
->outgoing_args
- offsets
->soft_frame
;
24616 case ARM_HARD_FRAME_POINTER_REGNUM
:
24617 return offsets
->saved_regs
- offsets
->soft_frame
;
24619 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24620 return offsets
->locals_base
- offsets
->soft_frame
;
24623 gcc_unreachable ();
24628 gcc_unreachable ();
24632 /* Generate the function's prologue. */
24635 thumb1_expand_prologue (void)
24639 HOST_WIDE_INT amount
;
24640 HOST_WIDE_INT size
;
24641 arm_stack_offsets
*offsets
;
24642 unsigned long func_type
;
24644 unsigned long live_regs_mask
;
24645 unsigned long l_mask
;
24646 unsigned high_regs_pushed
= 0;
24647 bool lr_needs_saving
;
24649 func_type
= arm_current_func_type ();
24651 /* Naked functions don't have prologues. */
24652 if (IS_NAKED (func_type
))
24654 if (flag_stack_usage_info
)
24655 current_function_static_stack_size
= 0;
24659 if (IS_INTERRUPT (func_type
))
24661 error ("interrupt Service Routines cannot be coded in Thumb mode");
24665 if (is_called_in_ARM_mode (current_function_decl
))
24666 emit_insn (gen_prologue_thumb1_interwork ());
24668 offsets
= arm_get_frame_offsets ();
24669 live_regs_mask
= offsets
->saved_regs_mask
;
24670 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24672 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24673 l_mask
= live_regs_mask
& 0x40ff;
24674 /* Then count how many other high registers will need to be pushed. */
24675 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24677 if (crtl
->args
.pretend_args_size
)
24679 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24681 if (cfun
->machine
->uses_anonymous_args
)
24683 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24684 unsigned long mask
;
24686 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24687 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24689 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24693 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24694 stack_pointer_rtx
, x
));
24696 RTX_FRAME_RELATED_P (insn
) = 1;
24699 if (TARGET_BACKTRACE
)
24701 HOST_WIDE_INT offset
= 0;
24702 unsigned work_register
;
24703 rtx work_reg
, x
, arm_hfp_rtx
;
24705 /* We have been asked to create a stack backtrace structure.
24706 The code looks like this:
24710 0 sub SP, #16 Reserve space for 4 registers.
24711 2 push {R7} Push low registers.
24712 4 add R7, SP, #20 Get the stack pointer before the push.
24713 6 str R7, [SP, #8] Store the stack pointer
24714 (before reserving the space).
24715 8 mov R7, PC Get hold of the start of this code + 12.
24716 10 str R7, [SP, #16] Store it.
24717 12 mov R7, FP Get hold of the current frame pointer.
24718 14 str R7, [SP, #4] Store it.
24719 16 mov R7, LR Get hold of the current return address.
24720 18 str R7, [SP, #12] Store it.
24721 20 add R7, SP, #16 Point at the start of the
24722 backtrace structure.
24723 22 mov FP, R7 Put this value into the frame pointer. */
24725 work_register
= thumb_find_work_register (live_regs_mask
);
24726 work_reg
= gen_rtx_REG (SImode
, work_register
);
24727 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24729 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24730 stack_pointer_rtx
, GEN_INT (-16)));
24731 RTX_FRAME_RELATED_P (insn
) = 1;
24735 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24736 RTX_FRAME_RELATED_P (insn
) = 1;
24737 lr_needs_saving
= false;
24739 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24742 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24743 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24745 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24746 x
= gen_frame_mem (SImode
, x
);
24747 emit_move_insn (x
, work_reg
);
24749 /* Make sure that the instruction fetching the PC is in the right place
24750 to calculate "start of backtrace creation code + 12". */
24751 /* ??? The stores using the common WORK_REG ought to be enough to
24752 prevent the scheduler from doing anything weird. Failing that
24753 we could always move all of the following into an UNSPEC_VOLATILE. */
24756 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24757 emit_move_insn (work_reg
, x
);
24759 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24760 x
= gen_frame_mem (SImode
, x
);
24761 emit_move_insn (x
, work_reg
);
24763 emit_move_insn (work_reg
, arm_hfp_rtx
);
24765 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24766 x
= gen_frame_mem (SImode
, x
);
24767 emit_move_insn (x
, work_reg
);
24771 emit_move_insn (work_reg
, arm_hfp_rtx
);
24773 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24774 x
= gen_frame_mem (SImode
, x
);
24775 emit_move_insn (x
, work_reg
);
24777 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24778 emit_move_insn (work_reg
, x
);
24780 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24781 x
= gen_frame_mem (SImode
, x
);
24782 emit_move_insn (x
, work_reg
);
24785 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24786 emit_move_insn (work_reg
, x
);
24788 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24789 x
= gen_frame_mem (SImode
, x
);
24790 emit_move_insn (x
, work_reg
);
24792 x
= GEN_INT (offset
+ 12);
24793 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24795 emit_move_insn (arm_hfp_rtx
, work_reg
);
24797 /* Optimization: If we are not pushing any low registers but we are going
24798 to push some high registers then delay our first push. This will just
24799 be a push of LR and we can combine it with the push of the first high
24801 else if ((l_mask
& 0xff) != 0
24802 || (high_regs_pushed
== 0 && lr_needs_saving
))
24804 unsigned long mask
= l_mask
;
24805 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24806 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24807 RTX_FRAME_RELATED_P (insn
) = 1;
24808 lr_needs_saving
= false;
24811 if (high_regs_pushed
)
24813 unsigned pushable_regs
;
24814 unsigned next_hi_reg
;
24815 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24816 : crtl
->args
.info
.nregs
;
24817 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24819 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24820 if (live_regs_mask
& (1 << next_hi_reg
))
24823 /* Here we need to mask out registers used for passing arguments
24824 even if they can be pushed. This is to avoid using them to stash the high
24825 registers. Such kind of stash may clobber the use of arguments. */
24826 pushable_regs
= l_mask
& (~arg_regs_mask
);
24827 if (lr_needs_saving
)
24828 pushable_regs
&= ~(1 << LR_REGNUM
);
24830 if (pushable_regs
== 0)
24831 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24833 while (high_regs_pushed
> 0)
24835 unsigned long real_regs_mask
= 0;
24836 unsigned long push_mask
= 0;
24838 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24840 if (pushable_regs
& (1 << regno
))
24842 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24843 gen_rtx_REG (SImode
, next_hi_reg
));
24845 high_regs_pushed
--;
24846 real_regs_mask
|= (1 << next_hi_reg
);
24847 push_mask
|= (1 << regno
);
24849 if (high_regs_pushed
)
24851 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24853 if (live_regs_mask
& (1 << next_hi_reg
))
24861 /* If we had to find a work register and we have not yet
24862 saved the LR then add it to the list of regs to push. */
24863 if (lr_needs_saving
)
24865 push_mask
|= 1 << LR_REGNUM
;
24866 real_regs_mask
|= 1 << LR_REGNUM
;
24867 lr_needs_saving
= false;
24870 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24871 RTX_FRAME_RELATED_P (insn
) = 1;
24875 /* Load the pic register before setting the frame pointer,
24876 so we can use r7 as a temporary work register. */
24877 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24878 arm_load_pic_register (live_regs_mask
);
24880 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24881 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24882 stack_pointer_rtx
);
24884 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24885 if (flag_stack_usage_info
)
24886 current_function_static_stack_size
= size
;
24888 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24889 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24890 sorry ("-fstack-check=specific for Thumb-1");
24892 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24893 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24898 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24899 GEN_INT (- amount
)));
24900 RTX_FRAME_RELATED_P (insn
) = 1;
24906 /* The stack decrement is too big for an immediate value in a single
24907 insn. In theory we could issue multiple subtracts, but after
24908 three of them it becomes more space efficient to place the full
24909 value in the constant pool and load into a register. (Also the
24910 ARM debugger really likes to see only one stack decrement per
24911 function). So instead we look for a scratch register into which
24912 we can load the decrement, and then we subtract this from the
24913 stack pointer. Unfortunately on the thumb the only available
24914 scratch registers are the argument registers, and we cannot use
24915 these as they may hold arguments to the function. Instead we
24916 attempt to locate a call preserved register which is used by this
24917 function. If we can find one, then we know that it will have
24918 been pushed at the start of the prologue and so we can corrupt
24920 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24921 if (live_regs_mask
& (1 << regno
))
24924 gcc_assert(regno
<= LAST_LO_REGNUM
);
24926 reg
= gen_rtx_REG (SImode
, regno
);
24928 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24930 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24931 stack_pointer_rtx
, reg
));
24933 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24934 plus_constant (Pmode
, stack_pointer_rtx
,
24936 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24937 RTX_FRAME_RELATED_P (insn
) = 1;
24941 if (frame_pointer_needed
)
24942 thumb_set_frame_pointer (offsets
);
24944 /* If we are profiling, make sure no instructions are scheduled before
24945 the call to mcount. Similarly if the user has requested no
24946 scheduling in the prolog. Similarly if we want non-call exceptions
24947 using the EABI unwinder, to prevent faulting instructions from being
24948 swapped with a stack adjustment. */
24949 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24950 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24951 && cfun
->can_throw_non_call_exceptions
))
24952 emit_insn (gen_blockage ());
24954 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24955 if (live_regs_mask
& 0xff)
24956 cfun
->machine
->lr_save_eliminated
= 0;
24959 /* Clear caller saved registers not used to pass return values and leaked
24960 condition flags before exiting a cmse_nonsecure_entry function. */
24963 cmse_nonsecure_entry_clear_before_return (void)
24965 uint64_t to_clear_mask
[2];
24966 uint32_t padding_bits_to_clear
= 0;
24967 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
24968 int regno
, maxregno
= IP_REGNUM
;
24972 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
24973 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
24975 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24976 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24977 to make sure the instructions used to clear them are present. */
24978 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
24980 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
24981 maxregno
= LAST_VFP_REGNUM
;
24983 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
24984 to_clear_mask
[0] |= float_mask
;
24986 float_mask
= (1ULL << (maxregno
- 63)) - 1;
24987 to_clear_mask
[1] = float_mask
;
24989 /* Make sure we don't clear the two scratch registers used to clear the
24990 relevant FPSCR bits in output_return_instruction. */
24991 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
24992 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
24993 emit_use (gen_rtx_REG (SImode
, 4));
24994 to_clear_mask
[0] &= ~(1ULL << 4);
24997 /* If the user has defined registers to be caller saved, these are no longer
24998 restored by the function before returning and must thus be cleared for
24999 security purposes. */
25000 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
25002 /* We do not touch registers that can be used to pass arguments as per
25003 the AAPCS, since these should never be made callee-saved by user
25005 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25007 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25009 if (call_used_regs
[regno
])
25010 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
25013 /* Make sure we do not clear the registers used to return the result in. */
25014 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25015 if (!VOID_TYPE_P (result_type
))
25017 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25019 /* No need to check that we return in registers, because we don't
25020 support returning on stack yet. */
25022 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25023 padding_bits_to_clear_ptr
);
25026 if (padding_bits_to_clear
!= 0)
25029 /* Padding bits to clear is not 0 so we know we are dealing with
25030 returning a composite type, which only uses r0. Let's make sure that
25031 r1-r3 is cleared too, we will use r1 as a scratch register. */
25032 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
25034 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25036 /* Fill the lower half of the negated padding_bits_to_clear. */
25037 emit_move_insn (reg_rtx
,
25038 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25040 /* Also fill the top half of the negated padding_bits_to_clear. */
25041 if (((~padding_bits_to_clear
) >> 16) > 0)
25042 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25045 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25047 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25048 gen_rtx_REG (SImode
, R0_REGNUM
),
25052 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25054 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25057 if (IS_VFP_REGNUM (regno
))
25059 /* If regno is an even vfp register and its successor is also to
25060 be cleared, use vmov. */
25061 if (TARGET_VFP_DOUBLE
25062 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25063 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25065 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25066 CONST1_RTX (DFmode
));
25067 emit_use (gen_rtx_REG (DFmode
, regno
));
25072 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25073 CONST1_RTX (SFmode
));
25074 emit_use (gen_rtx_REG (SFmode
, regno
));
25081 if (regno
== R0_REGNUM
)
25082 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25085 /* R0 has either been cleared before, see code above, or it
25086 holds a return value, either way it is not secret
25088 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25089 gen_rtx_REG (SImode
, R0_REGNUM
));
25090 emit_use (gen_rtx_REG (SImode
, regno
));
25094 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25095 gen_rtx_REG (SImode
, LR_REGNUM
));
25096 emit_use (gen_rtx_REG (SImode
, regno
));
25102 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25103 POP instruction can be generated. LR should be replaced by PC. All
25104 the checks required are already done by USE_RETURN_INSN (). Hence,
25105 all we really need to check here is if single register is to be
25106 returned, or multiple register return. */
25108 thumb2_expand_return (bool simple_return
)
25111 unsigned long saved_regs_mask
;
25112 arm_stack_offsets
*offsets
;
25114 offsets
= arm_get_frame_offsets ();
25115 saved_regs_mask
= offsets
->saved_regs_mask
;
25117 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25118 if (saved_regs_mask
& (1 << i
))
25121 if (!simple_return
&& saved_regs_mask
)
25123 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25124 functions or adapt code to handle according to ACLE. This path should
25125 not be reachable for cmse_nonsecure_entry functions though we prefer
25126 to assert it for now to ensure that future code changes do not silently
25127 change this behavior. */
25128 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25131 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25132 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25133 rtx addr
= gen_rtx_MEM (SImode
,
25134 gen_rtx_POST_INC (SImode
,
25135 stack_pointer_rtx
));
25136 set_mem_alias_set (addr
, get_frame_alias_set ());
25137 XVECEXP (par
, 0, 0) = ret_rtx
;
25138 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25139 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25140 emit_jump_insn (par
);
25144 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25145 saved_regs_mask
|= (1 << PC_REGNUM
);
25146 arm_emit_multi_reg_pop (saved_regs_mask
);
25151 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25152 cmse_nonsecure_entry_clear_before_return ();
25153 emit_jump_insn (simple_return_rtx
);
25158 thumb1_expand_epilogue (void)
25160 HOST_WIDE_INT amount
;
25161 arm_stack_offsets
*offsets
;
25164 /* Naked functions don't have prologues. */
25165 if (IS_NAKED (arm_current_func_type ()))
25168 offsets
= arm_get_frame_offsets ();
25169 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25171 if (frame_pointer_needed
)
25173 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25174 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25176 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25178 gcc_assert (amount
>= 0);
25181 emit_insn (gen_blockage ());
25184 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25185 GEN_INT (amount
)));
25188 /* r3 is always free in the epilogue. */
25189 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25191 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25192 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25196 /* Emit a USE (stack_pointer_rtx), so that
25197 the stack adjustment will not be deleted. */
25198 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25200 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25201 emit_insn (gen_blockage ());
25203 /* Emit a clobber for each insn that will be restored in the epilogue,
25204 so that flow2 will get register lifetimes correct. */
25205 for (regno
= 0; regno
< 13; regno
++)
25206 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25207 emit_clobber (gen_rtx_REG (SImode
, regno
));
25209 if (! df_regs_ever_live_p (LR_REGNUM
))
25210 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25212 /* Clear all caller-saved regs that are not used to return. */
25213 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25214 cmse_nonsecure_entry_clear_before_return ();
25217 /* Epilogue code for APCS frame. */
25219 arm_expand_epilogue_apcs_frame (bool really_return
)
25221 unsigned long func_type
;
25222 unsigned long saved_regs_mask
;
25225 int floats_from_frame
= 0;
25226 arm_stack_offsets
*offsets
;
25228 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25229 func_type
= arm_current_func_type ();
25231 /* Get frame offsets for ARM. */
25232 offsets
= arm_get_frame_offsets ();
25233 saved_regs_mask
= offsets
->saved_regs_mask
;
25235 /* Find the offset of the floating-point save area in the frame. */
25237 = (offsets
->saved_args
25238 + arm_compute_static_chain_stack_bytes ()
25241 /* Compute how many core registers saved and how far away the floats are. */
25242 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25243 if (saved_regs_mask
& (1 << i
))
25246 floats_from_frame
+= 4;
25249 if (TARGET_HARD_FLOAT
)
25252 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25254 /* The offset is from IP_REGNUM. */
25255 int saved_size
= arm_get_vfp_saved_size ();
25256 if (saved_size
> 0)
25259 floats_from_frame
+= saved_size
;
25260 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25261 hard_frame_pointer_rtx
,
25262 GEN_INT (-floats_from_frame
)));
25263 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25264 ip_rtx
, hard_frame_pointer_rtx
);
25267 /* Generate VFP register multi-pop. */
25268 start_reg
= FIRST_VFP_REGNUM
;
25270 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25271 /* Look for a case where a reg does not need restoring. */
25272 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25273 && (!df_regs_ever_live_p (i
+ 1)
25274 || call_used_regs
[i
+ 1]))
25276 if (start_reg
!= i
)
25277 arm_emit_vfp_multi_reg_pop (start_reg
,
25278 (i
- start_reg
) / 2,
25279 gen_rtx_REG (SImode
,
25284 /* Restore the remaining regs that we have discovered (or possibly
25285 even all of them, if the conditional in the for loop never
25287 if (start_reg
!= i
)
25288 arm_emit_vfp_multi_reg_pop (start_reg
,
25289 (i
- start_reg
) / 2,
25290 gen_rtx_REG (SImode
, IP_REGNUM
));
25295 /* The frame pointer is guaranteed to be non-double-word aligned, as
25296 it is set to double-word-aligned old_stack_pointer - 4. */
25298 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25300 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25301 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25303 rtx addr
= gen_frame_mem (V2SImode
,
25304 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25306 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25307 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25308 gen_rtx_REG (V2SImode
, i
),
25314 /* saved_regs_mask should contain IP which contains old stack pointer
25315 at the time of activation creation. Since SP and IP are adjacent registers,
25316 we can restore the value directly into SP. */
25317 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25318 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25319 saved_regs_mask
|= (1 << SP_REGNUM
);
25321 /* There are two registers left in saved_regs_mask - LR and PC. We
25322 only need to restore LR (the return address), but to
25323 save time we can load it directly into PC, unless we need a
25324 special function exit sequence, or we are not really returning. */
25326 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25327 && !crtl
->calls_eh_return
)
25328 /* Delete LR from the register mask, so that LR on
25329 the stack is loaded into the PC in the register mask. */
25330 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25332 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25334 num_regs
= bit_count (saved_regs_mask
);
25335 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25338 emit_insn (gen_blockage ());
25339 /* Unwind the stack to just below the saved registers. */
25340 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25341 hard_frame_pointer_rtx
,
25342 GEN_INT (- 4 * num_regs
)));
25344 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25345 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25348 arm_emit_multi_reg_pop (saved_regs_mask
);
25350 if (IS_INTERRUPT (func_type
))
25352 /* Interrupt handlers will have pushed the
25353 IP onto the stack, so restore it now. */
25355 rtx addr
= gen_rtx_MEM (SImode
,
25356 gen_rtx_POST_INC (SImode
,
25357 stack_pointer_rtx
));
25358 set_mem_alias_set (addr
, get_frame_alias_set ());
25359 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25360 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25361 gen_rtx_REG (SImode
, IP_REGNUM
),
25365 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25368 if (crtl
->calls_eh_return
)
25369 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25371 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25373 if (IS_STACKALIGN (func_type
))
25374 /* Restore the original stack pointer. Before prologue, the stack was
25375 realigned and the original stack pointer saved in r0. For details,
25376 see comment in arm_expand_prologue. */
25377 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25379 emit_jump_insn (simple_return_rtx
);
25382 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25383 function is not a sibcall. */
25385 arm_expand_epilogue (bool really_return
)
25387 unsigned long func_type
;
25388 unsigned long saved_regs_mask
;
25392 arm_stack_offsets
*offsets
;
25394 func_type
= arm_current_func_type ();
25396 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25397 let output_return_instruction take care of instruction emission if any. */
25398 if (IS_NAKED (func_type
)
25399 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25402 emit_jump_insn (simple_return_rtx
);
25406 /* If we are throwing an exception, then we really must be doing a
25407 return, so we can't tail-call. */
25408 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25410 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25412 arm_expand_epilogue_apcs_frame (really_return
);
25416 /* Get frame offsets for ARM. */
25417 offsets
= arm_get_frame_offsets ();
25418 saved_regs_mask
= offsets
->saved_regs_mask
;
25419 num_regs
= bit_count (saved_regs_mask
);
25421 if (frame_pointer_needed
)
25424 /* Restore stack pointer if necessary. */
25427 /* In ARM mode, frame pointer points to first saved register.
25428 Restore stack pointer to last saved register. */
25429 amount
= offsets
->frame
- offsets
->saved_regs
;
25431 /* Force out any pending memory operations that reference stacked data
25432 before stack de-allocation occurs. */
25433 emit_insn (gen_blockage ());
25434 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25435 hard_frame_pointer_rtx
,
25436 GEN_INT (amount
)));
25437 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25439 hard_frame_pointer_rtx
);
25441 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25443 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25447 /* In Thumb-2 mode, the frame pointer points to the last saved
25449 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25452 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25453 hard_frame_pointer_rtx
,
25454 GEN_INT (amount
)));
25455 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25456 hard_frame_pointer_rtx
,
25457 hard_frame_pointer_rtx
);
25460 /* Force out any pending memory operations that reference stacked data
25461 before stack de-allocation occurs. */
25462 emit_insn (gen_blockage ());
25463 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25464 hard_frame_pointer_rtx
));
25465 arm_add_cfa_adjust_cfa_note (insn
, 0,
25467 hard_frame_pointer_rtx
);
25468 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25470 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25475 /* Pop off outgoing args and local frame to adjust stack pointer to
25476 last saved register. */
25477 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25481 /* Force out any pending memory operations that reference stacked data
25482 before stack de-allocation occurs. */
25483 emit_insn (gen_blockage ());
25484 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25486 GEN_INT (amount
)));
25487 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25488 stack_pointer_rtx
, stack_pointer_rtx
);
25489 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25491 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25495 if (TARGET_HARD_FLOAT
)
25497 /* Generate VFP register multi-pop. */
25498 int end_reg
= LAST_VFP_REGNUM
+ 1;
25500 /* Scan the registers in reverse order. We need to match
25501 any groupings made in the prologue and generate matching
25502 vldm operations. The need to match groups is because,
25503 unlike pop, vldm can only do consecutive regs. */
25504 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25505 /* Look for a case where a reg does not need restoring. */
25506 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25507 && (!df_regs_ever_live_p (i
+ 1)
25508 || call_used_regs
[i
+ 1]))
25510 /* Restore the regs discovered so far (from reg+2 to
25512 if (end_reg
> i
+ 2)
25513 arm_emit_vfp_multi_reg_pop (i
+ 2,
25514 (end_reg
- (i
+ 2)) / 2,
25515 stack_pointer_rtx
);
25519 /* Restore the remaining regs that we have discovered (or possibly
25520 even all of them, if the conditional in the for loop never
25522 if (end_reg
> i
+ 2)
25523 arm_emit_vfp_multi_reg_pop (i
+ 2,
25524 (end_reg
- (i
+ 2)) / 2,
25525 stack_pointer_rtx
);
25529 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25530 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25533 rtx addr
= gen_rtx_MEM (V2SImode
,
25534 gen_rtx_POST_INC (SImode
,
25535 stack_pointer_rtx
));
25536 set_mem_alias_set (addr
, get_frame_alias_set ());
25537 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25538 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25539 gen_rtx_REG (V2SImode
, i
),
25541 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25542 stack_pointer_rtx
, stack_pointer_rtx
);
25545 if (saved_regs_mask
)
25548 bool return_in_pc
= false;
25550 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25551 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25552 && !IS_CMSE_ENTRY (func_type
)
25553 && !IS_STACKALIGN (func_type
)
25555 && crtl
->args
.pretend_args_size
== 0
25556 && saved_regs_mask
& (1 << LR_REGNUM
)
25557 && !crtl
->calls_eh_return
)
25559 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25560 saved_regs_mask
|= (1 << PC_REGNUM
);
25561 return_in_pc
= true;
25564 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25566 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25567 if (saved_regs_mask
& (1 << i
))
25569 rtx addr
= gen_rtx_MEM (SImode
,
25570 gen_rtx_POST_INC (SImode
,
25571 stack_pointer_rtx
));
25572 set_mem_alias_set (addr
, get_frame_alias_set ());
25574 if (i
== PC_REGNUM
)
25576 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25577 XVECEXP (insn
, 0, 0) = ret_rtx
;
25578 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25580 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25581 insn
= emit_jump_insn (insn
);
25585 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25587 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25588 gen_rtx_REG (SImode
, i
),
25590 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25592 stack_pointer_rtx
);
25599 && current_tune
->prefer_ldrd_strd
25600 && !optimize_function_for_size_p (cfun
))
25603 thumb2_emit_ldrd_pop (saved_regs_mask
);
25604 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25605 arm_emit_ldrd_pop (saved_regs_mask
);
25607 arm_emit_multi_reg_pop (saved_regs_mask
);
25610 arm_emit_multi_reg_pop (saved_regs_mask
);
25618 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25622 rtx dwarf
= NULL_RTX
;
25624 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25626 GEN_INT (amount
)));
25628 RTX_FRAME_RELATED_P (tmp
) = 1;
25630 if (cfun
->machine
->uses_anonymous_args
)
25632 /* Restore pretend args. Refer arm_expand_prologue on how to save
25633 pretend_args in stack. */
25634 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25635 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25636 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25637 if (saved_regs_mask
& (1 << i
))
25639 rtx reg
= gen_rtx_REG (SImode
, i
);
25640 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25643 REG_NOTES (tmp
) = dwarf
;
25645 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25646 stack_pointer_rtx
, stack_pointer_rtx
);
25649 /* Clear all caller-saved regs that are not used to return. */
25650 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25652 /* CMSE_ENTRY always returns. */
25653 gcc_assert (really_return
);
25654 cmse_nonsecure_entry_clear_before_return ();
25657 if (!really_return
)
25660 if (crtl
->calls_eh_return
)
25661 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25663 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25665 if (IS_STACKALIGN (func_type
))
25666 /* Restore the original stack pointer. Before prologue, the stack was
25667 realigned and the original stack pointer saved in r0. For details,
25668 see comment in arm_expand_prologue. */
25669 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25671 emit_jump_insn (simple_return_rtx
);
25674 /* Implementation of insn prologue_thumb1_interwork. This is the first
25675 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25678 thumb1_output_interwork (void)
25681 FILE *f
= asm_out_file
;
25683 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25684 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25686 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25688 /* Generate code sequence to switch us into Thumb mode. */
25689 /* The .code 32 directive has already been emitted by
25690 ASM_DECLARE_FUNCTION_NAME. */
25691 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25692 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25694 /* Generate a label, so that the debugger will notice the
25695 change in instruction sets. This label is also used by
25696 the assembler to bypass the ARM code when this function
25697 is called from a Thumb encoded function elsewhere in the
25698 same file. Hence the definition of STUB_NAME here must
25699 agree with the definition in gas/config/tc-arm.c. */
25701 #define STUB_NAME ".real_start_of"
25703 fprintf (f
, "\t.code\t16\n");
25705 if (arm_dllexport_name_p (name
))
25706 name
= arm_strip_name_encoding (name
);
25708 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25709 fprintf (f
, "\t.thumb_func\n");
25710 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25715 /* Handle the case of a double word load into a low register from
25716 a computed memory address. The computed address may involve a
25717 register which is overwritten by the load. */
25719 thumb_load_double_from_address (rtx
*operands
)
25727 gcc_assert (REG_P (operands
[0]));
25728 gcc_assert (MEM_P (operands
[1]));
25730 /* Get the memory address. */
25731 addr
= XEXP (operands
[1], 0);
25733 /* Work out how the memory address is computed. */
25734 switch (GET_CODE (addr
))
25737 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25739 if (REGNO (operands
[0]) == REGNO (addr
))
25741 output_asm_insn ("ldr\t%H0, %2", operands
);
25742 output_asm_insn ("ldr\t%0, %1", operands
);
25746 output_asm_insn ("ldr\t%0, %1", operands
);
25747 output_asm_insn ("ldr\t%H0, %2", operands
);
25752 /* Compute <address> + 4 for the high order load. */
25753 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25755 output_asm_insn ("ldr\t%0, %1", operands
);
25756 output_asm_insn ("ldr\t%H0, %2", operands
);
25760 arg1
= XEXP (addr
, 0);
25761 arg2
= XEXP (addr
, 1);
25763 if (CONSTANT_P (arg1
))
25764 base
= arg2
, offset
= arg1
;
25766 base
= arg1
, offset
= arg2
;
25768 gcc_assert (REG_P (base
));
25770 /* Catch the case of <address> = <reg> + <reg> */
25771 if (REG_P (offset
))
25773 int reg_offset
= REGNO (offset
);
25774 int reg_base
= REGNO (base
);
25775 int reg_dest
= REGNO (operands
[0]);
25777 /* Add the base and offset registers together into the
25778 higher destination register. */
25779 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25780 reg_dest
+ 1, reg_base
, reg_offset
);
25782 /* Load the lower destination register from the address in
25783 the higher destination register. */
25784 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25785 reg_dest
, reg_dest
+ 1);
25787 /* Load the higher destination register from its own address
25789 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25790 reg_dest
+ 1, reg_dest
+ 1);
25794 /* Compute <address> + 4 for the high order load. */
25795 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25797 /* If the computed address is held in the low order register
25798 then load the high order register first, otherwise always
25799 load the low order register first. */
25800 if (REGNO (operands
[0]) == REGNO (base
))
25802 output_asm_insn ("ldr\t%H0, %2", operands
);
25803 output_asm_insn ("ldr\t%0, %1", operands
);
25807 output_asm_insn ("ldr\t%0, %1", operands
);
25808 output_asm_insn ("ldr\t%H0, %2", operands
);
25814 /* With no registers to worry about we can just load the value
25816 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25818 output_asm_insn ("ldr\t%H0, %2", operands
);
25819 output_asm_insn ("ldr\t%0, %1", operands
);
25823 gcc_unreachable ();
25830 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25835 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25836 std::swap (operands
[4], operands
[5]);
25838 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25839 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25843 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25844 std::swap (operands
[4], operands
[5]);
25845 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25846 std::swap (operands
[5], operands
[6]);
25847 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25848 std::swap (operands
[4], operands
[5]);
25850 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25851 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25855 gcc_unreachable ();
25861 /* Output a call-via instruction for thumb state. */
25863 thumb_call_via_reg (rtx reg
)
25865 int regno
= REGNO (reg
);
25868 gcc_assert (regno
< LR_REGNUM
);
25870 /* If we are in the normal text section we can use a single instance
25871 per compilation unit. If we are doing function sections, then we need
25872 an entry per section, since we can't rely on reachability. */
25873 if (in_section
== text_section
)
25875 thumb_call_reg_needed
= 1;
25877 if (thumb_call_via_label
[regno
] == NULL
)
25878 thumb_call_via_label
[regno
] = gen_label_rtx ();
25879 labelp
= thumb_call_via_label
+ regno
;
25883 if (cfun
->machine
->call_via
[regno
] == NULL
)
25884 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25885 labelp
= cfun
->machine
->call_via
+ regno
;
25888 output_asm_insn ("bl\t%a0", labelp
);
25892 /* Routines for generating rtl. */
25894 thumb_expand_movmemqi (rtx
*operands
)
25896 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25897 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25898 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25899 HOST_WIDE_INT offset
= 0;
25903 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25909 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25915 rtx reg
= gen_reg_rtx (SImode
);
25916 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25917 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25924 rtx reg
= gen_reg_rtx (HImode
);
25925 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25926 plus_constant (Pmode
, in
,
25928 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25937 rtx reg
= gen_reg_rtx (QImode
);
25938 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25939 plus_constant (Pmode
, in
,
25941 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25948 thumb_reload_out_hi (rtx
*operands
)
25950 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25953 /* Return the length of a function name prefix
25954 that starts with the character 'c'. */
25956 arm_get_strip_length (int c
)
25960 ARM_NAME_ENCODING_LENGTHS
25965 /* Return a pointer to a function's name with any
25966 and all prefix encodings stripped from it. */
25968 arm_strip_name_encoding (const char *name
)
25972 while ((skip
= arm_get_strip_length (* name
)))
25978 /* If there is a '*' anywhere in the name's prefix, then
25979 emit the stripped name verbatim, otherwise prepend an
25980 underscore if leading underscores are being used. */
25982 arm_asm_output_labelref (FILE *stream
, const char *name
)
25987 while ((skip
= arm_get_strip_length (* name
)))
25989 verbatim
|= (*name
== '*');
25994 fputs (name
, stream
);
25996 asm_fprintf (stream
, "%U%s", name
);
25999 /* This function is used to emit an EABI tag and its associated value.
26000 We emit the numerical value of the tag in case the assembler does not
26001 support textual tags. (Eg gas prior to 2.20). If requested we include
26002 the tag name in a comment so that anyone reading the assembler output
26003 will know which tag is being set.
26005 This function is not static because arm-c.c needs it too. */
26008 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26010 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26011 if (flag_verbose_asm
|| flag_debug_asm
)
26012 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26013 asm_fprintf (asm_out_file
, "\n");
26016 /* This function is used to print CPU tuning information as comment
26017 in assembler file. Pointers are not printed for now. */
26020 arm_print_tune_info (void)
26022 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26023 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26024 current_tune
->constant_limit
);
26025 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26026 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26027 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26028 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26029 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26030 "prefetch.l1_cache_size:\t%d\n",
26031 current_tune
->prefetch
.l1_cache_size
);
26032 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26033 "prefetch.l1_cache_line_size:\t%d\n",
26034 current_tune
->prefetch
.l1_cache_line_size
);
26035 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26036 "prefer_constant_pool:\t%d\n",
26037 (int) current_tune
->prefer_constant_pool
);
26038 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26039 "branch_cost:\t(s:speed, p:predictable)\n");
26040 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26041 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26042 current_tune
->branch_cost (false, false));
26043 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26044 current_tune
->branch_cost (false, true));
26045 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26046 current_tune
->branch_cost (true, false));
26047 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26048 current_tune
->branch_cost (true, true));
26049 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26050 "prefer_ldrd_strd:\t%d\n",
26051 (int) current_tune
->prefer_ldrd_strd
);
26052 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26053 "logical_op_non_short_circuit:\t[%d,%d]\n",
26054 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26055 (int) current_tune
->logical_op_non_short_circuit_arm
);
26056 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26057 "prefer_neon_for_64bits:\t%d\n",
26058 (int) current_tune
->prefer_neon_for_64bits
);
26059 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26060 "disparage_flag_setting_t16_encodings:\t%d\n",
26061 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26062 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26063 "string_ops_prefer_neon:\t%d\n",
26064 (int) current_tune
->string_ops_prefer_neon
);
26065 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26066 "max_insns_inline_memset:\t%d\n",
26067 current_tune
->max_insns_inline_memset
);
26068 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26069 current_tune
->fusible_ops
);
26070 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26071 (int) current_tune
->sched_autopref
);
26075 arm_file_start (void)
26081 /* We don't have a specified CPU. Use the architecture to
26084 Note: it might be better to do this unconditionally, then the
26085 assembler would not need to know about all new CPU names as
26087 if (!arm_active_target
.core_name
)
26089 /* armv7ve doesn't support any extensions. */
26090 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26092 /* Keep backward compatability for assemblers
26093 which don't support armv7ve. */
26094 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26095 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26096 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26097 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26098 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26102 const char* pos
= strchr (arm_active_target
.arch_name
, '+');
26106 gcc_assert (strlen (arm_active_target
.arch_name
)
26107 <= sizeof (buf
) / sizeof (*pos
));
26108 strncpy (buf
, arm_active_target
.arch_name
,
26109 (pos
- arm_active_target
.arch_name
) * sizeof (*pos
));
26110 buf
[pos
- arm_active_target
.arch_name
] = '\0';
26111 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26112 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26115 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26116 arm_active_target
.arch_name
);
26119 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26120 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26121 arm_active_target
.core_name
+ 8);
26124 const char* truncated_name
26125 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26126 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26129 if (print_tune_info
)
26130 arm_print_tune_info ();
26132 if (! TARGET_SOFT_FLOAT
)
26134 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26135 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26137 if (TARGET_HARD_FLOAT_ABI
)
26138 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26141 /* Some of these attributes only apply when the corresponding features
26142 are used. However we don't have any easy way of figuring this out.
26143 Conservatively record the setting that would have been used. */
26145 if (flag_rounding_math
)
26146 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26148 if (!flag_unsafe_math_optimizations
)
26150 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26151 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26153 if (flag_signaling_nans
)
26154 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26156 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26157 flag_finite_math_only
? 1 : 3);
26159 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26160 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26161 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26162 flag_short_enums
? 1 : 2);
26164 /* Tag_ABI_optimization_goals. */
26167 else if (optimize
>= 2)
26173 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26175 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26178 if (arm_fp16_format
)
26179 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26180 (int) arm_fp16_format
);
26182 if (arm_lang_output_object_attributes_hook
)
26183 arm_lang_output_object_attributes_hook();
26186 default_file_start ();
26190 arm_file_end (void)
26194 if (NEED_INDICATE_EXEC_STACK
)
26195 /* Add .note.GNU-stack. */
26196 file_end_indicate_exec_stack ();
26198 if (! thumb_call_reg_needed
)
26201 switch_to_section (text_section
);
26202 asm_fprintf (asm_out_file
, "\t.code 16\n");
26203 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26205 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26207 rtx label
= thumb_call_via_label
[regno
];
26211 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26212 CODE_LABEL_NUMBER (label
));
26213 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26219 /* Symbols in the text segment can be accessed without indirecting via the
26220 constant pool; it may take an extra binary operation, but this is still
26221 faster than indirecting via memory. Don't do this when not optimizing,
26222 since we won't be calculating al of the offsets necessary to do this
26226 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26228 if (optimize
> 0 && TREE_CONSTANT (decl
))
26229 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26231 default_encode_section_info (decl
, rtl
, first
);
26233 #endif /* !ARM_PE */
26236 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26238 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26239 && !strcmp (prefix
, "L"))
26241 arm_ccfsm_state
= 0;
26242 arm_target_insn
= NULL
;
26244 default_internal_label (stream
, prefix
, labelno
);
26247 /* Output code to add DELTA to the first argument, and then jump
26248 to FUNCTION. Used for C++ multiple inheritance. */
26251 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26252 HOST_WIDE_INT
, tree function
)
26254 static int thunk_label
= 0;
26257 int mi_delta
= delta
;
26258 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26260 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26263 mi_delta
= - mi_delta
;
26265 final_start_function (emit_barrier (), file
, 1);
26269 int labelno
= thunk_label
++;
26270 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26271 /* Thunks are entered in arm mode when available. */
26272 if (TARGET_THUMB1_ONLY
)
26274 /* push r3 so we can use it as a temporary. */
26275 /* TODO: Omit this save if r3 is not used. */
26276 fputs ("\tpush {r3}\n", file
);
26277 fputs ("\tldr\tr3, ", file
);
26281 fputs ("\tldr\tr12, ", file
);
26283 assemble_name (file
, label
);
26284 fputc ('\n', file
);
26287 /* If we are generating PIC, the ldr instruction below loads
26288 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26289 the address of the add + 8, so we have:
26291 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26294 Note that we have "+ 1" because some versions of GNU ld
26295 don't set the low bit of the result for R_ARM_REL32
26296 relocations against thumb function symbols.
26297 On ARMv6M this is +4, not +8. */
26298 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26299 assemble_name (file
, labelpc
);
26300 fputs (":\n", file
);
26301 if (TARGET_THUMB1_ONLY
)
26303 /* This is 2 insns after the start of the thunk, so we know it
26304 is 4-byte aligned. */
26305 fputs ("\tadd\tr3, pc, r3\n", file
);
26306 fputs ("\tmov r12, r3\n", file
);
26309 fputs ("\tadd\tr12, pc, r12\n", file
);
26311 else if (TARGET_THUMB1_ONLY
)
26312 fputs ("\tmov r12, r3\n", file
);
26314 if (TARGET_THUMB1_ONLY
)
26316 if (mi_delta
> 255)
26318 fputs ("\tldr\tr3, ", file
);
26319 assemble_name (file
, label
);
26320 fputs ("+4\n", file
);
26321 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26322 mi_op
, this_regno
, this_regno
);
26324 else if (mi_delta
!= 0)
26326 /* Thumb1 unified syntax requires s suffix in instruction name when
26327 one of the operands is immediate. */
26328 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26329 mi_op
, this_regno
, this_regno
,
26335 /* TODO: Use movw/movt for large constants when available. */
26336 while (mi_delta
!= 0)
26338 if ((mi_delta
& (3 << shift
)) == 0)
26342 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26343 mi_op
, this_regno
, this_regno
,
26344 mi_delta
& (0xff << shift
));
26345 mi_delta
&= ~(0xff << shift
);
26352 if (TARGET_THUMB1_ONLY
)
26353 fputs ("\tpop\t{r3}\n", file
);
26355 fprintf (file
, "\tbx\tr12\n");
26356 ASM_OUTPUT_ALIGN (file
, 2);
26357 assemble_name (file
, label
);
26358 fputs (":\n", file
);
26361 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26362 rtx tem
= XEXP (DECL_RTL (function
), 0);
26363 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26364 pipeline offset is four rather than eight. Adjust the offset
26366 tem
= plus_constant (GET_MODE (tem
), tem
,
26367 TARGET_THUMB1_ONLY
? -3 : -7);
26368 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26370 gen_rtx_SYMBOL_REF (Pmode
,
26371 ggc_strdup (labelpc
)));
26372 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26375 /* Output ".word .LTHUNKn". */
26376 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26378 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26379 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26383 fputs ("\tb\t", file
);
26384 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26385 if (NEED_PLT_RELOC
)
26386 fputs ("(PLT)", file
);
26387 fputc ('\n', file
);
26390 final_end_function ();
26393 /* MI thunk handling for TARGET_32BIT. */
26396 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26397 HOST_WIDE_INT vcall_offset
, tree function
)
26399 /* On ARM, this_regno is R0 or R1 depending on
26400 whether the function returns an aggregate or not.
26402 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26404 ? R1_REGNUM
: R0_REGNUM
);
26406 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26407 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26408 reload_completed
= 1;
26409 emit_note (NOTE_INSN_PROLOGUE_END
);
26411 /* Add DELTA to THIS_RTX. */
26413 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26414 delta
, this_rtx
, this_rtx
, false);
26416 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26417 if (vcall_offset
!= 0)
26419 /* Load *THIS_RTX. */
26420 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26421 /* Compute *THIS_RTX + VCALL_OFFSET. */
26422 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26424 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26425 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26426 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26429 /* Generate a tail call to the target function. */
26430 if (!TREE_USED (function
))
26432 assemble_external (function
);
26433 TREE_USED (function
) = 1;
26435 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26436 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26437 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26438 SIBLING_CALL_P (insn
) = 1;
26440 insn
= get_insns ();
26441 shorten_branches (insn
);
26442 final_start_function (insn
, file
, 1);
26443 final (insn
, file
, 1);
26444 final_end_function ();
26446 /* Stop pretending this is a post-reload pass. */
26447 reload_completed
= 0;
26450 /* Output code to add DELTA to the first argument, and then jump
26451 to FUNCTION. Used for C++ multiple inheritance. */
26454 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26455 HOST_WIDE_INT vcall_offset
, tree function
)
26458 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26460 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26464 arm_emit_vector_const (FILE *file
, rtx x
)
26467 const char * pattern
;
26469 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26471 switch (GET_MODE (x
))
26473 case V2SImode
: pattern
= "%08x"; break;
26474 case V4HImode
: pattern
= "%04x"; break;
26475 case V8QImode
: pattern
= "%02x"; break;
26476 default: gcc_unreachable ();
26479 fprintf (file
, "0x");
26480 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26484 element
= CONST_VECTOR_ELT (x
, i
);
26485 fprintf (file
, pattern
, INTVAL (element
));
26491 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26492 HFmode constant pool entries are actually loaded with ldr. */
26494 arm_emit_fp16_const (rtx c
)
26498 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26499 if (WORDS_BIG_ENDIAN
)
26500 assemble_zeros (2);
26501 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26502 if (!WORDS_BIG_ENDIAN
)
26503 assemble_zeros (2);
26507 arm_output_load_gr (rtx
*operands
)
26514 if (!MEM_P (operands
[1])
26515 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26516 || !REG_P (reg
= XEXP (sum
, 0))
26517 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26518 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26519 return "wldrw%?\t%0, %1";
26521 /* Fix up an out-of-range load of a GR register. */
26522 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26523 wcgr
= operands
[0];
26525 output_asm_insn ("ldr%?\t%0, %1", operands
);
26527 operands
[0] = wcgr
;
26529 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26530 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26535 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26537 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26538 named arg and all anonymous args onto the stack.
26539 XXX I know the prologue shouldn't be pushing registers, but it is faster
26543 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26547 int second_time ATTRIBUTE_UNUSED
)
26549 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26552 cfun
->machine
->uses_anonymous_args
= 1;
26553 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26555 nregs
= pcum
->aapcs_ncrn
;
26558 int res
= arm_needs_doubleword_align (mode
, type
);
26559 if (res
< 0 && warn_psabi
)
26560 inform (input_location
, "parameter passing for argument of "
26561 "type %qT changed in GCC 7.1", type
);
26567 nregs
= pcum
->nregs
;
26569 if (nregs
< NUM_ARG_REGS
)
26570 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26573 /* We can't rely on the caller doing the proper promotion when
26574 using APCS or ATPCS. */
26577 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26579 return !TARGET_AAPCS_BASED
;
26582 static machine_mode
26583 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26585 int *punsignedp ATTRIBUTE_UNUSED
,
26586 const_tree fntype ATTRIBUTE_UNUSED
,
26587 int for_return ATTRIBUTE_UNUSED
)
26589 if (GET_MODE_CLASS (mode
) == MODE_INT
26590 && GET_MODE_SIZE (mode
) < 4)
26598 arm_default_short_enums (void)
26600 return ARM_DEFAULT_SHORT_ENUMS
;
26604 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26607 arm_align_anon_bitfield (void)
26609 return TARGET_AAPCS_BASED
;
26613 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26616 arm_cxx_guard_type (void)
26618 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26622 /* The EABI says test the least significant bit of a guard variable. */
26625 arm_cxx_guard_mask_bit (void)
26627 return TARGET_AAPCS_BASED
;
26631 /* The EABI specifies that all array cookies are 8 bytes long. */
26634 arm_get_cookie_size (tree type
)
26638 if (!TARGET_AAPCS_BASED
)
26639 return default_cxx_get_cookie_size (type
);
26641 size
= build_int_cst (sizetype
, 8);
26646 /* The EABI says that array cookies should also contain the element size. */
26649 arm_cookie_has_size (void)
26651 return TARGET_AAPCS_BASED
;
26655 /* The EABI says constructors and destructors should return a pointer to
26656 the object constructed/destroyed. */
26659 arm_cxx_cdtor_returns_this (void)
26661 return TARGET_AAPCS_BASED
;
26664 /* The EABI says that an inline function may never be the key
26668 arm_cxx_key_method_may_be_inline (void)
26670 return !TARGET_AAPCS_BASED
;
26674 arm_cxx_determine_class_data_visibility (tree decl
)
26676 if (!TARGET_AAPCS_BASED
26677 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26680 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26681 is exported. However, on systems without dynamic vague linkage,
26682 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26683 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26684 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26686 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26687 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26691 arm_cxx_class_data_always_comdat (void)
26693 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26694 vague linkage if the class has no key function. */
26695 return !TARGET_AAPCS_BASED
;
26699 /* The EABI says __aeabi_atexit should be used to register static
26703 arm_cxx_use_aeabi_atexit (void)
26705 return TARGET_AAPCS_BASED
;
26710 arm_set_return_address (rtx source
, rtx scratch
)
26712 arm_stack_offsets
*offsets
;
26713 HOST_WIDE_INT delta
;
26715 unsigned long saved_regs
;
26717 offsets
= arm_get_frame_offsets ();
26718 saved_regs
= offsets
->saved_regs_mask
;
26720 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26721 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26724 if (frame_pointer_needed
)
26725 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26728 /* LR will be the first saved register. */
26729 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26734 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26735 GEN_INT (delta
& ~4095)));
26740 addr
= stack_pointer_rtx
;
26742 addr
= plus_constant (Pmode
, addr
, delta
);
26744 /* The store needs to be marked as frame related in order to prevent
26745 DSE from deleting it as dead if it is based on fp. */
26746 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26747 RTX_FRAME_RELATED_P (insn
) = 1;
26748 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26754 thumb_set_return_address (rtx source
, rtx scratch
)
26756 arm_stack_offsets
*offsets
;
26757 HOST_WIDE_INT delta
;
26758 HOST_WIDE_INT limit
;
26761 unsigned long mask
;
26765 offsets
= arm_get_frame_offsets ();
26766 mask
= offsets
->saved_regs_mask
;
26767 if (mask
& (1 << LR_REGNUM
))
26770 /* Find the saved regs. */
26771 if (frame_pointer_needed
)
26773 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26774 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26780 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26783 /* Allow for the stack frame. */
26784 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26786 /* The link register is always the first saved register. */
26789 /* Construct the address. */
26790 addr
= gen_rtx_REG (SImode
, reg
);
26793 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26794 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26798 addr
= plus_constant (Pmode
, addr
, delta
);
26800 /* The store needs to be marked as frame related in order to prevent
26801 DSE from deleting it as dead if it is based on fp. */
26802 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26803 RTX_FRAME_RELATED_P (insn
) = 1;
26804 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26807 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26810 /* Implements target hook vector_mode_supported_p. */
26812 arm_vector_mode_supported_p (machine_mode mode
)
26814 /* Neon also supports V2SImode, etc. listed in the clause below. */
26815 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26816 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26817 || mode
== V2DImode
|| mode
== V8HFmode
))
26820 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26821 && ((mode
== V2SImode
)
26822 || (mode
== V4HImode
)
26823 || (mode
== V8QImode
)))
26826 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26827 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26828 || mode
== V2HAmode
))
26834 /* Implements target hook array_mode_supported_p. */
26837 arm_array_mode_supported_p (machine_mode mode
,
26838 unsigned HOST_WIDE_INT nelems
)
26841 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26842 && (nelems
>= 2 && nelems
<= 4))
26848 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26849 registers when autovectorizing for Neon, at least until multiple vector
26850 widths are supported properly by the middle-end. */
26852 static machine_mode
26853 arm_preferred_simd_mode (machine_mode mode
)
26859 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26861 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26863 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26865 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26867 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26874 if (TARGET_REALLY_IWMMXT
)
26890 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26892 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26893 using r0-r4 for function arguments, r7 for the stack frame and don't have
26894 enough left over to do doubleword arithmetic. For Thumb-2 all the
26895 potentially problematic instructions accept high registers so this is not
26896 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26897 that require many low registers. */
26899 arm_class_likely_spilled_p (reg_class_t rclass
)
26901 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26902 || rclass
== CC_REG
)
26908 /* Implements target hook small_register_classes_for_mode_p. */
26910 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26912 return TARGET_THUMB1
;
26915 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26916 ARM insns and therefore guarantee that the shift count is modulo 256.
26917 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26918 guarantee no particular behavior for out-of-range counts. */
26920 static unsigned HOST_WIDE_INT
26921 arm_shift_truncation_mask (machine_mode mode
)
26923 return mode
== SImode
? 255 : 0;
26927 /* Map internal gcc register numbers to DWARF2 register numbers. */
26930 arm_dbx_register_number (unsigned int regno
)
26935 if (IS_VFP_REGNUM (regno
))
26937 /* See comment in arm_dwarf_register_span. */
26938 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26939 return 64 + regno
- FIRST_VFP_REGNUM
;
26941 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26944 if (IS_IWMMXT_GR_REGNUM (regno
))
26945 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26947 if (IS_IWMMXT_REGNUM (regno
))
26948 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26950 return DWARF_FRAME_REGISTERS
;
26953 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26954 GCC models tham as 64 32-bit registers, so we need to describe this to
26955 the DWARF generation code. Other registers can use the default. */
26957 arm_dwarf_register_span (rtx rtl
)
26965 regno
= REGNO (rtl
);
26966 if (!IS_VFP_REGNUM (regno
))
26969 /* XXX FIXME: The EABI defines two VFP register ranges:
26970 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26972 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26973 corresponding D register. Until GDB supports this, we shall use the
26974 legacy encodings. We also use these encodings for D0-D15 for
26975 compatibility with older debuggers. */
26976 mode
= GET_MODE (rtl
);
26977 if (GET_MODE_SIZE (mode
) < 8)
26980 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26982 nregs
= GET_MODE_SIZE (mode
) / 4;
26983 for (i
= 0; i
< nregs
; i
+= 2)
26984 if (TARGET_BIG_END
)
26986 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26987 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26991 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26992 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26997 nregs
= GET_MODE_SIZE (mode
) / 8;
26998 for (i
= 0; i
< nregs
; i
++)
26999 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27002 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27005 #if ARM_UNWIND_INFO
27006 /* Emit unwind directives for a store-multiple instruction or stack pointer
27007 push during alignment.
27008 These should only ever be generated by the function prologue code, so
27009 expect them to have a particular form.
27010 The store-multiple instruction sometimes pushes pc as the last register,
27011 although it should not be tracked into unwind information, or for -Os
27012 sometimes pushes some dummy registers before first register that needs
27013 to be tracked in unwind information; such dummy registers are there just
27014 to avoid separate stack adjustment, and will not be restored in the
27018 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27021 HOST_WIDE_INT offset
;
27022 HOST_WIDE_INT nregs
;
27026 unsigned padfirst
= 0, padlast
= 0;
27029 e
= XVECEXP (p
, 0, 0);
27030 gcc_assert (GET_CODE (e
) == SET
);
27032 /* First insn will adjust the stack pointer. */
27033 gcc_assert (GET_CODE (e
) == SET
27034 && REG_P (SET_DEST (e
))
27035 && REGNO (SET_DEST (e
)) == SP_REGNUM
27036 && GET_CODE (SET_SRC (e
)) == PLUS
);
27038 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27039 nregs
= XVECLEN (p
, 0) - 1;
27040 gcc_assert (nregs
);
27042 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27045 /* For -Os dummy registers can be pushed at the beginning to
27046 avoid separate stack pointer adjustment. */
27047 e
= XVECEXP (p
, 0, 1);
27048 e
= XEXP (SET_DEST (e
), 0);
27049 if (GET_CODE (e
) == PLUS
)
27050 padfirst
= INTVAL (XEXP (e
, 1));
27051 gcc_assert (padfirst
== 0 || optimize_size
);
27052 /* The function prologue may also push pc, but not annotate it as it is
27053 never restored. We turn this into a stack pointer adjustment. */
27054 e
= XVECEXP (p
, 0, nregs
);
27055 e
= XEXP (SET_DEST (e
), 0);
27056 if (GET_CODE (e
) == PLUS
)
27057 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27059 padlast
= offset
- 4;
27060 gcc_assert (padlast
== 0 || padlast
== 4);
27062 fprintf (asm_out_file
, "\t.pad #4\n");
27064 fprintf (asm_out_file
, "\t.save {");
27066 else if (IS_VFP_REGNUM (reg
))
27069 fprintf (asm_out_file
, "\t.vsave {");
27072 /* Unknown register type. */
27073 gcc_unreachable ();
27075 /* If the stack increment doesn't match the size of the saved registers,
27076 something has gone horribly wrong. */
27077 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27081 /* The remaining insns will describe the stores. */
27082 for (i
= 1; i
<= nregs
; i
++)
27084 /* Expect (set (mem <addr>) (reg)).
27085 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27086 e
= XVECEXP (p
, 0, i
);
27087 gcc_assert (GET_CODE (e
) == SET
27088 && MEM_P (SET_DEST (e
))
27089 && REG_P (SET_SRC (e
)));
27091 reg
= REGNO (SET_SRC (e
));
27092 gcc_assert (reg
>= lastreg
);
27095 fprintf (asm_out_file
, ", ");
27096 /* We can't use %r for vfp because we need to use the
27097 double precision register names. */
27098 if (IS_VFP_REGNUM (reg
))
27099 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27101 asm_fprintf (asm_out_file
, "%r", reg
);
27105 /* Check that the addresses are consecutive. */
27106 e
= XEXP (SET_DEST (e
), 0);
27107 if (GET_CODE (e
) == PLUS
)
27108 gcc_assert (REG_P (XEXP (e
, 0))
27109 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27110 && CONST_INT_P (XEXP (e
, 1))
27111 && offset
== INTVAL (XEXP (e
, 1)));
27115 && REGNO (e
) == SP_REGNUM
);
27116 offset
+= reg_size
;
27119 fprintf (asm_out_file
, "}\n");
27121 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27124 /* Emit unwind directives for a SET. */
27127 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27135 switch (GET_CODE (e0
))
27138 /* Pushing a single register. */
27139 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27140 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27141 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27144 asm_fprintf (asm_out_file
, "\t.save ");
27145 if (IS_VFP_REGNUM (REGNO (e1
)))
27146 asm_fprintf(asm_out_file
, "{d%d}\n",
27147 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27149 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27153 if (REGNO (e0
) == SP_REGNUM
)
27155 /* A stack increment. */
27156 if (GET_CODE (e1
) != PLUS
27157 || !REG_P (XEXP (e1
, 0))
27158 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27159 || !CONST_INT_P (XEXP (e1
, 1)))
27162 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27163 -INTVAL (XEXP (e1
, 1)));
27165 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27167 HOST_WIDE_INT offset
;
27169 if (GET_CODE (e1
) == PLUS
)
27171 if (!REG_P (XEXP (e1
, 0))
27172 || !CONST_INT_P (XEXP (e1
, 1)))
27174 reg
= REGNO (XEXP (e1
, 0));
27175 offset
= INTVAL (XEXP (e1
, 1));
27176 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27177 HARD_FRAME_POINTER_REGNUM
, reg
,
27180 else if (REG_P (e1
))
27183 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27184 HARD_FRAME_POINTER_REGNUM
, reg
);
27189 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27191 /* Move from sp to reg. */
27192 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27194 else if (GET_CODE (e1
) == PLUS
27195 && REG_P (XEXP (e1
, 0))
27196 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27197 && CONST_INT_P (XEXP (e1
, 1)))
27199 /* Set reg to offset from sp. */
27200 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27201 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27213 /* Emit unwind directives for the given insn. */
27216 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27219 bool handled_one
= false;
27221 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27224 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27225 && (TREE_NOTHROW (current_function_decl
)
27226 || crtl
->all_throwers_are_sibcalls
))
27229 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27232 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27234 switch (REG_NOTE_KIND (note
))
27236 case REG_FRAME_RELATED_EXPR
:
27237 pat
= XEXP (note
, 0);
27240 case REG_CFA_REGISTER
:
27241 pat
= XEXP (note
, 0);
27244 pat
= PATTERN (insn
);
27245 if (GET_CODE (pat
) == PARALLEL
)
27246 pat
= XVECEXP (pat
, 0, 0);
27249 /* Only emitted for IS_STACKALIGN re-alignment. */
27254 src
= SET_SRC (pat
);
27255 dest
= SET_DEST (pat
);
27257 gcc_assert (src
== stack_pointer_rtx
);
27258 reg
= REGNO (dest
);
27259 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27262 handled_one
= true;
27265 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27266 to get correct dwarf information for shrink-wrap. We should not
27267 emit unwind information for it because these are used either for
27268 pretend arguments or notes to adjust sp and restore registers from
27270 case REG_CFA_DEF_CFA
:
27271 case REG_CFA_ADJUST_CFA
:
27272 case REG_CFA_RESTORE
:
27275 case REG_CFA_EXPRESSION
:
27276 case REG_CFA_OFFSET
:
27277 /* ??? Only handling here what we actually emit. */
27278 gcc_unreachable ();
27286 pat
= PATTERN (insn
);
27289 switch (GET_CODE (pat
))
27292 arm_unwind_emit_set (asm_out_file
, pat
);
27296 /* Store multiple. */
27297 arm_unwind_emit_sequence (asm_out_file
, pat
);
27306 /* Output a reference from a function exception table to the type_info
27307 object X. The EABI specifies that the symbol should be relocated by
27308 an R_ARM_TARGET2 relocation. */
27311 arm_output_ttype (rtx x
)
27313 fputs ("\t.word\t", asm_out_file
);
27314 output_addr_const (asm_out_file
, x
);
27315 /* Use special relocations for symbol references. */
27316 if (!CONST_INT_P (x
))
27317 fputs ("(TARGET2)", asm_out_file
);
27318 fputc ('\n', asm_out_file
);
27323 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27326 arm_asm_emit_except_personality (rtx personality
)
27328 fputs ("\t.personality\t", asm_out_file
);
27329 output_addr_const (asm_out_file
, personality
);
27330 fputc ('\n', asm_out_file
);
27332 #endif /* ARM_UNWIND_INFO */
27334 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27337 arm_asm_init_sections (void)
27339 #if ARM_UNWIND_INFO
27340 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27342 #endif /* ARM_UNWIND_INFO */
27344 #ifdef OBJECT_FORMAT_ELF
27345 if (target_pure_code
)
27346 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27350 /* Output unwind directives for the start/end of a function. */
27353 arm_output_fn_unwind (FILE * f
, bool prologue
)
27355 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27359 fputs ("\t.fnstart\n", f
);
27362 /* If this function will never be unwound, then mark it as such.
27363 The came condition is used in arm_unwind_emit to suppress
27364 the frame annotations. */
27365 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27366 && (TREE_NOTHROW (current_function_decl
)
27367 || crtl
->all_throwers_are_sibcalls
))
27368 fputs("\t.cantunwind\n", f
);
27370 fputs ("\t.fnend\n", f
);
27375 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27377 enum tls_reloc reloc
;
27380 val
= XVECEXP (x
, 0, 0);
27381 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27383 output_addr_const (fp
, val
);
27388 fputs ("(tlsgd)", fp
);
27391 fputs ("(tlsldm)", fp
);
27394 fputs ("(tlsldo)", fp
);
27397 fputs ("(gottpoff)", fp
);
27400 fputs ("(tpoff)", fp
);
27403 fputs ("(tlsdesc)", fp
);
27406 gcc_unreachable ();
27415 fputs (" + (. - ", fp
);
27416 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27417 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27418 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27419 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27429 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27432 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27434 gcc_assert (size
== 4);
27435 fputs ("\t.word\t", file
);
27436 output_addr_const (file
, x
);
27437 fputs ("(tlsldo)", file
);
27440 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27443 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27445 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27446 return arm_emit_tls_decoration (fp
, x
);
27447 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27450 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27452 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27453 assemble_name_raw (fp
, label
);
27457 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27459 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27463 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27467 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27469 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27473 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27477 else if (GET_CODE (x
) == CONST_VECTOR
)
27478 return arm_emit_vector_const (fp
, x
);
27483 /* Output assembly for a shift instruction.
27484 SET_FLAGS determines how the instruction modifies the condition codes.
27485 0 - Do not set condition codes.
27486 1 - Set condition codes.
27487 2 - Use smallest instruction. */
27489 arm_output_shift(rtx
* operands
, int set_flags
)
27492 static const char flag_chars
[3] = {'?', '.', '!'};
27497 c
= flag_chars
[set_flags
];
27498 shift
= shift_op(operands
[3], &val
);
27502 operands
[2] = GEN_INT(val
);
27503 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27506 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27508 output_asm_insn (pattern
, operands
);
27512 /* Output assembly for a WMMX immediate shift instruction. */
27514 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27516 int shift
= INTVAL (operands
[2]);
27518 machine_mode opmode
= GET_MODE (operands
[0]);
27520 gcc_assert (shift
>= 0);
27522 /* If the shift value in the register versions is > 63 (for D qualifier),
27523 31 (for W qualifier) or 15 (for H qualifier). */
27524 if (((opmode
== V4HImode
) && (shift
> 15))
27525 || ((opmode
== V2SImode
) && (shift
> 31))
27526 || ((opmode
== DImode
) && (shift
> 63)))
27530 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27531 output_asm_insn (templ
, operands
);
27532 if (opmode
== DImode
)
27534 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27535 output_asm_insn (templ
, operands
);
27540 /* The destination register will contain all zeros. */
27541 sprintf (templ
, "wzero\t%%0");
27542 output_asm_insn (templ
, operands
);
27547 if ((opmode
== DImode
) && (shift
> 32))
27549 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27550 output_asm_insn (templ
, operands
);
27551 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27552 output_asm_insn (templ
, operands
);
27556 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27557 output_asm_insn (templ
, operands
);
27562 /* Output assembly for a WMMX tinsr instruction. */
27564 arm_output_iwmmxt_tinsr (rtx
*operands
)
27566 int mask
= INTVAL (operands
[3]);
27569 int units
= mode_nunits
[GET_MODE (operands
[0])];
27570 gcc_assert ((mask
& (mask
- 1)) == 0);
27571 for (i
= 0; i
< units
; ++i
)
27573 if ((mask
& 0x01) == 1)
27579 gcc_assert (i
< units
);
27581 switch (GET_MODE (operands
[0]))
27584 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27587 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27590 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27593 gcc_unreachable ();
27596 output_asm_insn (templ
, operands
);
27601 /* Output a Thumb-1 casesi dispatch sequence. */
27603 thumb1_output_casesi (rtx
*operands
)
27605 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27607 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27609 switch (GET_MODE(diff_vec
))
27612 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27613 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27615 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27616 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27618 return "bl\t%___gnu_thumb1_case_si";
27620 gcc_unreachable ();
27624 /* Output a Thumb-2 casesi instruction. */
27626 thumb2_output_casesi (rtx
*operands
)
27628 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27630 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27632 output_asm_insn ("cmp\t%0, %1", operands
);
27633 output_asm_insn ("bhi\t%l3", operands
);
27634 switch (GET_MODE(diff_vec
))
27637 return "tbb\t[%|pc, %0]";
27639 return "tbh\t[%|pc, %0, lsl #1]";
27643 output_asm_insn ("adr\t%4, %l2", operands
);
27644 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27645 output_asm_insn ("add\t%4, %4, %5", operands
);
27650 output_asm_insn ("adr\t%4, %l2", operands
);
27651 return "ldr\t%|pc, [%4, %0, lsl #2]";
27654 gcc_unreachable ();
27658 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27659 per-core tuning structs. */
27661 arm_issue_rate (void)
27663 return current_tune
->issue_rate
;
27666 /* Return how many instructions should scheduler lookahead to choose the
27669 arm_first_cycle_multipass_dfa_lookahead (void)
27671 int issue_rate
= arm_issue_rate ();
27673 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27676 /* Enable modeling of L2 auto-prefetcher. */
27678 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27680 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27684 arm_mangle_type (const_tree type
)
27686 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27687 has to be managled as if it is in the "std" namespace. */
27688 if (TARGET_AAPCS_BASED
27689 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27690 return "St9__va_list";
27692 /* Half-precision float. */
27693 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27696 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27698 if (TYPE_NAME (type
) != NULL
)
27699 return arm_mangle_builtin_type (type
);
27701 /* Use the default mangling. */
27705 /* Order of allocation of core registers for Thumb: this allocation is
27706 written over the corresponding initial entries of the array
27707 initialized with REG_ALLOC_ORDER. We allocate all low registers
27708 first. Saving and restoring a low register is usually cheaper than
27709 using a call-clobbered high register. */
27711 static const int thumb_core_reg_alloc_order
[] =
27713 3, 2, 1, 0, 4, 5, 6, 7,
27714 12, 14, 8, 9, 10, 11
27717 /* Adjust register allocation order when compiling for Thumb. */
27720 arm_order_regs_for_local_alloc (void)
27722 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27723 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27725 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27726 sizeof (thumb_core_reg_alloc_order
));
27729 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27732 arm_frame_pointer_required (void)
27734 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27737 /* If the function receives nonlocal gotos, it needs to save the frame
27738 pointer in the nonlocal_goto_save_area object. */
27739 if (cfun
->has_nonlocal_label
)
27742 /* The frame pointer is required for non-leaf APCS frames. */
27743 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27746 /* If we are probing the stack in the prologue, we will have a faulting
27747 instruction prior to the stack adjustment and this requires a frame
27748 pointer if we want to catch the exception using the EABI unwinder. */
27749 if (!IS_INTERRUPT (arm_current_func_type ())
27750 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27751 && arm_except_unwind_info (&global_options
) == UI_TARGET
27752 && cfun
->can_throw_non_call_exceptions
)
27754 HOST_WIDE_INT size
= get_frame_size ();
27756 /* That's irrelevant if there is no stack adjustment. */
27760 /* That's relevant only if there is a stack probe. */
27761 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27763 /* We don't have the final size of the frame so adjust. */
27764 size
+= 32 * UNITS_PER_WORD
;
27765 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27775 /* Only thumb1 can't support conditional execution, so return true if
27776 the target is not thumb1. */
27778 arm_have_conditional_execution (void)
27780 return !TARGET_THUMB1
;
27783 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27784 static HOST_WIDE_INT
27785 arm_vector_alignment (const_tree type
)
27787 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27789 if (TARGET_AAPCS_BASED
)
27790 align
= MIN (align
, 64);
27795 static unsigned int
27796 arm_autovectorize_vector_sizes (void)
27798 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27802 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27804 /* Vectors which aren't in packed structures will not be less aligned than
27805 the natural alignment of their element type, so this is safe. */
27806 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27809 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27813 arm_builtin_support_vector_misalignment (machine_mode mode
,
27814 const_tree type
, int misalignment
,
27817 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27819 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27824 /* If the misalignment is unknown, we should be able to handle the access
27825 so long as it is not to a member of a packed data structure. */
27826 if (misalignment
== -1)
27829 /* Return true if the misalignment is a multiple of the natural alignment
27830 of the vector's element type. This is probably always going to be
27831 true in practice, since we've already established that this isn't a
27833 return ((misalignment
% align
) == 0);
27836 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27841 arm_conditional_register_usage (void)
27845 if (TARGET_THUMB1
&& optimize_size
)
27847 /* When optimizing for size on Thumb-1, it's better not
27848 to use the HI regs, because of the overhead of
27850 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27851 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27854 /* The link register can be clobbered by any branch insn,
27855 but we have no way to track that at present, so mark
27856 it as unavailable. */
27858 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27860 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27862 /* VFPv3 registers are disabled when earlier VFP
27863 versions are selected due to the definition of
27864 LAST_VFP_REGNUM. */
27865 for (regno
= FIRST_VFP_REGNUM
;
27866 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27868 fixed_regs
[regno
] = 0;
27869 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27870 || regno
>= FIRST_VFP_REGNUM
+ 32;
27874 if (TARGET_REALLY_IWMMXT
)
27876 regno
= FIRST_IWMMXT_GR_REGNUM
;
27877 /* The 2002/10/09 revision of the XScale ABI has wCG0
27878 and wCG1 as call-preserved registers. The 2002/11/21
27879 revision changed this so that all wCG registers are
27880 scratch registers. */
27881 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27882 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27883 fixed_regs
[regno
] = 0;
27884 /* The XScale ABI has wR0 - wR9 as scratch registers,
27885 the rest as call-preserved registers. */
27886 for (regno
= FIRST_IWMMXT_REGNUM
;
27887 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27889 fixed_regs
[regno
] = 0;
27890 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27894 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27896 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27897 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27899 else if (TARGET_APCS_STACK
)
27901 fixed_regs
[10] = 1;
27902 call_used_regs
[10] = 1;
27904 /* -mcaller-super-interworking reserves r11 for calls to
27905 _interwork_r11_call_via_rN(). Making the register global
27906 is an easy way of ensuring that it remains valid for all
27908 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27909 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27911 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27912 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27913 if (TARGET_CALLER_INTERWORKING
)
27914 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27916 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27920 arm_preferred_rename_class (reg_class_t rclass
)
27922 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27923 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27924 and code size can be reduced. */
27925 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27931 /* Compute the attribute "length" of insn "*push_multi".
27932 So this function MUST be kept in sync with that insn pattern. */
27934 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27936 int i
, regno
, hi_reg
;
27937 int num_saves
= XVECLEN (parallel_op
, 0);
27947 regno
= REGNO (first_op
);
27948 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27949 list is 8-bit. Normally this means all registers in the list must be
27950 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27951 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27952 with 16-bit encoding. */
27953 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27954 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27956 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27957 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27965 /* Compute the attribute "length" of insn. Currently, this function is used
27966 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27967 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27968 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27969 true if OPERANDS contains insn which explicit updates base register. */
27972 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
27981 rtx parallel_op
= operands
[0];
27982 /* Initialize to elements number of PARALLEL. */
27983 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
27984 /* Initialize the value to base register. */
27985 unsigned regno
= REGNO (operands
[1]);
27986 /* Skip return and write back pattern.
27987 We only need register pop pattern for later analysis. */
27988 unsigned first_indx
= 0;
27989 first_indx
+= return_pc
? 1 : 0;
27990 first_indx
+= write_back_p
? 1 : 0;
27992 /* A pop operation can be done through LDM or POP. If the base register is SP
27993 and if it's with write back, then a LDM will be alias of POP. */
27994 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
27995 bool ldm_p
= !pop_p
;
27997 /* Check base register for LDM. */
27998 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28001 /* Check each register in the list. */
28002 for (; indx
>= first_indx
; indx
--)
28004 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28005 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28006 comment in arm_attr_length_push_multi. */
28007 if (REGNO_REG_CLASS (regno
) == HI_REGS
28008 && (regno
!= PC_REGNUM
|| ldm_p
))
28015 /* Compute the number of instructions emitted by output_move_double. */
28017 arm_count_output_move_double_insns (rtx
*operands
)
28021 /* output_move_double may modify the operands array, so call it
28022 here on a copy of the array. */
28023 ops
[0] = operands
[0];
28024 ops
[1] = operands
[1];
28025 output_move_double (ops
, false, &count
);
28030 vfp3_const_double_for_fract_bits (rtx operand
)
28032 REAL_VALUE_TYPE r0
;
28034 if (!CONST_DOUBLE_P (operand
))
28037 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28038 if (exact_real_inverse (DFmode
, &r0
)
28039 && !REAL_VALUE_NEGATIVE (r0
))
28041 if (exact_real_truncate (DFmode
, &r0
))
28043 HOST_WIDE_INT value
= real_to_integer (&r0
);
28044 value
= value
& 0xffffffff;
28045 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28047 int ret
= exact_log2 (value
);
28048 gcc_assert (IN_RANGE (ret
, 0, 31));
28056 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28057 log2 is in [1, 32], return that log2. Otherwise return -1.
28058 This is used in the patterns for vcvt.s32.f32 floating-point to
28059 fixed-point conversions. */
28062 vfp3_const_double_for_bits (rtx x
)
28064 const REAL_VALUE_TYPE
*r
;
28066 if (!CONST_DOUBLE_P (x
))
28069 r
= CONST_DOUBLE_REAL_VALUE (x
);
28071 if (REAL_VALUE_NEGATIVE (*r
)
28072 || REAL_VALUE_ISNAN (*r
)
28073 || REAL_VALUE_ISINF (*r
)
28074 || !real_isinteger (r
, SFmode
))
28077 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28079 /* The exact_log2 above will have returned -1 if this is
28080 not an exact log2. */
28081 if (!IN_RANGE (hwint
, 1, 32))
28088 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28091 arm_pre_atomic_barrier (enum memmodel model
)
28093 if (need_atomic_barrier_p (model
, true))
28094 emit_insn (gen_memory_barrier ());
28098 arm_post_atomic_barrier (enum memmodel model
)
28100 if (need_atomic_barrier_p (model
, false))
28101 emit_insn (gen_memory_barrier ());
28104 /* Emit the load-exclusive and store-exclusive instructions.
28105 Use acquire and release versions if necessary. */
28108 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28110 rtx (*gen
) (rtx
, rtx
);
28116 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28117 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28118 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28119 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28121 gcc_unreachable ();
28128 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28129 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28130 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28131 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28133 gcc_unreachable ();
28137 emit_insn (gen (rval
, mem
));
28141 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28144 rtx (*gen
) (rtx
, rtx
, rtx
);
28150 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28151 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28152 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28153 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28155 gcc_unreachable ();
28162 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28163 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28164 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28165 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28167 gcc_unreachable ();
28171 emit_insn (gen (bval
, rval
, mem
));
28174 /* Mark the previous jump instruction as unlikely. */
28177 emit_unlikely_jump (rtx insn
)
28179 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28181 rtx_insn
*jump
= emit_jump_insn (insn
);
28182 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
28185 /* Expand a compare and swap pattern. */
28188 arm_expand_compare_and_swap (rtx operands
[])
28190 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28192 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28194 bval
= operands
[0];
28195 rval
= operands
[1];
28197 oldval
= operands
[3];
28198 newval
= operands
[4];
28199 is_weak
= operands
[5];
28200 mod_s
= operands
[6];
28201 mod_f
= operands
[7];
28202 mode
= GET_MODE (mem
);
28204 /* Normally the succ memory model must be stronger than fail, but in the
28205 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28206 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28208 if (TARGET_HAVE_LDACQ
28209 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28210 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28211 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28217 /* For narrow modes, we're going to perform the comparison in SImode,
28218 so do the zero-extension now. */
28219 rval
= gen_reg_rtx (SImode
);
28220 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28224 /* Force the value into a register if needed. We waited until after
28225 the zero-extension above to do this properly. */
28226 if (!arm_add_operand (oldval
, SImode
))
28227 oldval
= force_reg (SImode
, oldval
);
28231 if (!cmpdi_operand (oldval
, mode
))
28232 oldval
= force_reg (mode
, oldval
);
28236 gcc_unreachable ();
28243 case QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28244 case HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28245 case SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28246 case DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28248 gcc_unreachable ();
28255 case QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28256 case HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28257 case SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28258 case DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28260 gcc_unreachable ();
28264 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28265 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28267 if (mode
== QImode
|| mode
== HImode
)
28268 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28270 /* In all cases, we arrange for success to be signaled by Z set.
28271 This arrangement allows for the boolean result to be used directly
28272 in a subsequent branch, post optimization. For Thumb-1 targets, the
28273 boolean negation of the result is also stored in bval because Thumb-1
28274 backend lacks dependency tracking for CC flag due to flag-setting not
28275 being represented at RTL level. */
28277 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28280 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28281 emit_insn (gen_rtx_SET (bval
, x
));
28285 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28286 another memory store between the load-exclusive and store-exclusive can
28287 reset the monitor from Exclusive to Open state. This means we must wait
28288 until after reload to split the pattern, lest we get a register spill in
28289 the middle of the atomic sequence. Success of the compare and swap is
28290 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28291 for Thumb-1 targets (ie. negation of the boolean value returned by
28292 atomic_compare_and_swapmode standard pattern in operand 0). */
28295 arm_split_compare_and_swap (rtx operands
[])
28297 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28299 enum memmodel mod_s
, mod_f
;
28301 rtx_code_label
*label1
, *label2
;
28304 rval
= operands
[1];
28306 oldval
= operands
[3];
28307 newval
= operands
[4];
28308 is_weak
= (operands
[5] != const0_rtx
);
28309 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28310 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28311 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28312 mode
= GET_MODE (mem
);
28314 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28316 bool use_acquire
= TARGET_HAVE_LDACQ
28317 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28318 || is_mm_release (mod_s
));
28320 bool use_release
= TARGET_HAVE_LDACQ
28321 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28322 || is_mm_acquire (mod_s
));
28324 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28325 a full barrier is emitted after the store-release. */
28327 use_acquire
= false;
28329 /* Checks whether a barrier is needed and emits one accordingly. */
28330 if (!(use_acquire
|| use_release
))
28331 arm_pre_atomic_barrier (mod_s
);
28336 label1
= gen_label_rtx ();
28337 emit_label (label1
);
28339 label2
= gen_label_rtx ();
28341 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28343 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28344 as required to communicate with arm_expand_compare_and_swap. */
28347 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28348 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28349 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28350 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28351 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28355 emit_move_insn (neg_bval
, const1_rtx
);
28356 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28357 if (thumb1_cmpneg_operand (oldval
, SImode
))
28358 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28361 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28364 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28366 /* Weak or strong, we want EQ to be true for success, so that we
28367 match the flags that we got from the compare above. */
28370 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28371 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28372 emit_insn (gen_rtx_SET (cond
, x
));
28377 /* Z is set to boolean value of !neg_bval, as required to communicate
28378 with arm_expand_compare_and_swap. */
28379 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28380 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28383 if (!is_mm_relaxed (mod_f
))
28384 emit_label (label2
);
28386 /* Checks whether a barrier is needed and emits one accordingly. */
28388 || !(use_acquire
|| use_release
))
28389 arm_post_atomic_barrier (mod_s
);
28391 if (is_mm_relaxed (mod_f
))
28392 emit_label (label2
);
28395 /* Split an atomic operation pattern. Operation is given by CODE and is one
28396 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28397 operation). Operation is performed on the content at MEM and on VALUE
28398 following the memory model MODEL_RTX. The content at MEM before and after
28399 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28400 success of the operation is returned in COND. Using a scratch register or
28401 an operand register for these determines what result is returned for that
28405 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28406 rtx value
, rtx model_rtx
, rtx cond
)
28408 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28409 machine_mode mode
= GET_MODE (mem
);
28410 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28411 rtx_code_label
*label
;
28412 bool all_low_regs
, bind_old_new
;
28415 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28417 bool use_acquire
= TARGET_HAVE_LDACQ
28418 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28419 || is_mm_release (model
));
28421 bool use_release
= TARGET_HAVE_LDACQ
28422 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28423 || is_mm_acquire (model
));
28425 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28426 a full barrier is emitted after the store-release. */
28428 use_acquire
= false;
28430 /* Checks whether a barrier is needed and emits one accordingly. */
28431 if (!(use_acquire
|| use_release
))
28432 arm_pre_atomic_barrier (model
);
28434 label
= gen_label_rtx ();
28435 emit_label (label
);
28438 new_out
= gen_lowpart (wmode
, new_out
);
28440 old_out
= gen_lowpart (wmode
, old_out
);
28443 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28445 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28447 /* Does the operation require destination and first operand to use the same
28448 register? This is decided by register constraints of relevant insn
28449 patterns in thumb1.md. */
28450 gcc_assert (!new_out
|| REG_P (new_out
));
28451 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28452 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28453 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28458 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28460 /* We want to return the old value while putting the result of the operation
28461 in the same register as the old value so copy the old value over to the
28462 destination register and use that register for the operation. */
28463 if (old_out
&& bind_old_new
)
28465 emit_move_insn (new_out
, old_out
);
28476 x
= gen_rtx_AND (wmode
, old_out
, value
);
28477 emit_insn (gen_rtx_SET (new_out
, x
));
28478 x
= gen_rtx_NOT (wmode
, new_out
);
28479 emit_insn (gen_rtx_SET (new_out
, x
));
28483 if (CONST_INT_P (value
))
28485 value
= GEN_INT (-INTVAL (value
));
28491 if (mode
== DImode
)
28493 /* DImode plus/minus need to clobber flags. */
28494 /* The adddi3 and subdi3 patterns are incorrectly written so that
28495 they require matching operands, even when we could easily support
28496 three operands. Thankfully, this can be fixed up post-splitting,
28497 as the individual add+adc patterns do accept three operands and
28498 post-reload cprop can make these moves go away. */
28499 emit_move_insn (new_out
, old_out
);
28501 x
= gen_adddi3 (new_out
, new_out
, value
);
28503 x
= gen_subdi3 (new_out
, new_out
, value
);
28510 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28511 emit_insn (gen_rtx_SET (new_out
, x
));
28515 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28518 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28519 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28521 /* Checks whether a barrier is needed and emits one accordingly. */
28523 || !(use_acquire
|| use_release
))
28524 arm_post_atomic_barrier (model
);
28527 #define MAX_VECT_LEN 16
28529 struct expand_vec_perm_d
28531 rtx target
, op0
, op1
;
28532 unsigned char perm
[MAX_VECT_LEN
];
28533 machine_mode vmode
;
28534 unsigned char nelt
;
28539 /* Generate a variable permutation. */
28542 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28544 machine_mode vmode
= GET_MODE (target
);
28545 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28547 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28548 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28549 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28550 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28551 gcc_checking_assert (TARGET_NEON
);
28555 if (vmode
== V8QImode
)
28556 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28558 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28564 if (vmode
== V8QImode
)
28566 pair
= gen_reg_rtx (V16QImode
);
28567 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28568 pair
= gen_lowpart (TImode
, pair
);
28569 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28573 pair
= gen_reg_rtx (OImode
);
28574 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28575 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28581 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28583 machine_mode vmode
= GET_MODE (target
);
28584 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28585 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28586 rtx rmask
[MAX_VECT_LEN
], mask
;
28588 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28589 numbering of elements for big-endian, we must reverse the order. */
28590 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28592 /* The VTBL instruction does not use a modulo index, so we must take care
28593 of that ourselves. */
28594 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28595 for (i
= 0; i
< nelt
; ++i
)
28597 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28598 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28600 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28603 /* Map lane ordering between architectural lane order, and GCC lane order,
28604 taking into account ABI. See comment above output_move_neon for details. */
28607 neon_endian_lane_map (machine_mode mode
, int lane
)
28609 if (BYTES_BIG_ENDIAN
)
28611 int nelems
= GET_MODE_NUNITS (mode
);
28612 /* Reverse lane order. */
28613 lane
= (nelems
- 1 - lane
);
28614 /* Reverse D register order, to match ABI. */
28615 if (GET_MODE_SIZE (mode
) == 16)
28616 lane
= lane
^ (nelems
/ 2);
28621 /* Some permutations index into pairs of vectors, this is a helper function
28622 to map indexes into those pairs of vectors. */
28625 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28627 int nelem
= GET_MODE_NUNITS (mode
);
28628 if (BYTES_BIG_ENDIAN
)
28630 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28634 /* Generate or test for an insn that supports a constant permutation. */
28636 /* Recognize patterns for the VUZP insns. */
28639 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28641 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28642 rtx out0
, out1
, in0
, in1
;
28643 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28647 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28650 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28651 big endian pattern on 64 bit vectors, so we correct for that. */
28652 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28653 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28655 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28657 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28659 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28663 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28665 for (i
= 0; i
< nelt
; i
++)
28668 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28669 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28679 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28680 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28681 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28682 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28683 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28684 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28685 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28686 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28687 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28688 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28690 gcc_unreachable ();
28695 if (swap_nelt
!= 0)
28696 std::swap (in0
, in1
);
28699 out1
= gen_reg_rtx (d
->vmode
);
28701 std::swap (out0
, out1
);
28703 emit_insn (gen (out0
, in0
, in1
, out1
));
28707 /* Recognize patterns for the VZIP insns. */
28710 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28712 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28713 rtx out0
, out1
, in0
, in1
;
28714 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28718 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28721 is_swapped
= BYTES_BIG_ENDIAN
;
28723 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28726 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28728 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28732 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28734 for (i
= 0; i
< nelt
/ 2; i
++)
28737 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28738 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28742 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28743 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28754 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28755 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28756 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28757 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28758 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28759 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28760 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28761 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28762 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28763 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28765 gcc_unreachable ();
28771 std::swap (in0
, in1
);
28774 out1
= gen_reg_rtx (d
->vmode
);
28776 std::swap (out0
, out1
);
28778 emit_insn (gen (out0
, in0
, in1
, out1
));
28782 /* Recognize patterns for the VREV insns. */
28785 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28787 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28788 rtx (*gen
)(rtx
, rtx
);
28790 if (!d
->one_vector_p
)
28799 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28800 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28808 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28809 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28810 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28811 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28812 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28813 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28821 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28822 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28823 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28824 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28825 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28826 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28827 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28828 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28837 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28838 for (j
= 0; j
<= diff
; j
+= 1)
28840 /* This is guaranteed to be true as the value of diff
28841 is 7, 3, 1 and we should have enough elements in the
28842 queue to generate this. Getting a vector mask with a
28843 value of diff other than these values implies that
28844 something is wrong by the time we get here. */
28845 gcc_assert (i
+ j
< nelt
);
28846 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28854 emit_insn (gen (d
->target
, d
->op0
));
28858 /* Recognize patterns for the VTRN insns. */
28861 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28863 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28864 rtx out0
, out1
, in0
, in1
;
28865 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28867 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28870 /* Note that these are little-endian tests. Adjust for big-endian later. */
28871 if (d
->perm
[0] == 0)
28873 else if (d
->perm
[0] == 1)
28877 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28879 for (i
= 0; i
< nelt
; i
+= 2)
28881 if (d
->perm
[i
] != i
+ odd
)
28883 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28893 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28894 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28895 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28896 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28897 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28898 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28899 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28900 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28901 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28902 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28904 gcc_unreachable ();
28909 if (BYTES_BIG_ENDIAN
)
28911 std::swap (in0
, in1
);
28916 out1
= gen_reg_rtx (d
->vmode
);
28918 std::swap (out0
, out1
);
28920 emit_insn (gen (out0
, in0
, in1
, out1
));
28924 /* Recognize patterns for the VEXT insns. */
28927 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28929 unsigned int i
, nelt
= d
->nelt
;
28930 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28933 unsigned int location
;
28935 unsigned int next
= d
->perm
[0] + 1;
28937 /* TODO: Handle GCC's numbering of elements for big-endian. */
28938 if (BYTES_BIG_ENDIAN
)
28941 /* Check if the extracted indexes are increasing by one. */
28942 for (i
= 1; i
< nelt
; next
++, i
++)
28944 /* If we hit the most significant element of the 2nd vector in
28945 the previous iteration, no need to test further. */
28946 if (next
== 2 * nelt
)
28949 /* If we are operating on only one vector: it could be a
28950 rotation. If there are only two elements of size < 64, let
28951 arm_evpc_neon_vrev catch it. */
28952 if (d
->one_vector_p
&& (next
== nelt
))
28954 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28960 if (d
->perm
[i
] != next
)
28964 location
= d
->perm
[0];
28968 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28969 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28970 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28971 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28972 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28973 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28974 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
28975 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
28976 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28977 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28978 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28987 offset
= GEN_INT (location
);
28988 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28992 /* The NEON VTBL instruction is a fully variable permuation that's even
28993 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28994 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28995 can do slightly better by expanding this as a constant where we don't
28996 have to apply a mask. */
28999 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29001 rtx rperm
[MAX_VECT_LEN
], sel
;
29002 machine_mode vmode
= d
->vmode
;
29003 unsigned int i
, nelt
= d
->nelt
;
29005 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29006 numbering of elements for big-endian, we must reverse the order. */
29007 if (BYTES_BIG_ENDIAN
)
29013 /* Generic code will try constant permutation twice. Once with the
29014 original mode and again with the elements lowered to QImode.
29015 So wait and don't do the selector expansion ourselves. */
29016 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29019 for (i
= 0; i
< nelt
; ++i
)
29020 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29021 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29022 sel
= force_reg (vmode
, sel
);
29024 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29029 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29031 /* Check if the input mask matches vext before reordering the
29034 if (arm_evpc_neon_vext (d
))
29037 /* The pattern matching functions above are written to look for a small
29038 number to begin the sequence (0, 1, N/2). If we begin with an index
29039 from the second operand, we can swap the operands. */
29040 if (d
->perm
[0] >= d
->nelt
)
29042 unsigned i
, nelt
= d
->nelt
;
29044 for (i
= 0; i
< nelt
; ++i
)
29045 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29047 std::swap (d
->op0
, d
->op1
);
29052 if (arm_evpc_neon_vuzp (d
))
29054 if (arm_evpc_neon_vzip (d
))
29056 if (arm_evpc_neon_vrev (d
))
29058 if (arm_evpc_neon_vtrn (d
))
29060 return arm_evpc_neon_vtbl (d
);
29065 /* Expand a vec_perm_const pattern. */
29068 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29070 struct expand_vec_perm_d d
;
29071 int i
, nelt
, which
;
29077 d
.vmode
= GET_MODE (target
);
29078 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29079 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29080 d
.testing_p
= false;
29082 for (i
= which
= 0; i
< nelt
; ++i
)
29084 rtx e
= XVECEXP (sel
, 0, i
);
29085 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29086 which
|= (ei
< nelt
? 1 : 2);
29096 d
.one_vector_p
= false;
29097 if (!rtx_equal_p (op0
, op1
))
29100 /* The elements of PERM do not suggest that only the first operand
29101 is used, but both operands are identical. Allow easier matching
29102 of the permutation by folding the permutation into the single
29106 for (i
= 0; i
< nelt
; ++i
)
29107 d
.perm
[i
] &= nelt
- 1;
29109 d
.one_vector_p
= true;
29114 d
.one_vector_p
= true;
29118 return arm_expand_vec_perm_const_1 (&d
);
29121 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29124 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29125 const unsigned char *sel
)
29127 struct expand_vec_perm_d d
;
29128 unsigned int i
, nelt
, which
;
29132 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29133 d
.testing_p
= true;
29134 memcpy (d
.perm
, sel
, nelt
);
29136 /* Categorize the set of elements in the selector. */
29137 for (i
= which
= 0; i
< nelt
; ++i
)
29139 unsigned char e
= d
.perm
[i
];
29140 gcc_assert (e
< 2 * nelt
);
29141 which
|= (e
< nelt
? 1 : 2);
29144 /* For all elements from second vector, fold the elements to first. */
29146 for (i
= 0; i
< nelt
; ++i
)
29149 /* Check whether the mask can be applied to the vector type. */
29150 d
.one_vector_p
= (which
!= 3);
29152 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29153 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29154 if (!d
.one_vector_p
)
29155 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29158 ret
= arm_expand_vec_perm_const_1 (&d
);
29165 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29167 /* If we are soft float and we do not have ldrd
29168 then all auto increment forms are ok. */
29169 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29174 /* Post increment and Pre Decrement are supported for all
29175 instruction forms except for vector forms. */
29178 if (VECTOR_MODE_P (mode
))
29180 if (code
!= ARM_PRE_DEC
)
29190 /* Without LDRD and mode size greater than
29191 word size, there is no point in auto-incrementing
29192 because ldm and stm will not have these forms. */
29193 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29196 /* Vector and floating point modes do not support
29197 these auto increment forms. */
29198 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29211 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29212 on ARM, since we know that shifts by negative amounts are no-ops.
29213 Additionally, the default expansion code is not available or suitable
29214 for post-reload insn splits (this can occur when the register allocator
29215 chooses not to do a shift in NEON).
29217 This function is used in both initial expand and post-reload splits, and
29218 handles all kinds of 64-bit shifts.
29220 Input requirements:
29221 - It is safe for the input and output to be the same register, but
29222 early-clobber rules apply for the shift amount and scratch registers.
29223 - Shift by register requires both scratch registers. In all other cases
29224 the scratch registers may be NULL.
29225 - Ashiftrt by a register also clobbers the CC register. */
29227 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29228 rtx amount
, rtx scratch1
, rtx scratch2
)
29230 rtx out_high
= gen_highpart (SImode
, out
);
29231 rtx out_low
= gen_lowpart (SImode
, out
);
29232 rtx in_high
= gen_highpart (SImode
, in
);
29233 rtx in_low
= gen_lowpart (SImode
, in
);
29236 in = the register pair containing the input value.
29237 out = the destination register pair.
29238 up = the high- or low-part of each pair.
29239 down = the opposite part to "up".
29240 In a shift, we can consider bits to shift from "up"-stream to
29241 "down"-stream, so in a left-shift "up" is the low-part and "down"
29242 is the high-part of each register pair. */
29244 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29245 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29246 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29247 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29249 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29251 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29252 && GET_MODE (out
) == DImode
);
29254 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29255 && GET_MODE (in
) == DImode
);
29257 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29258 && GET_MODE (amount
) == SImode
)
29259 || CONST_INT_P (amount
)));
29260 gcc_assert (scratch1
== NULL
29261 || (GET_CODE (scratch1
) == SCRATCH
)
29262 || (GET_MODE (scratch1
) == SImode
29263 && REG_P (scratch1
)));
29264 gcc_assert (scratch2
== NULL
29265 || (GET_CODE (scratch2
) == SCRATCH
)
29266 || (GET_MODE (scratch2
) == SImode
29267 && REG_P (scratch2
)));
29268 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29269 || !HARD_REGISTER_P (out
)
29270 || (REGNO (out
) != REGNO (amount
)
29271 && REGNO (out
) + 1 != REGNO (amount
)));
29273 /* Macros to make following code more readable. */
29274 #define SUB_32(DEST,SRC) \
29275 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29276 #define RSB_32(DEST,SRC) \
29277 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29278 #define SUB_S_32(DEST,SRC) \
29279 gen_addsi3_compare0 ((DEST), (SRC), \
29281 #define SET(DEST,SRC) \
29282 gen_rtx_SET ((DEST), (SRC))
29283 #define SHIFT(CODE,SRC,AMOUNT) \
29284 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29285 #define LSHIFT(CODE,SRC,AMOUNT) \
29286 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29287 SImode, (SRC), (AMOUNT))
29288 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29289 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29290 SImode, (SRC), (AMOUNT))
29292 gen_rtx_IOR (SImode, (A), (B))
29293 #define BRANCH(COND,LABEL) \
29294 gen_arm_cond_branch ((LABEL), \
29295 gen_rtx_ ## COND (CCmode, cc_reg, \
29299 /* Shifts by register and shifts by constant are handled separately. */
29300 if (CONST_INT_P (amount
))
29302 /* We have a shift-by-constant. */
29304 /* First, handle out-of-range shift amounts.
29305 In both cases we try to match the result an ARM instruction in a
29306 shift-by-register would give. This helps reduce execution
29307 differences between optimization levels, but it won't stop other
29308 parts of the compiler doing different things. This is "undefined
29309 behavior, in any case. */
29310 if (INTVAL (amount
) <= 0)
29311 emit_insn (gen_movdi (out
, in
));
29312 else if (INTVAL (amount
) >= 64)
29314 if (code
== ASHIFTRT
)
29316 rtx const31_rtx
= GEN_INT (31);
29317 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29318 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29321 emit_insn (gen_movdi (out
, const0_rtx
));
29324 /* Now handle valid shifts. */
29325 else if (INTVAL (amount
) < 32)
29327 /* Shifts by a constant less than 32. */
29328 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29330 /* Clearing the out register in DImode first avoids lots
29331 of spilling and results in less stack usage.
29332 Later this redundant insn is completely removed.
29333 Do that only if "in" and "out" are different registers. */
29334 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29335 emit_insn (SET (out
, const0_rtx
));
29336 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29337 emit_insn (SET (out_down
,
29338 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29340 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29344 /* Shifts by a constant greater than 31. */
29345 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29347 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29348 emit_insn (SET (out
, const0_rtx
));
29349 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29350 if (code
== ASHIFTRT
)
29351 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29354 emit_insn (SET (out_up
, const0_rtx
));
29359 /* We have a shift-by-register. */
29360 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29362 /* This alternative requires the scratch registers. */
29363 gcc_assert (scratch1
&& REG_P (scratch1
));
29364 gcc_assert (scratch2
&& REG_P (scratch2
));
29366 /* We will need the values "amount-32" and "32-amount" later.
29367 Swapping them around now allows the later code to be more general. */
29371 emit_insn (SUB_32 (scratch1
, amount
));
29372 emit_insn (RSB_32 (scratch2
, amount
));
29375 emit_insn (RSB_32 (scratch1
, amount
));
29376 /* Also set CC = amount > 32. */
29377 emit_insn (SUB_S_32 (scratch2
, amount
));
29380 emit_insn (RSB_32 (scratch1
, amount
));
29381 emit_insn (SUB_32 (scratch2
, amount
));
29384 gcc_unreachable ();
29387 /* Emit code like this:
29390 out_down = in_down << amount;
29391 out_down = (in_up << (amount - 32)) | out_down;
29392 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29393 out_up = in_up << amount;
29396 out_down = in_down >> amount;
29397 out_down = (in_up << (32 - amount)) | out_down;
29399 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29400 out_up = in_up << amount;
29403 out_down = in_down >> amount;
29404 out_down = (in_up << (32 - amount)) | out_down;
29406 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29407 out_up = in_up << amount;
29409 The ARM and Thumb2 variants are the same but implemented slightly
29410 differently. If this were only called during expand we could just
29411 use the Thumb2 case and let combine do the right thing, but this
29412 can also be called from post-reload splitters. */
29414 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29416 if (!TARGET_THUMB2
)
29418 /* Emit code for ARM mode. */
29419 emit_insn (SET (out_down
,
29420 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29421 if (code
== ASHIFTRT
)
29423 rtx_code_label
*done_label
= gen_label_rtx ();
29424 emit_jump_insn (BRANCH (LT
, done_label
));
29425 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29427 emit_label (done_label
);
29430 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29435 /* Emit code for Thumb2 mode.
29436 Thumb2 can't do shift and or in one insn. */
29437 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29438 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29440 if (code
== ASHIFTRT
)
29442 rtx_code_label
*done_label
= gen_label_rtx ();
29443 emit_jump_insn (BRANCH (LT
, done_label
));
29444 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29445 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29446 emit_label (done_label
);
29450 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29451 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29455 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29469 /* Returns true if the pattern is a valid symbolic address, which is either a
29470 symbol_ref or (symbol_ref + addend).
29472 According to the ARM ELF ABI, the initial addend of REL-type relocations
29473 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29474 literal field of the instruction as a 16-bit signed value in the range
29475 -32768 <= A < 32768. */
29478 arm_valid_symbolic_address_p (rtx addr
)
29480 rtx xop0
, xop1
= NULL_RTX
;
29483 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29486 /* (const (plus: symbol_ref const_int)) */
29487 if (GET_CODE (addr
) == CONST
)
29488 tmp
= XEXP (addr
, 0);
29490 if (GET_CODE (tmp
) == PLUS
)
29492 xop0
= XEXP (tmp
, 0);
29493 xop1
= XEXP (tmp
, 1);
29495 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29496 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29502 /* Returns true if a valid comparison operation and makes
29503 the operands in a form that is valid. */
29505 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29507 enum rtx_code code
= GET_CODE (*comparison
);
29509 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29510 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29512 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29514 if (code
== UNEQ
|| code
== LTGT
)
29517 code_int
= (int)code
;
29518 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29519 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29524 if (!arm_add_operand (*op1
, mode
))
29525 *op1
= force_reg (mode
, *op1
);
29526 if (!arm_add_operand (*op2
, mode
))
29527 *op2
= force_reg (mode
, *op2
);
29531 if (!cmpdi_operand (*op1
, mode
))
29532 *op1
= force_reg (mode
, *op1
);
29533 if (!cmpdi_operand (*op2
, mode
))
29534 *op2
= force_reg (mode
, *op2
);
29538 if (!TARGET_VFP_FP16INST
)
29540 /* FP16 comparisons are done in SF mode. */
29542 *op1
= convert_to_mode (mode
, *op1
, 1);
29543 *op2
= convert_to_mode (mode
, *op2
, 1);
29544 /* Fall through. */
29547 if (!vfp_compare_operand (*op1
, mode
))
29548 *op1
= force_reg (mode
, *op1
);
29549 if (!vfp_compare_operand (*op2
, mode
))
29550 *op2
= force_reg (mode
, *op2
);
29560 /* Maximum number of instructions to set block of memory. */
29562 arm_block_set_max_insns (void)
29564 if (optimize_function_for_size_p (cfun
))
29567 return current_tune
->max_insns_inline_memset
;
29570 /* Return TRUE if it's profitable to set block of memory for
29571 non-vectorized case. VAL is the value to set the memory
29572 with. LENGTH is the number of bytes to set. ALIGN is the
29573 alignment of the destination memory in bytes. UNALIGNED_P
29574 is TRUE if we can only set the memory with instructions
29575 meeting alignment requirements. USE_STRD_P is TRUE if we
29576 can use strd to set the memory. */
29578 arm_block_set_non_vect_profit_p (rtx val
,
29579 unsigned HOST_WIDE_INT length
,
29580 unsigned HOST_WIDE_INT align
,
29581 bool unaligned_p
, bool use_strd_p
)
29584 /* For leftovers in bytes of 0-7, we can set the memory block using
29585 strb/strh/str with minimum instruction number. */
29586 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29590 num
= arm_const_inline_cost (SET
, val
);
29591 num
+= length
/ align
+ length
% align
;
29593 else if (use_strd_p
)
29595 num
= arm_const_double_inline_cost (val
);
29596 num
+= (length
>> 3) + leftover
[length
& 7];
29600 num
= arm_const_inline_cost (SET
, val
);
29601 num
+= (length
>> 2) + leftover
[length
& 3];
29604 /* We may be able to combine last pair STRH/STRB into a single STR
29605 by shifting one byte back. */
29606 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29609 return (num
<= arm_block_set_max_insns ());
29612 /* Return TRUE if it's profitable to set block of memory for
29613 vectorized case. LENGTH is the number of bytes to set.
29614 ALIGN is the alignment of destination memory in bytes.
29615 MODE is the vector mode used to set the memory. */
29617 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29618 unsigned HOST_WIDE_INT align
,
29622 bool unaligned_p
= ((align
& 3) != 0);
29623 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29625 /* Instruction loading constant value. */
29627 /* Instructions storing the memory. */
29628 num
+= (length
+ nelt
- 1) / nelt
;
29629 /* Instructions adjusting the address expression. Only need to
29630 adjust address expression if it's 4 bytes aligned and bytes
29631 leftover can only be stored by mis-aligned store instruction. */
29632 if (!unaligned_p
&& (length
& 3) != 0)
29635 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29636 if (!unaligned_p
&& mode
== V16QImode
)
29639 return (num
<= arm_block_set_max_insns ());
29642 /* Set a block of memory using vectorization instructions for the
29643 unaligned case. We fill the first LENGTH bytes of the memory
29644 area starting from DSTBASE with byte constant VALUE. ALIGN is
29645 the alignment requirement of memory. Return TRUE if succeeded. */
29647 arm_block_set_unaligned_vect (rtx dstbase
,
29648 unsigned HOST_WIDE_INT length
,
29649 unsigned HOST_WIDE_INT value
,
29650 unsigned HOST_WIDE_INT align
)
29652 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29654 rtx val_elt
, val_vec
, reg
;
29655 rtx rval
[MAX_VECT_LEN
];
29656 rtx (*gen_func
) (rtx
, rtx
);
29658 unsigned HOST_WIDE_INT v
= value
;
29659 unsigned int offset
= 0;
29660 gcc_assert ((align
& 0x3) != 0);
29661 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29662 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29663 if (length
>= nelt_v16
)
29666 gen_func
= gen_movmisalignv16qi
;
29671 gen_func
= gen_movmisalignv8qi
;
29673 nelt_mode
= GET_MODE_NUNITS (mode
);
29674 gcc_assert (length
>= nelt_mode
);
29675 /* Skip if it isn't profitable. */
29676 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29679 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29680 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29682 v
= sext_hwi (v
, BITS_PER_WORD
);
29683 val_elt
= GEN_INT (v
);
29684 for (j
= 0; j
< nelt_mode
; j
++)
29687 reg
= gen_reg_rtx (mode
);
29688 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29689 /* Emit instruction loading the constant value. */
29690 emit_move_insn (reg
, val_vec
);
29692 /* Handle nelt_mode bytes in a vector. */
29693 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29695 emit_insn ((*gen_func
) (mem
, reg
));
29696 if (i
+ 2 * nelt_mode
<= length
)
29698 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29699 offset
+= nelt_mode
;
29700 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29704 /* If there are not less than nelt_v8 bytes leftover, we must be in
29706 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29708 /* Handle (8, 16) bytes leftover. */
29709 if (i
+ nelt_v8
< length
)
29711 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29712 offset
+= length
- i
;
29713 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29715 /* We are shifting bytes back, set the alignment accordingly. */
29716 if ((length
& 1) != 0 && align
>= 2)
29717 set_mem_align (mem
, BITS_PER_UNIT
);
29719 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29721 /* Handle (0, 8] bytes leftover. */
29722 else if (i
< length
&& i
+ nelt_v8
>= length
)
29724 if (mode
== V16QImode
)
29725 reg
= gen_lowpart (V8QImode
, reg
);
29727 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29728 + (nelt_mode
- nelt_v8
))));
29729 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29730 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29732 /* We are shifting bytes back, set the alignment accordingly. */
29733 if ((length
& 1) != 0 && align
>= 2)
29734 set_mem_align (mem
, BITS_PER_UNIT
);
29736 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29742 /* Set a block of memory using vectorization instructions for the
29743 aligned case. We fill the first LENGTH bytes of the memory area
29744 starting from DSTBASE with byte constant VALUE. ALIGN is the
29745 alignment requirement of memory. Return TRUE if succeeded. */
29747 arm_block_set_aligned_vect (rtx dstbase
,
29748 unsigned HOST_WIDE_INT length
,
29749 unsigned HOST_WIDE_INT value
,
29750 unsigned HOST_WIDE_INT align
)
29752 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29753 rtx dst
, addr
, mem
;
29754 rtx val_elt
, val_vec
, reg
;
29755 rtx rval
[MAX_VECT_LEN
];
29757 unsigned HOST_WIDE_INT v
= value
;
29758 unsigned int offset
= 0;
29760 gcc_assert ((align
& 0x3) == 0);
29761 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29762 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29763 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29768 nelt_mode
= GET_MODE_NUNITS (mode
);
29769 gcc_assert (length
>= nelt_mode
);
29770 /* Skip if it isn't profitable. */
29771 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29774 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29776 v
= sext_hwi (v
, BITS_PER_WORD
);
29777 val_elt
= GEN_INT (v
);
29778 for (j
= 0; j
< nelt_mode
; j
++)
29781 reg
= gen_reg_rtx (mode
);
29782 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29783 /* Emit instruction loading the constant value. */
29784 emit_move_insn (reg
, val_vec
);
29787 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29788 if (mode
== V16QImode
)
29790 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29791 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29793 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29794 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29796 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29797 offset
+= length
- nelt_mode
;
29798 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29799 /* We are shifting bytes back, set the alignment accordingly. */
29800 if ((length
& 0x3) == 0)
29801 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29802 else if ((length
& 0x1) == 0)
29803 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29805 set_mem_align (mem
, BITS_PER_UNIT
);
29807 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29810 /* Fall through for bytes leftover. */
29812 nelt_mode
= GET_MODE_NUNITS (mode
);
29813 reg
= gen_lowpart (V8QImode
, reg
);
29816 /* Handle 8 bytes in a vector. */
29817 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29819 addr
= plus_constant (Pmode
, dst
, i
);
29820 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29821 emit_move_insn (mem
, reg
);
29824 /* Handle single word leftover by shifting 4 bytes back. We can
29825 use aligned access for this case. */
29826 if (i
+ UNITS_PER_WORD
== length
)
29828 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29829 offset
+= i
- UNITS_PER_WORD
;
29830 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29831 /* We are shifting 4 bytes back, set the alignment accordingly. */
29832 if (align
> UNITS_PER_WORD
)
29833 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29835 emit_move_insn (mem
, reg
);
29837 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29838 We have to use unaligned access for this case. */
29839 else if (i
< length
)
29841 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29842 offset
+= length
- nelt_mode
;
29843 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29844 /* We are shifting bytes back, set the alignment accordingly. */
29845 if ((length
& 1) == 0)
29846 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29848 set_mem_align (mem
, BITS_PER_UNIT
);
29850 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29856 /* Set a block of memory using plain strh/strb instructions, only
29857 using instructions allowed by ALIGN on processor. We fill the
29858 first LENGTH bytes of the memory area starting from DSTBASE
29859 with byte constant VALUE. ALIGN is the alignment requirement
29862 arm_block_set_unaligned_non_vect (rtx dstbase
,
29863 unsigned HOST_WIDE_INT length
,
29864 unsigned HOST_WIDE_INT value
,
29865 unsigned HOST_WIDE_INT align
)
29868 rtx dst
, addr
, mem
;
29869 rtx val_exp
, val_reg
, reg
;
29871 HOST_WIDE_INT v
= value
;
29873 gcc_assert (align
== 1 || align
== 2);
29876 v
|= (value
<< BITS_PER_UNIT
);
29878 v
= sext_hwi (v
, BITS_PER_WORD
);
29879 val_exp
= GEN_INT (v
);
29880 /* Skip if it isn't profitable. */
29881 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29882 align
, true, false))
29885 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29886 mode
= (align
== 2 ? HImode
: QImode
);
29887 val_reg
= force_reg (SImode
, val_exp
);
29888 reg
= gen_lowpart (mode
, val_reg
);
29890 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29892 addr
= plus_constant (Pmode
, dst
, i
);
29893 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29894 emit_move_insn (mem
, reg
);
29897 /* Handle single byte leftover. */
29898 if (i
+ 1 == length
)
29900 reg
= gen_lowpart (QImode
, val_reg
);
29901 addr
= plus_constant (Pmode
, dst
, i
);
29902 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29903 emit_move_insn (mem
, reg
);
29907 gcc_assert (i
== length
);
29911 /* Set a block of memory using plain strd/str/strh/strb instructions,
29912 to permit unaligned copies on processors which support unaligned
29913 semantics for those instructions. We fill the first LENGTH bytes
29914 of the memory area starting from DSTBASE with byte constant VALUE.
29915 ALIGN is the alignment requirement of memory. */
29917 arm_block_set_aligned_non_vect (rtx dstbase
,
29918 unsigned HOST_WIDE_INT length
,
29919 unsigned HOST_WIDE_INT value
,
29920 unsigned HOST_WIDE_INT align
)
29923 rtx dst
, addr
, mem
;
29924 rtx val_exp
, val_reg
, reg
;
29925 unsigned HOST_WIDE_INT v
;
29928 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29929 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29931 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29932 if (length
< UNITS_PER_WORD
)
29933 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29936 v
|= (v
<< BITS_PER_WORD
);
29938 v
= sext_hwi (v
, BITS_PER_WORD
);
29940 val_exp
= GEN_INT (v
);
29941 /* Skip if it isn't profitable. */
29942 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29943 align
, false, use_strd_p
))
29948 /* Try without strd. */
29949 v
= (v
>> BITS_PER_WORD
);
29950 v
= sext_hwi (v
, BITS_PER_WORD
);
29951 val_exp
= GEN_INT (v
);
29952 use_strd_p
= false;
29953 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29954 align
, false, use_strd_p
))
29959 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29960 /* Handle double words using strd if possible. */
29963 val_reg
= force_reg (DImode
, val_exp
);
29965 for (; (i
+ 8 <= length
); i
+= 8)
29967 addr
= plus_constant (Pmode
, dst
, i
);
29968 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29969 emit_move_insn (mem
, reg
);
29973 val_reg
= force_reg (SImode
, val_exp
);
29975 /* Handle words. */
29976 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29977 for (; (i
+ 4 <= length
); i
+= 4)
29979 addr
= plus_constant (Pmode
, dst
, i
);
29980 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29981 if ((align
& 3) == 0)
29982 emit_move_insn (mem
, reg
);
29984 emit_insn (gen_unaligned_storesi (mem
, reg
));
29987 /* Merge last pair of STRH and STRB into a STR if possible. */
29988 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29990 addr
= plus_constant (Pmode
, dst
, i
- 1);
29991 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29992 /* We are shifting one byte back, set the alignment accordingly. */
29993 if ((align
& 1) == 0)
29994 set_mem_align (mem
, BITS_PER_UNIT
);
29996 /* Most likely this is an unaligned access, and we can't tell at
29997 compilation time. */
29998 emit_insn (gen_unaligned_storesi (mem
, reg
));
30002 /* Handle half word leftover. */
30003 if (i
+ 2 <= length
)
30005 reg
= gen_lowpart (HImode
, val_reg
);
30006 addr
= plus_constant (Pmode
, dst
, i
);
30007 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30008 if ((align
& 1) == 0)
30009 emit_move_insn (mem
, reg
);
30011 emit_insn (gen_unaligned_storehi (mem
, reg
));
30016 /* Handle single byte leftover. */
30017 if (i
+ 1 == length
)
30019 reg
= gen_lowpart (QImode
, val_reg
);
30020 addr
= plus_constant (Pmode
, dst
, i
);
30021 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30022 emit_move_insn (mem
, reg
);
30028 /* Set a block of memory using vectorization instructions for both
30029 aligned and unaligned cases. We fill the first LENGTH bytes of
30030 the memory area starting from DSTBASE with byte constant VALUE.
30031 ALIGN is the alignment requirement of memory. */
30033 arm_block_set_vect (rtx dstbase
,
30034 unsigned HOST_WIDE_INT length
,
30035 unsigned HOST_WIDE_INT value
,
30036 unsigned HOST_WIDE_INT align
)
30038 /* Check whether we need to use unaligned store instruction. */
30039 if (((align
& 3) != 0 || (length
& 3) != 0)
30040 /* Check whether unaligned store instruction is available. */
30041 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30044 if ((align
& 3) == 0)
30045 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30047 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30050 /* Expand string store operation. Firstly we try to do that by using
30051 vectorization instructions, then try with ARM unaligned access and
30052 double-word store if profitable. OPERANDS[0] is the destination,
30053 OPERANDS[1] is the number of bytes, operands[2] is the value to
30054 initialize the memory, OPERANDS[3] is the known alignment of the
30057 arm_gen_setmem (rtx
*operands
)
30059 rtx dstbase
= operands
[0];
30060 unsigned HOST_WIDE_INT length
;
30061 unsigned HOST_WIDE_INT value
;
30062 unsigned HOST_WIDE_INT align
;
30064 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30067 length
= UINTVAL (operands
[1]);
30071 value
= (UINTVAL (operands
[2]) & 0xFF);
30072 align
= UINTVAL (operands
[3]);
30073 if (TARGET_NEON
&& length
>= 8
30074 && current_tune
->string_ops_prefer_neon
30075 && arm_block_set_vect (dstbase
, length
, value
, align
))
30078 if (!unaligned_access
&& (align
& 3) != 0)
30079 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30081 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30086 arm_macro_fusion_p (void)
30088 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30091 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30092 for MOVW / MOVT macro fusion. */
30095 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30097 /* We are trying to fuse
30098 movw imm / movt imm
30099 instructions as a group that gets scheduled together. */
30101 rtx set_dest
= SET_DEST (curr_set
);
30103 if (GET_MODE (set_dest
) != SImode
)
30106 /* We are trying to match:
30107 prev (movw) == (set (reg r0) (const_int imm16))
30108 curr (movt) == (set (zero_extract (reg r0)
30111 (const_int imm16_1))
30113 prev (movw) == (set (reg r1)
30114 (high (symbol_ref ("SYM"))))
30115 curr (movt) == (set (reg r0)
30117 (symbol_ref ("SYM")))) */
30119 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30121 if (CONST_INT_P (SET_SRC (curr_set
))
30122 && CONST_INT_P (SET_SRC (prev_set
))
30123 && REG_P (XEXP (set_dest
, 0))
30124 && REG_P (SET_DEST (prev_set
))
30125 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30129 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30130 && REG_P (SET_DEST (curr_set
))
30131 && REG_P (SET_DEST (prev_set
))
30132 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30133 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30140 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30142 rtx prev_set
= single_set (prev
);
30143 rtx curr_set
= single_set (curr
);
30149 if (any_condjump_p (curr
))
30152 if (!arm_macro_fusion_p ())
30155 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30156 && aarch_crypto_can_dual_issue (prev
, curr
))
30159 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30160 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30166 /* Return true iff the instruction fusion described by OP is enabled. */
30168 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30170 return current_tune
->fusible_ops
& op
;
30173 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30174 scheduled for speculative execution. Reject the long-running division
30175 and square-root instructions. */
30178 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30180 switch (get_attr_type (insn
))
30188 case TYPE_NEON_FP_SQRT_S
:
30189 case TYPE_NEON_FP_SQRT_D
:
30190 case TYPE_NEON_FP_SQRT_S_Q
:
30191 case TYPE_NEON_FP_SQRT_D_Q
:
30192 case TYPE_NEON_FP_DIV_S
:
30193 case TYPE_NEON_FP_DIV_D
:
30194 case TYPE_NEON_FP_DIV_S_Q
:
30195 case TYPE_NEON_FP_DIV_D_Q
:
30202 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30204 static unsigned HOST_WIDE_INT
30205 arm_asan_shadow_offset (void)
30207 return HOST_WIDE_INT_1U
<< 29;
30211 /* This is a temporary fix for PR60655. Ideally we need
30212 to handle most of these cases in the generic part but
30213 currently we reject minus (..) (sym_ref). We try to
30214 ameliorate the case with minus (sym_ref1) (sym_ref2)
30215 where they are in the same section. */
30218 arm_const_not_ok_for_debug_p (rtx p
)
30220 tree decl_op0
= NULL
;
30221 tree decl_op1
= NULL
;
30223 if (GET_CODE (p
) == MINUS
)
30225 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30227 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30229 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30230 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30232 if ((VAR_P (decl_op1
)
30233 || TREE_CODE (decl_op1
) == CONST_DECL
)
30234 && (VAR_P (decl_op0
)
30235 || TREE_CODE (decl_op0
) == CONST_DECL
))
30236 return (get_variable_section (decl_op1
, false)
30237 != get_variable_section (decl_op0
, false));
30239 if (TREE_CODE (decl_op1
) == LABEL_DECL
30240 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30241 return (DECL_CONTEXT (decl_op1
)
30242 != DECL_CONTEXT (decl_op0
));
30252 /* return TRUE if x is a reference to a value in a constant pool */
30254 arm_is_constant_pool_ref (rtx x
)
30257 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30258 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30261 /* Remember the last target of arm_set_current_function. */
30262 static GTY(()) tree arm_previous_fndecl
;
30264 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30267 save_restore_target_globals (tree new_tree
)
30269 /* If we have a previous state, use it. */
30270 if (TREE_TARGET_GLOBALS (new_tree
))
30271 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30272 else if (new_tree
== target_option_default_node
)
30273 restore_target_globals (&default_target_globals
);
30276 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30277 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30280 arm_option_params_internal ();
30283 /* Invalidate arm_previous_fndecl. */
30286 arm_reset_previous_fndecl (void)
30288 arm_previous_fndecl
= NULL_TREE
;
30291 /* Establish appropriate back-end context for processing the function
30292 FNDECL. The argument might be NULL to indicate processing at top
30293 level, outside of any function scope. */
30296 arm_set_current_function (tree fndecl
)
30298 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30301 tree old_tree
= (arm_previous_fndecl
30302 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30305 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30307 /* If current function has no attributes but previous one did,
30308 use the default node. */
30309 if (! new_tree
&& old_tree
)
30310 new_tree
= target_option_default_node
;
30312 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30313 the default have been handled by save_restore_target_globals from
30314 arm_pragma_target_parse. */
30315 if (old_tree
== new_tree
)
30318 arm_previous_fndecl
= fndecl
;
30320 /* First set the target options. */
30321 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30323 save_restore_target_globals (new_tree
);
30326 /* Implement TARGET_OPTION_PRINT. */
30329 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30331 int flags
= ptr
->x_target_flags
;
30332 const char *fpu_name
;
30334 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30335 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30337 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30338 TARGET_THUMB2_P (flags
) ? "thumb2" :
30339 TARGET_THUMB_P (flags
) ? "thumb1" :
30342 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30345 /* Hook to determine if one function can safely inline another. */
30348 arm_can_inline_p (tree caller
, tree callee
)
30350 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30351 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30352 bool can_inline
= true;
30354 struct cl_target_option
*caller_opts
30355 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30356 : target_option_default_node
);
30358 struct cl_target_option
*callee_opts
30359 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30360 : target_option_default_node
);
30362 if (callee_opts
== caller_opts
)
30365 /* Callee's ISA features should be a subset of the caller's. */
30366 struct arm_build_target caller_target
;
30367 struct arm_build_target callee_target
;
30368 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30369 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30371 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30373 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30375 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30376 can_inline
= false;
30378 sbitmap_free (caller_target
.isa
);
30379 sbitmap_free (callee_target
.isa
);
30381 /* OK to inline between different modes.
30382 Function with mode specific instructions, e.g using asm,
30383 must be explicitly protected with noinline. */
30387 /* Hook to fix function's alignment affected by target attribute. */
30390 arm_relayout_function (tree fndecl
)
30392 if (DECL_USER_ALIGN (fndecl
))
30395 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30398 callee_tree
= target_option_default_node
;
30400 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30403 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30406 /* Inner function to process the attribute((target(...))), take an argument and
30407 set the current options from the argument. If we have a list, recursively
30408 go over the list. */
30411 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30413 if (TREE_CODE (args
) == TREE_LIST
)
30417 for (; args
; args
= TREE_CHAIN (args
))
30418 if (TREE_VALUE (args
)
30419 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30424 else if (TREE_CODE (args
) != STRING_CST
)
30426 error ("attribute %<target%> argument not a string");
30430 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30433 while ((q
= strtok (argstr
, ",")) != NULL
)
30435 while (ISSPACE (*q
)) ++q
;
30438 if (!strncmp (q
, "thumb", 5))
30439 opts
->x_target_flags
|= MASK_THUMB
;
30441 else if (!strncmp (q
, "arm", 3))
30442 opts
->x_target_flags
&= ~MASK_THUMB
;
30444 else if (!strncmp (q
, "fpu=", 4))
30447 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30448 &fpu_index
, CL_TARGET
))
30450 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30453 if (fpu_index
== TARGET_FPU_auto
)
30455 /* This doesn't really make sense until we support
30456 general dynamic selection of the architecture and all
30458 sorry ("auto fpu selection not currently permitted here");
30461 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30465 error ("attribute(target(\"%s\")) is unknown", q
);
30473 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30476 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30477 struct gcc_options
*opts_set
)
30479 struct cl_target_option cl_opts
;
30481 if (!arm_valid_target_attribute_rec (args
, opts
))
30484 cl_target_option_save (&cl_opts
, opts
);
30485 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30486 arm_option_check_internal (opts
);
30487 /* Do any overrides, such as global options arch=xxx. */
30488 arm_option_override_internal (opts
, opts_set
);
30490 return build_target_option_node (opts
);
30494 add_attribute (const char * mode
, tree
*attributes
)
30496 size_t len
= strlen (mode
);
30497 tree value
= build_string (len
, mode
);
30499 TREE_TYPE (value
) = build_array_type (char_type_node
,
30500 build_index_type (size_int (len
)));
30502 *attributes
= tree_cons (get_identifier ("target"),
30503 build_tree_list (NULL_TREE
, value
),
30507 /* For testing. Insert thumb or arm modes alternatively on functions. */
30510 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30514 if (! TARGET_FLIP_THUMB
)
30517 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30518 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30521 /* Nested definitions must inherit mode. */
30522 if (current_function_decl
)
30524 mode
= TARGET_THUMB
? "thumb" : "arm";
30525 add_attribute (mode
, attributes
);
30529 /* If there is already a setting don't change it. */
30530 if (lookup_attribute ("target", *attributes
) != NULL
)
30533 mode
= thumb_flipper
? "thumb" : "arm";
30534 add_attribute (mode
, attributes
);
30536 thumb_flipper
= !thumb_flipper
;
30539 /* Hook to validate attribute((target("string"))). */
30542 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30543 tree args
, int ARG_UNUSED (flags
))
30546 struct gcc_options func_options
;
30547 tree cur_tree
, new_optimize
;
30548 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30550 /* Get the optimization options of the current function. */
30551 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30553 /* If the function changed the optimization levels as well as setting target
30554 options, start with the optimizations specified. */
30555 if (!func_optimize
)
30556 func_optimize
= optimization_default_node
;
30558 /* Init func_options. */
30559 memset (&func_options
, 0, sizeof (func_options
));
30560 init_options_struct (&func_options
, NULL
);
30561 lang_hooks
.init_options_struct (&func_options
);
30563 /* Initialize func_options to the defaults. */
30564 cl_optimization_restore (&func_options
,
30565 TREE_OPTIMIZATION (func_optimize
));
30567 cl_target_option_restore (&func_options
,
30568 TREE_TARGET_OPTION (target_option_default_node
));
30570 /* Set func_options flags with new target mode. */
30571 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30572 &global_options_set
);
30574 if (cur_tree
== NULL_TREE
)
30577 new_optimize
= build_optimization_node (&func_options
);
30579 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30581 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30583 finalize_options_struct (&func_options
);
30588 /* Match an ISA feature bitmap to a named FPU. We always use the
30589 first entry that exactly matches the feature set, so that we
30590 effectively canonicalize the FPU name for the assembler. */
30592 arm_identify_fpu_from_isa (sbitmap isa
)
30594 auto_sbitmap
fpubits (isa_num_bits
);
30595 auto_sbitmap
cand_fpubits (isa_num_bits
);
30597 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30599 /* If there are no ISA feature bits relating to the FPU, we must be
30600 doing soft-float. */
30601 if (bitmap_empty_p (fpubits
))
30604 for (unsigned int i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
30606 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30607 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30608 return all_fpus
[i
].name
;
30610 /* We must find an entry, or things have gone wrong. */
30611 gcc_unreachable ();
30615 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30618 fprintf (stream
, "\t.syntax unified\n");
30622 if (is_called_in_ARM_mode (decl
)
30623 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30624 && cfun
->is_thunk
))
30625 fprintf (stream
, "\t.code 32\n");
30626 else if (TARGET_THUMB1
)
30627 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30629 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30632 fprintf (stream
, "\t.arm\n");
30634 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30637 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30639 if (TARGET_POKE_FUNCTION_NAME
)
30640 arm_poke_function_name (stream
, (const char *) name
);
30643 /* If MEM is in the form of [base+offset], extract the two parts
30644 of address and set to BASE and OFFSET, otherwise return false
30645 after clearing BASE and OFFSET. */
30648 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30652 gcc_assert (MEM_P (mem
));
30654 addr
= XEXP (mem
, 0);
30656 /* Strip off const from addresses like (const (addr)). */
30657 if (GET_CODE (addr
) == CONST
)
30658 addr
= XEXP (addr
, 0);
30660 if (GET_CODE (addr
) == REG
)
30663 *offset
= const0_rtx
;
30667 if (GET_CODE (addr
) == PLUS
30668 && GET_CODE (XEXP (addr
, 0)) == REG
30669 && CONST_INT_P (XEXP (addr
, 1)))
30671 *base
= XEXP (addr
, 0);
30672 *offset
= XEXP (addr
, 1);
30677 *offset
= NULL_RTX
;
30682 /* If INSN is a load or store of address in the form of [base+offset],
30683 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30684 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30685 otherwise return FALSE. */
30688 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30692 gcc_assert (INSN_P (insn
));
30693 x
= PATTERN (insn
);
30694 if (GET_CODE (x
) != SET
)
30698 dest
= SET_DEST (x
);
30699 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30702 extract_base_offset_in_addr (dest
, base
, offset
);
30704 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30707 extract_base_offset_in_addr (src
, base
, offset
);
30712 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30715 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30717 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30718 and PRI are only calculated for these instructions. For other instruction,
30719 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30720 instruction fusion can be supported by returning different priorities.
30722 It's important that irrelevant instructions get the largest FUSION_PRI. */
30725 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30726 int *fusion_pri
, int *pri
)
30732 gcc_assert (INSN_P (insn
));
30735 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30742 /* Load goes first. */
30744 *fusion_pri
= tmp
- 1;
30746 *fusion_pri
= tmp
- 2;
30750 /* INSN with smaller base register goes first. */
30751 tmp
-= ((REGNO (base
) & 0xff) << 20);
30753 /* INSN with smaller offset goes first. */
30754 off_val
= (int)(INTVAL (offset
));
30756 tmp
-= (off_val
& 0xfffff);
30758 tmp
+= ((- off_val
) & 0xfffff);
30765 /* Construct and return a PARALLEL RTX vector with elements numbering the
30766 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30767 the vector - from the perspective of the architecture. This does not
30768 line up with GCC's perspective on lane numbers, so we end up with
30769 different masks depending on our target endian-ness. The diagram
30770 below may help. We must draw the distinction when building masks
30771 which select one half of the vector. An instruction selecting
30772 architectural low-lanes for a big-endian target, must be described using
30773 a mask selecting GCC high-lanes.
30775 Big-Endian Little-Endian
30777 GCC 0 1 2 3 3 2 1 0
30778 | x | x | x | x | | x | x | x | x |
30779 Architecture 3 2 1 0 3 2 1 0
30781 Low Mask: { 2, 3 } { 0, 1 }
30782 High Mask: { 0, 1 } { 2, 3 }
30786 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30788 int nunits
= GET_MODE_NUNITS (mode
);
30789 rtvec v
= rtvec_alloc (nunits
/ 2);
30790 int high_base
= nunits
/ 2;
30796 if (BYTES_BIG_ENDIAN
)
30797 base
= high
? low_base
: high_base
;
30799 base
= high
? high_base
: low_base
;
30801 for (i
= 0; i
< nunits
/ 2; i
++)
30802 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30804 t1
= gen_rtx_PARALLEL (mode
, v
);
30808 /* Check OP for validity as a PARALLEL RTX vector with elements
30809 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30810 from the perspective of the architecture. See the diagram above
30811 arm_simd_vect_par_cnst_half_p for more details. */
30814 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30817 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30818 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30819 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30822 if (!VECTOR_MODE_P (mode
))
30825 if (count_op
!= count_ideal
)
30828 for (i
= 0; i
< count_ideal
; i
++)
30830 rtx elt_op
= XVECEXP (op
, 0, i
);
30831 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30833 if (!CONST_INT_P (elt_op
)
30834 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30840 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30843 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30846 /* For now, we punt and not handle this for TARGET_THUMB1. */
30847 if (vcall_offset
&& TARGET_THUMB1
)
30850 /* Otherwise ok. */
30854 /* Generate RTL for a conditional branch with rtx comparison CODE in
30855 mode CC_MODE. The destination of the unlikely conditional branch
30859 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30863 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30864 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30867 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30868 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30870 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30873 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30875 For pure-code sections there is no letter code for this attribute, so
30876 output all the section flags numerically when this is needed. */
30879 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30882 if (flags
& SECTION_ARM_PURECODE
)
30886 if (!(flags
& SECTION_DEBUG
))
30888 if (flags
& SECTION_EXCLUDE
)
30889 *num
|= 0x80000000;
30890 if (flags
& SECTION_WRITE
)
30892 if (flags
& SECTION_CODE
)
30894 if (flags
& SECTION_MERGE
)
30896 if (flags
& SECTION_STRINGS
)
30898 if (flags
& SECTION_TLS
)
30900 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30909 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30911 If pure-code is passed as an option, make sure all functions are in
30912 sections that have the SHF_ARM_PURECODE attribute. */
30915 arm_function_section (tree decl
, enum node_frequency freq
,
30916 bool startup
, bool exit
)
30918 const char * section_name
;
30921 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30922 return default_function_section (decl
, freq
, startup
, exit
);
30924 if (!target_pure_code
)
30925 return default_function_section (decl
, freq
, startup
, exit
);
30928 section_name
= DECL_SECTION_NAME (decl
);
30930 /* If a function is not in a named section then it falls under the 'default'
30931 text section, also known as '.text'. We can preserve previous behavior as
30932 the default text section already has the SHF_ARM_PURECODE section
30936 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30939 /* If default_sec is not null, then it must be a special section like for
30940 example .text.startup. We set the pure-code attribute and return the
30941 same section to preserve existing behavior. */
30943 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30944 return default_sec
;
30947 /* Otherwise look whether a section has already been created with
30949 sec
= get_named_section (decl
, section_name
, 0);
30951 /* If that is not the case passing NULL as the section's name to
30952 'get_named_section' will create a section with the declaration's
30954 sec
= get_named_section (decl
, NULL
, 0);
30956 /* Set the SHF_ARM_PURECODE attribute. */
30957 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30962 /* Implements the TARGET_SECTION_FLAGS hook.
30964 If DECL is a function declaration and pure-code is passed as an option
30965 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30966 section's name and RELOC indicates whether the declarations initializer may
30967 contain runtime relocations. */
30969 static unsigned int
30970 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
30972 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
30974 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
30975 flags
|= SECTION_ARM_PURECODE
;
30980 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30983 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
30985 rtx
*quot_p
, rtx
*rem_p
)
30987 if (mode
== SImode
)
30988 gcc_assert (!TARGET_IDIV
);
30990 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
30993 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
30995 op0
, GET_MODE (op0
),
30996 op1
, GET_MODE (op1
));
30998 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
30999 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31000 GET_MODE_SIZE (mode
));
31002 gcc_assert (quotient
);
31003 gcc_assert (remainder
);
31005 *quot_p
= quotient
;
31006 *rem_p
= remainder
;
31009 /* This function checks for the availability of the coprocessor builtin passed
31010 in BUILTIN for the current target. Returns true if it is available and
31011 false otherwise. If a BUILTIN is passed for which this function has not
31012 been implemented it will cause an exception. */
31015 arm_coproc_builtin_available (enum unspecv builtin
)
31017 /* None of these builtins are available in Thumb mode if the target only
31018 supports Thumb-1. */
31036 case VUNSPEC_LDC2L
:
31038 case VUNSPEC_STC2L
:
31041 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31048 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31050 if (arm_arch6
|| arm_arch5te
)
31053 case VUNSPEC_MCRR2
:
31054 case VUNSPEC_MRRC2
:
31059 gcc_unreachable ();
31064 /* This function returns true if OP is a valid memory operand for the ldc and
31065 stc coprocessor instructions and false otherwise. */
31068 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31070 HOST_WIDE_INT range
;
31071 /* Has to be a memory operand. */
31077 /* We accept registers. */
31081 switch GET_CODE (op
)
31085 /* Or registers with an offset. */
31086 if (!REG_P (XEXP (op
, 0)))
31091 /* The offset must be an immediate though. */
31092 if (!CONST_INT_P (op
))
31095 range
= INTVAL (op
);
31097 /* Within the range of [-1020,1020]. */
31098 if (!IN_RANGE (range
, -1020, 1020))
31101 /* And a multiple of 4. */
31102 return (range
% 4) == 0;
31108 return REG_P (XEXP (op
, 0));
31110 gcc_unreachable ();
31114 #include "gt-arm.h"