1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode
;
73 typedef struct minipool_fixup Mfix
;
75 void (*arm_lang_output_object_attributes_hook
)(void);
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx
);
84 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets
*arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
89 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap
);
92 static int arm_address_register_rtx_p (rtx
, int);
93 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
94 static bool is_called_in_ARM_mode (tree
);
95 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
96 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
97 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
98 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
99 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
100 inline static int thumb1_index_register_rtx_p (rtx
, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx
, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx
, int);
106 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
107 static bool arm_print_operand_punct_valid_p (unsigned char code
);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
109 static arm_cc
get_arm_condition_code (rtx
);
110 static const char *output_multi_immediate (rtx
*, const char *, const char *,
112 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
113 static struct machine_function
*arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
116 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
117 static Mnode
*add_minipool_forward_ref (Mfix
*);
118 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
119 static Mnode
*add_minipool_backward_ref (Mfix
*);
120 static void assign_minipool_offsets (Mfix
*);
121 static void arm_print_value (FILE *, rtx
);
122 static void dump_minipool (rtx_insn
*);
123 static int arm_barrier_cost (rtx_insn
*);
124 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
125 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
126 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree
);
133 static unsigned long arm_compute_func_type (void);
134 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
135 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
136 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
144 static int arm_comp_type_attributes (const_tree
, const_tree
);
145 static void arm_set_default_type_attributes (tree
);
146 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code
,
149 unsigned HOST_WIDE_INT val
,
150 struct four_ints
*return_sequence
);
151 static int optimal_immediate_sequence_1 (enum rtx_code code
,
152 unsigned HOST_WIDE_INT val
,
153 struct four_ints
*return_sequence
,
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree
, tree
);
157 static machine_mode
arm_promote_function_mode (const_tree
,
160 static bool arm_return_in_memory (const_tree
, const_tree
);
161 static rtx
arm_function_value (const_tree
, const_tree
, bool);
162 static rtx
arm_libcall_value_1 (machine_mode
);
163 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
170 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
171 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
172 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
173 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
174 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
175 static void emit_constant_insn (rtx cond
, rtx pattern
);
176 static rtx_insn
*emit_set_insn (rtx
, rtx
);
177 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
180 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
182 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
184 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
185 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
187 static rtx
aapcs_libcall_value (machine_mode
);
188 static int aapcs_select_return_coproc (const_tree
, const_tree
);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
192 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_encode_section_info (tree
, rtx
, int);
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree
, tree
*);
202 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
204 static bool arm_pass_by_reference (cumulative_args_t
,
205 machine_mode
, const_tree
, bool);
206 static bool arm_promote_prototypes (const_tree
);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree
);
210 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
211 static bool arm_return_in_memory (const_tree
, const_tree
);
213 static void arm_unwind_emit (FILE *, rtx_insn
*);
214 static bool arm_output_ttype (rtx
);
215 static void arm_asm_emit_except_personality (rtx
);
217 static void arm_asm_init_sections (void);
218 static rtx
arm_dwarf_register_span (rtx
);
220 static tree
arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree
arm_get_cookie_size (tree
);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree
);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree
arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree
, rtx
);
232 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options
*,
235 struct cl_target_option
*);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option
*);
238 static void arm_set_current_function (tree
);
239 static bool arm_can_inline_p (tree
, tree
);
240 static void arm_relayout_function (tree
);
241 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
242 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn
*);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
249 static bool arm_output_addr_const_extra (FILE *, rtx
);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree
);
252 static tree
arm_promoted_type (const_tree t
);
253 static bool arm_scalar_mode_supported_p (machine_mode
);
254 static bool arm_frame_pointer_required (void);
255 static bool arm_can_eliminate (const int, const int);
256 static void arm_asm_trampoline_template (FILE *);
257 static void arm_trampoline_init (rtx
, tree
, rtx
);
258 static rtx
arm_trampoline_adjust_address (rtx
);
259 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
260 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
261 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
262 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
263 static bool arm_array_mode_supported_p (machine_mode
,
264 unsigned HOST_WIDE_INT
);
265 static machine_mode
arm_preferred_simd_mode (machine_mode
);
266 static bool arm_class_likely_spilled_p (reg_class_t
);
267 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
268 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
269 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
273 static void arm_conditional_register_usage (void);
274 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
275 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
283 const unsigned char *sel
);
285 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
289 int misalign ATTRIBUTE_UNUSED
);
290 static unsigned arm_add_stmt_cost (void *data
, int count
,
291 enum vect_cost_for_stmt kind
,
292 struct _stmt_vec_info
*stmt_info
,
294 enum vect_cost_model_location where
);
296 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
297 bool op0_preserve_value
);
298 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
300 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
303 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
304 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
305 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
307 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
308 static machine_mode
arm_floatn_mode (int, bool);
310 /* Table of machine attributes. */
311 static const struct attribute_spec arm_attribute_table
[] =
313 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
314 affects_type_identity } */
315 /* Function calls made to this symbol must be done indirectly, because
316 it may lie outside of the 26 bit addressing range of a normal function
318 { "long_call", 0, 0, false, true, true, NULL
, false },
319 /* Whereas these functions are always known to reside within the 26 bit
321 { "short_call", 0, 0, false, true, true, NULL
, false },
322 /* Specify the procedure call conventions for a function. */
323 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
325 /* Interrupt Service Routines have special prologue and epilogue requirements. */
326 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
328 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
330 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
333 /* ARM/PE has three new attributes:
335 dllexport - for exporting a function/variable that will live in a dll
336 dllimport - for importing a function/variable from a dll
338 Microsoft allows multiple declspecs in one __declspec, separating
339 them with spaces. We do NOT support this. Instead, use __declspec
342 { "dllimport", 0, 0, true, false, false, NULL
, false },
343 { "dllexport", 0, 0, true, false, false, NULL
, false },
344 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
346 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
347 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
348 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
349 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
352 /* ARMv8-M Security Extensions support. */
353 { "cmse_nonsecure_entry", 0, 0, true, false, false,
354 arm_handle_cmse_nonsecure_entry
, false },
355 { "cmse_nonsecure_call", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_call
, true },
357 { NULL
, 0, 0, false, false, false, NULL
, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
369 #undef TARGET_ATTRIBUTE_TABLE
370 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
372 #undef TARGET_INSERT_ATTRIBUTES
373 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_CAN_INLINE_P
402 #define TARGET_CAN_INLINE_P arm_can_inline_p
404 #undef TARGET_RELAYOUT_FUNCTION
405 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
407 #undef TARGET_OPTION_OVERRIDE
408 #define TARGET_OPTION_OVERRIDE arm_option_override
410 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
411 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
413 #undef TARGET_OPTION_RESTORE
414 #define TARGET_OPTION_RESTORE arm_option_restore
416 #undef TARGET_OPTION_PRINT
417 #define TARGET_OPTION_PRINT arm_option_print
419 #undef TARGET_COMP_TYPE_ATTRIBUTES
420 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
422 #undef TARGET_SCHED_MACRO_FUSION_P
423 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
425 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
426 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
428 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
429 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
431 #undef TARGET_SCHED_ADJUST_COST
432 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
434 #undef TARGET_SET_CURRENT_FUNCTION
435 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
437 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
438 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
440 #undef TARGET_SCHED_REORDER
441 #define TARGET_SCHED_REORDER arm_sched_reorder
443 #undef TARGET_REGISTER_MOVE_COST
444 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
446 #undef TARGET_MEMORY_MOVE_COST
447 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
449 #undef TARGET_ENCODE_SECTION_INFO
451 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
453 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
456 #undef TARGET_STRIP_NAME_ENCODING
457 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
459 #undef TARGET_ASM_INTERNAL_LABEL
460 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
462 #undef TARGET_FLOATN_MODE
463 #define TARGET_FLOATN_MODE arm_floatn_mode
465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
466 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
468 #undef TARGET_FUNCTION_VALUE
469 #define TARGET_FUNCTION_VALUE arm_function_value
471 #undef TARGET_LIBCALL_VALUE
472 #define TARGET_LIBCALL_VALUE arm_libcall_value
474 #undef TARGET_FUNCTION_VALUE_REGNO_P
475 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
477 #undef TARGET_ASM_OUTPUT_MI_THUNK
478 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
479 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
480 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
482 #undef TARGET_RTX_COSTS
483 #define TARGET_RTX_COSTS arm_rtx_costs
484 #undef TARGET_ADDRESS_COST
485 #define TARGET_ADDRESS_COST arm_address_cost
487 #undef TARGET_SHIFT_TRUNCATION_MASK
488 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
489 #undef TARGET_VECTOR_MODE_SUPPORTED_P
490 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
491 #undef TARGET_ARRAY_MODE_SUPPORTED_P
492 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
493 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
494 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
495 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
496 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
497 arm_autovectorize_vector_sizes
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
502 #undef TARGET_INIT_BUILTINS
503 #define TARGET_INIT_BUILTINS arm_init_builtins
504 #undef TARGET_EXPAND_BUILTIN
505 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
506 #undef TARGET_BUILTIN_DECL
507 #define TARGET_BUILTIN_DECL arm_builtin_decl
509 #undef TARGET_INIT_LIBFUNCS
510 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
512 #undef TARGET_PROMOTE_FUNCTION_MODE
513 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
514 #undef TARGET_PROMOTE_PROTOTYPES
515 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
516 #undef TARGET_PASS_BY_REFERENCE
517 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG arm_function_arg
522 #undef TARGET_FUNCTION_ARG_ADVANCE
523 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
524 #undef TARGET_FUNCTION_ARG_BOUNDARY
525 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
527 #undef TARGET_SETUP_INCOMING_VARARGS
528 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
530 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
531 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
533 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
534 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
535 #undef TARGET_TRAMPOLINE_INIT
536 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
537 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
538 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
540 #undef TARGET_WARN_FUNC_RETURN
541 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
543 #undef TARGET_DEFAULT_SHORT_ENUMS
544 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
546 #undef TARGET_ALIGN_ANON_BITFIELD
547 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
549 #undef TARGET_NARROW_VOLATILE_BITFIELD
550 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
552 #undef TARGET_CXX_GUARD_TYPE
553 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
555 #undef TARGET_CXX_GUARD_MASK_BIT
556 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
558 #undef TARGET_CXX_GET_COOKIE_SIZE
559 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
561 #undef TARGET_CXX_COOKIE_HAS_SIZE
562 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
564 #undef TARGET_CXX_CDTOR_RETURNS_THIS
565 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
567 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
568 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
570 #undef TARGET_CXX_USE_AEABI_ATEXIT
571 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
573 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
574 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
575 arm_cxx_determine_class_data_visibility
577 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
578 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
580 #undef TARGET_RETURN_IN_MSB
581 #define TARGET_RETURN_IN_MSB arm_return_in_msb
583 #undef TARGET_RETURN_IN_MEMORY
584 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
586 #undef TARGET_MUST_PASS_IN_STACK
587 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
590 #undef TARGET_ASM_UNWIND_EMIT
591 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
593 /* EABI unwinding tables use a different format for the typeinfo tables. */
594 #undef TARGET_ASM_TTYPE
595 #define TARGET_ASM_TTYPE arm_output_ttype
597 #undef TARGET_ARM_EABI_UNWINDER
598 #define TARGET_ARM_EABI_UNWINDER true
600 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
601 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
603 #endif /* ARM_UNWIND_INFO */
605 #undef TARGET_ASM_INIT_SECTIONS
606 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
608 #undef TARGET_DWARF_REGISTER_SPAN
609 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
611 #undef TARGET_CANNOT_COPY_INSN_P
612 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
615 #undef TARGET_HAVE_TLS
616 #define TARGET_HAVE_TLS true
619 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
620 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
622 #undef TARGET_LEGITIMATE_CONSTANT_P
623 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
625 #undef TARGET_CANNOT_FORCE_CONST_MEM
626 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
628 #undef TARGET_MAX_ANCHOR_OFFSET
629 #define TARGET_MAX_ANCHOR_OFFSET 4095
631 /* The minimum is set such that the total size of the block
632 for a particular anchor is -4088 + 1 + 4095 bytes, which is
633 divisible by eight, ensuring natural spacing of anchors. */
634 #undef TARGET_MIN_ANCHOR_OFFSET
635 #define TARGET_MIN_ANCHOR_OFFSET -4088
637 #undef TARGET_SCHED_ISSUE_RATE
638 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
640 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
641 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
642 arm_first_cycle_multipass_dfa_lookahead
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
646 arm_first_cycle_multipass_dfa_lookahead_guard
648 #undef TARGET_MANGLE_TYPE
649 #define TARGET_MANGLE_TYPE arm_mangle_type
651 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
652 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
654 #undef TARGET_BUILD_BUILTIN_VA_LIST
655 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
656 #undef TARGET_EXPAND_BUILTIN_VA_START
657 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
658 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
659 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
662 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
663 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
666 #undef TARGET_LEGITIMATE_ADDRESS_P
667 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
669 #undef TARGET_PREFERRED_RELOAD_CLASS
670 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
672 #undef TARGET_PROMOTED_TYPE
673 #define TARGET_PROMOTED_TYPE arm_promoted_type
675 #undef TARGET_SCALAR_MODE_SUPPORTED_P
676 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
678 #undef TARGET_FRAME_POINTER_REQUIRED
679 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
681 #undef TARGET_CAN_ELIMINATE
682 #define TARGET_CAN_ELIMINATE arm_can_eliminate
684 #undef TARGET_CONDITIONAL_REGISTER_USAGE
685 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
687 #undef TARGET_CLASS_LIKELY_SPILLED_P
688 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
690 #undef TARGET_VECTORIZE_BUILTINS
691 #define TARGET_VECTORIZE_BUILTINS
693 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
694 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
695 arm_builtin_vectorized_function
697 #undef TARGET_VECTOR_ALIGNMENT
698 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
700 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
701 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
702 arm_vector_alignment_reachable
704 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
705 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
706 arm_builtin_support_vector_misalignment
708 #undef TARGET_PREFERRED_RENAME_CLASS
709 #define TARGET_PREFERRED_RENAME_CLASS \
710 arm_preferred_rename_class
712 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
713 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
714 arm_vectorize_vec_perm_const_ok
716 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
717 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
718 arm_builtin_vectorization_cost
719 #undef TARGET_VECTORIZE_ADD_STMT_COST
720 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
722 #undef TARGET_CANONICALIZE_COMPARISON
723 #define TARGET_CANONICALIZE_COMPARISON \
724 arm_canonicalize_comparison
726 #undef TARGET_ASAN_SHADOW_OFFSET
727 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
729 #undef MAX_INSN_PER_IT_BLOCK
730 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
732 #undef TARGET_CAN_USE_DOLOOP_P
733 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
735 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
736 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
738 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
739 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
741 #undef TARGET_SCHED_FUSION_PRIORITY
742 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
744 #undef TARGET_ASM_FUNCTION_SECTION
745 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
747 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
748 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
750 #undef TARGET_SECTION_TYPE_FLAGS
751 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
753 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
754 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
756 #undef TARGET_C_EXCESS_PRECISION
757 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
759 struct gcc_target targetm
= TARGET_INITIALIZER
;
761 /* Obstack for minipool constant handling. */
762 static struct obstack minipool_obstack
;
763 static char * minipool_startobj
;
765 /* The maximum number of insns skipped which
766 will be conditionalised if possible. */
767 static int max_insns_skipped
= 5;
769 extern FILE * asm_out_file
;
771 /* True if we are currently building a constant table. */
772 int making_const_table
;
774 /* The processor for which instructions should be scheduled. */
775 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
777 /* The current tuning set. */
778 const struct tune_params
*current_tune
;
780 /* Which floating point hardware to schedule for. */
783 /* Used for Thumb call_via trampolines. */
784 rtx thumb_call_via_label
[14];
785 static int thumb_call_reg_needed
;
787 /* The bits in this mask specify which instruction scheduling options should
789 unsigned int tune_flags
= 0;
791 /* The highest ARM architecture version supported by the
793 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
795 /* Active target architecture and tuning. */
797 struct arm_build_target arm_active_target
;
799 /* The following are used in the arm.md file as equivalents to bits
800 in the above two flag variables. */
802 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
805 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
808 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
811 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
814 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
817 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
820 /* Nonzero if this chip supports the ARM 6K extensions. */
823 /* Nonzero if this chip supports the ARM 6KZ extensions. */
826 /* Nonzero if instructions present in ARMv6-M can be used. */
829 /* Nonzero if this chip supports the ARM 7 extensions. */
832 /* Nonzero if this chip supports the ARM 7ve extensions. */
835 /* Nonzero if instructions not present in the 'M' profile can be used. */
836 int arm_arch_notm
= 0;
838 /* Nonzero if instructions present in ARMv7E-M can be used. */
841 /* Nonzero if instructions present in ARMv8 can be used. */
844 /* Nonzero if this chip supports the ARMv8.1 extensions. */
847 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
850 /* Nonzero if this chip supports the FP16 instructions extension of ARM
852 int arm_fp16_inst
= 0;
854 /* Nonzero if this chip can benefit from load scheduling. */
855 int arm_ld_sched
= 0;
857 /* Nonzero if this chip is a StrongARM. */
858 int arm_tune_strongarm
= 0;
860 /* Nonzero if this chip supports Intel Wireless MMX technology. */
861 int arm_arch_iwmmxt
= 0;
863 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
864 int arm_arch_iwmmxt2
= 0;
866 /* Nonzero if this chip is an XScale. */
867 int arm_arch_xscale
= 0;
869 /* Nonzero if tuning for XScale */
870 int arm_tune_xscale
= 0;
872 /* Nonzero if we want to tune for stores that access the write-buffer.
873 This typically means an ARM6 or ARM7 with MMU or MPU. */
874 int arm_tune_wbuf
= 0;
876 /* Nonzero if tuning for Cortex-A9. */
877 int arm_tune_cortex_a9
= 0;
879 /* Nonzero if we should define __THUMB_INTERWORK__ in the
881 XXX This is a bit of a hack, it's intended to help work around
882 problems in GLD which doesn't understand that armv5t code is
883 interworking clean. */
884 int arm_cpp_interwork
= 0;
886 /* Nonzero if chip supports Thumb 1. */
889 /* Nonzero if chip supports Thumb 2. */
892 /* Nonzero if chip supports integer division instruction. */
893 int arm_arch_arm_hwdiv
;
894 int arm_arch_thumb_hwdiv
;
896 /* Nonzero if chip disallows volatile memory access in IT block. */
897 int arm_arch_no_volatile_ce
;
899 /* Nonzero if we should use Neon to handle 64-bits operations rather
900 than core registers. */
901 int prefer_neon_for_64bits
= 0;
903 /* Nonzero if we shouldn't use literal pools. */
904 bool arm_disable_literal_pool
= false;
906 /* The register number to be used for the PIC offset register. */
907 unsigned arm_pic_register
= INVALID_REGNUM
;
909 enum arm_pcs arm_pcs_default
;
911 /* For an explanation of these variables, see final_prescan_insn below. */
913 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
914 enum arm_cond_code arm_current_cc
;
917 int arm_target_label
;
918 /* The number of conditionally executed insns, including the current insn. */
919 int arm_condexec_count
= 0;
920 /* A bitmask specifying the patterns for the IT block.
921 Zero means do not output an IT block before this insn. */
922 int arm_condexec_mask
= 0;
923 /* The number of bits used in arm_condexec_mask. */
924 int arm_condexec_masklen
= 0;
926 /* Nonzero if chip supports the ARMv8 CRC instructions. */
927 int arm_arch_crc
= 0;
929 /* Nonzero if chip supports the ARMv8-M security extensions. */
930 int arm_arch_cmse
= 0;
932 /* Nonzero if the core has a very small, high-latency, multiply unit. */
933 int arm_m_profile_small_mul
= 0;
935 /* The condition codes of the ARM, and the inverse function. */
936 static const char * const arm_condition_codes
[] =
938 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
939 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
942 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
943 int arm_regs_in_sequence
[] =
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
948 #define ARM_LSL_NAME "lsl"
949 #define streq(string1, string2) (strcmp (string1, string2) == 0)
951 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
952 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
953 | (1 << PIC_OFFSET_TABLE_REGNUM)))
955 /* Initialization code. */
959 const char *const name
;
960 enum processor_type core
;
961 unsigned int tune_flags
;
963 enum base_architecture base_arch
;
964 enum isa_feature isa_bits
[isa_num_bits
];
965 const struct tune_params
*const tune
;
969 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
970 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
977 /* arm generic vectorizer costs. */
979 struct cpu_vec_costs arm_default_vec_cost
= {
980 1, /* scalar_stmt_cost. */
981 1, /* scalar load_cost. */
982 1, /* scalar_store_cost. */
983 1, /* vec_stmt_cost. */
984 1, /* vec_to_scalar_cost. */
985 1, /* scalar_to_vec_cost. */
986 1, /* vec_align_load_cost. */
987 1, /* vec_unalign_load_cost. */
988 1, /* vec_unalign_store_cost. */
989 1, /* vec_store_cost. */
990 3, /* cond_taken_branch_cost. */
991 1, /* cond_not_taken_branch_cost. */
994 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
995 #include "aarch-cost-tables.h"
999 const struct cpu_cost_table cortexa9_extra_costs
=
1006 COSTS_N_INSNS (1), /* shift_reg. */
1007 COSTS_N_INSNS (1), /* arith_shift. */
1008 COSTS_N_INSNS (2), /* arith_shift_reg. */
1010 COSTS_N_INSNS (1), /* log_shift_reg. */
1011 COSTS_N_INSNS (1), /* extend. */
1012 COSTS_N_INSNS (2), /* extend_arith. */
1013 COSTS_N_INSNS (1), /* bfi. */
1014 COSTS_N_INSNS (1), /* bfx. */
1018 true /* non_exec_costs_exec. */
1023 COSTS_N_INSNS (3), /* simple. */
1024 COSTS_N_INSNS (3), /* flag_setting. */
1025 COSTS_N_INSNS (2), /* extend. */
1026 COSTS_N_INSNS (3), /* add. */
1027 COSTS_N_INSNS (2), /* extend_add. */
1028 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1032 0, /* simple (N/A). */
1033 0, /* flag_setting (N/A). */
1034 COSTS_N_INSNS (4), /* extend. */
1036 COSTS_N_INSNS (4), /* extend_add. */
1042 COSTS_N_INSNS (2), /* load. */
1043 COSTS_N_INSNS (2), /* load_sign_extend. */
1044 COSTS_N_INSNS (2), /* ldrd. */
1045 COSTS_N_INSNS (2), /* ldm_1st. */
1046 1, /* ldm_regs_per_insn_1st. */
1047 2, /* ldm_regs_per_insn_subsequent. */
1048 COSTS_N_INSNS (5), /* loadf. */
1049 COSTS_N_INSNS (5), /* loadd. */
1050 COSTS_N_INSNS (1), /* load_unaligned. */
1051 COSTS_N_INSNS (2), /* store. */
1052 COSTS_N_INSNS (2), /* strd. */
1053 COSTS_N_INSNS (2), /* stm_1st. */
1054 1, /* stm_regs_per_insn_1st. */
1055 2, /* stm_regs_per_insn_subsequent. */
1056 COSTS_N_INSNS (1), /* storef. */
1057 COSTS_N_INSNS (1), /* stored. */
1058 COSTS_N_INSNS (1), /* store_unaligned. */
1059 COSTS_N_INSNS (1), /* loadv. */
1060 COSTS_N_INSNS (1) /* storev. */
1065 COSTS_N_INSNS (14), /* div. */
1066 COSTS_N_INSNS (4), /* mult. */
1067 COSTS_N_INSNS (7), /* mult_addsub. */
1068 COSTS_N_INSNS (30), /* fma. */
1069 COSTS_N_INSNS (3), /* addsub. */
1070 COSTS_N_INSNS (1), /* fpconst. */
1071 COSTS_N_INSNS (1), /* neg. */
1072 COSTS_N_INSNS (3), /* compare. */
1073 COSTS_N_INSNS (3), /* widen. */
1074 COSTS_N_INSNS (3), /* narrow. */
1075 COSTS_N_INSNS (3), /* toint. */
1076 COSTS_N_INSNS (3), /* fromint. */
1077 COSTS_N_INSNS (3) /* roundint. */
1081 COSTS_N_INSNS (24), /* div. */
1082 COSTS_N_INSNS (5), /* mult. */
1083 COSTS_N_INSNS (8), /* mult_addsub. */
1084 COSTS_N_INSNS (30), /* fma. */
1085 COSTS_N_INSNS (3), /* addsub. */
1086 COSTS_N_INSNS (1), /* fpconst. */
1087 COSTS_N_INSNS (1), /* neg. */
1088 COSTS_N_INSNS (3), /* compare. */
1089 COSTS_N_INSNS (3), /* widen. */
1090 COSTS_N_INSNS (3), /* narrow. */
1091 COSTS_N_INSNS (3), /* toint. */
1092 COSTS_N_INSNS (3), /* fromint. */
1093 COSTS_N_INSNS (3) /* roundint. */
1098 COSTS_N_INSNS (1) /* alu. */
1102 const struct cpu_cost_table cortexa8_extra_costs
=
1108 COSTS_N_INSNS (1), /* shift. */
1110 COSTS_N_INSNS (1), /* arith_shift. */
1111 0, /* arith_shift_reg. */
1112 COSTS_N_INSNS (1), /* log_shift. */
1113 0, /* log_shift_reg. */
1115 0, /* extend_arith. */
1121 true /* non_exec_costs_exec. */
1126 COSTS_N_INSNS (1), /* simple. */
1127 COSTS_N_INSNS (1), /* flag_setting. */
1128 COSTS_N_INSNS (1), /* extend. */
1129 COSTS_N_INSNS (1), /* add. */
1130 COSTS_N_INSNS (1), /* extend_add. */
1131 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1135 0, /* simple (N/A). */
1136 0, /* flag_setting (N/A). */
1137 COSTS_N_INSNS (2), /* extend. */
1139 COSTS_N_INSNS (2), /* extend_add. */
1145 COSTS_N_INSNS (1), /* load. */
1146 COSTS_N_INSNS (1), /* load_sign_extend. */
1147 COSTS_N_INSNS (1), /* ldrd. */
1148 COSTS_N_INSNS (1), /* ldm_1st. */
1149 1, /* ldm_regs_per_insn_1st. */
1150 2, /* ldm_regs_per_insn_subsequent. */
1151 COSTS_N_INSNS (1), /* loadf. */
1152 COSTS_N_INSNS (1), /* loadd. */
1153 COSTS_N_INSNS (1), /* load_unaligned. */
1154 COSTS_N_INSNS (1), /* store. */
1155 COSTS_N_INSNS (1), /* strd. */
1156 COSTS_N_INSNS (1), /* stm_1st. */
1157 1, /* stm_regs_per_insn_1st. */
1158 2, /* stm_regs_per_insn_subsequent. */
1159 COSTS_N_INSNS (1), /* storef. */
1160 COSTS_N_INSNS (1), /* stored. */
1161 COSTS_N_INSNS (1), /* store_unaligned. */
1162 COSTS_N_INSNS (1), /* loadv. */
1163 COSTS_N_INSNS (1) /* storev. */
1168 COSTS_N_INSNS (36), /* div. */
1169 COSTS_N_INSNS (11), /* mult. */
1170 COSTS_N_INSNS (20), /* mult_addsub. */
1171 COSTS_N_INSNS (30), /* fma. */
1172 COSTS_N_INSNS (9), /* addsub. */
1173 COSTS_N_INSNS (3), /* fpconst. */
1174 COSTS_N_INSNS (3), /* neg. */
1175 COSTS_N_INSNS (6), /* compare. */
1176 COSTS_N_INSNS (4), /* widen. */
1177 COSTS_N_INSNS (4), /* narrow. */
1178 COSTS_N_INSNS (8), /* toint. */
1179 COSTS_N_INSNS (8), /* fromint. */
1180 COSTS_N_INSNS (8) /* roundint. */
1184 COSTS_N_INSNS (64), /* div. */
1185 COSTS_N_INSNS (16), /* mult. */
1186 COSTS_N_INSNS (25), /* mult_addsub. */
1187 COSTS_N_INSNS (30), /* fma. */
1188 COSTS_N_INSNS (9), /* addsub. */
1189 COSTS_N_INSNS (3), /* fpconst. */
1190 COSTS_N_INSNS (3), /* neg. */
1191 COSTS_N_INSNS (6), /* compare. */
1192 COSTS_N_INSNS (6), /* widen. */
1193 COSTS_N_INSNS (6), /* narrow. */
1194 COSTS_N_INSNS (8), /* toint. */
1195 COSTS_N_INSNS (8), /* fromint. */
1196 COSTS_N_INSNS (8) /* roundint. */
1201 COSTS_N_INSNS (1) /* alu. */
1205 const struct cpu_cost_table cortexa5_extra_costs
=
1211 COSTS_N_INSNS (1), /* shift. */
1212 COSTS_N_INSNS (1), /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 COSTS_N_INSNS (1), /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 COSTS_N_INSNS (1), /* log_shift_reg. */
1217 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (1), /* extend_arith. */
1219 COSTS_N_INSNS (1), /* bfi. */
1220 COSTS_N_INSNS (1), /* bfx. */
1221 COSTS_N_INSNS (1), /* clz. */
1222 COSTS_N_INSNS (1), /* rev. */
1224 true /* non_exec_costs_exec. */
1231 COSTS_N_INSNS (1), /* flag_setting. */
1232 COSTS_N_INSNS (1), /* extend. */
1233 COSTS_N_INSNS (1), /* add. */
1234 COSTS_N_INSNS (1), /* extend_add. */
1235 COSTS_N_INSNS (7) /* idiv. */
1239 0, /* simple (N/A). */
1240 0, /* flag_setting (N/A). */
1241 COSTS_N_INSNS (1), /* extend. */
1243 COSTS_N_INSNS (2), /* extend_add. */
1249 COSTS_N_INSNS (1), /* load. */
1250 COSTS_N_INSNS (1), /* load_sign_extend. */
1251 COSTS_N_INSNS (6), /* ldrd. */
1252 COSTS_N_INSNS (1), /* ldm_1st. */
1253 1, /* ldm_regs_per_insn_1st. */
1254 2, /* ldm_regs_per_insn_subsequent. */
1255 COSTS_N_INSNS (2), /* loadf. */
1256 COSTS_N_INSNS (4), /* loadd. */
1257 COSTS_N_INSNS (1), /* load_unaligned. */
1258 COSTS_N_INSNS (1), /* store. */
1259 COSTS_N_INSNS (3), /* strd. */
1260 COSTS_N_INSNS (1), /* stm_1st. */
1261 1, /* stm_regs_per_insn_1st. */
1262 2, /* stm_regs_per_insn_subsequent. */
1263 COSTS_N_INSNS (2), /* storef. */
1264 COSTS_N_INSNS (2), /* stored. */
1265 COSTS_N_INSNS (1), /* store_unaligned. */
1266 COSTS_N_INSNS (1), /* loadv. */
1267 COSTS_N_INSNS (1) /* storev. */
1272 COSTS_N_INSNS (15), /* div. */
1273 COSTS_N_INSNS (3), /* mult. */
1274 COSTS_N_INSNS (7), /* mult_addsub. */
1275 COSTS_N_INSNS (7), /* fma. */
1276 COSTS_N_INSNS (3), /* addsub. */
1277 COSTS_N_INSNS (3), /* fpconst. */
1278 COSTS_N_INSNS (3), /* neg. */
1279 COSTS_N_INSNS (3), /* compare. */
1280 COSTS_N_INSNS (3), /* widen. */
1281 COSTS_N_INSNS (3), /* narrow. */
1282 COSTS_N_INSNS (3), /* toint. */
1283 COSTS_N_INSNS (3), /* fromint. */
1284 COSTS_N_INSNS (3) /* roundint. */
1288 COSTS_N_INSNS (30), /* div. */
1289 COSTS_N_INSNS (6), /* mult. */
1290 COSTS_N_INSNS (10), /* mult_addsub. */
1291 COSTS_N_INSNS (7), /* fma. */
1292 COSTS_N_INSNS (3), /* addsub. */
1293 COSTS_N_INSNS (3), /* fpconst. */
1294 COSTS_N_INSNS (3), /* neg. */
1295 COSTS_N_INSNS (3), /* compare. */
1296 COSTS_N_INSNS (3), /* widen. */
1297 COSTS_N_INSNS (3), /* narrow. */
1298 COSTS_N_INSNS (3), /* toint. */
1299 COSTS_N_INSNS (3), /* fromint. */
1300 COSTS_N_INSNS (3) /* roundint. */
1305 COSTS_N_INSNS (1) /* alu. */
1310 const struct cpu_cost_table cortexa7_extra_costs
=
1316 COSTS_N_INSNS (1), /* shift. */
1317 COSTS_N_INSNS (1), /* shift_reg. */
1318 COSTS_N_INSNS (1), /* arith_shift. */
1319 COSTS_N_INSNS (1), /* arith_shift_reg. */
1320 COSTS_N_INSNS (1), /* log_shift. */
1321 COSTS_N_INSNS (1), /* log_shift_reg. */
1322 COSTS_N_INSNS (1), /* extend. */
1323 COSTS_N_INSNS (1), /* extend_arith. */
1324 COSTS_N_INSNS (1), /* bfi. */
1325 COSTS_N_INSNS (1), /* bfx. */
1326 COSTS_N_INSNS (1), /* clz. */
1327 COSTS_N_INSNS (1), /* rev. */
1329 true /* non_exec_costs_exec. */
1336 COSTS_N_INSNS (1), /* flag_setting. */
1337 COSTS_N_INSNS (1), /* extend. */
1338 COSTS_N_INSNS (1), /* add. */
1339 COSTS_N_INSNS (1), /* extend_add. */
1340 COSTS_N_INSNS (7) /* idiv. */
1344 0, /* simple (N/A). */
1345 0, /* flag_setting (N/A). */
1346 COSTS_N_INSNS (1), /* extend. */
1348 COSTS_N_INSNS (2), /* extend_add. */
1354 COSTS_N_INSNS (1), /* load. */
1355 COSTS_N_INSNS (1), /* load_sign_extend. */
1356 COSTS_N_INSNS (3), /* ldrd. */
1357 COSTS_N_INSNS (1), /* ldm_1st. */
1358 1, /* ldm_regs_per_insn_1st. */
1359 2, /* ldm_regs_per_insn_subsequent. */
1360 COSTS_N_INSNS (2), /* loadf. */
1361 COSTS_N_INSNS (2), /* loadd. */
1362 COSTS_N_INSNS (1), /* load_unaligned. */
1363 COSTS_N_INSNS (1), /* store. */
1364 COSTS_N_INSNS (3), /* strd. */
1365 COSTS_N_INSNS (1), /* stm_1st. */
1366 1, /* stm_regs_per_insn_1st. */
1367 2, /* stm_regs_per_insn_subsequent. */
1368 COSTS_N_INSNS (2), /* storef. */
1369 COSTS_N_INSNS (2), /* stored. */
1370 COSTS_N_INSNS (1), /* store_unaligned. */
1371 COSTS_N_INSNS (1), /* loadv. */
1372 COSTS_N_INSNS (1) /* storev. */
1377 COSTS_N_INSNS (15), /* div. */
1378 COSTS_N_INSNS (3), /* mult. */
1379 COSTS_N_INSNS (7), /* mult_addsub. */
1380 COSTS_N_INSNS (7), /* fma. */
1381 COSTS_N_INSNS (3), /* addsub. */
1382 COSTS_N_INSNS (3), /* fpconst. */
1383 COSTS_N_INSNS (3), /* neg. */
1384 COSTS_N_INSNS (3), /* compare. */
1385 COSTS_N_INSNS (3), /* widen. */
1386 COSTS_N_INSNS (3), /* narrow. */
1387 COSTS_N_INSNS (3), /* toint. */
1388 COSTS_N_INSNS (3), /* fromint. */
1389 COSTS_N_INSNS (3) /* roundint. */
1393 COSTS_N_INSNS (30), /* div. */
1394 COSTS_N_INSNS (6), /* mult. */
1395 COSTS_N_INSNS (10), /* mult_addsub. */
1396 COSTS_N_INSNS (7), /* fma. */
1397 COSTS_N_INSNS (3), /* addsub. */
1398 COSTS_N_INSNS (3), /* fpconst. */
1399 COSTS_N_INSNS (3), /* neg. */
1400 COSTS_N_INSNS (3), /* compare. */
1401 COSTS_N_INSNS (3), /* widen. */
1402 COSTS_N_INSNS (3), /* narrow. */
1403 COSTS_N_INSNS (3), /* toint. */
1404 COSTS_N_INSNS (3), /* fromint. */
1405 COSTS_N_INSNS (3) /* roundint. */
1410 COSTS_N_INSNS (1) /* alu. */
1414 const struct cpu_cost_table cortexa12_extra_costs
=
1421 COSTS_N_INSNS (1), /* shift_reg. */
1422 COSTS_N_INSNS (1), /* arith_shift. */
1423 COSTS_N_INSNS (1), /* arith_shift_reg. */
1424 COSTS_N_INSNS (1), /* log_shift. */
1425 COSTS_N_INSNS (1), /* log_shift_reg. */
1427 COSTS_N_INSNS (1), /* extend_arith. */
1429 COSTS_N_INSNS (1), /* bfx. */
1430 COSTS_N_INSNS (1), /* clz. */
1431 COSTS_N_INSNS (1), /* rev. */
1433 true /* non_exec_costs_exec. */
1438 COSTS_N_INSNS (2), /* simple. */
1439 COSTS_N_INSNS (3), /* flag_setting. */
1440 COSTS_N_INSNS (2), /* extend. */
1441 COSTS_N_INSNS (3), /* add. */
1442 COSTS_N_INSNS (2), /* extend_add. */
1443 COSTS_N_INSNS (18) /* idiv. */
1447 0, /* simple (N/A). */
1448 0, /* flag_setting (N/A). */
1449 COSTS_N_INSNS (3), /* extend. */
1451 COSTS_N_INSNS (3), /* extend_add. */
1457 COSTS_N_INSNS (3), /* load. */
1458 COSTS_N_INSNS (3), /* load_sign_extend. */
1459 COSTS_N_INSNS (3), /* ldrd. */
1460 COSTS_N_INSNS (3), /* ldm_1st. */
1461 1, /* ldm_regs_per_insn_1st. */
1462 2, /* ldm_regs_per_insn_subsequent. */
1463 COSTS_N_INSNS (3), /* loadf. */
1464 COSTS_N_INSNS (3), /* loadd. */
1465 0, /* load_unaligned. */
1469 1, /* stm_regs_per_insn_1st. */
1470 2, /* stm_regs_per_insn_subsequent. */
1471 COSTS_N_INSNS (2), /* storef. */
1472 COSTS_N_INSNS (2), /* stored. */
1473 0, /* store_unaligned. */
1474 COSTS_N_INSNS (1), /* loadv. */
1475 COSTS_N_INSNS (1) /* storev. */
1480 COSTS_N_INSNS (17), /* div. */
1481 COSTS_N_INSNS (4), /* mult. */
1482 COSTS_N_INSNS (8), /* mult_addsub. */
1483 COSTS_N_INSNS (8), /* fma. */
1484 COSTS_N_INSNS (4), /* addsub. */
1485 COSTS_N_INSNS (2), /* fpconst. */
1486 COSTS_N_INSNS (2), /* neg. */
1487 COSTS_N_INSNS (2), /* compare. */
1488 COSTS_N_INSNS (4), /* widen. */
1489 COSTS_N_INSNS (4), /* narrow. */
1490 COSTS_N_INSNS (4), /* toint. */
1491 COSTS_N_INSNS (4), /* fromint. */
1492 COSTS_N_INSNS (4) /* roundint. */
1496 COSTS_N_INSNS (31), /* div. */
1497 COSTS_N_INSNS (4), /* mult. */
1498 COSTS_N_INSNS (8), /* mult_addsub. */
1499 COSTS_N_INSNS (8), /* fma. */
1500 COSTS_N_INSNS (4), /* addsub. */
1501 COSTS_N_INSNS (2), /* fpconst. */
1502 COSTS_N_INSNS (2), /* neg. */
1503 COSTS_N_INSNS (2), /* compare. */
1504 COSTS_N_INSNS (4), /* widen. */
1505 COSTS_N_INSNS (4), /* narrow. */
1506 COSTS_N_INSNS (4), /* toint. */
1507 COSTS_N_INSNS (4), /* fromint. */
1508 COSTS_N_INSNS (4) /* roundint. */
1513 COSTS_N_INSNS (1) /* alu. */
1517 const struct cpu_cost_table cortexa15_extra_costs
=
1525 COSTS_N_INSNS (1), /* arith_shift. */
1526 COSTS_N_INSNS (1), /* arith_shift_reg. */
1527 COSTS_N_INSNS (1), /* log_shift. */
1528 COSTS_N_INSNS (1), /* log_shift_reg. */
1530 COSTS_N_INSNS (1), /* extend_arith. */
1531 COSTS_N_INSNS (1), /* bfi. */
1536 true /* non_exec_costs_exec. */
1541 COSTS_N_INSNS (2), /* simple. */
1542 COSTS_N_INSNS (3), /* flag_setting. */
1543 COSTS_N_INSNS (2), /* extend. */
1544 COSTS_N_INSNS (2), /* add. */
1545 COSTS_N_INSNS (2), /* extend_add. */
1546 COSTS_N_INSNS (18) /* idiv. */
1550 0, /* simple (N/A). */
1551 0, /* flag_setting (N/A). */
1552 COSTS_N_INSNS (3), /* extend. */
1554 COSTS_N_INSNS (3), /* extend_add. */
1560 COSTS_N_INSNS (3), /* load. */
1561 COSTS_N_INSNS (3), /* load_sign_extend. */
1562 COSTS_N_INSNS (3), /* ldrd. */
1563 COSTS_N_INSNS (4), /* ldm_1st. */
1564 1, /* ldm_regs_per_insn_1st. */
1565 2, /* ldm_regs_per_insn_subsequent. */
1566 COSTS_N_INSNS (4), /* loadf. */
1567 COSTS_N_INSNS (4), /* loadd. */
1568 0, /* load_unaligned. */
1571 COSTS_N_INSNS (1), /* stm_1st. */
1572 1, /* stm_regs_per_insn_1st. */
1573 2, /* stm_regs_per_insn_subsequent. */
1576 0, /* store_unaligned. */
1577 COSTS_N_INSNS (1), /* loadv. */
1578 COSTS_N_INSNS (1) /* storev. */
1583 COSTS_N_INSNS (17), /* div. */
1584 COSTS_N_INSNS (4), /* mult. */
1585 COSTS_N_INSNS (8), /* mult_addsub. */
1586 COSTS_N_INSNS (8), /* fma. */
1587 COSTS_N_INSNS (4), /* addsub. */
1588 COSTS_N_INSNS (2), /* fpconst. */
1589 COSTS_N_INSNS (2), /* neg. */
1590 COSTS_N_INSNS (5), /* compare. */
1591 COSTS_N_INSNS (4), /* widen. */
1592 COSTS_N_INSNS (4), /* narrow. */
1593 COSTS_N_INSNS (4), /* toint. */
1594 COSTS_N_INSNS (4), /* fromint. */
1595 COSTS_N_INSNS (4) /* roundint. */
1599 COSTS_N_INSNS (31), /* div. */
1600 COSTS_N_INSNS (4), /* mult. */
1601 COSTS_N_INSNS (8), /* mult_addsub. */
1602 COSTS_N_INSNS (8), /* fma. */
1603 COSTS_N_INSNS (4), /* addsub. */
1604 COSTS_N_INSNS (2), /* fpconst. */
1605 COSTS_N_INSNS (2), /* neg. */
1606 COSTS_N_INSNS (2), /* compare. */
1607 COSTS_N_INSNS (4), /* widen. */
1608 COSTS_N_INSNS (4), /* narrow. */
1609 COSTS_N_INSNS (4), /* toint. */
1610 COSTS_N_INSNS (4), /* fromint. */
1611 COSTS_N_INSNS (4) /* roundint. */
1616 COSTS_N_INSNS (1) /* alu. */
1620 const struct cpu_cost_table v7m_extra_costs
=
1628 0, /* arith_shift. */
1629 COSTS_N_INSNS (1), /* arith_shift_reg. */
1631 COSTS_N_INSNS (1), /* log_shift_reg. */
1633 COSTS_N_INSNS (1), /* extend_arith. */
1638 COSTS_N_INSNS (1), /* non_exec. */
1639 false /* non_exec_costs_exec. */
1644 COSTS_N_INSNS (1), /* simple. */
1645 COSTS_N_INSNS (1), /* flag_setting. */
1646 COSTS_N_INSNS (2), /* extend. */
1647 COSTS_N_INSNS (1), /* add. */
1648 COSTS_N_INSNS (3), /* extend_add. */
1649 COSTS_N_INSNS (8) /* idiv. */
1653 0, /* simple (N/A). */
1654 0, /* flag_setting (N/A). */
1655 COSTS_N_INSNS (2), /* extend. */
1657 COSTS_N_INSNS (3), /* extend_add. */
1663 COSTS_N_INSNS (2), /* load. */
1664 0, /* load_sign_extend. */
1665 COSTS_N_INSNS (3), /* ldrd. */
1666 COSTS_N_INSNS (2), /* ldm_1st. */
1667 1, /* ldm_regs_per_insn_1st. */
1668 1, /* ldm_regs_per_insn_subsequent. */
1669 COSTS_N_INSNS (2), /* loadf. */
1670 COSTS_N_INSNS (3), /* loadd. */
1671 COSTS_N_INSNS (1), /* load_unaligned. */
1672 COSTS_N_INSNS (2), /* store. */
1673 COSTS_N_INSNS (3), /* strd. */
1674 COSTS_N_INSNS (2), /* stm_1st. */
1675 1, /* stm_regs_per_insn_1st. */
1676 1, /* stm_regs_per_insn_subsequent. */
1677 COSTS_N_INSNS (2), /* storef. */
1678 COSTS_N_INSNS (3), /* stored. */
1679 COSTS_N_INSNS (1), /* store_unaligned. */
1680 COSTS_N_INSNS (1), /* loadv. */
1681 COSTS_N_INSNS (1) /* storev. */
1686 COSTS_N_INSNS (7), /* div. */
1687 COSTS_N_INSNS (2), /* mult. */
1688 COSTS_N_INSNS (5), /* mult_addsub. */
1689 COSTS_N_INSNS (3), /* fma. */
1690 COSTS_N_INSNS (1), /* addsub. */
1702 COSTS_N_INSNS (15), /* div. */
1703 COSTS_N_INSNS (5), /* mult. */
1704 COSTS_N_INSNS (7), /* mult_addsub. */
1705 COSTS_N_INSNS (7), /* fma. */
1706 COSTS_N_INSNS (3), /* addsub. */
1719 COSTS_N_INSNS (1) /* alu. */
1723 const struct tune_params arm_slowmul_tune
=
1725 &generic_extra_costs
, /* Insn extra costs. */
1726 NULL
, /* Sched adj cost. */
1727 arm_default_branch_cost
,
1728 &arm_default_vec_cost
,
1729 3, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 8, /* Memset max inline. */
1732 1, /* Issue rate. */
1733 ARM_PREFETCH_NOT_BENEFICIAL
,
1734 tune_params::PREF_CONST_POOL_TRUE
,
1735 tune_params::PREF_LDRD_FALSE
,
1736 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1737 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1738 tune_params::DISPARAGE_FLAGS_NEITHER
,
1739 tune_params::PREF_NEON_64_FALSE
,
1740 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1741 tune_params::FUSE_NOTHING
,
1742 tune_params::SCHED_AUTOPREF_OFF
1745 const struct tune_params arm_fastmul_tune
=
1747 &generic_extra_costs
, /* Insn extra costs. */
1748 NULL
, /* Sched adj cost. */
1749 arm_default_branch_cost
,
1750 &arm_default_vec_cost
,
1751 1, /* Constant limit. */
1752 5, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL
,
1756 tune_params::PREF_CONST_POOL_TRUE
,
1757 tune_params::PREF_LDRD_FALSE
,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER
,
1761 tune_params::PREF_NEON_64_FALSE
,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1763 tune_params::FUSE_NOTHING
,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 /* StrongARM has early execution of branches, so a sequence that is worth
1768 skipping is shorter. Set max_insns_skipped to a lower value. */
1770 const struct tune_params arm_strongarm_tune
=
1772 &generic_extra_costs
, /* Insn extra costs. */
1773 NULL
, /* Sched adj cost. */
1774 arm_default_branch_cost
,
1775 &arm_default_vec_cost
,
1776 1, /* Constant limit. */
1777 3, /* Max cond insns. */
1778 8, /* Memset max inline. */
1779 1, /* Issue rate. */
1780 ARM_PREFETCH_NOT_BENEFICIAL
,
1781 tune_params::PREF_CONST_POOL_TRUE
,
1782 tune_params::PREF_LDRD_FALSE
,
1783 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1784 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1785 tune_params::DISPARAGE_FLAGS_NEITHER
,
1786 tune_params::PREF_NEON_64_FALSE
,
1787 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1788 tune_params::FUSE_NOTHING
,
1789 tune_params::SCHED_AUTOPREF_OFF
1792 const struct tune_params arm_xscale_tune
=
1794 &generic_extra_costs
, /* Insn extra costs. */
1795 xscale_sched_adjust_cost
,
1796 arm_default_branch_cost
,
1797 &arm_default_vec_cost
,
1798 2, /* Constant limit. */
1799 3, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL
,
1803 tune_params::PREF_CONST_POOL_TRUE
,
1804 tune_params::PREF_LDRD_FALSE
,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER
,
1808 tune_params::PREF_NEON_64_FALSE
,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1810 tune_params::FUSE_NOTHING
,
1811 tune_params::SCHED_AUTOPREF_OFF
1814 const struct tune_params arm_9e_tune
=
1816 &generic_extra_costs
, /* Insn extra costs. */
1817 NULL
, /* Sched adj cost. */
1818 arm_default_branch_cost
,
1819 &arm_default_vec_cost
,
1820 1, /* Constant limit. */
1821 5, /* Max cond insns. */
1822 8, /* Memset max inline. */
1823 1, /* Issue rate. */
1824 ARM_PREFETCH_NOT_BENEFICIAL
,
1825 tune_params::PREF_CONST_POOL_TRUE
,
1826 tune_params::PREF_LDRD_FALSE
,
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1829 tune_params::DISPARAGE_FLAGS_NEITHER
,
1830 tune_params::PREF_NEON_64_FALSE
,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1832 tune_params::FUSE_NOTHING
,
1833 tune_params::SCHED_AUTOPREF_OFF
1836 const struct tune_params arm_marvell_pj4_tune
=
1838 &generic_extra_costs
, /* Insn extra costs. */
1839 NULL
, /* Sched adj cost. */
1840 arm_default_branch_cost
,
1841 &arm_default_vec_cost
,
1842 1, /* Constant limit. */
1843 5, /* Max cond insns. */
1844 8, /* Memset max inline. */
1845 2, /* Issue rate. */
1846 ARM_PREFETCH_NOT_BENEFICIAL
,
1847 tune_params::PREF_CONST_POOL_TRUE
,
1848 tune_params::PREF_LDRD_FALSE
,
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1850 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1851 tune_params::DISPARAGE_FLAGS_NEITHER
,
1852 tune_params::PREF_NEON_64_FALSE
,
1853 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1854 tune_params::FUSE_NOTHING
,
1855 tune_params::SCHED_AUTOPREF_OFF
1858 const struct tune_params arm_v6t2_tune
=
1860 &generic_extra_costs
, /* Insn extra costs. */
1861 NULL
, /* Sched adj cost. */
1862 arm_default_branch_cost
,
1863 &arm_default_vec_cost
,
1864 1, /* Constant limit. */
1865 5, /* Max cond insns. */
1866 8, /* Memset max inline. */
1867 1, /* Issue rate. */
1868 ARM_PREFETCH_NOT_BENEFICIAL
,
1869 tune_params::PREF_CONST_POOL_FALSE
,
1870 tune_params::PREF_LDRD_FALSE
,
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1872 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1873 tune_params::DISPARAGE_FLAGS_NEITHER
,
1874 tune_params::PREF_NEON_64_FALSE
,
1875 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1876 tune_params::FUSE_NOTHING
,
1877 tune_params::SCHED_AUTOPREF_OFF
1881 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1882 const struct tune_params arm_cortex_tune
=
1884 &generic_extra_costs
,
1885 NULL
, /* Sched adj cost. */
1886 arm_default_branch_cost
,
1887 &arm_default_vec_cost
,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 8, /* Memset max inline. */
1891 2, /* Issue rate. */
1892 ARM_PREFETCH_NOT_BENEFICIAL
,
1893 tune_params::PREF_CONST_POOL_FALSE
,
1894 tune_params::PREF_LDRD_FALSE
,
1895 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1896 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1897 tune_params::DISPARAGE_FLAGS_NEITHER
,
1898 tune_params::PREF_NEON_64_FALSE
,
1899 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1900 tune_params::FUSE_NOTHING
,
1901 tune_params::SCHED_AUTOPREF_OFF
1904 const struct tune_params arm_cortex_a8_tune
=
1906 &cortexa8_extra_costs
,
1907 NULL
, /* Sched adj cost. */
1908 arm_default_branch_cost
,
1909 &arm_default_vec_cost
,
1910 1, /* Constant limit. */
1911 5, /* Max cond insns. */
1912 8, /* Memset max inline. */
1913 2, /* Issue rate. */
1914 ARM_PREFETCH_NOT_BENEFICIAL
,
1915 tune_params::PREF_CONST_POOL_FALSE
,
1916 tune_params::PREF_LDRD_FALSE
,
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1918 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1919 tune_params::DISPARAGE_FLAGS_NEITHER
,
1920 tune_params::PREF_NEON_64_FALSE
,
1921 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1922 tune_params::FUSE_NOTHING
,
1923 tune_params::SCHED_AUTOPREF_OFF
1926 const struct tune_params arm_cortex_a7_tune
=
1928 &cortexa7_extra_costs
,
1929 NULL
, /* Sched adj cost. */
1930 arm_default_branch_cost
,
1931 &arm_default_vec_cost
,
1932 1, /* Constant limit. */
1933 5, /* Max cond insns. */
1934 8, /* Memset max inline. */
1935 2, /* Issue rate. */
1936 ARM_PREFETCH_NOT_BENEFICIAL
,
1937 tune_params::PREF_CONST_POOL_FALSE
,
1938 tune_params::PREF_LDRD_FALSE
,
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1940 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1941 tune_params::DISPARAGE_FLAGS_NEITHER
,
1942 tune_params::PREF_NEON_64_FALSE
,
1943 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1944 tune_params::FUSE_NOTHING
,
1945 tune_params::SCHED_AUTOPREF_OFF
1948 const struct tune_params arm_cortex_a15_tune
=
1950 &cortexa15_extra_costs
,
1951 NULL
, /* Sched adj cost. */
1952 arm_default_branch_cost
,
1953 &arm_default_vec_cost
,
1954 1, /* Constant limit. */
1955 2, /* Max cond insns. */
1956 8, /* Memset max inline. */
1957 3, /* Issue rate. */
1958 ARM_PREFETCH_NOT_BENEFICIAL
,
1959 tune_params::PREF_CONST_POOL_FALSE
,
1960 tune_params::PREF_LDRD_TRUE
,
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1962 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1963 tune_params::DISPARAGE_FLAGS_ALL
,
1964 tune_params::PREF_NEON_64_FALSE
,
1965 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1966 tune_params::FUSE_NOTHING
,
1967 tune_params::SCHED_AUTOPREF_FULL
1970 const struct tune_params arm_cortex_a35_tune
=
1972 &cortexa53_extra_costs
,
1973 NULL
, /* Sched adj cost. */
1974 arm_default_branch_cost
,
1975 &arm_default_vec_cost
,
1976 1, /* Constant limit. */
1977 5, /* Max cond insns. */
1978 8, /* Memset max inline. */
1979 1, /* Issue rate. */
1980 ARM_PREFETCH_NOT_BENEFICIAL
,
1981 tune_params::PREF_CONST_POOL_FALSE
,
1982 tune_params::PREF_LDRD_FALSE
,
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1984 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1985 tune_params::DISPARAGE_FLAGS_NEITHER
,
1986 tune_params::PREF_NEON_64_FALSE
,
1987 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1988 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1989 tune_params::SCHED_AUTOPREF_OFF
1992 const struct tune_params arm_cortex_a53_tune
=
1994 &cortexa53_extra_costs
,
1995 NULL
, /* Sched adj cost. */
1996 arm_default_branch_cost
,
1997 &arm_default_vec_cost
,
1998 1, /* Constant limit. */
1999 5, /* Max cond insns. */
2000 8, /* Memset max inline. */
2001 2, /* Issue rate. */
2002 ARM_PREFETCH_NOT_BENEFICIAL
,
2003 tune_params::PREF_CONST_POOL_FALSE
,
2004 tune_params::PREF_LDRD_FALSE
,
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2006 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2007 tune_params::DISPARAGE_FLAGS_NEITHER
,
2008 tune_params::PREF_NEON_64_FALSE
,
2009 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2010 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2011 tune_params::SCHED_AUTOPREF_OFF
2014 const struct tune_params arm_cortex_a57_tune
=
2016 &cortexa57_extra_costs
,
2017 NULL
, /* Sched adj cost. */
2018 arm_default_branch_cost
,
2019 &arm_default_vec_cost
,
2020 1, /* Constant limit. */
2021 2, /* Max cond insns. */
2022 8, /* Memset max inline. */
2023 3, /* Issue rate. */
2024 ARM_PREFETCH_NOT_BENEFICIAL
,
2025 tune_params::PREF_CONST_POOL_FALSE
,
2026 tune_params::PREF_LDRD_TRUE
,
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2028 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2029 tune_params::DISPARAGE_FLAGS_ALL
,
2030 tune_params::PREF_NEON_64_FALSE
,
2031 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2032 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2033 tune_params::SCHED_AUTOPREF_FULL
2036 const struct tune_params arm_exynosm1_tune
=
2038 &exynosm1_extra_costs
,
2039 NULL
, /* Sched adj cost. */
2040 arm_default_branch_cost
,
2041 &arm_default_vec_cost
,
2042 1, /* Constant limit. */
2043 2, /* Max cond insns. */
2044 8, /* Memset max inline. */
2045 3, /* Issue rate. */
2046 ARM_PREFETCH_NOT_BENEFICIAL
,
2047 tune_params::PREF_CONST_POOL_FALSE
,
2048 tune_params::PREF_LDRD_TRUE
,
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2050 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2051 tune_params::DISPARAGE_FLAGS_ALL
,
2052 tune_params::PREF_NEON_64_FALSE
,
2053 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2054 tune_params::FUSE_NOTHING
,
2055 tune_params::SCHED_AUTOPREF_OFF
2058 const struct tune_params arm_xgene1_tune
=
2060 &xgene1_extra_costs
,
2061 NULL
, /* Sched adj cost. */
2062 arm_default_branch_cost
,
2063 &arm_default_vec_cost
,
2064 1, /* Constant limit. */
2065 2, /* Max cond insns. */
2066 32, /* Memset max inline. */
2067 4, /* Issue rate. */
2068 ARM_PREFETCH_NOT_BENEFICIAL
,
2069 tune_params::PREF_CONST_POOL_FALSE
,
2070 tune_params::PREF_LDRD_TRUE
,
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2072 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2073 tune_params::DISPARAGE_FLAGS_ALL
,
2074 tune_params::PREF_NEON_64_FALSE
,
2075 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2076 tune_params::FUSE_NOTHING
,
2077 tune_params::SCHED_AUTOPREF_OFF
2080 const struct tune_params arm_qdf24xx_tune
=
2082 &qdf24xx_extra_costs
,
2083 NULL
, /* Scheduler cost adjustment. */
2084 arm_default_branch_cost
,
2085 &arm_default_vec_cost
, /* Vectorizer costs. */
2086 1, /* Constant limit. */
2087 2, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 4, /* Issue rate. */
2090 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2091 tune_params::PREF_CONST_POOL_FALSE
,
2092 tune_params::PREF_LDRD_TRUE
,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_ALL
,
2096 tune_params::PREF_NEON_64_FALSE
,
2097 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2098 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2099 tune_params::SCHED_AUTOPREF_FULL
2102 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2103 less appealing. Set max_insns_skipped to a low value. */
2105 const struct tune_params arm_cortex_a5_tune
=
2107 &cortexa5_extra_costs
,
2108 NULL
, /* Sched adj cost. */
2109 arm_cortex_a5_branch_cost
,
2110 &arm_default_vec_cost
,
2111 1, /* Constant limit. */
2112 1, /* Max cond insns. */
2113 8, /* Memset max inline. */
2114 2, /* Issue rate. */
2115 ARM_PREFETCH_NOT_BENEFICIAL
,
2116 tune_params::PREF_CONST_POOL_FALSE
,
2117 tune_params::PREF_LDRD_FALSE
,
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2120 tune_params::DISPARAGE_FLAGS_NEITHER
,
2121 tune_params::PREF_NEON_64_FALSE
,
2122 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2123 tune_params::FUSE_NOTHING
,
2124 tune_params::SCHED_AUTOPREF_OFF
2127 const struct tune_params arm_cortex_a9_tune
=
2129 &cortexa9_extra_costs
,
2130 cortex_a9_sched_adjust_cost
,
2131 arm_default_branch_cost
,
2132 &arm_default_vec_cost
,
2133 1, /* Constant limit. */
2134 5, /* Max cond insns. */
2135 8, /* Memset max inline. */
2136 2, /* Issue rate. */
2137 ARM_PREFETCH_BENEFICIAL(4,32,32),
2138 tune_params::PREF_CONST_POOL_FALSE
,
2139 tune_params::PREF_LDRD_FALSE
,
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2142 tune_params::DISPARAGE_FLAGS_NEITHER
,
2143 tune_params::PREF_NEON_64_FALSE
,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2145 tune_params::FUSE_NOTHING
,
2146 tune_params::SCHED_AUTOPREF_OFF
2149 const struct tune_params arm_cortex_a12_tune
=
2151 &cortexa12_extra_costs
,
2152 NULL
, /* Sched adj cost. */
2153 arm_default_branch_cost
,
2154 &arm_default_vec_cost
, /* Vectorizer costs. */
2155 1, /* Constant limit. */
2156 2, /* Max cond insns. */
2157 8, /* Memset max inline. */
2158 2, /* Issue rate. */
2159 ARM_PREFETCH_NOT_BENEFICIAL
,
2160 tune_params::PREF_CONST_POOL_FALSE
,
2161 tune_params::PREF_LDRD_TRUE
,
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2163 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2164 tune_params::DISPARAGE_FLAGS_ALL
,
2165 tune_params::PREF_NEON_64_FALSE
,
2166 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2167 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2168 tune_params::SCHED_AUTOPREF_OFF
2171 const struct tune_params arm_cortex_a73_tune
=
2173 &cortexa57_extra_costs
,
2174 NULL
, /* Sched adj cost. */
2175 arm_default_branch_cost
,
2176 &arm_default_vec_cost
, /* Vectorizer costs. */
2177 1, /* Constant limit. */
2178 2, /* Max cond insns. */
2179 8, /* Memset max inline. */
2180 2, /* Issue rate. */
2181 ARM_PREFETCH_NOT_BENEFICIAL
,
2182 tune_params::PREF_CONST_POOL_FALSE
,
2183 tune_params::PREF_LDRD_TRUE
,
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2185 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2186 tune_params::DISPARAGE_FLAGS_ALL
,
2187 tune_params::PREF_NEON_64_FALSE
,
2188 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2189 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2190 tune_params::SCHED_AUTOPREF_FULL
2193 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2194 cycle to execute each. An LDR from the constant pool also takes two cycles
2195 to execute, but mildly increases pipelining opportunity (consecutive
2196 loads/stores can be pipelined together, saving one cycle), and may also
2197 improve icache utilisation. Hence we prefer the constant pool for such
2200 const struct tune_params arm_v7m_tune
=
2203 NULL
, /* Sched adj cost. */
2204 arm_cortex_m_branch_cost
,
2205 &arm_default_vec_cost
,
2206 1, /* Constant limit. */
2207 2, /* Max cond insns. */
2208 8, /* Memset max inline. */
2209 1, /* Issue rate. */
2210 ARM_PREFETCH_NOT_BENEFICIAL
,
2211 tune_params::PREF_CONST_POOL_TRUE
,
2212 tune_params::PREF_LDRD_FALSE
,
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2214 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2215 tune_params::DISPARAGE_FLAGS_NEITHER
,
2216 tune_params::PREF_NEON_64_FALSE
,
2217 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2218 tune_params::FUSE_NOTHING
,
2219 tune_params::SCHED_AUTOPREF_OFF
2222 /* Cortex-M7 tuning. */
2224 const struct tune_params arm_cortex_m7_tune
=
2227 NULL
, /* Sched adj cost. */
2228 arm_cortex_m7_branch_cost
,
2229 &arm_default_vec_cost
,
2230 0, /* Constant limit. */
2231 1, /* Max cond insns. */
2232 8, /* Memset max inline. */
2233 2, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL
,
2235 tune_params::PREF_CONST_POOL_TRUE
,
2236 tune_params::PREF_LDRD_FALSE
,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_NEITHER
,
2240 tune_params::PREF_NEON_64_FALSE
,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2242 tune_params::FUSE_NOTHING
,
2243 tune_params::SCHED_AUTOPREF_OFF
2246 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2247 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2249 const struct tune_params arm_v6m_tune
=
2251 &generic_extra_costs
, /* Insn extra costs. */
2252 NULL
, /* Sched adj cost. */
2253 arm_default_branch_cost
,
2254 &arm_default_vec_cost
, /* Vectorizer costs. */
2255 1, /* Constant limit. */
2256 5, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 1, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL
,
2260 tune_params::PREF_CONST_POOL_FALSE
,
2261 tune_params::PREF_LDRD_FALSE
,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER
,
2265 tune_params::PREF_NEON_64_FALSE
,
2266 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2267 tune_params::FUSE_NOTHING
,
2268 tune_params::SCHED_AUTOPREF_OFF
2271 const struct tune_params arm_fa726te_tune
=
2273 &generic_extra_costs
, /* Insn extra costs. */
2274 fa726te_sched_adjust_cost
,
2275 arm_default_branch_cost
,
2276 &arm_default_vec_cost
,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_NOT_BENEFICIAL
,
2282 tune_params::PREF_CONST_POOL_TRUE
,
2283 tune_params::PREF_LDRD_FALSE
,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER
,
2287 tune_params::PREF_NEON_64_FALSE
,
2288 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2289 tune_params::FUSE_NOTHING
,
2290 tune_params::SCHED_AUTOPREF_OFF
2294 /* Not all of these give usefully different compilation alternatives,
2295 but there is no simple way of generalizing them. */
2296 static const struct processors all_cores
[] =
2299 #define ARM_CORE(NAME, X, IDENT, TUNE_FLAGS, ARCH, ISA, COSTS) \
2300 {NAME, TARGET_CPU_##IDENT, TUNE_FLAGS, #ARCH, BASE_ARCH_##ARCH, \
2301 {ISA isa_nobit}, &arm_##COSTS##_tune},
2302 #include "arm-cores.def"
2304 {NULL
, TARGET_CPU_arm_none
, 0, NULL
, BASE_ARCH_0
, {isa_nobit
}, NULL
}
2307 static const struct processors all_architectures
[] =
2309 /* ARM Architectures */
2310 /* We don't specify tuning costs here as it will be figured out
2313 #define ARM_ARCH(NAME, CORE, TUNE_FLAGS, ARCH, ISA) \
2314 {NAME, TARGET_CPU_##CORE, TUNE_FLAGS, #ARCH, BASE_ARCH_##ARCH, \
2315 {ISA isa_nobit}, NULL},
2316 #include "arm-arches.def"
2318 {NULL
, TARGET_CPU_arm_none
, 0, NULL
, BASE_ARCH_0
, {isa_nobit
}, NULL
}
2321 /* The name of the preprocessor macro to define for this architecture. PROFILE
2322 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2323 is thus chosen to be big enough to hold the longest architecture name. */
2325 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2327 /* Available values for -mfpu=. */
2329 const struct arm_fpu_desc all_fpus
[] =
2332 #define ARM_FPU(NAME, CNAME, ISA) \
2333 { NAME, {ISA isa_nobit} },
2334 #include "arm-fpus.def"
2338 /* Supported TLS relocations. */
2346 TLS_DESCSEQ
/* GNU scheme */
2349 /* The maximum number of insns to be used when loading a constant. */
2351 arm_constant_limit (bool size_p
)
2353 return size_p
? 1 : current_tune
->constant_limit
;
2356 /* Emit an insn that's a simple single-set. Both the operands must be known
2358 inline static rtx_insn
*
2359 emit_set_insn (rtx x
, rtx y
)
2361 return emit_insn (gen_rtx_SET (x
, y
));
2364 /* Return the number of bits set in VALUE. */
2366 bit_count (unsigned long value
)
2368 unsigned long count
= 0;
2373 value
&= value
- 1; /* Clear the least-significant set bit. */
2379 /* Return the number of bits set in BMAP. */
2381 bitmap_popcount (const sbitmap bmap
)
2383 unsigned int count
= 0;
2385 sbitmap_iterator sbi
;
2387 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2396 } arm_fixed_mode_set
;
2398 /* A small helper for setting fixed-point library libfuncs. */
2401 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2402 const char *funcname
, const char *modename
,
2407 if (num_suffix
== 0)
2408 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2410 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2412 set_optab_libfunc (optable
, mode
, buffer
);
2416 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2417 machine_mode from
, const char *funcname
,
2418 const char *toname
, const char *fromname
)
2421 const char *maybe_suffix_2
= "";
2423 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2424 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2425 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2426 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2427 maybe_suffix_2
= "2";
2429 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2432 set_conv_libfunc (optable
, to
, from
, buffer
);
2435 /* Set up library functions unique to ARM. */
2438 arm_init_libfuncs (void)
2440 /* For Linux, we have access to kernel support for atomic operations. */
2441 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2442 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2444 /* There are no special library functions unless we are using the
2449 /* The functions below are described in Section 4 of the "Run-Time
2450 ABI for the ARM architecture", Version 1.0. */
2452 /* Double-precision floating-point arithmetic. Table 2. */
2453 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2454 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2455 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2456 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2457 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2459 /* Double-precision comparisons. Table 3. */
2460 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2461 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2462 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2463 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2464 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2465 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2466 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2468 /* Single-precision floating-point arithmetic. Table 4. */
2469 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2470 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2471 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2472 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2473 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2475 /* Single-precision comparisons. Table 5. */
2476 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2477 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2478 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2479 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2480 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2481 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2482 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2484 /* Floating-point to integer conversions. Table 6. */
2485 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2486 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2487 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2488 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2489 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2490 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2491 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2492 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2494 /* Conversions between floating types. Table 7. */
2495 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2496 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2498 /* Integer to floating-point conversions. Table 8. */
2499 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2500 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2501 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2502 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2503 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2504 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2505 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2506 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2508 /* Long long. Table 9. */
2509 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2510 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2511 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2512 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2513 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2514 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2515 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2516 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2518 /* Integer (32/32->32) division. \S 4.3.1. */
2519 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2520 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2522 /* The divmod functions are designed so that they can be used for
2523 plain division, even though they return both the quotient and the
2524 remainder. The quotient is returned in the usual location (i.e.,
2525 r0 for SImode, {r0, r1} for DImode), just as would be expected
2526 for an ordinary division routine. Because the AAPCS calling
2527 conventions specify that all of { r0, r1, r2, r3 } are
2528 callee-saved registers, there is no need to tell the compiler
2529 explicitly that those registers are clobbered by these
2531 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2532 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2534 /* For SImode division the ABI provides div-without-mod routines,
2535 which are faster. */
2536 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2537 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2539 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2540 divmod libcalls instead. */
2541 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2542 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2543 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2544 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2546 /* Half-precision float operations. The compiler handles all operations
2547 with NULL libfuncs by converting the SFmode. */
2548 switch (arm_fp16_format
)
2550 case ARM_FP16_FORMAT_IEEE
:
2551 case ARM_FP16_FORMAT_ALTERNATIVE
:
2554 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2555 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2557 : "__gnu_f2h_alternative"));
2558 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2559 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2561 : "__gnu_h2f_alternative"));
2563 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2564 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2566 : "__gnu_d2h_alternative"));
2569 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2570 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2571 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2572 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2573 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2576 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2577 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2578 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2579 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2580 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2581 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2582 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2589 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2591 const arm_fixed_mode_set fixed_arith_modes
[] =
2612 const arm_fixed_mode_set fixed_conv_modes
[] =
2642 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2644 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2645 "add", fixed_arith_modes
[i
].name
, 3);
2646 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2647 "ssadd", fixed_arith_modes
[i
].name
, 3);
2648 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2649 "usadd", fixed_arith_modes
[i
].name
, 3);
2650 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2651 "sub", fixed_arith_modes
[i
].name
, 3);
2652 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2653 "sssub", fixed_arith_modes
[i
].name
, 3);
2654 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2655 "ussub", fixed_arith_modes
[i
].name
, 3);
2656 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2657 "mul", fixed_arith_modes
[i
].name
, 3);
2658 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2659 "ssmul", fixed_arith_modes
[i
].name
, 3);
2660 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2661 "usmul", fixed_arith_modes
[i
].name
, 3);
2662 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2663 "div", fixed_arith_modes
[i
].name
, 3);
2664 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2665 "udiv", fixed_arith_modes
[i
].name
, 3);
2666 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2667 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2668 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2669 "usdiv", fixed_arith_modes
[i
].name
, 3);
2670 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2671 "neg", fixed_arith_modes
[i
].name
, 2);
2672 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2673 "ssneg", fixed_arith_modes
[i
].name
, 2);
2674 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2675 "usneg", fixed_arith_modes
[i
].name
, 2);
2676 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2677 "ashl", fixed_arith_modes
[i
].name
, 3);
2678 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2679 "ashr", fixed_arith_modes
[i
].name
, 3);
2680 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2681 "lshr", fixed_arith_modes
[i
].name
, 3);
2682 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2683 "ssashl", fixed_arith_modes
[i
].name
, 3);
2684 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2685 "usashl", fixed_arith_modes
[i
].name
, 3);
2686 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2687 "cmp", fixed_arith_modes
[i
].name
, 2);
2690 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2691 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2694 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2695 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2698 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2699 fixed_conv_modes
[j
].mode
, "fract",
2700 fixed_conv_modes
[i
].name
,
2701 fixed_conv_modes
[j
].name
);
2702 arm_set_fixed_conv_libfunc (satfract_optab
,
2703 fixed_conv_modes
[i
].mode
,
2704 fixed_conv_modes
[j
].mode
, "satfract",
2705 fixed_conv_modes
[i
].name
,
2706 fixed_conv_modes
[j
].name
);
2707 arm_set_fixed_conv_libfunc (fractuns_optab
,
2708 fixed_conv_modes
[i
].mode
,
2709 fixed_conv_modes
[j
].mode
, "fractuns",
2710 fixed_conv_modes
[i
].name
,
2711 fixed_conv_modes
[j
].name
);
2712 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2713 fixed_conv_modes
[i
].mode
,
2714 fixed_conv_modes
[j
].mode
, "satfractuns",
2715 fixed_conv_modes
[i
].name
,
2716 fixed_conv_modes
[j
].name
);
2720 if (TARGET_AAPCS_BASED
)
2721 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2724 /* On AAPCS systems, this is the "struct __va_list". */
2725 static GTY(()) tree va_list_type
;
2727 /* Return the type to use as __builtin_va_list. */
2729 arm_build_builtin_va_list (void)
2734 if (!TARGET_AAPCS_BASED
)
2735 return std_build_builtin_va_list ();
2737 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2745 The C Library ABI further reinforces this definition in \S
2748 We must follow this definition exactly. The structure tag
2749 name is visible in C++ mangled names, and thus forms a part
2750 of the ABI. The field name may be used by people who
2751 #include <stdarg.h>. */
2752 /* Create the type. */
2753 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2754 /* Give it the required name. */
2755 va_list_name
= build_decl (BUILTINS_LOCATION
,
2757 get_identifier ("__va_list"),
2759 DECL_ARTIFICIAL (va_list_name
) = 1;
2760 TYPE_NAME (va_list_type
) = va_list_name
;
2761 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2762 /* Create the __ap field. */
2763 ap_field
= build_decl (BUILTINS_LOCATION
,
2765 get_identifier ("__ap"),
2767 DECL_ARTIFICIAL (ap_field
) = 1;
2768 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2769 TYPE_FIELDS (va_list_type
) = ap_field
;
2770 /* Compute its layout. */
2771 layout_type (va_list_type
);
2773 return va_list_type
;
2776 /* Return an expression of type "void *" pointing to the next
2777 available argument in a variable-argument list. VALIST is the
2778 user-level va_list object, of type __builtin_va_list. */
2780 arm_extract_valist_ptr (tree valist
)
2782 if (TREE_TYPE (valist
) == error_mark_node
)
2783 return error_mark_node
;
2785 /* On an AAPCS target, the pointer is stored within "struct
2787 if (TARGET_AAPCS_BASED
)
2789 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2790 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2791 valist
, ap_field
, NULL_TREE
);
2797 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2799 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2801 valist
= arm_extract_valist_ptr (valist
);
2802 std_expand_builtin_va_start (valist
, nextarg
);
2805 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2807 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2810 valist
= arm_extract_valist_ptr (valist
);
2811 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2814 /* Check any incompatible options that the user has specified. */
2816 arm_option_check_internal (struct gcc_options
*opts
)
2818 int flags
= opts
->x_target_flags
;
2820 /* iWMMXt and NEON are incompatible. */
2822 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2823 error ("iWMMXt and NEON are incompatible");
2825 /* Make sure that the processor choice does not conflict with any of the
2826 other command line choices. */
2827 if (TARGET_ARM_P (flags
)
2828 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2829 error ("target CPU does not support ARM mode");
2831 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2832 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2833 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2835 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2836 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2838 /* If this target is normally configured to use APCS frames, warn if they
2839 are turned off and debugging is turned on. */
2840 if (TARGET_ARM_P (flags
)
2841 && write_symbols
!= NO_DEBUG
2842 && !TARGET_APCS_FRAME
2843 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2844 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2846 /* iWMMXt unsupported under Thumb mode. */
2847 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2848 error ("iWMMXt unsupported under Thumb mode");
2850 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2851 error ("can not use -mtp=cp15 with 16-bit Thumb");
2853 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2855 error ("RTP PIC is incompatible with Thumb");
2859 /* We only support -mslow-flash-data on armv7-m targets. */
2860 if (target_slow_flash_data
2861 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2862 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2863 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2865 /* We only support pure-code on Thumb-2 M-profile targets. */
2866 if (target_pure_code
2867 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2868 error ("-mpure-code only supports non-pic code on armv7-m targets");
2872 /* Recompute the global settings depending on target attribute options. */
2875 arm_option_params_internal (void)
2877 /* If we are not using the default (ARM mode) section anchor offset
2878 ranges, then set the correct ranges now. */
2881 /* Thumb-1 LDR instructions cannot have negative offsets.
2882 Permissible positive offset ranges are 5-bit (for byte loads),
2883 6-bit (for halfword loads), or 7-bit (for word loads).
2884 Empirical results suggest a 7-bit anchor range gives the best
2885 overall code size. */
2886 targetm
.min_anchor_offset
= 0;
2887 targetm
.max_anchor_offset
= 127;
2889 else if (TARGET_THUMB2
)
2891 /* The minimum is set such that the total size of the block
2892 for a particular anchor is 248 + 1 + 4095 bytes, which is
2893 divisible by eight, ensuring natural spacing of anchors. */
2894 targetm
.min_anchor_offset
= -248;
2895 targetm
.max_anchor_offset
= 4095;
2899 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2900 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2905 /* If optimizing for size, bump the number of instructions that we
2906 are prepared to conditionally execute (even on a StrongARM). */
2907 max_insns_skipped
= 6;
2909 /* For THUMB2, we limit the conditional sequence to one IT block. */
2911 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2914 /* When -mrestrict-it is in use tone down the if-conversion. */
2915 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2916 ? 1 : current_tune
->max_insns_skipped
;
2919 /* True if -mflip-thumb should next add an attribute for the default
2920 mode, false if it should next add an attribute for the opposite mode. */
2921 static GTY(()) bool thumb_flipper
;
2923 /* Options after initial target override. */
2924 static GTY(()) tree init_optimize
;
2927 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2929 if (opts
->x_align_functions
<= 0)
2930 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2931 && opts
->x_optimize_size
? 2 : 4;
2934 /* Implement targetm.override_options_after_change. */
2937 arm_override_options_after_change (void)
2939 arm_configure_build_target (&arm_active_target
,
2940 TREE_TARGET_OPTION (target_option_default_node
),
2941 &global_options_set
, false);
2943 arm_override_options_after_change_1 (&global_options
);
2947 arm_option_restore (struct gcc_options
*, struct cl_target_option
*ptr
)
2949 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2953 /* Reset options between modes that the user has specified. */
2955 arm_option_override_internal (struct gcc_options
*opts
,
2956 struct gcc_options
*opts_set
)
2958 arm_override_options_after_change_1 (opts
);
2960 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2962 /* The default is to enable interworking, so this warning message would
2963 be confusing to users who have just compiled with, eg, -march=armv3. */
2964 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2965 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2968 if (TARGET_THUMB_P (opts
->x_target_flags
)
2969 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2971 warning (0, "target CPU does not support THUMB instructions");
2972 opts
->x_target_flags
&= ~MASK_THUMB
;
2975 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2977 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2978 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2981 /* Callee super interworking implies thumb interworking. Adding
2982 this to the flags here simplifies the logic elsewhere. */
2983 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2984 opts
->x_target_flags
|= MASK_INTERWORK
;
2986 /* need to remember initial values so combinaisons of options like
2987 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2988 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2990 if (! opts_set
->x_arm_restrict_it
)
2991 opts
->x_arm_restrict_it
= arm_arch8
;
2993 /* ARM execution state and M profile don't have [restrict] IT. */
2994 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2995 opts
->x_arm_restrict_it
= 0;
2997 /* Enable -munaligned-access by default for
2998 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2999 i.e. Thumb2 and ARM state only.
3000 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3001 - ARMv8 architecture-base processors.
3003 Disable -munaligned-access by default for
3004 - all pre-ARMv6 architecture-based processors
3005 - ARMv6-M architecture-based processors
3006 - ARMv8-M Baseline processors. */
3008 if (! opts_set
->x_unaligned_access
)
3010 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3011 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3013 else if (opts
->x_unaligned_access
== 1
3014 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3016 warning (0, "target CPU does not support unaligned accesses");
3017 opts
->x_unaligned_access
= 0;
3020 /* Don't warn since it's on by default in -O2. */
3021 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3022 opts
->x_flag_schedule_insns
= 0;
3024 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3026 /* Disable shrink-wrap when optimizing function for size, since it tends to
3027 generate additional returns. */
3028 if (optimize_function_for_size_p (cfun
)
3029 && TARGET_THUMB2_P (opts
->x_target_flags
))
3030 opts
->x_flag_shrink_wrap
= false;
3032 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3034 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3035 - epilogue_insns - does not accurately model the corresponding insns
3036 emitted in the asm file. In particular, see the comment in thumb_exit
3037 'Find out how many of the (return) argument registers we can corrupt'.
3038 As a consequence, the epilogue may clobber registers without fipa-ra
3039 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3040 TODO: Accurately model clobbers for epilogue_insns and reenable
3042 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3043 opts
->x_flag_ipa_ra
= 0;
3045 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3047 /* Thumb2 inline assembly code should always use unified syntax.
3048 This will apply to ARM and Thumb1 eventually. */
3049 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3051 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3052 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3056 /* Convert a static initializer array of feature bits to sbitmap
3059 arm_initialize_isa (sbitmap isa
, const enum isa_feature
*isa_bits
)
3062 while (*isa_bits
!= isa_nobit
)
3063 bitmap_set_bit (isa
, *(isa_bits
++));
3066 static sbitmap isa_all_fpubits
;
3067 static sbitmap isa_quirkbits
;
3069 /* Configure a build target TARGET from the user-specified options OPTS and
3070 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3071 architecture have been specified, but the two are not identical. */
3073 arm_configure_build_target (struct arm_build_target
*target
,
3074 struct cl_target_option
*opts
,
3075 struct gcc_options
*opts_set
,
3076 bool warn_compatible
)
3078 const struct processors
*arm_selected_tune
= NULL
;
3079 const struct processors
*arm_selected_arch
= NULL
;
3080 const struct processors
*arm_selected_cpu
= NULL
;
3081 const struct arm_fpu_desc
*arm_selected_fpu
= NULL
;
3083 bitmap_clear (target
->isa
);
3084 target
->core_name
= NULL
;
3085 target
->arch_name
= NULL
;
3087 if (opts_set
->x_arm_arch_option
)
3088 arm_selected_arch
= &all_architectures
[opts
->x_arm_arch_option
];
3090 if (opts_set
->x_arm_cpu_option
)
3092 arm_selected_cpu
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3093 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3096 if (opts_set
->x_arm_tune_option
)
3097 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_tune_option
];
3099 if (arm_selected_arch
)
3101 arm_initialize_isa (target
->isa
, arm_selected_arch
->isa_bits
);
3103 if (arm_selected_cpu
)
3105 auto_sbitmap
cpu_isa (isa_num_bits
);
3107 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->isa_bits
);
3108 bitmap_xor (cpu_isa
, cpu_isa
, target
->isa
);
3109 /* Ignore any bits that are quirk bits. */
3110 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_quirkbits
);
3111 /* Ignore (for now) any bits that might be set by -mfpu. */
3112 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_all_fpubits
);
3114 if (!bitmap_empty_p (cpu_isa
))
3116 if (warn_compatible
)
3117 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3118 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3119 /* -march wins for code generation.
3120 -mcpu wins for default tuning. */
3121 if (!arm_selected_tune
)
3122 arm_selected_tune
= arm_selected_cpu
;
3124 arm_selected_cpu
= arm_selected_arch
;
3128 /* Architecture and CPU are essentially the same.
3129 Prefer the CPU setting. */
3130 arm_selected_arch
= NULL
;
3133 target
->core_name
= arm_selected_cpu
->name
;
3137 /* Pick a CPU based on the architecture. */
3138 arm_selected_cpu
= arm_selected_arch
;
3139 target
->arch_name
= arm_selected_arch
->name
;
3140 /* Note: target->core_name is left unset in this path. */
3143 else if (arm_selected_cpu
)
3145 target
->core_name
= arm_selected_cpu
->name
;
3146 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3148 /* If the user did not specify a processor, choose one for them. */
3151 const struct processors
* sel
;
3152 auto_sbitmap
sought_isa (isa_num_bits
);
3153 bitmap_clear (sought_isa
);
3154 auto_sbitmap
default_isa (isa_num_bits
);
3156 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3157 gcc_assert (arm_selected_cpu
->name
);
3159 /* RWE: All of the selection logic below (to the end of this
3160 'if' clause) looks somewhat suspect. It appears to be mostly
3161 there to support forcing thumb support when the default CPU
3162 does not have thumb (somewhat dubious in terms of what the
3163 user might be expecting). I think it should be removed once
3164 support for the pre-thumb era cores is removed. */
3165 sel
= arm_selected_cpu
;
3166 arm_initialize_isa (default_isa
, sel
->isa_bits
);
3168 /* Now check to see if the user has specified any command line
3169 switches that require certain abilities from the cpu. */
3171 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3173 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3174 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3176 /* There are no ARM processors that support both APCS-26 and
3177 interworking. Therefore we forcibly remove MODE26 from
3178 from the isa features here (if it was set), so that the
3179 search below will always be able to find a compatible
3181 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3184 /* If there are such requirements and the default CPU does not
3185 satisfy them, we need to run over the complete list of
3186 cores looking for one that is satisfactory. */
3187 if (!bitmap_empty_p (sought_isa
)
3188 && !bitmap_subset_p (sought_isa
, default_isa
))
3190 auto_sbitmap
candidate_isa (isa_num_bits
);
3191 /* We're only interested in a CPU with at least the
3192 capabilities of the default CPU and the required
3193 additional features. */
3194 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3196 /* Try to locate a CPU type that supports all of the abilities
3197 of the default CPU, plus the extra abilities requested by
3199 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3201 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3202 /* An exact match? */
3203 if (bitmap_equal_p (default_isa
, candidate_isa
))
3207 if (sel
->name
== NULL
)
3209 unsigned current_bit_count
= isa_num_bits
;
3210 const struct processors
* best_fit
= NULL
;
3212 /* Ideally we would like to issue an error message here
3213 saying that it was not possible to find a CPU compatible
3214 with the default CPU, but which also supports the command
3215 line options specified by the programmer, and so they
3216 ought to use the -mcpu=<name> command line option to
3217 override the default CPU type.
3219 If we cannot find a CPU that has exactly the
3220 characteristics of the default CPU and the given
3221 command line options we scan the array again looking
3222 for a best match. The best match must have at least
3223 the capabilities of the perfect match. */
3224 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3226 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3228 if (bitmap_subset_p (default_isa
, candidate_isa
))
3232 bitmap_and_compl (candidate_isa
, candidate_isa
,
3234 count
= bitmap_popcount (candidate_isa
);
3236 if (count
< current_bit_count
)
3239 current_bit_count
= count
;
3243 gcc_assert (best_fit
);
3247 arm_selected_cpu
= sel
;
3250 /* Now we know the CPU, we can finally initialize the target
3252 target
->core_name
= arm_selected_cpu
->name
;
3253 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3256 gcc_assert (arm_selected_cpu
);
3258 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3260 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3261 auto_sbitmap
fpu_bits (isa_num_bits
);
3263 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3264 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3265 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3267 else if (target
->core_name
== NULL
)
3268 /* To support this we need to be able to parse FPU feature options
3269 from the architecture string. */
3270 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3272 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3273 if (!arm_selected_tune
)
3274 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3276 /* Finish initializing the target structure. */
3277 target
->arch_pp_name
= arm_selected_cpu
->arch
;
3278 target
->base_arch
= arm_selected_cpu
->base_arch
;
3279 target
->arch_core
= arm_selected_cpu
->core
;
3281 target
->tune_flags
= arm_selected_tune
->tune_flags
;
3282 target
->tune
= arm_selected_tune
->tune
;
3283 target
->tune_core
= arm_selected_tune
->core
;
3286 /* Fix up any incompatible options that the user has specified. */
3288 arm_option_override (void)
3290 static const enum isa_feature fpu_bitlist
[] = { ISA_ALL_FPU
, isa_nobit
};
3291 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3293 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3294 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3296 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3297 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3299 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3301 if (!global_options_set
.x_arm_fpu_index
)
3303 const char *target_fpu_name
;
3307 #ifdef FPUTYPE_DEFAULT
3308 target_fpu_name
= FPUTYPE_DEFAULT
;
3310 target_fpu_name
= "vfp";
3313 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &fpu_index
,
3316 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3319 /* Create the default target_options structure. We need this early
3320 to configure the overall build target. */
3321 target_option_default_node
= target_option_current_node
3322 = build_target_option_node (&global_options
);
3324 arm_configure_build_target (&arm_active_target
,
3325 TREE_TARGET_OPTION (target_option_default_node
),
3326 &global_options_set
, true);
3328 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3329 SUBTARGET_OVERRIDE_OPTIONS
;
3332 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3333 arm_base_arch
= arm_active_target
.base_arch
;
3335 arm_tune
= arm_active_target
.tune_core
;
3336 tune_flags
= arm_active_target
.tune_flags
;
3337 current_tune
= arm_active_target
.tune
;
3339 /* TBD: Dwarf info for apcs frame is not handled yet. */
3340 if (TARGET_APCS_FRAME
)
3341 flag_shrink_wrap
= false;
3343 /* BPABI targets use linker tricks to allow interworking on cores
3344 without thumb support. */
3345 if (TARGET_INTERWORK
3347 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3349 warning (0, "target CPU does not support interworking" );
3350 target_flags
&= ~MASK_INTERWORK
;
3353 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3355 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3356 target_flags
|= MASK_APCS_FRAME
;
3359 if (TARGET_POKE_FUNCTION_NAME
)
3360 target_flags
|= MASK_APCS_FRAME
;
3362 if (TARGET_APCS_REENT
&& flag_pic
)
3363 error ("-fpic and -mapcs-reent are incompatible");
3365 if (TARGET_APCS_REENT
)
3366 warning (0, "APCS reentrant code not supported. Ignored");
3368 /* Initialize boolean versions of the architectural flags, for use
3369 in the arm.md file. */
3370 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3371 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3372 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3373 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3374 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3375 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3376 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3377 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3378 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3379 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3380 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3381 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3382 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3383 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3384 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3385 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3386 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3387 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3388 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3389 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3390 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3391 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3392 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3393 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3395 = (arm_arch6k
&& arm_arch7
&& arm_arch_thumb_hwdiv
&& arm_arch_arm_hwdiv
);
3398 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3399 error ("selected fp16 options are incompatible.");
3400 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3404 /* Set up some tuning parameters. */
3405 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3406 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3407 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3408 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3409 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3410 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3412 /* And finally, set up some quirks. */
3413 arm_arch_no_volatile_ce
3414 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3416 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3418 /* V5 code we generate is completely interworking capable, so we turn off
3419 TARGET_INTERWORK here to avoid many tests later on. */
3421 /* XXX However, we must pass the right pre-processor defines to CPP
3422 or GLD can get confused. This is a hack. */
3423 if (TARGET_INTERWORK
)
3424 arm_cpp_interwork
= 1;
3427 target_flags
&= ~MASK_INTERWORK
;
3429 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3430 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3432 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3433 error ("iwmmxt abi requires an iwmmxt capable cpu");
3435 /* If soft-float is specified then don't use FPU. */
3436 if (TARGET_SOFT_FLOAT
)
3437 arm_fpu_attr
= FPU_NONE
;
3439 arm_fpu_attr
= FPU_VFP
;
3441 if (TARGET_AAPCS_BASED
)
3443 if (TARGET_CALLER_INTERWORKING
)
3444 error ("AAPCS does not support -mcaller-super-interworking");
3446 if (TARGET_CALLEE_INTERWORKING
)
3447 error ("AAPCS does not support -mcallee-super-interworking");
3450 /* __fp16 support currently assumes the core has ldrh. */
3451 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3452 sorry ("__fp16 and no ldrh");
3454 if (TARGET_AAPCS_BASED
)
3456 if (arm_abi
== ARM_ABI_IWMMXT
)
3457 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3458 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3459 && TARGET_HARD_FLOAT
)
3461 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3462 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3463 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3466 arm_pcs_default
= ARM_PCS_AAPCS
;
3470 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3471 sorry ("-mfloat-abi=hard and VFP");
3473 if (arm_abi
== ARM_ABI_APCS
)
3474 arm_pcs_default
= ARM_PCS_APCS
;
3476 arm_pcs_default
= ARM_PCS_ATPCS
;
3479 /* For arm2/3 there is no need to do any scheduling if we are doing
3480 software floating-point. */
3481 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3482 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3484 /* Use the cp15 method if it is available. */
3485 if (target_thread_pointer
== TP_AUTO
)
3487 if (arm_arch6k
&& !TARGET_THUMB1
)
3488 target_thread_pointer
= TP_CP15
;
3490 target_thread_pointer
= TP_SOFT
;
3493 /* Override the default structure alignment for AAPCS ABI. */
3494 if (!global_options_set
.x_arm_structure_size_boundary
)
3496 if (TARGET_AAPCS_BASED
)
3497 arm_structure_size_boundary
= 8;
3501 if (arm_structure_size_boundary
!= 8
3502 && arm_structure_size_boundary
!= 32
3503 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3505 if (ARM_DOUBLEWORD_ALIGN
)
3507 "structure size boundary can only be set to 8, 32 or 64");
3509 warning (0, "structure size boundary can only be set to 8 or 32");
3510 arm_structure_size_boundary
3511 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3515 if (TARGET_VXWORKS_RTP
)
3517 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3518 arm_pic_data_is_text_relative
= 0;
3521 && !arm_pic_data_is_text_relative
3522 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3523 /* When text & data segments don't have a fixed displacement, the
3524 intended use is with a single, read only, pic base register.
3525 Unless the user explicitly requested not to do that, set
3527 target_flags
|= MASK_SINGLE_PIC_BASE
;
3529 /* If stack checking is disabled, we can use r10 as the PIC register,
3530 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3531 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3533 if (TARGET_VXWORKS_RTP
)
3534 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3535 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3538 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3539 arm_pic_register
= 9;
3541 if (arm_pic_register_string
!= NULL
)
3543 int pic_register
= decode_reg_name (arm_pic_register_string
);
3546 warning (0, "-mpic-register= is useless without -fpic");
3548 /* Prevent the user from choosing an obviously stupid PIC register. */
3549 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3550 || pic_register
== HARD_FRAME_POINTER_REGNUM
3551 || pic_register
== STACK_POINTER_REGNUM
3552 || pic_register
>= PC_REGNUM
3553 || (TARGET_VXWORKS_RTP
3554 && (unsigned int) pic_register
!= arm_pic_register
))
3555 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3557 arm_pic_register
= pic_register
;
3560 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3561 if (fix_cm3_ldrd
== 2)
3563 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3569 /* Hot/Cold partitioning is not currently supported, since we can't
3570 handle literal pool placement in that case. */
3571 if (flag_reorder_blocks_and_partition
)
3573 inform (input_location
,
3574 "-freorder-blocks-and-partition not supported on this architecture");
3575 flag_reorder_blocks_and_partition
= 0;
3576 flag_reorder_blocks
= 1;
3580 /* Hoisting PIC address calculations more aggressively provides a small,
3581 but measurable, size reduction for PIC code. Therefore, we decrease
3582 the bar for unrestricted expression hoisting to the cost of PIC address
3583 calculation, which is 2 instructions. */
3584 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3585 global_options
.x_param_values
,
3586 global_options_set
.x_param_values
);
3588 /* ARM EABI defaults to strict volatile bitfields. */
3589 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3590 && abi_version_at_least(2))
3591 flag_strict_volatile_bitfields
= 1;
3593 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3594 have deemed it beneficial (signified by setting
3595 prefetch.num_slots to 1 or more). */
3596 if (flag_prefetch_loop_arrays
< 0
3599 && current_tune
->prefetch
.num_slots
> 0)
3600 flag_prefetch_loop_arrays
= 1;
3602 /* Set up parameters to be used in prefetching algorithm. Do not
3603 override the defaults unless we are tuning for a core we have
3604 researched values for. */
3605 if (current_tune
->prefetch
.num_slots
> 0)
3606 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3607 current_tune
->prefetch
.num_slots
,
3608 global_options
.x_param_values
,
3609 global_options_set
.x_param_values
);
3610 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3611 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3612 current_tune
->prefetch
.l1_cache_line_size
,
3613 global_options
.x_param_values
,
3614 global_options_set
.x_param_values
);
3615 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3616 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3617 current_tune
->prefetch
.l1_cache_size
,
3618 global_options
.x_param_values
,
3619 global_options_set
.x_param_values
);
3621 /* Use Neon to perform 64-bits operations rather than core
3623 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3624 if (use_neon_for_64bits
== 1)
3625 prefer_neon_for_64bits
= true;
3627 /* Use the alternative scheduling-pressure algorithm by default. */
3628 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3629 global_options
.x_param_values
,
3630 global_options_set
.x_param_values
);
3632 /* Look through ready list and all of queue for instructions
3633 relevant for L2 auto-prefetcher. */
3634 int param_sched_autopref_queue_depth
;
3636 switch (current_tune
->sched_autopref
)
3638 case tune_params::SCHED_AUTOPREF_OFF
:
3639 param_sched_autopref_queue_depth
= -1;
3642 case tune_params::SCHED_AUTOPREF_RANK
:
3643 param_sched_autopref_queue_depth
= 0;
3646 case tune_params::SCHED_AUTOPREF_FULL
:
3647 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3654 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3655 param_sched_autopref_queue_depth
,
3656 global_options
.x_param_values
,
3657 global_options_set
.x_param_values
);
3659 /* Currently, for slow flash data, we just disable literal pools. We also
3660 disable it for pure-code. */
3661 if (target_slow_flash_data
|| target_pure_code
)
3662 arm_disable_literal_pool
= true;
3664 if (use_cmse
&& !arm_arch_cmse
)
3665 error ("target CPU does not support ARMv8-M Security Extensions");
3667 /* Disable scheduling fusion by default if it's not armv7 processor
3668 or doesn't prefer ldrd/strd. */
3669 if (flag_schedule_fusion
== 2
3670 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3671 flag_schedule_fusion
= 0;
3673 /* Need to remember initial options before they are overriden. */
3674 init_optimize
= build_optimization_node (&global_options
);
3676 arm_option_override_internal (&global_options
, &global_options_set
);
3677 arm_option_check_internal (&global_options
);
3678 arm_option_params_internal ();
3680 /* Resynchronize the saved target options. */
3681 cl_target_option_save (TREE_TARGET_OPTION (target_option_default_node
),
3683 /* Register global variables with the garbage collector. */
3684 arm_add_gc_roots ();
3686 /* Init initial mode for testing. */
3687 thumb_flipper
= TARGET_THUMB
;
3691 arm_add_gc_roots (void)
3693 gcc_obstack_init(&minipool_obstack
);
3694 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3697 /* A table of known ARM exception types.
3698 For use with the interrupt function attribute. */
3702 const char *const arg
;
3703 const unsigned long return_value
;
3707 static const isr_attribute_arg isr_attribute_args
[] =
3709 { "IRQ", ARM_FT_ISR
},
3710 { "irq", ARM_FT_ISR
},
3711 { "FIQ", ARM_FT_FIQ
},
3712 { "fiq", ARM_FT_FIQ
},
3713 { "ABORT", ARM_FT_ISR
},
3714 { "abort", ARM_FT_ISR
},
3715 { "ABORT", ARM_FT_ISR
},
3716 { "abort", ARM_FT_ISR
},
3717 { "UNDEF", ARM_FT_EXCEPTION
},
3718 { "undef", ARM_FT_EXCEPTION
},
3719 { "SWI", ARM_FT_EXCEPTION
},
3720 { "swi", ARM_FT_EXCEPTION
},
3721 { NULL
, ARM_FT_NORMAL
}
3724 /* Returns the (interrupt) function type of the current
3725 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3727 static unsigned long
3728 arm_isr_value (tree argument
)
3730 const isr_attribute_arg
* ptr
;
3734 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3736 /* No argument - default to IRQ. */
3737 if (argument
== NULL_TREE
)
3740 /* Get the value of the argument. */
3741 if (TREE_VALUE (argument
) == NULL_TREE
3742 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3743 return ARM_FT_UNKNOWN
;
3745 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3747 /* Check it against the list of known arguments. */
3748 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3749 if (streq (arg
, ptr
->arg
))
3750 return ptr
->return_value
;
3752 /* An unrecognized interrupt type. */
3753 return ARM_FT_UNKNOWN
;
3756 /* Computes the type of the current function. */
3758 static unsigned long
3759 arm_compute_func_type (void)
3761 unsigned long type
= ARM_FT_UNKNOWN
;
3765 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3767 /* Decide if the current function is volatile. Such functions
3768 never return, and many memory cycles can be saved by not storing
3769 register values that will never be needed again. This optimization
3770 was added to speed up context switching in a kernel application. */
3772 && (TREE_NOTHROW (current_function_decl
)
3773 || !(flag_unwind_tables
3775 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3776 && TREE_THIS_VOLATILE (current_function_decl
))
3777 type
|= ARM_FT_VOLATILE
;
3779 if (cfun
->static_chain_decl
!= NULL
)
3780 type
|= ARM_FT_NESTED
;
3782 attr
= DECL_ATTRIBUTES (current_function_decl
);
3784 a
= lookup_attribute ("naked", attr
);
3786 type
|= ARM_FT_NAKED
;
3788 a
= lookup_attribute ("isr", attr
);
3790 a
= lookup_attribute ("interrupt", attr
);
3793 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3795 type
|= arm_isr_value (TREE_VALUE (a
));
3797 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3798 type
|= ARM_FT_CMSE_ENTRY
;
3803 /* Returns the type of the current function. */
3806 arm_current_func_type (void)
3808 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3809 cfun
->machine
->func_type
= arm_compute_func_type ();
3811 return cfun
->machine
->func_type
;
3815 arm_allocate_stack_slots_for_args (void)
3817 /* Naked functions should not allocate stack slots for arguments. */
3818 return !IS_NAKED (arm_current_func_type ());
3822 arm_warn_func_return (tree decl
)
3824 /* Naked functions are implemented entirely in assembly, including the
3825 return sequence, so suppress warnings about this. */
3826 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3830 /* Output assembler code for a block containing the constant parts
3831 of a trampoline, leaving space for the variable parts.
3833 On the ARM, (if r8 is the static chain regnum, and remembering that
3834 referencing pc adds an offset of 8) the trampoline looks like:
3837 .word static chain value
3838 .word function's address
3839 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3842 arm_asm_trampoline_template (FILE *f
)
3844 fprintf (f
, "\t.syntax unified\n");
3848 fprintf (f
, "\t.arm\n");
3849 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3850 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3852 else if (TARGET_THUMB2
)
3854 fprintf (f
, "\t.thumb\n");
3855 /* The Thumb-2 trampoline is similar to the arm implementation.
3856 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3857 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3858 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3859 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3863 ASM_OUTPUT_ALIGN (f
, 2);
3864 fprintf (f
, "\t.code\t16\n");
3865 fprintf (f
, ".Ltrampoline_start:\n");
3866 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3867 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3868 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3869 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3870 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3871 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3873 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3874 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3877 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3880 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3882 rtx fnaddr
, mem
, a_tramp
;
3884 emit_block_move (m_tramp
, assemble_trampoline_template (),
3885 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3887 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3888 emit_move_insn (mem
, chain_value
);
3890 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3891 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3892 emit_move_insn (mem
, fnaddr
);
3894 a_tramp
= XEXP (m_tramp
, 0);
3895 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3896 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3897 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3900 /* Thumb trampolines should be entered in thumb mode, so set
3901 the bottom bit of the address. */
3904 arm_trampoline_adjust_address (rtx addr
)
3907 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3908 NULL
, 0, OPTAB_LIB_WIDEN
);
3912 /* Return 1 if it is possible to return using a single instruction.
3913 If SIBLING is non-null, this is a test for a return before a sibling
3914 call. SIBLING is the call insn, so we can examine its register usage. */
3917 use_return_insn (int iscond
, rtx sibling
)
3920 unsigned int func_type
;
3921 unsigned long saved_int_regs
;
3922 unsigned HOST_WIDE_INT stack_adjust
;
3923 arm_stack_offsets
*offsets
;
3925 /* Never use a return instruction before reload has run. */
3926 if (!reload_completed
)
3929 func_type
= arm_current_func_type ();
3931 /* Naked, volatile and stack alignment functions need special
3933 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3936 /* So do interrupt functions that use the frame pointer and Thumb
3937 interrupt functions. */
3938 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3941 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3942 && !optimize_function_for_size_p (cfun
))
3945 offsets
= arm_get_frame_offsets ();
3946 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3948 /* As do variadic functions. */
3949 if (crtl
->args
.pretend_args_size
3950 || cfun
->machine
->uses_anonymous_args
3951 /* Or if the function calls __builtin_eh_return () */
3952 || crtl
->calls_eh_return
3953 /* Or if the function calls alloca */
3954 || cfun
->calls_alloca
3955 /* Or if there is a stack adjustment. However, if the stack pointer
3956 is saved on the stack, we can use a pre-incrementing stack load. */
3957 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3958 && stack_adjust
== 4))
3959 /* Or if the static chain register was saved above the frame, under the
3960 assumption that the stack pointer isn't saved on the stack. */
3961 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3962 && arm_compute_static_chain_stack_bytes() != 0))
3965 saved_int_regs
= offsets
->saved_regs_mask
;
3967 /* Unfortunately, the insn
3969 ldmib sp, {..., sp, ...}
3971 triggers a bug on most SA-110 based devices, such that the stack
3972 pointer won't be correctly restored if the instruction takes a
3973 page fault. We work around this problem by popping r3 along with
3974 the other registers, since that is never slower than executing
3975 another instruction.
3977 We test for !arm_arch5 here, because code for any architecture
3978 less than this could potentially be run on one of the buggy
3980 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3982 /* Validate that r3 is a call-clobbered register (always true in
3983 the default abi) ... */
3984 if (!call_used_regs
[3])
3987 /* ... that it isn't being used for a return value ... */
3988 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3991 /* ... or for a tail-call argument ... */
3994 gcc_assert (CALL_P (sibling
));
3996 if (find_regno_fusage (sibling
, USE
, 3))
4000 /* ... and that there are no call-saved registers in r0-r2
4001 (always true in the default ABI). */
4002 if (saved_int_regs
& 0x7)
4006 /* Can't be done if interworking with Thumb, and any registers have been
4008 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4011 /* On StrongARM, conditional returns are expensive if they aren't
4012 taken and multiple registers have been stacked. */
4013 if (iscond
&& arm_tune_strongarm
)
4015 /* Conditional return when just the LR is stored is a simple
4016 conditional-load instruction, that's not expensive. */
4017 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4021 && arm_pic_register
!= INVALID_REGNUM
4022 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4026 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4027 several instructions if anything needs to be popped. */
4028 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4031 /* If there are saved registers but the LR isn't saved, then we need
4032 two instructions for the return. */
4033 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4036 /* Can't be done if any of the VFP regs are pushed,
4037 since this also requires an insn. */
4038 if (TARGET_HARD_FLOAT
)
4039 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4040 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4043 if (TARGET_REALLY_IWMMXT
)
4044 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4045 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4051 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4052 shrink-wrapping if possible. This is the case if we need to emit a
4053 prologue, which we can test by looking at the offsets. */
4055 use_simple_return_p (void)
4057 arm_stack_offsets
*offsets
;
4059 offsets
= arm_get_frame_offsets ();
4060 return offsets
->outgoing_args
!= 0;
4063 /* Return TRUE if int I is a valid immediate ARM constant. */
4066 const_ok_for_arm (HOST_WIDE_INT i
)
4070 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4071 be all zero, or all one. */
4072 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4073 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4074 != ((~(unsigned HOST_WIDE_INT
) 0)
4075 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4078 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4080 /* Fast return for 0 and small values. We must do this for zero, since
4081 the code below can't handle that one case. */
4082 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4085 /* Get the number of trailing zeros. */
4086 lowbit
= ffs((int) i
) - 1;
4088 /* Only even shifts are allowed in ARM mode so round down to the
4089 nearest even number. */
4093 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4098 /* Allow rotated constants in ARM mode. */
4100 && ((i
& ~0xc000003f) == 0
4101 || (i
& ~0xf000000f) == 0
4102 || (i
& ~0xfc000003) == 0))
4109 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4112 if (i
== v
|| i
== (v
| (v
<< 8)))
4115 /* Allow repeated pattern 0xXY00XY00. */
4125 /* Return true if I is a valid constant for the operation CODE. */
4127 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4129 if (const_ok_for_arm (i
))
4135 /* See if we can use movw. */
4136 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4139 /* Otherwise, try mvn. */
4140 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4143 /* See if we can use addw or subw. */
4145 && ((i
& 0xfffff000) == 0
4146 || ((-i
) & 0xfffff000) == 0))
4167 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4169 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4175 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4179 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4186 /* Return true if I is a valid di mode constant for the operation CODE. */
4188 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4190 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4191 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4192 rtx hi
= GEN_INT (hi_val
);
4193 rtx lo
= GEN_INT (lo_val
);
4203 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4204 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4206 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4213 /* Emit a sequence of insns to handle a large constant.
4214 CODE is the code of the operation required, it can be any of SET, PLUS,
4215 IOR, AND, XOR, MINUS;
4216 MODE is the mode in which the operation is being performed;
4217 VAL is the integer to operate on;
4218 SOURCE is the other operand (a register, or a null-pointer for SET);
4219 SUBTARGETS means it is safe to create scratch registers if that will
4220 either produce a simpler sequence, or we will want to cse the values.
4221 Return value is the number of insns emitted. */
4223 /* ??? Tweak this for thumb2. */
4225 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4226 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4230 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4231 cond
= COND_EXEC_TEST (PATTERN (insn
));
4235 if (subtargets
|| code
== SET
4236 || (REG_P (target
) && REG_P (source
)
4237 && REGNO (target
) != REGNO (source
)))
4239 /* After arm_reorg has been called, we can't fix up expensive
4240 constants by pushing them into memory so we must synthesize
4241 them in-line, regardless of the cost. This is only likely to
4242 be more costly on chips that have load delay slots and we are
4243 compiling without running the scheduler (so no splitting
4244 occurred before the final instruction emission).
4246 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4248 if (!cfun
->machine
->after_arm_reorg
4250 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4252 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4257 /* Currently SET is the only monadic value for CODE, all
4258 the rest are diadic. */
4259 if (TARGET_USE_MOVT
)
4260 arm_emit_movpair (target
, GEN_INT (val
));
4262 emit_set_insn (target
, GEN_INT (val
));
4268 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4270 if (TARGET_USE_MOVT
)
4271 arm_emit_movpair (temp
, GEN_INT (val
));
4273 emit_set_insn (temp
, GEN_INT (val
));
4275 /* For MINUS, the value is subtracted from, since we never
4276 have subtraction of a constant. */
4278 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4280 emit_set_insn (target
,
4281 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4287 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4291 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4292 ARM/THUMB2 immediates, and add up to VAL.
4293 Thr function return value gives the number of insns required. */
4295 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4296 struct four_ints
*return_sequence
)
4298 int best_consecutive_zeros
= 0;
4302 struct four_ints tmp_sequence
;
4304 /* If we aren't targeting ARM, the best place to start is always at
4305 the bottom, otherwise look more closely. */
4308 for (i
= 0; i
< 32; i
+= 2)
4310 int consecutive_zeros
= 0;
4312 if (!(val
& (3 << i
)))
4314 while ((i
< 32) && !(val
& (3 << i
)))
4316 consecutive_zeros
+= 2;
4319 if (consecutive_zeros
> best_consecutive_zeros
)
4321 best_consecutive_zeros
= consecutive_zeros
;
4322 best_start
= i
- consecutive_zeros
;
4329 /* So long as it won't require any more insns to do so, it's
4330 desirable to emit a small constant (in bits 0...9) in the last
4331 insn. This way there is more chance that it can be combined with
4332 a later addressing insn to form a pre-indexed load or store
4333 operation. Consider:
4335 *((volatile int *)0xe0000100) = 1;
4336 *((volatile int *)0xe0000110) = 2;
4338 We want this to wind up as:
4342 str rB, [rA, #0x100]
4344 str rB, [rA, #0x110]
4346 rather than having to synthesize both large constants from scratch.
4348 Therefore, we calculate how many insns would be required to emit
4349 the constant starting from `best_start', and also starting from
4350 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4351 yield a shorter sequence, we may as well use zero. */
4352 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4354 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4356 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4357 if (insns2
<= insns1
)
4359 *return_sequence
= tmp_sequence
;
4367 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4369 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4370 struct four_ints
*return_sequence
, int i
)
4372 int remainder
= val
& 0xffffffff;
4375 /* Try and find a way of doing the job in either two or three
4378 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4379 location. We start at position I. This may be the MSB, or
4380 optimial_immediate_sequence may have positioned it at the largest block
4381 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4382 wrapping around to the top of the word when we drop off the bottom.
4383 In the worst case this code should produce no more than four insns.
4385 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4386 constants, shifted to any arbitrary location. We should always start
4391 unsigned int b1
, b2
, b3
, b4
;
4392 unsigned HOST_WIDE_INT result
;
4395 gcc_assert (insns
< 4);
4400 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4401 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4404 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4405 /* We can use addw/subw for the last 12 bits. */
4409 /* Use an 8-bit shifted/rotated immediate. */
4413 result
= remainder
& ((0x0ff << end
)
4414 | ((i
< end
) ? (0xff >> (32 - end
))
4421 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4422 arbitrary shifts. */
4423 i
-= TARGET_ARM
? 2 : 1;
4427 /* Next, see if we can do a better job with a thumb2 replicated
4430 We do it this way around to catch the cases like 0x01F001E0 where
4431 two 8-bit immediates would work, but a replicated constant would
4434 TODO: 16-bit constants that don't clear all the bits, but still win.
4435 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4438 b1
= (remainder
& 0xff000000) >> 24;
4439 b2
= (remainder
& 0x00ff0000) >> 16;
4440 b3
= (remainder
& 0x0000ff00) >> 8;
4441 b4
= remainder
& 0xff;
4445 /* The 8-bit immediate already found clears b1 (and maybe b2),
4446 but must leave b3 and b4 alone. */
4448 /* First try to find a 32-bit replicated constant that clears
4449 almost everything. We can assume that we can't do it in one,
4450 or else we wouldn't be here. */
4451 unsigned int tmp
= b1
& b2
& b3
& b4
;
4452 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4454 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4455 + (tmp
== b3
) + (tmp
== b4
);
4457 && (matching_bytes
>= 3
4458 || (matching_bytes
== 2
4459 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4461 /* At least 3 of the bytes match, and the fourth has at
4462 least as many bits set, or two of the bytes match
4463 and it will only require one more insn to finish. */
4471 /* Second, try to find a 16-bit replicated constant that can
4472 leave three of the bytes clear. If b2 or b4 is already
4473 zero, then we can. If the 8-bit from above would not
4474 clear b2 anyway, then we still win. */
4475 else if (b1
== b3
&& (!b2
|| !b4
4476 || (remainder
& 0x00ff0000 & ~result
)))
4478 result
= remainder
& 0xff00ff00;
4484 /* The 8-bit immediate already found clears b2 (and maybe b3)
4485 and we don't get here unless b1 is alredy clear, but it will
4486 leave b4 unchanged. */
4488 /* If we can clear b2 and b4 at once, then we win, since the
4489 8-bits couldn't possibly reach that far. */
4492 result
= remainder
& 0x00ff00ff;
4498 return_sequence
->i
[insns
++] = result
;
4499 remainder
&= ~result
;
4501 if (code
== SET
|| code
== MINUS
)
4509 /* Emit an instruction with the indicated PATTERN. If COND is
4510 non-NULL, conditionalize the execution of the instruction on COND
4514 emit_constant_insn (rtx cond
, rtx pattern
)
4517 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4518 emit_insn (pattern
);
4521 /* As above, but extra parameter GENERATE which, if clear, suppresses
4525 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4526 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4527 int subtargets
, int generate
)
4531 int final_invert
= 0;
4533 int set_sign_bit_copies
= 0;
4534 int clear_sign_bit_copies
= 0;
4535 int clear_zero_bit_copies
= 0;
4536 int set_zero_bit_copies
= 0;
4537 int insns
= 0, neg_insns
, inv_insns
;
4538 unsigned HOST_WIDE_INT temp1
, temp2
;
4539 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4540 struct four_ints
*immediates
;
4541 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4543 /* Find out which operations are safe for a given CODE. Also do a quick
4544 check for degenerate cases; these can occur when DImode operations
4557 if (remainder
== 0xffffffff)
4560 emit_constant_insn (cond
,
4561 gen_rtx_SET (target
,
4562 GEN_INT (ARM_SIGN_EXTEND (val
))));
4568 if (reload_completed
&& rtx_equal_p (target
, source
))
4572 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4581 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4584 if (remainder
== 0xffffffff)
4586 if (reload_completed
&& rtx_equal_p (target
, source
))
4589 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4598 if (reload_completed
&& rtx_equal_p (target
, source
))
4601 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4605 if (remainder
== 0xffffffff)
4608 emit_constant_insn (cond
,
4609 gen_rtx_SET (target
,
4610 gen_rtx_NOT (mode
, source
)));
4617 /* We treat MINUS as (val - source), since (source - val) is always
4618 passed as (source + (-val)). */
4622 emit_constant_insn (cond
,
4623 gen_rtx_SET (target
,
4624 gen_rtx_NEG (mode
, source
)));
4627 if (const_ok_for_arm (val
))
4630 emit_constant_insn (cond
,
4631 gen_rtx_SET (target
,
4632 gen_rtx_MINUS (mode
, GEN_INT (val
),
4643 /* If we can do it in one insn get out quickly. */
4644 if (const_ok_for_op (val
, code
))
4647 emit_constant_insn (cond
,
4648 gen_rtx_SET (target
,
4650 ? gen_rtx_fmt_ee (code
, mode
, source
,
4656 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4658 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4659 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4663 if (mode
== SImode
&& i
== 16)
4664 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4666 emit_constant_insn (cond
,
4667 gen_zero_extendhisi2
4668 (target
, gen_lowpart (HImode
, source
)));
4670 /* Extz only supports SImode, but we can coerce the operands
4672 emit_constant_insn (cond
,
4673 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4674 gen_lowpart (SImode
, source
),
4675 GEN_INT (i
), const0_rtx
));
4681 /* Calculate a few attributes that may be useful for specific
4683 /* Count number of leading zeros. */
4684 for (i
= 31; i
>= 0; i
--)
4686 if ((remainder
& (1 << i
)) == 0)
4687 clear_sign_bit_copies
++;
4692 /* Count number of leading 1's. */
4693 for (i
= 31; i
>= 0; i
--)
4695 if ((remainder
& (1 << i
)) != 0)
4696 set_sign_bit_copies
++;
4701 /* Count number of trailing zero's. */
4702 for (i
= 0; i
<= 31; i
++)
4704 if ((remainder
& (1 << i
)) == 0)
4705 clear_zero_bit_copies
++;
4710 /* Count number of trailing 1's. */
4711 for (i
= 0; i
<= 31; i
++)
4713 if ((remainder
& (1 << i
)) != 0)
4714 set_zero_bit_copies
++;
4722 /* See if we can do this by sign_extending a constant that is known
4723 to be negative. This is a good, way of doing it, since the shift
4724 may well merge into a subsequent insn. */
4725 if (set_sign_bit_copies
> 1)
4727 if (const_ok_for_arm
4728 (temp1
= ARM_SIGN_EXTEND (remainder
4729 << (set_sign_bit_copies
- 1))))
4733 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4734 emit_constant_insn (cond
,
4735 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4736 emit_constant_insn (cond
,
4737 gen_ashrsi3 (target
, new_src
,
4738 GEN_INT (set_sign_bit_copies
- 1)));
4742 /* For an inverted constant, we will need to set the low bits,
4743 these will be shifted out of harm's way. */
4744 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4745 if (const_ok_for_arm (~temp1
))
4749 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4750 emit_constant_insn (cond
,
4751 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4752 emit_constant_insn (cond
,
4753 gen_ashrsi3 (target
, new_src
,
4754 GEN_INT (set_sign_bit_copies
- 1)));
4760 /* See if we can calculate the value as the difference between two
4761 valid immediates. */
4762 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4764 int topshift
= clear_sign_bit_copies
& ~1;
4766 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4767 & (0xff000000 >> topshift
));
4769 /* If temp1 is zero, then that means the 9 most significant
4770 bits of remainder were 1 and we've caused it to overflow.
4771 When topshift is 0 we don't need to do anything since we
4772 can borrow from 'bit 32'. */
4773 if (temp1
== 0 && topshift
!= 0)
4774 temp1
= 0x80000000 >> (topshift
- 1);
4776 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4778 if (const_ok_for_arm (temp2
))
4782 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4783 emit_constant_insn (cond
,
4784 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4785 emit_constant_insn (cond
,
4786 gen_addsi3 (target
, new_src
,
4794 /* See if we can generate this by setting the bottom (or the top)
4795 16 bits, and then shifting these into the other half of the
4796 word. We only look for the simplest cases, to do more would cost
4797 too much. Be careful, however, not to generate this when the
4798 alternative would take fewer insns. */
4799 if (val
& 0xffff0000)
4801 temp1
= remainder
& 0xffff0000;
4802 temp2
= remainder
& 0x0000ffff;
4804 /* Overlaps outside this range are best done using other methods. */
4805 for (i
= 9; i
< 24; i
++)
4807 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4808 && !const_ok_for_arm (temp2
))
4810 rtx new_src
= (subtargets
4811 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4813 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4814 source
, subtargets
, generate
);
4822 gen_rtx_ASHIFT (mode
, source
,
4829 /* Don't duplicate cases already considered. */
4830 for (i
= 17; i
< 24; i
++)
4832 if (((temp1
| (temp1
>> i
)) == remainder
)
4833 && !const_ok_for_arm (temp1
))
4835 rtx new_src
= (subtargets
4836 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4838 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4839 source
, subtargets
, generate
);
4844 gen_rtx_SET (target
,
4847 gen_rtx_LSHIFTRT (mode
, source
,
4858 /* If we have IOR or XOR, and the constant can be loaded in a
4859 single instruction, and we can find a temporary to put it in,
4860 then this can be done in two instructions instead of 3-4. */
4862 /* TARGET can't be NULL if SUBTARGETS is 0 */
4863 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4865 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4869 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4871 emit_constant_insn (cond
,
4872 gen_rtx_SET (sub
, GEN_INT (val
)));
4873 emit_constant_insn (cond
,
4874 gen_rtx_SET (target
,
4875 gen_rtx_fmt_ee (code
, mode
,
4886 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4887 and the remainder 0s for e.g. 0xfff00000)
4888 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4890 This can be done in 2 instructions by using shifts with mov or mvn.
4895 mvn r0, r0, lsr #12 */
4896 if (set_sign_bit_copies
> 8
4897 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4901 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4902 rtx shift
= GEN_INT (set_sign_bit_copies
);
4908 gen_rtx_ASHIFT (mode
,
4913 gen_rtx_SET (target
,
4915 gen_rtx_LSHIFTRT (mode
, sub
,
4922 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4924 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4926 For eg. r0 = r0 | 0xfff
4931 if (set_zero_bit_copies
> 8
4932 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4936 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4937 rtx shift
= GEN_INT (set_zero_bit_copies
);
4943 gen_rtx_LSHIFTRT (mode
,
4948 gen_rtx_SET (target
,
4950 gen_rtx_ASHIFT (mode
, sub
,
4956 /* This will never be reached for Thumb2 because orn is a valid
4957 instruction. This is for Thumb1 and the ARM 32 bit cases.
4959 x = y | constant (such that ~constant is a valid constant)
4961 x = ~(~y & ~constant).
4963 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4967 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4968 emit_constant_insn (cond
,
4970 gen_rtx_NOT (mode
, source
)));
4973 sub
= gen_reg_rtx (mode
);
4974 emit_constant_insn (cond
,
4976 gen_rtx_AND (mode
, source
,
4978 emit_constant_insn (cond
,
4979 gen_rtx_SET (target
,
4980 gen_rtx_NOT (mode
, sub
)));
4987 /* See if two shifts will do 2 or more insn's worth of work. */
4988 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4990 HOST_WIDE_INT shift_mask
= ((0xffffffff
4991 << (32 - clear_sign_bit_copies
))
4994 if ((remainder
| shift_mask
) != 0xffffffff)
4996 HOST_WIDE_INT new_val
4997 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5001 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5002 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5003 new_src
, source
, subtargets
, 1);
5008 rtx targ
= subtargets
? NULL_RTX
: target
;
5009 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5010 targ
, source
, subtargets
, 0);
5016 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5017 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5019 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5020 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5026 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5028 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5030 if ((remainder
| shift_mask
) != 0xffffffff)
5032 HOST_WIDE_INT new_val
5033 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5036 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5038 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5039 new_src
, source
, subtargets
, 1);
5044 rtx targ
= subtargets
? NULL_RTX
: target
;
5046 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5047 targ
, source
, subtargets
, 0);
5053 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5054 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5056 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5057 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5069 /* Calculate what the instruction sequences would be if we generated it
5070 normally, negated, or inverted. */
5072 /* AND cannot be split into multiple insns, so invert and use BIC. */
5075 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5078 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5083 if (can_invert
|| final_invert
)
5084 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5089 immediates
= &pos_immediates
;
5091 /* Is the negated immediate sequence more efficient? */
5092 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5095 immediates
= &neg_immediates
;
5100 /* Is the inverted immediate sequence more efficient?
5101 We must allow for an extra NOT instruction for XOR operations, although
5102 there is some chance that the final 'mvn' will get optimized later. */
5103 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5106 immediates
= &inv_immediates
;
5114 /* Now output the chosen sequence as instructions. */
5117 for (i
= 0; i
< insns
; i
++)
5119 rtx new_src
, temp1_rtx
;
5121 temp1
= immediates
->i
[i
];
5123 if (code
== SET
|| code
== MINUS
)
5124 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5125 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5126 new_src
= gen_reg_rtx (mode
);
5132 else if (can_negate
)
5135 temp1
= trunc_int_for_mode (temp1
, mode
);
5136 temp1_rtx
= GEN_INT (temp1
);
5140 else if (code
== MINUS
)
5141 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5143 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5145 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5150 can_negate
= can_invert
;
5154 else if (code
== MINUS
)
5162 emit_constant_insn (cond
, gen_rtx_SET (target
,
5163 gen_rtx_NOT (mode
, source
)));
5170 /* Canonicalize a comparison so that we are more likely to recognize it.
5171 This can be done for a few constant compares, where we can make the
5172 immediate value easier to load. */
5175 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5176 bool op0_preserve_value
)
5179 unsigned HOST_WIDE_INT i
, maxval
;
5181 mode
= GET_MODE (*op0
);
5182 if (mode
== VOIDmode
)
5183 mode
= GET_MODE (*op1
);
5185 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5187 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5188 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5189 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5190 for GTU/LEU in Thumb mode. */
5194 if (*code
== GT
|| *code
== LE
5195 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5197 /* Missing comparison. First try to use an available
5199 if (CONST_INT_P (*op1
))
5207 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5209 *op1
= GEN_INT (i
+ 1);
5210 *code
= *code
== GT
? GE
: LT
;
5216 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5217 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5219 *op1
= GEN_INT (i
+ 1);
5220 *code
= *code
== GTU
? GEU
: LTU
;
5229 /* If that did not work, reverse the condition. */
5230 if (!op0_preserve_value
)
5232 std::swap (*op0
, *op1
);
5233 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5239 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5240 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5241 to facilitate possible combining with a cmp into 'ands'. */
5243 && GET_CODE (*op0
) == ZERO_EXTEND
5244 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5245 && GET_MODE (XEXP (*op0
, 0)) == QImode
5246 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5247 && subreg_lowpart_p (XEXP (*op0
, 0))
5248 && *op1
== const0_rtx
)
5249 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5252 /* Comparisons smaller than DImode. Only adjust comparisons against
5253 an out-of-range constant. */
5254 if (!CONST_INT_P (*op1
)
5255 || const_ok_for_arm (INTVAL (*op1
))
5256 || const_ok_for_arm (- INTVAL (*op1
)))
5270 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5272 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5273 *code
= *code
== GT
? GE
: LT
;
5281 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5283 *op1
= GEN_INT (i
- 1);
5284 *code
= *code
== GE
? GT
: LE
;
5291 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5292 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5294 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5295 *code
= *code
== GTU
? GEU
: LTU
;
5303 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5305 *op1
= GEN_INT (i
- 1);
5306 *code
= *code
== GEU
? GTU
: LEU
;
5317 /* Define how to find the value returned by a function. */
5320 arm_function_value(const_tree type
, const_tree func
,
5321 bool outgoing ATTRIBUTE_UNUSED
)
5324 int unsignedp ATTRIBUTE_UNUSED
;
5325 rtx r ATTRIBUTE_UNUSED
;
5327 mode
= TYPE_MODE (type
);
5329 if (TARGET_AAPCS_BASED
)
5330 return aapcs_allocate_return_reg (mode
, type
, func
);
5332 /* Promote integer types. */
5333 if (INTEGRAL_TYPE_P (type
))
5334 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5336 /* Promotes small structs returned in a register to full-word size
5337 for big-endian AAPCS. */
5338 if (arm_return_in_msb (type
))
5340 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5341 if (size
% UNITS_PER_WORD
!= 0)
5343 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5344 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5348 return arm_libcall_value_1 (mode
);
5351 /* libcall hashtable helpers. */
5353 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5355 static inline hashval_t
hash (const rtx_def
*);
5356 static inline bool equal (const rtx_def
*, const rtx_def
*);
5357 static inline void remove (rtx_def
*);
5361 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5363 return rtx_equal_p (p1
, p2
);
5367 libcall_hasher::hash (const rtx_def
*p1
)
5369 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5372 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5375 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5377 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5381 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5383 static bool init_done
= false;
5384 static libcall_table_type
*libcall_htab
= NULL
;
5390 libcall_htab
= new libcall_table_type (31);
5391 add_libcall (libcall_htab
,
5392 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5393 add_libcall (libcall_htab
,
5394 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5395 add_libcall (libcall_htab
,
5396 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5397 add_libcall (libcall_htab
,
5398 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5400 add_libcall (libcall_htab
,
5401 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5402 add_libcall (libcall_htab
,
5403 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5404 add_libcall (libcall_htab
,
5405 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5406 add_libcall (libcall_htab
,
5407 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5409 add_libcall (libcall_htab
,
5410 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5411 add_libcall (libcall_htab
,
5412 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5413 add_libcall (libcall_htab
,
5414 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5415 add_libcall (libcall_htab
,
5416 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5417 add_libcall (libcall_htab
,
5418 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5419 add_libcall (libcall_htab
,
5420 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5421 add_libcall (libcall_htab
,
5422 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5423 add_libcall (libcall_htab
,
5424 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5426 /* Values from double-precision helper functions are returned in core
5427 registers if the selected core only supports single-precision
5428 arithmetic, even if we are using the hard-float ABI. The same is
5429 true for single-precision helpers, but we will never be using the
5430 hard-float ABI on a CPU which doesn't support single-precision
5431 operations in hardware. */
5432 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5433 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5434 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5435 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5436 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5437 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5438 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5439 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5440 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5441 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5442 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5443 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5445 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5447 add_libcall (libcall_htab
,
5448 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5451 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5455 arm_libcall_value_1 (machine_mode mode
)
5457 if (TARGET_AAPCS_BASED
)
5458 return aapcs_libcall_value (mode
);
5459 else if (TARGET_IWMMXT_ABI
5460 && arm_vector_mode_supported_p (mode
))
5461 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5463 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5466 /* Define how to find the value returned by a library function
5467 assuming the value has mode MODE. */
5470 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5472 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5473 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5475 /* The following libcalls return their result in integer registers,
5476 even though they return a floating point value. */
5477 if (arm_libcall_uses_aapcs_base (libcall
))
5478 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5482 return arm_libcall_value_1 (mode
);
5485 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5488 arm_function_value_regno_p (const unsigned int regno
)
5490 if (regno
== ARG_REGISTER (1)
5492 && TARGET_AAPCS_BASED
5493 && TARGET_HARD_FLOAT
5494 && regno
== FIRST_VFP_REGNUM
)
5495 || (TARGET_IWMMXT_ABI
5496 && regno
== FIRST_IWMMXT_REGNUM
))
5502 /* Determine the amount of memory needed to store the possible return
5503 registers of an untyped call. */
5505 arm_apply_result_size (void)
5511 if (TARGET_HARD_FLOAT_ABI
)
5513 if (TARGET_IWMMXT_ABI
)
5520 /* Decide whether TYPE should be returned in memory (true)
5521 or in a register (false). FNTYPE is the type of the function making
5524 arm_return_in_memory (const_tree type
, const_tree fntype
)
5528 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5530 if (TARGET_AAPCS_BASED
)
5532 /* Simple, non-aggregate types (ie not including vectors and
5533 complex) are always returned in a register (or registers).
5534 We don't care about which register here, so we can short-cut
5535 some of the detail. */
5536 if (!AGGREGATE_TYPE_P (type
)
5537 && TREE_CODE (type
) != VECTOR_TYPE
5538 && TREE_CODE (type
) != COMPLEX_TYPE
)
5541 /* Any return value that is no larger than one word can be
5543 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5546 /* Check any available co-processors to see if they accept the
5547 type as a register candidate (VFP, for example, can return
5548 some aggregates in consecutive registers). These aren't
5549 available if the call is variadic. */
5550 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5553 /* Vector values should be returned using ARM registers, not
5554 memory (unless they're over 16 bytes, which will break since
5555 we only have four call-clobbered registers to play with). */
5556 if (TREE_CODE (type
) == VECTOR_TYPE
)
5557 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5559 /* The rest go in memory. */
5563 if (TREE_CODE (type
) == VECTOR_TYPE
)
5564 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5566 if (!AGGREGATE_TYPE_P (type
) &&
5567 (TREE_CODE (type
) != VECTOR_TYPE
))
5568 /* All simple types are returned in registers. */
5571 if (arm_abi
!= ARM_ABI_APCS
)
5573 /* ATPCS and later return aggregate types in memory only if they are
5574 larger than a word (or are variable size). */
5575 return (size
< 0 || size
> UNITS_PER_WORD
);
5578 /* For the arm-wince targets we choose to be compatible with Microsoft's
5579 ARM and Thumb compilers, which always return aggregates in memory. */
5581 /* All structures/unions bigger than one word are returned in memory.
5582 Also catch the case where int_size_in_bytes returns -1. In this case
5583 the aggregate is either huge or of variable size, and in either case
5584 we will want to return it via memory and not in a register. */
5585 if (size
< 0 || size
> UNITS_PER_WORD
)
5588 if (TREE_CODE (type
) == RECORD_TYPE
)
5592 /* For a struct the APCS says that we only return in a register
5593 if the type is 'integer like' and every addressable element
5594 has an offset of zero. For practical purposes this means
5595 that the structure can have at most one non bit-field element
5596 and that this element must be the first one in the structure. */
5598 /* Find the first field, ignoring non FIELD_DECL things which will
5599 have been created by C++. */
5600 for (field
= TYPE_FIELDS (type
);
5601 field
&& TREE_CODE (field
) != FIELD_DECL
;
5602 field
= DECL_CHAIN (field
))
5606 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5608 /* Check that the first field is valid for returning in a register. */
5610 /* ... Floats are not allowed */
5611 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5614 /* ... Aggregates that are not themselves valid for returning in
5615 a register are not allowed. */
5616 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5619 /* Now check the remaining fields, if any. Only bitfields are allowed,
5620 since they are not addressable. */
5621 for (field
= DECL_CHAIN (field
);
5623 field
= DECL_CHAIN (field
))
5625 if (TREE_CODE (field
) != FIELD_DECL
)
5628 if (!DECL_BIT_FIELD_TYPE (field
))
5635 if (TREE_CODE (type
) == UNION_TYPE
)
5639 /* Unions can be returned in registers if every element is
5640 integral, or can be returned in an integer register. */
5641 for (field
= TYPE_FIELDS (type
);
5643 field
= DECL_CHAIN (field
))
5645 if (TREE_CODE (field
) != FIELD_DECL
)
5648 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5651 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5657 #endif /* not ARM_WINCE */
5659 /* Return all other types in memory. */
5663 const struct pcs_attribute_arg
5667 } pcs_attribute_args
[] =
5669 {"aapcs", ARM_PCS_AAPCS
},
5670 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5672 /* We could recognize these, but changes would be needed elsewhere
5673 * to implement them. */
5674 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5675 {"atpcs", ARM_PCS_ATPCS
},
5676 {"apcs", ARM_PCS_APCS
},
5678 {NULL
, ARM_PCS_UNKNOWN
}
5682 arm_pcs_from_attribute (tree attr
)
5684 const struct pcs_attribute_arg
*ptr
;
5687 /* Get the value of the argument. */
5688 if (TREE_VALUE (attr
) == NULL_TREE
5689 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5690 return ARM_PCS_UNKNOWN
;
5692 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5694 /* Check it against the list of known arguments. */
5695 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5696 if (streq (arg
, ptr
->arg
))
5699 /* An unrecognized interrupt type. */
5700 return ARM_PCS_UNKNOWN
;
5703 /* Get the PCS variant to use for this call. TYPE is the function's type
5704 specification, DECL is the specific declartion. DECL may be null if
5705 the call could be indirect or if this is a library call. */
5707 arm_get_pcs_model (const_tree type
, const_tree decl
)
5709 bool user_convention
= false;
5710 enum arm_pcs user_pcs
= arm_pcs_default
;
5715 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5718 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5719 user_convention
= true;
5722 if (TARGET_AAPCS_BASED
)
5724 /* Detect varargs functions. These always use the base rules
5725 (no argument is ever a candidate for a co-processor
5727 bool base_rules
= stdarg_p (type
);
5729 if (user_convention
)
5731 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5732 sorry ("non-AAPCS derived PCS variant");
5733 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5734 error ("variadic functions must use the base AAPCS variant");
5738 return ARM_PCS_AAPCS
;
5739 else if (user_convention
)
5741 else if (decl
&& flag_unit_at_a_time
)
5743 /* Local functions never leak outside this compilation unit,
5744 so we are free to use whatever conventions are
5746 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5747 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5749 return ARM_PCS_AAPCS_LOCAL
;
5752 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5753 sorry ("PCS variant");
5755 /* For everything else we use the target's default. */
5756 return arm_pcs_default
;
5761 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5762 const_tree fntype ATTRIBUTE_UNUSED
,
5763 rtx libcall ATTRIBUTE_UNUSED
,
5764 const_tree fndecl ATTRIBUTE_UNUSED
)
5766 /* Record the unallocated VFP registers. */
5767 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5768 pcum
->aapcs_vfp_reg_alloc
= 0;
5771 /* Walk down the type tree of TYPE counting consecutive base elements.
5772 If *MODEP is VOIDmode, then set it to the first valid floating point
5773 type. If a non-floating point type is found, or if a floating point
5774 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5775 otherwise return the count in the sub-tree. */
5777 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5782 switch (TREE_CODE (type
))
5785 mode
= TYPE_MODE (type
);
5786 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5789 if (*modep
== VOIDmode
)
5798 mode
= TYPE_MODE (TREE_TYPE (type
));
5799 if (mode
!= DFmode
&& mode
!= SFmode
)
5802 if (*modep
== VOIDmode
)
5811 /* Use V2SImode and V4SImode as representatives of all 64-bit
5812 and 128-bit vector types, whether or not those modes are
5813 supported with the present options. */
5814 size
= int_size_in_bytes (type
);
5827 if (*modep
== VOIDmode
)
5830 /* Vector modes are considered to be opaque: two vectors are
5831 equivalent for the purposes of being homogeneous aggregates
5832 if they are the same size. */
5841 tree index
= TYPE_DOMAIN (type
);
5843 /* Can't handle incomplete types nor sizes that are not
5845 if (!COMPLETE_TYPE_P (type
)
5846 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5849 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5852 || !TYPE_MAX_VALUE (index
)
5853 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5854 || !TYPE_MIN_VALUE (index
)
5855 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5859 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5860 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5862 /* There must be no padding. */
5863 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5875 /* Can't handle incomplete types nor sizes that are not
5877 if (!COMPLETE_TYPE_P (type
)
5878 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5881 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5883 if (TREE_CODE (field
) != FIELD_DECL
)
5886 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5892 /* There must be no padding. */
5893 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5900 case QUAL_UNION_TYPE
:
5902 /* These aren't very interesting except in a degenerate case. */
5907 /* Can't handle incomplete types nor sizes that are not
5909 if (!COMPLETE_TYPE_P (type
)
5910 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5913 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5915 if (TREE_CODE (field
) != FIELD_DECL
)
5918 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5921 count
= count
> sub_count
? count
: sub_count
;
5924 /* There must be no padding. */
5925 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5938 /* Return true if PCS_VARIANT should use VFP registers. */
5940 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5942 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5944 static bool seen_thumb1_vfp
= false;
5946 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5948 sorry ("Thumb-1 hard-float VFP ABI");
5949 /* sorry() is not immediately fatal, so only display this once. */
5950 seen_thumb1_vfp
= true;
5956 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5959 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5960 (TARGET_VFP_DOUBLE
|| !is_double
));
5963 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5964 suitable for passing or returning in VFP registers for the PCS
5965 variant selected. If it is, then *BASE_MODE is updated to contain
5966 a machine mode describing each element of the argument's type and
5967 *COUNT to hold the number of such elements. */
5969 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5970 machine_mode mode
, const_tree type
,
5971 machine_mode
*base_mode
, int *count
)
5973 machine_mode new_mode
= VOIDmode
;
5975 /* If we have the type information, prefer that to working things
5976 out from the mode. */
5979 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5981 if (ag_count
> 0 && ag_count
<= 4)
5986 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5987 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5988 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5993 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5996 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6002 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6005 *base_mode
= new_mode
;
6010 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6011 machine_mode mode
, const_tree type
)
6013 int count ATTRIBUTE_UNUSED
;
6014 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6016 if (!use_vfp_abi (pcs_variant
, false))
6018 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6023 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6026 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6029 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6030 &pcum
->aapcs_vfp_rmode
,
6031 &pcum
->aapcs_vfp_rcount
);
6034 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6035 for the behaviour of this function. */
6038 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6039 const_tree type ATTRIBUTE_UNUSED
)
6042 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6043 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6044 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6047 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6048 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6050 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6052 || (mode
== TImode
&& ! TARGET_NEON
)
6053 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6056 int rcount
= pcum
->aapcs_vfp_rcount
;
6058 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6062 /* Avoid using unsupported vector modes. */
6063 if (rmode
== V2SImode
)
6065 else if (rmode
== V4SImode
)
6072 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6073 for (i
= 0; i
< rcount
; i
++)
6075 rtx tmp
= gen_rtx_REG (rmode
,
6076 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6077 tmp
= gen_rtx_EXPR_LIST
6079 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6080 XVECEXP (par
, 0, i
) = tmp
;
6083 pcum
->aapcs_reg
= par
;
6086 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6092 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6093 comment there for the behaviour of this function. */
6096 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6098 const_tree type ATTRIBUTE_UNUSED
)
6100 if (!use_vfp_abi (pcs_variant
, false))
6104 || (GET_MODE_CLASS (mode
) == MODE_INT
6105 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6109 machine_mode ag_mode
;
6114 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6119 if (ag_mode
== V2SImode
)
6121 else if (ag_mode
== V4SImode
)
6127 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6128 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6129 for (i
= 0; i
< count
; i
++)
6131 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6132 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6133 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6134 XVECEXP (par
, 0, i
) = tmp
;
6140 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6144 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6145 machine_mode mode ATTRIBUTE_UNUSED
,
6146 const_tree type ATTRIBUTE_UNUSED
)
6148 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6149 pcum
->aapcs_vfp_reg_alloc
= 0;
6153 #define AAPCS_CP(X) \
6155 aapcs_ ## X ## _cum_init, \
6156 aapcs_ ## X ## _is_call_candidate, \
6157 aapcs_ ## X ## _allocate, \
6158 aapcs_ ## X ## _is_return_candidate, \
6159 aapcs_ ## X ## _allocate_return_reg, \
6160 aapcs_ ## X ## _advance \
6163 /* Table of co-processors that can be used to pass arguments in
6164 registers. Idealy no arugment should be a candidate for more than
6165 one co-processor table entry, but the table is processed in order
6166 and stops after the first match. If that entry then fails to put
6167 the argument into a co-processor register, the argument will go on
6171 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6172 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6174 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6175 BLKmode) is a candidate for this co-processor's registers; this
6176 function should ignore any position-dependent state in
6177 CUMULATIVE_ARGS and only use call-type dependent information. */
6178 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6180 /* Return true if the argument does get a co-processor register; it
6181 should set aapcs_reg to an RTX of the register allocated as is
6182 required for a return from FUNCTION_ARG. */
6183 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6185 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6186 be returned in this co-processor's registers. */
6187 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6189 /* Allocate and return an RTX element to hold the return type of a call. This
6190 routine must not fail and will only be called if is_return_candidate
6191 returned true with the same parameters. */
6192 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6194 /* Finish processing this argument and prepare to start processing
6196 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6197 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6205 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6210 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6211 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6218 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6220 /* We aren't passed a decl, so we can't check that a call is local.
6221 However, it isn't clear that that would be a win anyway, since it
6222 might limit some tail-calling opportunities. */
6223 enum arm_pcs pcs_variant
;
6227 const_tree fndecl
= NULL_TREE
;
6229 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6232 fntype
= TREE_TYPE (fntype
);
6235 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6238 pcs_variant
= arm_pcs_default
;
6240 if (pcs_variant
!= ARM_PCS_AAPCS
)
6244 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6245 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6254 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6257 /* We aren't passed a decl, so we can't check that a call is local.
6258 However, it isn't clear that that would be a win anyway, since it
6259 might limit some tail-calling opportunities. */
6260 enum arm_pcs pcs_variant
;
6261 int unsignedp ATTRIBUTE_UNUSED
;
6265 const_tree fndecl
= NULL_TREE
;
6267 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6270 fntype
= TREE_TYPE (fntype
);
6273 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6276 pcs_variant
= arm_pcs_default
;
6278 /* Promote integer types. */
6279 if (type
&& INTEGRAL_TYPE_P (type
))
6280 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6282 if (pcs_variant
!= ARM_PCS_AAPCS
)
6286 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6287 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6289 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6293 /* Promotes small structs returned in a register to full-word size
6294 for big-endian AAPCS. */
6295 if (type
&& arm_return_in_msb (type
))
6297 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6298 if (size
% UNITS_PER_WORD
!= 0)
6300 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6301 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6305 return gen_rtx_REG (mode
, R0_REGNUM
);
6309 aapcs_libcall_value (machine_mode mode
)
6311 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6312 && GET_MODE_SIZE (mode
) <= 4)
6315 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6318 /* Lay out a function argument using the AAPCS rules. The rule
6319 numbers referred to here are those in the AAPCS. */
6321 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6322 const_tree type
, bool named
)
6327 /* We only need to do this once per argument. */
6328 if (pcum
->aapcs_arg_processed
)
6331 pcum
->aapcs_arg_processed
= true;
6333 /* Special case: if named is false then we are handling an incoming
6334 anonymous argument which is on the stack. */
6338 /* Is this a potential co-processor register candidate? */
6339 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6341 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6342 pcum
->aapcs_cprc_slot
= slot
;
6344 /* We don't have to apply any of the rules from part B of the
6345 preparation phase, these are handled elsewhere in the
6350 /* A Co-processor register candidate goes either in its own
6351 class of registers or on the stack. */
6352 if (!pcum
->aapcs_cprc_failed
[slot
])
6354 /* C1.cp - Try to allocate the argument to co-processor
6356 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6359 /* C2.cp - Put the argument on the stack and note that we
6360 can't assign any more candidates in this slot. We also
6361 need to note that we have allocated stack space, so that
6362 we won't later try to split a non-cprc candidate between
6363 core registers and the stack. */
6364 pcum
->aapcs_cprc_failed
[slot
] = true;
6365 pcum
->can_split
= false;
6368 /* We didn't get a register, so this argument goes on the
6370 gcc_assert (pcum
->can_split
== false);
6375 /* C3 - For double-word aligned arguments, round the NCRN up to the
6376 next even number. */
6377 ncrn
= pcum
->aapcs_ncrn
;
6378 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6381 nregs
= ARM_NUM_REGS2(mode
, type
);
6383 /* Sigh, this test should really assert that nregs > 0, but a GCC
6384 extension allows empty structs and then gives them empty size; it
6385 then allows such a structure to be passed by value. For some of
6386 the code below we have to pretend that such an argument has
6387 non-zero size so that we 'locate' it correctly either in
6388 registers or on the stack. */
6389 gcc_assert (nregs
>= 0);
6391 nregs2
= nregs
? nregs
: 1;
6393 /* C4 - Argument fits entirely in core registers. */
6394 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6396 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6397 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6401 /* C5 - Some core registers left and there are no arguments already
6402 on the stack: split this argument between the remaining core
6403 registers and the stack. */
6404 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6406 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6407 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6408 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6412 /* C6 - NCRN is set to 4. */
6413 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6415 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6419 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6420 for a call to a function whose data type is FNTYPE.
6421 For a library call, FNTYPE is NULL. */
6423 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6425 tree fndecl ATTRIBUTE_UNUSED
)
6427 /* Long call handling. */
6429 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6431 pcum
->pcs_variant
= arm_pcs_default
;
6433 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6435 if (arm_libcall_uses_aapcs_base (libname
))
6436 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6438 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6439 pcum
->aapcs_reg
= NULL_RTX
;
6440 pcum
->aapcs_partial
= 0;
6441 pcum
->aapcs_arg_processed
= false;
6442 pcum
->aapcs_cprc_slot
= -1;
6443 pcum
->can_split
= true;
6445 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6449 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6451 pcum
->aapcs_cprc_failed
[i
] = false;
6452 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6460 /* On the ARM, the offset starts at 0. */
6462 pcum
->iwmmxt_nregs
= 0;
6463 pcum
->can_split
= true;
6465 /* Varargs vectors are treated the same as long long.
6466 named_count avoids having to change the way arm handles 'named' */
6467 pcum
->named_count
= 0;
6470 if (TARGET_REALLY_IWMMXT
&& fntype
)
6474 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6476 fn_arg
= TREE_CHAIN (fn_arg
))
6477 pcum
->named_count
+= 1;
6479 if (! pcum
->named_count
)
6480 pcum
->named_count
= INT_MAX
;
6484 /* Return true if mode/type need doubleword alignment. */
6486 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6489 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6491 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6492 if (!AGGREGATE_TYPE_P (type
))
6493 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6495 /* Array types: Use member alignment of element type. */
6496 if (TREE_CODE (type
) == ARRAY_TYPE
)
6497 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6499 /* Record/aggregate types: Use greatest member alignment of any member. */
6500 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6501 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6508 /* Determine where to put an argument to a function.
6509 Value is zero to push the argument on the stack,
6510 or a hard register in which to store the argument.
6512 MODE is the argument's machine mode.
6513 TYPE is the data type of the argument (as a tree).
6514 This is null for libcalls where that information may
6516 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6517 the preceding args and about the function being called.
6518 NAMED is nonzero if this argument is a named parameter
6519 (otherwise it is an extra parameter matching an ellipsis).
6521 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6522 other arguments are passed on the stack. If (NAMED == 0) (which happens
6523 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6524 defined), say it is passed in the stack (function_prologue will
6525 indeed make it pass in the stack if necessary). */
6528 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6529 const_tree type
, bool named
)
6531 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6534 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6535 a call insn (op3 of a call_value insn). */
6536 if (mode
== VOIDmode
)
6539 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6541 aapcs_layout_arg (pcum
, mode
, type
, named
);
6542 return pcum
->aapcs_reg
;
6545 /* Varargs vectors are treated the same as long long.
6546 named_count avoids having to change the way arm handles 'named' */
6547 if (TARGET_IWMMXT_ABI
6548 && arm_vector_mode_supported_p (mode
)
6549 && pcum
->named_count
> pcum
->nargs
+ 1)
6551 if (pcum
->iwmmxt_nregs
<= 9)
6552 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6555 pcum
->can_split
= false;
6560 /* Put doubleword aligned quantities in even register pairs. */
6562 && ARM_DOUBLEWORD_ALIGN
6563 && arm_needs_doubleword_align (mode
, type
))
6566 /* Only allow splitting an arg between regs and memory if all preceding
6567 args were allocated to regs. For args passed by reference we only count
6568 the reference pointer. */
6569 if (pcum
->can_split
)
6572 nregs
= ARM_NUM_REGS2 (mode
, type
);
6574 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6577 return gen_rtx_REG (mode
, pcum
->nregs
);
6581 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6583 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6584 ? DOUBLEWORD_ALIGNMENT
6589 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6590 tree type
, bool named
)
6592 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6593 int nregs
= pcum
->nregs
;
6595 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6597 aapcs_layout_arg (pcum
, mode
, type
, named
);
6598 return pcum
->aapcs_partial
;
6601 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6604 if (NUM_ARG_REGS
> nregs
6605 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6607 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6612 /* Update the data in PCUM to advance over an argument
6613 of mode MODE and data type TYPE.
6614 (TYPE is null for libcalls where that information may not be available.) */
6617 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6618 const_tree type
, bool named
)
6620 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6622 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6624 aapcs_layout_arg (pcum
, mode
, type
, named
);
6626 if (pcum
->aapcs_cprc_slot
>= 0)
6628 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6630 pcum
->aapcs_cprc_slot
= -1;
6633 /* Generic stuff. */
6634 pcum
->aapcs_arg_processed
= false;
6635 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6636 pcum
->aapcs_reg
= NULL_RTX
;
6637 pcum
->aapcs_partial
= 0;
6642 if (arm_vector_mode_supported_p (mode
)
6643 && pcum
->named_count
> pcum
->nargs
6644 && TARGET_IWMMXT_ABI
)
6645 pcum
->iwmmxt_nregs
+= 1;
6647 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6651 /* Variable sized types are passed by reference. This is a GCC
6652 extension to the ARM ABI. */
6655 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6656 machine_mode mode ATTRIBUTE_UNUSED
,
6657 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6659 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6662 /* Encode the current state of the #pragma [no_]long_calls. */
6665 OFF
, /* No #pragma [no_]long_calls is in effect. */
6666 LONG
, /* #pragma long_calls is in effect. */
6667 SHORT
/* #pragma no_long_calls is in effect. */
6670 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6673 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6675 arm_pragma_long_calls
= LONG
;
6679 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6681 arm_pragma_long_calls
= SHORT
;
6685 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6687 arm_pragma_long_calls
= OFF
;
6690 /* Handle an attribute requiring a FUNCTION_DECL;
6691 arguments as in struct attribute_spec.handler. */
6693 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6694 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6696 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6698 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6700 *no_add_attrs
= true;
6706 /* Handle an "interrupt" or "isr" attribute;
6707 arguments as in struct attribute_spec.handler. */
6709 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6714 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6716 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6718 *no_add_attrs
= true;
6720 /* FIXME: the argument if any is checked for type attributes;
6721 should it be checked for decl ones? */
6725 if (TREE_CODE (*node
) == FUNCTION_TYPE
6726 || TREE_CODE (*node
) == METHOD_TYPE
)
6728 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6730 warning (OPT_Wattributes
, "%qE attribute ignored",
6732 *no_add_attrs
= true;
6735 else if (TREE_CODE (*node
) == POINTER_TYPE
6736 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6737 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6738 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6740 *node
= build_variant_type_copy (*node
);
6741 TREE_TYPE (*node
) = build_type_attribute_variant
6743 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6744 *no_add_attrs
= true;
6748 /* Possibly pass this attribute on from the type to a decl. */
6749 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6750 | (int) ATTR_FLAG_FUNCTION_NEXT
6751 | (int) ATTR_FLAG_ARRAY_NEXT
))
6753 *no_add_attrs
= true;
6754 return tree_cons (name
, args
, NULL_TREE
);
6758 warning (OPT_Wattributes
, "%qE attribute ignored",
6767 /* Handle a "pcs" attribute; arguments as in struct
6768 attribute_spec.handler. */
6770 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6771 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6773 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6775 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6776 *no_add_attrs
= true;
6781 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6782 /* Handle the "notshared" attribute. This attribute is another way of
6783 requesting hidden visibility. ARM's compiler supports
6784 "__declspec(notshared)"; we support the same thing via an
6788 arm_handle_notshared_attribute (tree
*node
,
6789 tree name ATTRIBUTE_UNUSED
,
6790 tree args ATTRIBUTE_UNUSED
,
6791 int flags ATTRIBUTE_UNUSED
,
6794 tree decl
= TYPE_NAME (*node
);
6798 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6799 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6800 *no_add_attrs
= false;
6806 /* This function returns true if a function with declaration FNDECL and type
6807 FNTYPE uses the stack to pass arguments or return variables and false
6808 otherwise. This is used for functions with the attributes
6809 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6810 diagnostic messages if the stack is used. NAME is the name of the attribute
6814 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6816 function_args_iterator args_iter
;
6817 CUMULATIVE_ARGS args_so_far_v
;
6818 cumulative_args_t args_so_far
;
6819 bool first_param
= true;
6820 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6822 /* Error out if any argument is passed on the stack. */
6823 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6824 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6825 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6828 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6830 prev_arg_type
= arg_type
;
6831 if (VOID_TYPE_P (arg_type
))
6835 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6836 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6838 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6840 error ("%qE attribute not available to functions with arguments "
6841 "passed on the stack", name
);
6844 first_param
= false;
6847 /* Error out for variadic functions since we cannot control how many
6848 arguments will be passed and thus stack could be used. stdarg_p () is not
6849 used for the checking to avoid browsing arguments twice. */
6850 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6852 error ("%qE attribute not available to functions with variable number "
6853 "of arguments", name
);
6857 /* Error out if return value is passed on the stack. */
6858 ret_type
= TREE_TYPE (fntype
);
6859 if (arm_return_in_memory (ret_type
, fntype
))
6861 error ("%qE attribute not available to functions that return value on "
6868 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6869 function will check whether the attribute is allowed here and will add the
6870 attribute to the function declaration tree or otherwise issue a warning. */
6873 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6882 *no_add_attrs
= true;
6883 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6888 /* Ignore attribute for function types. */
6889 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6891 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6893 *no_add_attrs
= true;
6899 /* Warn for static linkage functions. */
6900 if (!TREE_PUBLIC (fndecl
))
6902 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6903 "with static linkage", name
);
6904 *no_add_attrs
= true;
6908 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6909 TREE_TYPE (fndecl
));
6914 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6915 function will check whether the attribute is allowed here and will add the
6916 attribute to the function type tree or otherwise issue a diagnostic. The
6917 reason we check this at declaration time is to only allow the use of the
6918 attribute with declarations of function pointers and not function
6919 declarations. This function checks NODE is of the expected type and issues
6920 diagnostics otherwise using NAME. If it is not of the expected type
6921 *NO_ADD_ATTRS will be set to true. */
6924 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6929 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6934 *no_add_attrs
= true;
6935 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6940 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6943 fntype
= TREE_TYPE (decl
);
6946 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6947 fntype
= TREE_TYPE (fntype
);
6949 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6951 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6952 "function pointer", name
);
6953 *no_add_attrs
= true;
6957 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
6962 /* Prevent trees being shared among function types with and without
6963 cmse_nonsecure_call attribute. */
6964 type
= TREE_TYPE (decl
);
6966 type
= build_distinct_type_copy (type
);
6967 TREE_TYPE (decl
) = type
;
6970 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
6973 fntype
= TREE_TYPE (fntype
);
6974 fntype
= build_distinct_type_copy (fntype
);
6975 TREE_TYPE (type
) = fntype
;
6978 /* Construct a type attribute and add it to the function type. */
6979 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
6980 TYPE_ATTRIBUTES (fntype
));
6981 TYPE_ATTRIBUTES (fntype
) = attrs
;
6985 /* Return 0 if the attributes for two types are incompatible, 1 if they
6986 are compatible, and 2 if they are nearly compatible (which causes a
6987 warning to be generated). */
6989 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6993 /* Check for mismatch of non-default calling convention. */
6994 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6997 /* Check for mismatched call attributes. */
6998 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6999 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7000 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7001 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7003 /* Only bother to check if an attribute is defined. */
7004 if (l1
| l2
| s1
| s2
)
7006 /* If one type has an attribute, the other must have the same attribute. */
7007 if ((l1
!= l2
) || (s1
!= s2
))
7010 /* Disallow mixed attributes. */
7011 if ((l1
& s2
) || (l2
& s1
))
7015 /* Check for mismatched ISR attribute. */
7016 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7018 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7019 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7021 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7025 l1
= lookup_attribute ("cmse_nonsecure_call",
7026 TYPE_ATTRIBUTES (type1
)) != NULL
;
7027 l2
= lookup_attribute ("cmse_nonsecure_call",
7028 TYPE_ATTRIBUTES (type2
)) != NULL
;
7036 /* Assigns default attributes to newly defined type. This is used to
7037 set short_call/long_call attributes for function types of
7038 functions defined inside corresponding #pragma scopes. */
7040 arm_set_default_type_attributes (tree type
)
7042 /* Add __attribute__ ((long_call)) to all functions, when
7043 inside #pragma long_calls or __attribute__ ((short_call)),
7044 when inside #pragma no_long_calls. */
7045 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7047 tree type_attr_list
, attr_name
;
7048 type_attr_list
= TYPE_ATTRIBUTES (type
);
7050 if (arm_pragma_long_calls
== LONG
)
7051 attr_name
= get_identifier ("long_call");
7052 else if (arm_pragma_long_calls
== SHORT
)
7053 attr_name
= get_identifier ("short_call");
7057 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7058 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7062 /* Return true if DECL is known to be linked into section SECTION. */
7065 arm_function_in_section_p (tree decl
, section
*section
)
7067 /* We can only be certain about the prevailing symbol definition. */
7068 if (!decl_binds_to_current_def_p (decl
))
7071 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7072 if (!DECL_SECTION_NAME (decl
))
7074 /* Make sure that we will not create a unique section for DECL. */
7075 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7079 return function_section (decl
) == section
;
7082 /* Return nonzero if a 32-bit "long_call" should be generated for
7083 a call from the current function to DECL. We generate a long_call
7086 a. has an __attribute__((long call))
7087 or b. is within the scope of a #pragma long_calls
7088 or c. the -mlong-calls command line switch has been specified
7090 However we do not generate a long call if the function:
7092 d. has an __attribute__ ((short_call))
7093 or e. is inside the scope of a #pragma no_long_calls
7094 or f. is defined in the same section as the current function. */
7097 arm_is_long_call_p (tree decl
)
7102 return TARGET_LONG_CALLS
;
7104 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7105 if (lookup_attribute ("short_call", attrs
))
7108 /* For "f", be conservative, and only cater for cases in which the
7109 whole of the current function is placed in the same section. */
7110 if (!flag_reorder_blocks_and_partition
7111 && TREE_CODE (decl
) == FUNCTION_DECL
7112 && arm_function_in_section_p (decl
, current_function_section ()))
7115 if (lookup_attribute ("long_call", attrs
))
7118 return TARGET_LONG_CALLS
;
7121 /* Return nonzero if it is ok to make a tail-call to DECL. */
7123 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7125 unsigned long func_type
;
7127 if (cfun
->machine
->sibcall_blocked
)
7130 /* Never tailcall something if we are generating code for Thumb-1. */
7134 /* The PIC register is live on entry to VxWorks PLT entries, so we
7135 must make the call before restoring the PIC register. */
7136 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7139 /* If we are interworking and the function is not declared static
7140 then we can't tail-call it unless we know that it exists in this
7141 compilation unit (since it might be a Thumb routine). */
7142 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7143 && !TREE_ASM_WRITTEN (decl
))
7146 func_type
= arm_current_func_type ();
7147 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7148 if (IS_INTERRUPT (func_type
))
7151 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7152 generated for entry functions themselves. */
7153 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7156 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7157 this would complicate matters for later code generation. */
7158 if (TREE_CODE (exp
) == CALL_EXPR
)
7160 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7161 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7165 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7167 /* Check that the return value locations are the same. For
7168 example that we aren't returning a value from the sibling in
7169 a VFP register but then need to transfer it to a core
7172 tree decl_or_type
= decl
;
7174 /* If it is an indirect function pointer, get the function type. */
7176 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7178 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7179 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7181 if (!rtx_equal_p (a
, b
))
7185 /* Never tailcall if function may be called with a misaligned SP. */
7186 if (IS_STACKALIGN (func_type
))
7189 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7190 references should become a NOP. Don't convert such calls into
7192 if (TARGET_AAPCS_BASED
7193 && arm_abi
== ARM_ABI_AAPCS
7195 && DECL_WEAK (decl
))
7198 /* Everything else is ok. */
7203 /* Addressing mode support functions. */
7205 /* Return nonzero if X is a legitimate immediate operand when compiling
7206 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7208 legitimate_pic_operand_p (rtx x
)
7210 if (GET_CODE (x
) == SYMBOL_REF
7211 || (GET_CODE (x
) == CONST
7212 && GET_CODE (XEXP (x
, 0)) == PLUS
7213 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7219 /* Record that the current function needs a PIC register. Initialize
7220 cfun->machine->pic_reg if we have not already done so. */
7223 require_pic_register (void)
7225 /* A lot of the logic here is made obscure by the fact that this
7226 routine gets called as part of the rtx cost estimation process.
7227 We don't want those calls to affect any assumptions about the real
7228 function; and further, we can't call entry_of_function() until we
7229 start the real expansion process. */
7230 if (!crtl
->uses_pic_offset_table
)
7232 gcc_assert (can_create_pseudo_p ());
7233 if (arm_pic_register
!= INVALID_REGNUM
7234 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7236 if (!cfun
->machine
->pic_reg
)
7237 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7239 /* Play games to avoid marking the function as needing pic
7240 if we are being called as part of the cost-estimation
7242 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7243 crtl
->uses_pic_offset_table
= 1;
7247 rtx_insn
*seq
, *insn
;
7249 if (!cfun
->machine
->pic_reg
)
7250 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7252 /* Play games to avoid marking the function as needing pic
7253 if we are being called as part of the cost-estimation
7255 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7257 crtl
->uses_pic_offset_table
= 1;
7260 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7261 && arm_pic_register
> LAST_LO_REGNUM
)
7262 emit_move_insn (cfun
->machine
->pic_reg
,
7263 gen_rtx_REG (Pmode
, arm_pic_register
));
7265 arm_load_pic_register (0UL);
7270 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7272 INSN_LOCATION (insn
) = prologue_location
;
7274 /* We can be called during expansion of PHI nodes, where
7275 we can't yet emit instructions directly in the final
7276 insn stream. Queue the insns on the entry edge, they will
7277 be committed after everything else is expanded. */
7278 insert_insn_on_edge (seq
,
7279 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7286 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7288 if (GET_CODE (orig
) == SYMBOL_REF
7289 || GET_CODE (orig
) == LABEL_REF
)
7293 gcc_assert (can_create_pseudo_p ());
7294 reg
= gen_reg_rtx (Pmode
);
7297 /* VxWorks does not impose a fixed gap between segments; the run-time
7298 gap can be different from the object-file gap. We therefore can't
7299 use GOTOFF unless we are absolutely sure that the symbol is in the
7300 same segment as the GOT. Unfortunately, the flexibility of linker
7301 scripts means that we can't be sure of that in general, so assume
7302 that GOTOFF is never valid on VxWorks. */
7304 if ((GET_CODE (orig
) == LABEL_REF
7305 || (GET_CODE (orig
) == SYMBOL_REF
&&
7306 SYMBOL_REF_LOCAL_P (orig
)))
7308 && arm_pic_data_is_text_relative
)
7309 insn
= arm_pic_static_addr (orig
, reg
);
7315 /* If this function doesn't have a pic register, create one now. */
7316 require_pic_register ();
7318 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7320 /* Make the MEM as close to a constant as possible. */
7321 mem
= SET_SRC (pat
);
7322 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7323 MEM_READONLY_P (mem
) = 1;
7324 MEM_NOTRAP_P (mem
) = 1;
7326 insn
= emit_insn (pat
);
7329 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7331 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7335 else if (GET_CODE (orig
) == CONST
)
7339 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7340 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7343 /* Handle the case where we have: const (UNSPEC_TLS). */
7344 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7345 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7348 /* Handle the case where we have:
7349 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7351 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7352 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7353 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7355 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7361 gcc_assert (can_create_pseudo_p ());
7362 reg
= gen_reg_rtx (Pmode
);
7365 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7367 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7368 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7369 base
== reg
? 0 : reg
);
7371 if (CONST_INT_P (offset
))
7373 /* The base register doesn't really matter, we only want to
7374 test the index for the appropriate mode. */
7375 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7377 gcc_assert (can_create_pseudo_p ());
7378 offset
= force_reg (Pmode
, offset
);
7381 if (CONST_INT_P (offset
))
7382 return plus_constant (Pmode
, base
, INTVAL (offset
));
7385 if (GET_MODE_SIZE (mode
) > 4
7386 && (GET_MODE_CLASS (mode
) == MODE_INT
7387 || TARGET_SOFT_FLOAT
))
7389 emit_insn (gen_addsi3 (reg
, base
, offset
));
7393 return gen_rtx_PLUS (Pmode
, base
, offset
);
7400 /* Find a spare register to use during the prolog of a function. */
7403 thumb_find_work_register (unsigned long pushed_regs_mask
)
7407 /* Check the argument registers first as these are call-used. The
7408 register allocation order means that sometimes r3 might be used
7409 but earlier argument registers might not, so check them all. */
7410 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7411 if (!df_regs_ever_live_p (reg
))
7414 /* Before going on to check the call-saved registers we can try a couple
7415 more ways of deducing that r3 is available. The first is when we are
7416 pushing anonymous arguments onto the stack and we have less than 4
7417 registers worth of fixed arguments(*). In this case r3 will be part of
7418 the variable argument list and so we can be sure that it will be
7419 pushed right at the start of the function. Hence it will be available
7420 for the rest of the prologue.
7421 (*): ie crtl->args.pretend_args_size is greater than 0. */
7422 if (cfun
->machine
->uses_anonymous_args
7423 && crtl
->args
.pretend_args_size
> 0)
7424 return LAST_ARG_REGNUM
;
7426 /* The other case is when we have fixed arguments but less than 4 registers
7427 worth. In this case r3 might be used in the body of the function, but
7428 it is not being used to convey an argument into the function. In theory
7429 we could just check crtl->args.size to see how many bytes are
7430 being passed in argument registers, but it seems that it is unreliable.
7431 Sometimes it will have the value 0 when in fact arguments are being
7432 passed. (See testcase execute/20021111-1.c for an example). So we also
7433 check the args_info.nregs field as well. The problem with this field is
7434 that it makes no allowances for arguments that are passed to the
7435 function but which are not used. Hence we could miss an opportunity
7436 when a function has an unused argument in r3. But it is better to be
7437 safe than to be sorry. */
7438 if (! cfun
->machine
->uses_anonymous_args
7439 && crtl
->args
.size
>= 0
7440 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7441 && (TARGET_AAPCS_BASED
7442 ? crtl
->args
.info
.aapcs_ncrn
< 4
7443 : crtl
->args
.info
.nregs
< 4))
7444 return LAST_ARG_REGNUM
;
7446 /* Otherwise look for a call-saved register that is going to be pushed. */
7447 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7448 if (pushed_regs_mask
& (1 << reg
))
7453 /* Thumb-2 can use high regs. */
7454 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7455 if (pushed_regs_mask
& (1 << reg
))
7458 /* Something went wrong - thumb_compute_save_reg_mask()
7459 should have arranged for a suitable register to be pushed. */
7463 static GTY(()) int pic_labelno
;
7465 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7469 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7471 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7473 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7476 gcc_assert (flag_pic
);
7478 pic_reg
= cfun
->machine
->pic_reg
;
7479 if (TARGET_VXWORKS_RTP
)
7481 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7482 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7483 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7485 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7487 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7488 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7492 /* We use an UNSPEC rather than a LABEL_REF because this label
7493 never appears in the code stream. */
7495 labelno
= GEN_INT (pic_labelno
++);
7496 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7497 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7499 /* On the ARM the PC register contains 'dot + 8' at the time of the
7500 addition, on the Thumb it is 'dot + 4'. */
7501 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7502 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7504 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7508 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7510 else /* TARGET_THUMB1 */
7512 if (arm_pic_register
!= INVALID_REGNUM
7513 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7515 /* We will have pushed the pic register, so we should always be
7516 able to find a work register. */
7517 pic_tmp
= gen_rtx_REG (SImode
,
7518 thumb_find_work_register (saved_regs
));
7519 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7520 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7521 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7523 else if (arm_pic_register
!= INVALID_REGNUM
7524 && arm_pic_register
> LAST_LO_REGNUM
7525 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7527 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7528 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7529 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7532 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7536 /* Need to emit this whether or not we obey regdecls,
7537 since setjmp/longjmp can cause life info to screw up. */
7541 /* Generate code to load the address of a static var when flag_pic is set. */
7543 arm_pic_static_addr (rtx orig
, rtx reg
)
7545 rtx l1
, labelno
, offset_rtx
;
7547 gcc_assert (flag_pic
);
7549 /* We use an UNSPEC rather than a LABEL_REF because this label
7550 never appears in the code stream. */
7551 labelno
= GEN_INT (pic_labelno
++);
7552 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7553 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7555 /* On the ARM the PC register contains 'dot + 8' at the time of the
7556 addition, on the Thumb it is 'dot + 4'. */
7557 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7558 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7559 UNSPEC_SYMBOL_OFFSET
);
7560 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7562 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7565 /* Return nonzero if X is valid as an ARM state addressing register. */
7567 arm_address_register_rtx_p (rtx x
, int strict_p
)
7577 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7579 return (regno
<= LAST_ARM_REGNUM
7580 || regno
>= FIRST_PSEUDO_REGISTER
7581 || regno
== FRAME_POINTER_REGNUM
7582 || regno
== ARG_POINTER_REGNUM
);
7585 /* Return TRUE if this rtx is the difference of a symbol and a label,
7586 and will reduce to a PC-relative relocation in the object file.
7587 Expressions like this can be left alone when generating PIC, rather
7588 than forced through the GOT. */
7590 pcrel_constant_p (rtx x
)
7592 if (GET_CODE (x
) == MINUS
)
7593 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7598 /* Return true if X will surely end up in an index register after next
7601 will_be_in_index_register (const_rtx x
)
7603 /* arm.md: calculate_pic_address will split this into a register. */
7604 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7607 /* Return nonzero if X is a valid ARM state address operand. */
7609 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7613 enum rtx_code code
= GET_CODE (x
);
7615 if (arm_address_register_rtx_p (x
, strict_p
))
7618 use_ldrd
= (TARGET_LDRD
7619 && (mode
== DImode
|| mode
== DFmode
));
7621 if (code
== POST_INC
|| code
== PRE_DEC
7622 || ((code
== PRE_INC
|| code
== POST_DEC
)
7623 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7624 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7626 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7627 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7628 && GET_CODE (XEXP (x
, 1)) == PLUS
7629 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7631 rtx addend
= XEXP (XEXP (x
, 1), 1);
7633 /* Don't allow ldrd post increment by register because it's hard
7634 to fixup invalid register choices. */
7636 && GET_CODE (x
) == POST_MODIFY
7640 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7641 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7644 /* After reload constants split into minipools will have addresses
7645 from a LABEL_REF. */
7646 else if (reload_completed
7647 && (code
== LABEL_REF
7649 && GET_CODE (XEXP (x
, 0)) == PLUS
7650 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7651 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7654 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7657 else if (code
== PLUS
)
7659 rtx xop0
= XEXP (x
, 0);
7660 rtx xop1
= XEXP (x
, 1);
7662 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7663 && ((CONST_INT_P (xop1
)
7664 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7665 || (!strict_p
&& will_be_in_index_register (xop1
))))
7666 || (arm_address_register_rtx_p (xop1
, strict_p
)
7667 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7671 /* Reload currently can't handle MINUS, so disable this for now */
7672 else if (GET_CODE (x
) == MINUS
)
7674 rtx xop0
= XEXP (x
, 0);
7675 rtx xop1
= XEXP (x
, 1);
7677 return (arm_address_register_rtx_p (xop0
, strict_p
)
7678 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7682 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7683 && code
== SYMBOL_REF
7684 && CONSTANT_POOL_ADDRESS_P (x
)
7686 && symbol_mentioned_p (get_pool_constant (x
))
7687 && ! pcrel_constant_p (get_pool_constant (x
))))
7693 /* Return nonzero if X is a valid Thumb-2 address operand. */
7695 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7698 enum rtx_code code
= GET_CODE (x
);
7700 if (arm_address_register_rtx_p (x
, strict_p
))
7703 use_ldrd
= (TARGET_LDRD
7704 && (mode
== DImode
|| mode
== DFmode
));
7706 if (code
== POST_INC
|| code
== PRE_DEC
7707 || ((code
== PRE_INC
|| code
== POST_DEC
)
7708 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7709 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7711 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7712 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7713 && GET_CODE (XEXP (x
, 1)) == PLUS
7714 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7716 /* Thumb-2 only has autoincrement by constant. */
7717 rtx addend
= XEXP (XEXP (x
, 1), 1);
7718 HOST_WIDE_INT offset
;
7720 if (!CONST_INT_P (addend
))
7723 offset
= INTVAL(addend
);
7724 if (GET_MODE_SIZE (mode
) <= 4)
7725 return (offset
> -256 && offset
< 256);
7727 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7728 && (offset
& 3) == 0);
7731 /* After reload constants split into minipools will have addresses
7732 from a LABEL_REF. */
7733 else if (reload_completed
7734 && (code
== LABEL_REF
7736 && GET_CODE (XEXP (x
, 0)) == PLUS
7737 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7738 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7741 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7744 else if (code
== PLUS
)
7746 rtx xop0
= XEXP (x
, 0);
7747 rtx xop1
= XEXP (x
, 1);
7749 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7750 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7751 || (!strict_p
&& will_be_in_index_register (xop1
))))
7752 || (arm_address_register_rtx_p (xop1
, strict_p
)
7753 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7756 /* Normally we can assign constant values to target registers without
7757 the help of constant pool. But there are cases we have to use constant
7759 1) assign a label to register.
7760 2) sign-extend a 8bit value to 32bit and then assign to register.
7762 Constant pool access in format:
7763 (set (reg r0) (mem (symbol_ref (".LC0"))))
7764 will cause the use of literal pool (later in function arm_reorg).
7765 So here we mark such format as an invalid format, then the compiler
7766 will adjust it into:
7767 (set (reg r0) (symbol_ref (".LC0")))
7768 (set (reg r0) (mem (reg r0))).
7769 No extra register is required, and (mem (reg r0)) won't cause the use
7770 of literal pools. */
7771 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7772 && CONSTANT_POOL_ADDRESS_P (x
))
7775 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7776 && code
== SYMBOL_REF
7777 && CONSTANT_POOL_ADDRESS_P (x
)
7779 && symbol_mentioned_p (get_pool_constant (x
))
7780 && ! pcrel_constant_p (get_pool_constant (x
))))
7786 /* Return nonzero if INDEX is valid for an address index operand in
7789 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7792 HOST_WIDE_INT range
;
7793 enum rtx_code code
= GET_CODE (index
);
7795 /* Standard coprocessor addressing modes. */
7796 if (TARGET_HARD_FLOAT
7797 && (mode
== SFmode
|| mode
== DFmode
))
7798 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7799 && INTVAL (index
) > -1024
7800 && (INTVAL (index
) & 3) == 0);
7802 /* For quad modes, we restrict the constant offset to be slightly less
7803 than what the instruction format permits. We do this because for
7804 quad mode moves, we will actually decompose them into two separate
7805 double-mode reads or writes. INDEX must therefore be a valid
7806 (double-mode) offset and so should INDEX+8. */
7807 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7808 return (code
== CONST_INT
7809 && INTVAL (index
) < 1016
7810 && INTVAL (index
) > -1024
7811 && (INTVAL (index
) & 3) == 0);
7813 /* We have no such constraint on double mode offsets, so we permit the
7814 full range of the instruction format. */
7815 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7816 return (code
== CONST_INT
7817 && INTVAL (index
) < 1024
7818 && INTVAL (index
) > -1024
7819 && (INTVAL (index
) & 3) == 0);
7821 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7822 return (code
== CONST_INT
7823 && INTVAL (index
) < 1024
7824 && INTVAL (index
) > -1024
7825 && (INTVAL (index
) & 3) == 0);
7827 if (arm_address_register_rtx_p (index
, strict_p
)
7828 && (GET_MODE_SIZE (mode
) <= 4))
7831 if (mode
== DImode
|| mode
== DFmode
)
7833 if (code
== CONST_INT
)
7835 HOST_WIDE_INT val
= INTVAL (index
);
7838 return val
> -256 && val
< 256;
7840 return val
> -4096 && val
< 4092;
7843 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7846 if (GET_MODE_SIZE (mode
) <= 4
7850 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7854 rtx xiop0
= XEXP (index
, 0);
7855 rtx xiop1
= XEXP (index
, 1);
7857 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7858 && power_of_two_operand (xiop1
, SImode
))
7859 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7860 && power_of_two_operand (xiop0
, SImode
)));
7862 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7863 || code
== ASHIFT
|| code
== ROTATERT
)
7865 rtx op
= XEXP (index
, 1);
7867 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7870 && INTVAL (op
) <= 31);
7874 /* For ARM v4 we may be doing a sign-extend operation during the
7880 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7886 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7888 return (code
== CONST_INT
7889 && INTVAL (index
) < range
7890 && INTVAL (index
) > -range
);
7893 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7894 index operand. i.e. 1, 2, 4 or 8. */
7896 thumb2_index_mul_operand (rtx op
)
7900 if (!CONST_INT_P (op
))
7904 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7907 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7909 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7911 enum rtx_code code
= GET_CODE (index
);
7913 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7914 /* Standard coprocessor addressing modes. */
7915 if (TARGET_HARD_FLOAT
7916 && (mode
== SFmode
|| mode
== DFmode
))
7917 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7918 /* Thumb-2 allows only > -256 index range for it's core register
7919 load/stores. Since we allow SF/DF in core registers, we have
7920 to use the intersection between -256~4096 (core) and -1024~1024
7922 && INTVAL (index
) > -256
7923 && (INTVAL (index
) & 3) == 0);
7925 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7927 /* For DImode assume values will usually live in core regs
7928 and only allow LDRD addressing modes. */
7929 if (!TARGET_LDRD
|| mode
!= DImode
)
7930 return (code
== CONST_INT
7931 && INTVAL (index
) < 1024
7932 && INTVAL (index
) > -1024
7933 && (INTVAL (index
) & 3) == 0);
7936 /* For quad modes, we restrict the constant offset to be slightly less
7937 than what the instruction format permits. We do this because for
7938 quad mode moves, we will actually decompose them into two separate
7939 double-mode reads or writes. INDEX must therefore be a valid
7940 (double-mode) offset and so should INDEX+8. */
7941 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7942 return (code
== CONST_INT
7943 && INTVAL (index
) < 1016
7944 && INTVAL (index
) > -1024
7945 && (INTVAL (index
) & 3) == 0);
7947 /* We have no such constraint on double mode offsets, so we permit the
7948 full range of the instruction format. */
7949 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7950 return (code
== CONST_INT
7951 && INTVAL (index
) < 1024
7952 && INTVAL (index
) > -1024
7953 && (INTVAL (index
) & 3) == 0);
7955 if (arm_address_register_rtx_p (index
, strict_p
)
7956 && (GET_MODE_SIZE (mode
) <= 4))
7959 if (mode
== DImode
|| mode
== DFmode
)
7961 if (code
== CONST_INT
)
7963 HOST_WIDE_INT val
= INTVAL (index
);
7964 /* ??? Can we assume ldrd for thumb2? */
7965 /* Thumb-2 ldrd only has reg+const addressing modes. */
7966 /* ldrd supports offsets of +-1020.
7967 However the ldr fallback does not. */
7968 return val
> -256 && val
< 256 && (val
& 3) == 0;
7976 rtx xiop0
= XEXP (index
, 0);
7977 rtx xiop1
= XEXP (index
, 1);
7979 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7980 && thumb2_index_mul_operand (xiop1
))
7981 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7982 && thumb2_index_mul_operand (xiop0
)));
7984 else if (code
== ASHIFT
)
7986 rtx op
= XEXP (index
, 1);
7988 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7991 && INTVAL (op
) <= 3);
7994 return (code
== CONST_INT
7995 && INTVAL (index
) < 4096
7996 && INTVAL (index
) > -256);
7999 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8001 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8011 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8013 return (regno
<= LAST_LO_REGNUM
8014 || regno
> LAST_VIRTUAL_REGISTER
8015 || regno
== FRAME_POINTER_REGNUM
8016 || (GET_MODE_SIZE (mode
) >= 4
8017 && (regno
== STACK_POINTER_REGNUM
8018 || regno
>= FIRST_PSEUDO_REGISTER
8019 || x
== hard_frame_pointer_rtx
8020 || x
== arg_pointer_rtx
)));
8023 /* Return nonzero if x is a legitimate index register. This is the case
8024 for any base register that can access a QImode object. */
8026 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8028 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8031 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8033 The AP may be eliminated to either the SP or the FP, so we use the
8034 least common denominator, e.g. SImode, and offsets from 0 to 64.
8036 ??? Verify whether the above is the right approach.
8038 ??? Also, the FP may be eliminated to the SP, so perhaps that
8039 needs special handling also.
8041 ??? Look at how the mips16 port solves this problem. It probably uses
8042 better ways to solve some of these problems.
8044 Although it is not incorrect, we don't accept QImode and HImode
8045 addresses based on the frame pointer or arg pointer until the
8046 reload pass starts. This is so that eliminating such addresses
8047 into stack based ones won't produce impossible code. */
8049 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8051 /* ??? Not clear if this is right. Experiment. */
8052 if (GET_MODE_SIZE (mode
) < 4
8053 && !(reload_in_progress
|| reload_completed
)
8054 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8055 || reg_mentioned_p (arg_pointer_rtx
, x
)
8056 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8057 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8058 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8059 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8062 /* Accept any base register. SP only in SImode or larger. */
8063 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8066 /* This is PC relative data before arm_reorg runs. */
8067 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8068 && GET_CODE (x
) == SYMBOL_REF
8069 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8072 /* This is PC relative data after arm_reorg runs. */
8073 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8075 && (GET_CODE (x
) == LABEL_REF
8076 || (GET_CODE (x
) == CONST
8077 && GET_CODE (XEXP (x
, 0)) == PLUS
8078 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8079 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8082 /* Post-inc indexing only supported for SImode and larger. */
8083 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8084 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8087 else if (GET_CODE (x
) == PLUS
)
8089 /* REG+REG address can be any two index registers. */
8090 /* We disallow FRAME+REG addressing since we know that FRAME
8091 will be replaced with STACK, and SP relative addressing only
8092 permits SP+OFFSET. */
8093 if (GET_MODE_SIZE (mode
) <= 4
8094 && XEXP (x
, 0) != frame_pointer_rtx
8095 && XEXP (x
, 1) != frame_pointer_rtx
8096 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8097 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8098 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8101 /* REG+const has 5-7 bit offset for non-SP registers. */
8102 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8103 || XEXP (x
, 0) == arg_pointer_rtx
)
8104 && CONST_INT_P (XEXP (x
, 1))
8105 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8108 /* REG+const has 10-bit offset for SP, but only SImode and
8109 larger is supported. */
8110 /* ??? Should probably check for DI/DFmode overflow here
8111 just like GO_IF_LEGITIMATE_OFFSET does. */
8112 else if (REG_P (XEXP (x
, 0))
8113 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8114 && GET_MODE_SIZE (mode
) >= 4
8115 && CONST_INT_P (XEXP (x
, 1))
8116 && INTVAL (XEXP (x
, 1)) >= 0
8117 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8118 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8121 else if (REG_P (XEXP (x
, 0))
8122 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8123 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8124 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8125 && REGNO (XEXP (x
, 0))
8126 <= LAST_VIRTUAL_POINTER_REGISTER
))
8127 && GET_MODE_SIZE (mode
) >= 4
8128 && CONST_INT_P (XEXP (x
, 1))
8129 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8133 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8134 && GET_MODE_SIZE (mode
) == 4
8135 && GET_CODE (x
) == SYMBOL_REF
8136 && CONSTANT_POOL_ADDRESS_P (x
)
8138 && symbol_mentioned_p (get_pool_constant (x
))
8139 && ! pcrel_constant_p (get_pool_constant (x
))))
8145 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8146 instruction of mode MODE. */
8148 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8150 switch (GET_MODE_SIZE (mode
))
8153 return val
>= 0 && val
< 32;
8156 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8160 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8166 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8169 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8170 else if (TARGET_THUMB2
)
8171 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8172 else /* if (TARGET_THUMB1) */
8173 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8176 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8178 Given an rtx X being reloaded into a reg required to be
8179 in class CLASS, return the class of reg to actually use.
8180 In general this is just CLASS, but for the Thumb core registers and
8181 immediate constants we prefer a LO_REGS class or a subset. */
8184 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8190 if (rclass
== GENERAL_REGS
)
8197 /* Build the SYMBOL_REF for __tls_get_addr. */
8199 static GTY(()) rtx tls_get_addr_libfunc
;
8202 get_tls_get_addr (void)
8204 if (!tls_get_addr_libfunc
)
8205 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8206 return tls_get_addr_libfunc
;
8210 arm_load_tp (rtx target
)
8213 target
= gen_reg_rtx (SImode
);
8217 /* Can return in any reg. */
8218 emit_insn (gen_load_tp_hard (target
));
8222 /* Always returned in r0. Immediately copy the result into a pseudo,
8223 otherwise other uses of r0 (e.g. setting up function arguments) may
8224 clobber the value. */
8228 emit_insn (gen_load_tp_soft ());
8230 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8231 emit_move_insn (target
, tmp
);
8237 load_tls_operand (rtx x
, rtx reg
)
8241 if (reg
== NULL_RTX
)
8242 reg
= gen_reg_rtx (SImode
);
8244 tmp
= gen_rtx_CONST (SImode
, x
);
8246 emit_move_insn (reg
, tmp
);
8252 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8254 rtx label
, labelno
, sum
;
8256 gcc_assert (reloc
!= TLS_DESCSEQ
);
8259 labelno
= GEN_INT (pic_labelno
++);
8260 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8261 label
= gen_rtx_CONST (VOIDmode
, label
);
8263 sum
= gen_rtx_UNSPEC (Pmode
,
8264 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8265 GEN_INT (TARGET_ARM
? 8 : 4)),
8267 reg
= load_tls_operand (sum
, reg
);
8270 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8272 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8274 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8275 LCT_PURE
, /* LCT_CONST? */
8276 Pmode
, 1, reg
, Pmode
);
8278 rtx_insn
*insns
= get_insns ();
8285 arm_tls_descseq_addr (rtx x
, rtx reg
)
8287 rtx labelno
= GEN_INT (pic_labelno
++);
8288 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8289 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8290 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8291 gen_rtx_CONST (VOIDmode
, label
),
8292 GEN_INT (!TARGET_ARM
)),
8294 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8296 emit_insn (gen_tlscall (x
, labelno
));
8298 reg
= gen_reg_rtx (SImode
);
8300 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8302 emit_move_insn (reg
, reg0
);
8308 legitimize_tls_address (rtx x
, rtx reg
)
8310 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8312 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8316 case TLS_MODEL_GLOBAL_DYNAMIC
:
8317 if (TARGET_GNU2_TLS
)
8319 reg
= arm_tls_descseq_addr (x
, reg
);
8321 tp
= arm_load_tp (NULL_RTX
);
8323 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8327 /* Original scheme */
8328 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8329 dest
= gen_reg_rtx (Pmode
);
8330 emit_libcall_block (insns
, dest
, ret
, x
);
8334 case TLS_MODEL_LOCAL_DYNAMIC
:
8335 if (TARGET_GNU2_TLS
)
8337 reg
= arm_tls_descseq_addr (x
, reg
);
8339 tp
= arm_load_tp (NULL_RTX
);
8341 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8345 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8347 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8348 share the LDM result with other LD model accesses. */
8349 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8351 dest
= gen_reg_rtx (Pmode
);
8352 emit_libcall_block (insns
, dest
, ret
, eqv
);
8354 /* Load the addend. */
8355 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8356 GEN_INT (TLS_LDO32
)),
8358 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8359 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8363 case TLS_MODEL_INITIAL_EXEC
:
8364 labelno
= GEN_INT (pic_labelno
++);
8365 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8366 label
= gen_rtx_CONST (VOIDmode
, label
);
8367 sum
= gen_rtx_UNSPEC (Pmode
,
8368 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8369 GEN_INT (TARGET_ARM
? 8 : 4)),
8371 reg
= load_tls_operand (sum
, reg
);
8374 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8375 else if (TARGET_THUMB2
)
8376 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8379 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8380 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8383 tp
= arm_load_tp (NULL_RTX
);
8385 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8387 case TLS_MODEL_LOCAL_EXEC
:
8388 tp
= arm_load_tp (NULL_RTX
);
8390 reg
= gen_rtx_UNSPEC (Pmode
,
8391 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8393 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8395 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8402 /* Try machine-dependent ways of modifying an illegitimate address
8403 to be legitimate. If we find one, return the new, valid address. */
8405 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8407 if (arm_tls_referenced_p (x
))
8411 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8413 addend
= XEXP (XEXP (x
, 0), 1);
8414 x
= XEXP (XEXP (x
, 0), 0);
8417 if (GET_CODE (x
) != SYMBOL_REF
)
8420 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8422 x
= legitimize_tls_address (x
, NULL_RTX
);
8426 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8435 /* TODO: legitimize_address for Thumb2. */
8438 return thumb_legitimize_address (x
, orig_x
, mode
);
8441 if (GET_CODE (x
) == PLUS
)
8443 rtx xop0
= XEXP (x
, 0);
8444 rtx xop1
= XEXP (x
, 1);
8446 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8447 xop0
= force_reg (SImode
, xop0
);
8449 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8450 && !symbol_mentioned_p (xop1
))
8451 xop1
= force_reg (SImode
, xop1
);
8453 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8454 && CONST_INT_P (xop1
))
8456 HOST_WIDE_INT n
, low_n
;
8460 /* VFP addressing modes actually allow greater offsets, but for
8461 now we just stick with the lowest common denominator. */
8462 if (mode
== DImode
|| mode
== DFmode
)
8474 low_n
= ((mode
) == TImode
? 0
8475 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8479 base_reg
= gen_reg_rtx (SImode
);
8480 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8481 emit_move_insn (base_reg
, val
);
8482 x
= plus_constant (Pmode
, base_reg
, low_n
);
8484 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8485 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8488 /* XXX We don't allow MINUS any more -- see comment in
8489 arm_legitimate_address_outer_p (). */
8490 else if (GET_CODE (x
) == MINUS
)
8492 rtx xop0
= XEXP (x
, 0);
8493 rtx xop1
= XEXP (x
, 1);
8495 if (CONSTANT_P (xop0
))
8496 xop0
= force_reg (SImode
, xop0
);
8498 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8499 xop1
= force_reg (SImode
, xop1
);
8501 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8502 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8505 /* Make sure to take full advantage of the pre-indexed addressing mode
8506 with absolute addresses which often allows for the base register to
8507 be factorized for multiple adjacent memory references, and it might
8508 even allows for the mini pool to be avoided entirely. */
8509 else if (CONST_INT_P (x
) && optimize
> 0)
8512 HOST_WIDE_INT mask
, base
, index
;
8515 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8516 use a 8-bit index. So let's use a 12-bit index for SImode only and
8517 hope that arm_gen_constant will enable ldrb to use more bits. */
8518 bits
= (mode
== SImode
) ? 12 : 8;
8519 mask
= (1 << bits
) - 1;
8520 base
= INTVAL (x
) & ~mask
;
8521 index
= INTVAL (x
) & mask
;
8522 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8524 /* It'll most probably be more efficient to generate the base
8525 with more bits set and use a negative index instead. */
8529 base_reg
= force_reg (SImode
, GEN_INT (base
));
8530 x
= plus_constant (Pmode
, base_reg
, index
);
8535 /* We need to find and carefully transform any SYMBOL and LABEL
8536 references; so go back to the original address expression. */
8537 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8539 if (new_x
!= orig_x
)
8547 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8548 to be legitimate. If we find one, return the new, valid address. */
8550 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8552 if (GET_CODE (x
) == PLUS
8553 && CONST_INT_P (XEXP (x
, 1))
8554 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8555 || INTVAL (XEXP (x
, 1)) < 0))
8557 rtx xop0
= XEXP (x
, 0);
8558 rtx xop1
= XEXP (x
, 1);
8559 HOST_WIDE_INT offset
= INTVAL (xop1
);
8561 /* Try and fold the offset into a biasing of the base register and
8562 then offsetting that. Don't do this when optimizing for space
8563 since it can cause too many CSEs. */
8564 if (optimize_size
&& offset
>= 0
8565 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8567 HOST_WIDE_INT delta
;
8570 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8571 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8572 delta
= 31 * GET_MODE_SIZE (mode
);
8574 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8576 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8578 x
= plus_constant (Pmode
, xop0
, delta
);
8580 else if (offset
< 0 && offset
> -256)
8581 /* Small negative offsets are best done with a subtract before the
8582 dereference, forcing these into a register normally takes two
8584 x
= force_operand (x
, NULL_RTX
);
8587 /* For the remaining cases, force the constant into a register. */
8588 xop1
= force_reg (SImode
, xop1
);
8589 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8592 else if (GET_CODE (x
) == PLUS
8593 && s_register_operand (XEXP (x
, 1), SImode
)
8594 && !s_register_operand (XEXP (x
, 0), SImode
))
8596 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8598 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8603 /* We need to find and carefully transform any SYMBOL and LABEL
8604 references; so go back to the original address expression. */
8605 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8607 if (new_x
!= orig_x
)
8614 /* Return TRUE if X contains any TLS symbol references. */
8617 arm_tls_referenced_p (rtx x
)
8619 if (! TARGET_HAVE_TLS
)
8622 subrtx_iterator::array_type array
;
8623 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8625 const_rtx x
= *iter
;
8626 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8629 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8630 TLS offsets, not real symbol references. */
8631 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8632 iter
.skip_subrtxes ();
8637 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8639 On the ARM, allow any integer (invalid ones are removed later by insn
8640 patterns), nice doubles and symbol_refs which refer to the function's
8643 When generating pic allow anything. */
8646 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8648 return flag_pic
|| !label_mentioned_p (x
);
8652 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8654 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8655 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8656 for ARMv8-M Baseline or later the result is valid. */
8657 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8660 return (CONST_INT_P (x
)
8661 || CONST_DOUBLE_P (x
)
8662 || CONSTANT_ADDRESS_P (x
)
8667 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8669 return (!arm_cannot_force_const_mem (mode
, x
)
8671 ? arm_legitimate_constant_p_1 (mode
, x
)
8672 : thumb_legitimate_constant_p (mode
, x
)));
8675 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8678 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8682 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8684 split_const (x
, &base
, &offset
);
8685 if (GET_CODE (base
) == SYMBOL_REF
8686 && !offset_within_block_p (base
, INTVAL (offset
)))
8689 return arm_tls_referenced_p (x
);
8692 #define REG_OR_SUBREG_REG(X) \
8694 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8696 #define REG_OR_SUBREG_RTX(X) \
8697 (REG_P (X) ? (X) : SUBREG_REG (X))
8700 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8702 machine_mode mode
= GET_MODE (x
);
8711 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8718 return COSTS_N_INSNS (1);
8721 if (CONST_INT_P (XEXP (x
, 1)))
8724 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8731 return COSTS_N_INSNS (2) + cycles
;
8733 return COSTS_N_INSNS (1) + 16;
8736 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8738 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8739 return (COSTS_N_INSNS (words
)
8740 + 4 * ((MEM_P (SET_SRC (x
)))
8741 + MEM_P (SET_DEST (x
))));
8746 if (UINTVAL (x
) < 256
8747 /* 16-bit constant. */
8748 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8750 if (thumb_shiftable_const (INTVAL (x
)))
8751 return COSTS_N_INSNS (2);
8752 return COSTS_N_INSNS (3);
8754 else if ((outer
== PLUS
|| outer
== COMPARE
)
8755 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8757 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8758 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8759 return COSTS_N_INSNS (1);
8760 else if (outer
== AND
)
8763 /* This duplicates the tests in the andsi3 expander. */
8764 for (i
= 9; i
<= 31; i
++)
8765 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8766 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8767 return COSTS_N_INSNS (2);
8769 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8770 || outer
== LSHIFTRT
)
8772 return COSTS_N_INSNS (2);
8778 return COSTS_N_INSNS (3);
8796 /* XXX another guess. */
8797 /* Memory costs quite a lot for the first word, but subsequent words
8798 load at the equivalent of a single insn each. */
8799 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8800 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8805 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8811 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8812 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8818 return total
+ COSTS_N_INSNS (1);
8820 /* Assume a two-shift sequence. Increase the cost slightly so
8821 we prefer actual shifts over an extend operation. */
8822 return total
+ 1 + COSTS_N_INSNS (2);
8829 /* Estimates the size cost of thumb1 instructions.
8830 For now most of the code is copied from thumb1_rtx_costs. We need more
8831 fine grain tuning when we have more related test cases. */
8833 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8835 machine_mode mode
= GET_MODE (x
);
8844 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8848 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8849 defined by RTL expansion, especially for the expansion of
8851 if ((GET_CODE (XEXP (x
, 0)) == MULT
8852 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8853 || (GET_CODE (XEXP (x
, 1)) == MULT
8854 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8855 return COSTS_N_INSNS (2);
8860 return COSTS_N_INSNS (1);
8863 if (CONST_INT_P (XEXP (x
, 1)))
8865 /* Thumb1 mul instruction can't operate on const. We must Load it
8866 into a register first. */
8867 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8868 /* For the targets which have a very small and high-latency multiply
8869 unit, we prefer to synthesize the mult with up to 5 instructions,
8870 giving a good balance between size and performance. */
8871 if (arm_arch6m
&& arm_m_profile_small_mul
)
8872 return COSTS_N_INSNS (5);
8874 return COSTS_N_INSNS (1) + const_size
;
8876 return COSTS_N_INSNS (1);
8879 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8881 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8882 cost
= COSTS_N_INSNS (words
);
8883 if (satisfies_constraint_J (SET_SRC (x
))
8884 || satisfies_constraint_K (SET_SRC (x
))
8885 /* Too big an immediate for a 2-byte mov, using MOVT. */
8886 || (CONST_INT_P (SET_SRC (x
))
8887 && UINTVAL (SET_SRC (x
)) >= 256
8889 && satisfies_constraint_j (SET_SRC (x
)))
8890 /* thumb1_movdi_insn. */
8891 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8892 cost
+= COSTS_N_INSNS (1);
8898 if (UINTVAL (x
) < 256)
8899 return COSTS_N_INSNS (1);
8900 /* movw is 4byte long. */
8901 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8902 return COSTS_N_INSNS (2);
8903 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8904 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8905 return COSTS_N_INSNS (2);
8906 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8907 if (thumb_shiftable_const (INTVAL (x
)))
8908 return COSTS_N_INSNS (2);
8909 return COSTS_N_INSNS (3);
8911 else if ((outer
== PLUS
|| outer
== COMPARE
)
8912 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8914 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8915 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8916 return COSTS_N_INSNS (1);
8917 else if (outer
== AND
)
8920 /* This duplicates the tests in the andsi3 expander. */
8921 for (i
= 9; i
<= 31; i
++)
8922 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8923 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8924 return COSTS_N_INSNS (2);
8926 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8927 || outer
== LSHIFTRT
)
8929 return COSTS_N_INSNS (2);
8935 return COSTS_N_INSNS (3);
8949 return COSTS_N_INSNS (1);
8952 return (COSTS_N_INSNS (1)
8954 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8955 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8956 ? COSTS_N_INSNS (1) : 0));
8960 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8965 /* XXX still guessing. */
8966 switch (GET_MODE (XEXP (x
, 0)))
8969 return (1 + (mode
== DImode
? 4 : 0)
8970 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8973 return (4 + (mode
== DImode
? 4 : 0)
8974 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8977 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8988 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8989 operand, then return the operand that is being shifted. If the shift
8990 is not by a constant, then set SHIFT_REG to point to the operand.
8991 Return NULL if OP is not a shifter operand. */
8993 shifter_op_p (rtx op
, rtx
*shift_reg
)
8995 enum rtx_code code
= GET_CODE (op
);
8997 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8998 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8999 return XEXP (op
, 0);
9000 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9001 return XEXP (op
, 0);
9002 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9003 || code
== ASHIFTRT
)
9005 if (!CONST_INT_P (XEXP (op
, 1)))
9006 *shift_reg
= XEXP (op
, 1);
9007 return XEXP (op
, 0);
9014 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9016 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9017 rtx_code code
= GET_CODE (x
);
9018 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9020 switch (XINT (x
, 1))
9022 case UNSPEC_UNALIGNED_LOAD
:
9023 /* We can only do unaligned loads into the integer unit, and we can't
9025 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9027 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9028 + extra_cost
->ldst
.load_unaligned
);
9031 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9032 ADDR_SPACE_GENERIC
, speed_p
);
9036 case UNSPEC_UNALIGNED_STORE
:
9037 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9039 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9040 + extra_cost
->ldst
.store_unaligned
);
9042 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9044 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9045 ADDR_SPACE_GENERIC
, speed_p
);
9056 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9060 *cost
= COSTS_N_INSNS (2);
9066 /* Cost of a libcall. We assume one insn per argument, an amount for the
9067 call (one insn for -Os) and then one for processing the result. */
9068 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9070 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9073 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9074 if (shift_op != NULL \
9075 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9080 *cost += extra_cost->alu.arith_shift_reg; \
9081 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9082 ASHIFT, 1, speed_p); \
9085 *cost += extra_cost->alu.arith_shift; \
9087 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9088 ASHIFT, 0, speed_p) \
9089 + rtx_cost (XEXP (x, 1 - IDX), \
9090 GET_MODE (shift_op), \
9097 /* RTX costs. Make an estimate of the cost of executing the operation
9098 X, which is contained with an operation with code OUTER_CODE.
9099 SPEED_P indicates whether the cost desired is the performance cost,
9100 or the size cost. The estimate is stored in COST and the return
9101 value is TRUE if the cost calculation is final, or FALSE if the
9102 caller should recurse through the operands of X to add additional
9105 We currently make no attempt to model the size savings of Thumb-2
9106 16-bit instructions. At the normal points in compilation where
9107 this code is called we have no measure of whether the condition
9108 flags are live or not, and thus no realistic way to determine what
9109 the size will eventually be. */
9111 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9112 const struct cpu_cost_table
*extra_cost
,
9113 int *cost
, bool speed_p
)
9115 machine_mode mode
= GET_MODE (x
);
9117 *cost
= COSTS_N_INSNS (1);
9122 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9124 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9132 /* SET RTXs don't have a mode so we get it from the destination. */
9133 mode
= GET_MODE (SET_DEST (x
));
9135 if (REG_P (SET_SRC (x
))
9136 && REG_P (SET_DEST (x
)))
9138 /* Assume that most copies can be done with a single insn,
9139 unless we don't have HW FP, in which case everything
9140 larger than word mode will require two insns. */
9141 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9142 && GET_MODE_SIZE (mode
) > 4)
9145 /* Conditional register moves can be encoded
9146 in 16 bits in Thumb mode. */
9147 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9153 if (CONST_INT_P (SET_SRC (x
)))
9155 /* Handle CONST_INT here, since the value doesn't have a mode
9156 and we would otherwise be unable to work out the true cost. */
9157 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9160 /* Slightly lower the cost of setting a core reg to a constant.
9161 This helps break up chains and allows for better scheduling. */
9162 if (REG_P (SET_DEST (x
))
9163 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9166 /* Immediate moves with an immediate in the range [0, 255] can be
9167 encoded in 16 bits in Thumb mode. */
9168 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9169 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9171 goto const_int_cost
;
9177 /* A memory access costs 1 insn if the mode is small, or the address is
9178 a single register, otherwise it costs one insn per word. */
9179 if (REG_P (XEXP (x
, 0)))
9180 *cost
= COSTS_N_INSNS (1);
9182 && GET_CODE (XEXP (x
, 0)) == PLUS
9183 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9184 /* This will be split into two instructions.
9185 See arm.md:calculate_pic_address. */
9186 *cost
= COSTS_N_INSNS (2);
9188 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9190 /* For speed optimizations, add the costs of the address and
9191 accessing memory. */
9194 *cost
+= (extra_cost
->ldst
.load
9195 + arm_address_cost (XEXP (x
, 0), mode
,
9196 ADDR_SPACE_GENERIC
, speed_p
));
9198 *cost
+= extra_cost
->ldst
.load
;
9204 /* Calculations of LDM costs are complex. We assume an initial cost
9205 (ldm_1st) which will load the number of registers mentioned in
9206 ldm_regs_per_insn_1st registers; then each additional
9207 ldm_regs_per_insn_subsequent registers cost one more insn. The
9208 formula for N regs is thus:
9210 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9211 + ldm_regs_per_insn_subsequent - 1)
9212 / ldm_regs_per_insn_subsequent).
9214 Additional costs may also be added for addressing. A similar
9215 formula is used for STM. */
9217 bool is_ldm
= load_multiple_operation (x
, SImode
);
9218 bool is_stm
= store_multiple_operation (x
, SImode
);
9220 if (is_ldm
|| is_stm
)
9224 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9225 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9226 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9227 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9228 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9229 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9230 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9232 *cost
+= regs_per_insn_1st
9233 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9234 + regs_per_insn_sub
- 1)
9235 / regs_per_insn_sub
);
9244 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9245 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9246 *cost
+= COSTS_N_INSNS (speed_p
9247 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9248 else if (mode
== SImode
&& TARGET_IDIV
)
9249 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9251 *cost
= LIBCALL_COST (2);
9252 return false; /* All arguments must be in registers. */
9255 /* MOD by a power of 2 can be expanded as:
9257 and r0, r0, #(n - 1)
9258 and r1, r1, #(n - 1)
9259 rsbpl r0, r1, #0. */
9260 if (CONST_INT_P (XEXP (x
, 1))
9261 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9264 *cost
+= COSTS_N_INSNS (3);
9267 *cost
+= 2 * extra_cost
->alu
.logical
9268 + extra_cost
->alu
.arith
;
9274 *cost
= LIBCALL_COST (2);
9275 return false; /* All arguments must be in registers. */
9278 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9280 *cost
+= (COSTS_N_INSNS (1)
9281 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9283 *cost
+= extra_cost
->alu
.shift_reg
;
9291 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9293 *cost
+= (COSTS_N_INSNS (2)
9294 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9296 *cost
+= 2 * extra_cost
->alu
.shift
;
9299 else if (mode
== SImode
)
9301 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9302 /* Slightly disparage register shifts at -Os, but not by much. */
9303 if (!CONST_INT_P (XEXP (x
, 1)))
9304 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9305 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9308 else if (GET_MODE_CLASS (mode
) == MODE_INT
9309 && GET_MODE_SIZE (mode
) < 4)
9313 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9314 /* Slightly disparage register shifts at -Os, but not by
9316 if (!CONST_INT_P (XEXP (x
, 1)))
9317 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9318 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9320 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9322 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9324 /* Can use SBFX/UBFX. */
9326 *cost
+= extra_cost
->alu
.bfx
;
9327 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9331 *cost
+= COSTS_N_INSNS (1);
9332 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9335 if (CONST_INT_P (XEXP (x
, 1)))
9336 *cost
+= 2 * extra_cost
->alu
.shift
;
9338 *cost
+= (extra_cost
->alu
.shift
9339 + extra_cost
->alu
.shift_reg
);
9342 /* Slightly disparage register shifts. */
9343 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9348 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9349 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9352 if (CONST_INT_P (XEXP (x
, 1)))
9353 *cost
+= (2 * extra_cost
->alu
.shift
9354 + extra_cost
->alu
.log_shift
);
9356 *cost
+= (extra_cost
->alu
.shift
9357 + extra_cost
->alu
.shift_reg
9358 + extra_cost
->alu
.log_shift_reg
);
9364 *cost
= LIBCALL_COST (2);
9373 *cost
+= extra_cost
->alu
.rev
;
9380 /* No rev instruction available. Look at arm_legacy_rev
9381 and thumb_legacy_rev for the form of RTL used then. */
9384 *cost
+= COSTS_N_INSNS (9);
9388 *cost
+= 6 * extra_cost
->alu
.shift
;
9389 *cost
+= 3 * extra_cost
->alu
.logical
;
9394 *cost
+= COSTS_N_INSNS (4);
9398 *cost
+= 2 * extra_cost
->alu
.shift
;
9399 *cost
+= extra_cost
->alu
.arith_shift
;
9400 *cost
+= 2 * extra_cost
->alu
.logical
;
9408 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9409 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9411 if (GET_CODE (XEXP (x
, 0)) == MULT
9412 || GET_CODE (XEXP (x
, 1)) == MULT
)
9414 rtx mul_op0
, mul_op1
, sub_op
;
9417 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9419 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9421 mul_op0
= XEXP (XEXP (x
, 0), 0);
9422 mul_op1
= XEXP (XEXP (x
, 0), 1);
9423 sub_op
= XEXP (x
, 1);
9427 mul_op0
= XEXP (XEXP (x
, 1), 0);
9428 mul_op1
= XEXP (XEXP (x
, 1), 1);
9429 sub_op
= XEXP (x
, 0);
9432 /* The first operand of the multiply may be optionally
9434 if (GET_CODE (mul_op0
) == NEG
)
9435 mul_op0
= XEXP (mul_op0
, 0);
9437 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9438 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9439 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9445 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9451 rtx shift_by_reg
= NULL
;
9455 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9456 if (shift_op
== NULL
)
9458 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9459 non_shift_op
= XEXP (x
, 0);
9462 non_shift_op
= XEXP (x
, 1);
9464 if (shift_op
!= NULL
)
9466 if (shift_by_reg
!= NULL
)
9469 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9470 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9473 *cost
+= extra_cost
->alu
.arith_shift
;
9475 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9476 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9481 && GET_CODE (XEXP (x
, 1)) == MULT
)
9485 *cost
+= extra_cost
->mult
[0].add
;
9486 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9487 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9488 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9492 if (CONST_INT_P (XEXP (x
, 0)))
9494 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9495 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9497 *cost
= COSTS_N_INSNS (insns
);
9499 *cost
+= insns
* extra_cost
->alu
.arith
;
9500 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9504 *cost
+= extra_cost
->alu
.arith
;
9509 if (GET_MODE_CLASS (mode
) == MODE_INT
9510 && GET_MODE_SIZE (mode
) < 4)
9512 rtx shift_op
, shift_reg
;
9515 /* We check both sides of the MINUS for shifter operands since,
9516 unlike PLUS, it's not commutative. */
9518 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9519 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9521 /* Slightly disparage, as we might need to widen the result. */
9524 *cost
+= extra_cost
->alu
.arith
;
9526 if (CONST_INT_P (XEXP (x
, 0)))
9528 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9537 *cost
+= COSTS_N_INSNS (1);
9539 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9541 rtx op1
= XEXP (x
, 1);
9544 *cost
+= 2 * extra_cost
->alu
.arith
;
9546 if (GET_CODE (op1
) == ZERO_EXTEND
)
9547 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9550 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9551 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9555 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9558 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9559 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9561 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9564 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9565 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9568 *cost
+= (extra_cost
->alu
.arith
9569 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9570 ? extra_cost
->alu
.arith
9571 : extra_cost
->alu
.arith_shift
));
9572 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9573 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9574 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9579 *cost
+= 2 * extra_cost
->alu
.arith
;
9585 *cost
= LIBCALL_COST (2);
9589 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9590 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9592 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9594 rtx mul_op0
, mul_op1
, add_op
;
9597 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9599 mul_op0
= XEXP (XEXP (x
, 0), 0);
9600 mul_op1
= XEXP (XEXP (x
, 0), 1);
9601 add_op
= XEXP (x
, 1);
9603 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9604 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9605 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9611 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9614 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9616 *cost
= LIBCALL_COST (2);
9620 /* Narrow modes can be synthesized in SImode, but the range
9621 of useful sub-operations is limited. Check for shift operations
9622 on one of the operands. Only left shifts can be used in the
9624 if (GET_MODE_CLASS (mode
) == MODE_INT
9625 && GET_MODE_SIZE (mode
) < 4)
9627 rtx shift_op
, shift_reg
;
9630 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9632 if (CONST_INT_P (XEXP (x
, 1)))
9634 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9635 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9637 *cost
= COSTS_N_INSNS (insns
);
9639 *cost
+= insns
* extra_cost
->alu
.arith
;
9640 /* Slightly penalize a narrow operation as the result may
9642 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9646 /* Slightly penalize a narrow operation as the result may
9650 *cost
+= extra_cost
->alu
.arith
;
9657 rtx shift_op
, shift_reg
;
9660 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9661 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9663 /* UXTA[BH] or SXTA[BH]. */
9665 *cost
+= extra_cost
->alu
.extend_arith
;
9666 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9668 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9673 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9674 if (shift_op
!= NULL
)
9679 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9680 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9683 *cost
+= extra_cost
->alu
.arith_shift
;
9685 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9686 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9689 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9691 rtx mul_op
= XEXP (x
, 0);
9693 if (TARGET_DSP_MULTIPLY
9694 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9695 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9696 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9697 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9698 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9699 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9700 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9701 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9702 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9703 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9704 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9705 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9710 *cost
+= extra_cost
->mult
[0].extend_add
;
9711 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9712 SIGN_EXTEND
, 0, speed_p
)
9713 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9714 SIGN_EXTEND
, 0, speed_p
)
9715 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9720 *cost
+= extra_cost
->mult
[0].add
;
9721 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9722 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9723 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9726 if (CONST_INT_P (XEXP (x
, 1)))
9728 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9729 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9731 *cost
= COSTS_N_INSNS (insns
);
9733 *cost
+= insns
* extra_cost
->alu
.arith
;
9734 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9738 *cost
+= extra_cost
->alu
.arith
;
9746 && GET_CODE (XEXP (x
, 0)) == MULT
9747 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9748 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9749 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9750 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9753 *cost
+= extra_cost
->mult
[1].extend_add
;
9754 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9755 ZERO_EXTEND
, 0, speed_p
)
9756 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9757 ZERO_EXTEND
, 0, speed_p
)
9758 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9762 *cost
+= COSTS_N_INSNS (1);
9764 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9765 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9768 *cost
+= (extra_cost
->alu
.arith
9769 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9770 ? extra_cost
->alu
.arith
9771 : extra_cost
->alu
.arith_shift
));
9773 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9775 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9780 *cost
+= 2 * extra_cost
->alu
.arith
;
9785 *cost
= LIBCALL_COST (2);
9788 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9791 *cost
+= extra_cost
->alu
.rev
;
9799 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9800 rtx op0
= XEXP (x
, 0);
9801 rtx shift_op
, shift_reg
;
9805 || (code
== IOR
&& TARGET_THUMB2
)))
9806 op0
= XEXP (op0
, 0);
9809 shift_op
= shifter_op_p (op0
, &shift_reg
);
9810 if (shift_op
!= NULL
)
9815 *cost
+= extra_cost
->alu
.log_shift_reg
;
9816 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9819 *cost
+= extra_cost
->alu
.log_shift
;
9821 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9822 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9826 if (CONST_INT_P (XEXP (x
, 1)))
9828 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9829 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9832 *cost
= COSTS_N_INSNS (insns
);
9834 *cost
+= insns
* extra_cost
->alu
.logical
;
9835 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9840 *cost
+= extra_cost
->alu
.logical
;
9841 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9842 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9848 rtx op0
= XEXP (x
, 0);
9849 enum rtx_code subcode
= GET_CODE (op0
);
9851 *cost
+= COSTS_N_INSNS (1);
9855 || (code
== IOR
&& TARGET_THUMB2
)))
9856 op0
= XEXP (op0
, 0);
9858 if (GET_CODE (op0
) == ZERO_EXTEND
)
9861 *cost
+= 2 * extra_cost
->alu
.logical
;
9863 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9865 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9868 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9871 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9873 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9875 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9880 *cost
+= 2 * extra_cost
->alu
.logical
;
9886 *cost
= LIBCALL_COST (2);
9890 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9891 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9893 rtx op0
= XEXP (x
, 0);
9895 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9896 op0
= XEXP (op0
, 0);
9899 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9901 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9902 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9905 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9907 *cost
= LIBCALL_COST (2);
9913 if (TARGET_DSP_MULTIPLY
9914 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9915 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9916 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9917 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9918 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9919 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9920 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9921 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9922 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9923 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9924 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9925 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9930 *cost
+= extra_cost
->mult
[0].extend
;
9931 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9932 SIGN_EXTEND
, 0, speed_p
);
9933 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
9934 SIGN_EXTEND
, 1, speed_p
);
9938 *cost
+= extra_cost
->mult
[0].simple
;
9945 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9946 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9947 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9948 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9951 *cost
+= extra_cost
->mult
[1].extend
;
9952 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
9953 ZERO_EXTEND
, 0, speed_p
)
9954 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9955 ZERO_EXTEND
, 0, speed_p
));
9959 *cost
= LIBCALL_COST (2);
9964 *cost
= LIBCALL_COST (2);
9968 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9969 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9971 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9974 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
9979 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9983 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9985 *cost
= LIBCALL_COST (1);
9991 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9993 *cost
+= COSTS_N_INSNS (1);
9994 /* Assume the non-flag-changing variant. */
9996 *cost
+= (extra_cost
->alu
.log_shift
9997 + extra_cost
->alu
.arith_shift
);
9998 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10002 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10003 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10005 *cost
+= COSTS_N_INSNS (1);
10006 /* No extra cost for MOV imm and MVN imm. */
10007 /* If the comparison op is using the flags, there's no further
10008 cost, otherwise we need to add the cost of the comparison. */
10009 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10010 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10011 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10013 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10014 *cost
+= (COSTS_N_INSNS (1)
10015 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10017 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10020 *cost
+= extra_cost
->alu
.arith
;
10026 *cost
+= extra_cost
->alu
.arith
;
10030 if (GET_MODE_CLASS (mode
) == MODE_INT
10031 && GET_MODE_SIZE (mode
) < 4)
10033 /* Slightly disparage, as we might need an extend operation. */
10036 *cost
+= extra_cost
->alu
.arith
;
10040 if (mode
== DImode
)
10042 *cost
+= COSTS_N_INSNS (1);
10044 *cost
+= 2 * extra_cost
->alu
.arith
;
10049 *cost
= LIBCALL_COST (1);
10053 if (mode
== SImode
)
10056 rtx shift_reg
= NULL
;
10058 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10062 if (shift_reg
!= NULL
)
10065 *cost
+= extra_cost
->alu
.log_shift_reg
;
10066 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10069 *cost
+= extra_cost
->alu
.log_shift
;
10070 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10075 *cost
+= extra_cost
->alu
.logical
;
10078 if (mode
== DImode
)
10080 *cost
+= COSTS_N_INSNS (1);
10086 *cost
+= LIBCALL_COST (1);
10091 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10093 *cost
+= COSTS_N_INSNS (3);
10096 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10097 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10099 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10100 /* Assume that if one arm of the if_then_else is a register,
10101 that it will be tied with the result and eliminate the
10102 conditional insn. */
10103 if (REG_P (XEXP (x
, 1)))
10105 else if (REG_P (XEXP (x
, 2)))
10111 if (extra_cost
->alu
.non_exec_costs_exec
)
10112 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10114 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10117 *cost
+= op1cost
+ op2cost
;
10123 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10127 machine_mode op0mode
;
10128 /* We'll mostly assume that the cost of a compare is the cost of the
10129 LHS. However, there are some notable exceptions. */
10131 /* Floating point compares are never done as side-effects. */
10132 op0mode
= GET_MODE (XEXP (x
, 0));
10133 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10134 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10137 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10139 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10141 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10147 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10149 *cost
= LIBCALL_COST (2);
10153 /* DImode compares normally take two insns. */
10154 if (op0mode
== DImode
)
10156 *cost
+= COSTS_N_INSNS (1);
10158 *cost
+= 2 * extra_cost
->alu
.arith
;
10162 if (op0mode
== SImode
)
10167 if (XEXP (x
, 1) == const0_rtx
10168 && !(REG_P (XEXP (x
, 0))
10169 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10170 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10172 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10174 /* Multiply operations that set the flags are often
10175 significantly more expensive. */
10177 && GET_CODE (XEXP (x
, 0)) == MULT
10178 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10179 *cost
+= extra_cost
->mult
[0].flag_setting
;
10182 && GET_CODE (XEXP (x
, 0)) == PLUS
10183 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10184 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10186 *cost
+= extra_cost
->mult
[0].flag_setting
;
10191 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10192 if (shift_op
!= NULL
)
10194 if (shift_reg
!= NULL
)
10196 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10199 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10202 *cost
+= extra_cost
->alu
.arith_shift
;
10203 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10204 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10209 *cost
+= extra_cost
->alu
.arith
;
10210 if (CONST_INT_P (XEXP (x
, 1))
10211 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10213 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10221 *cost
= LIBCALL_COST (2);
10244 if (outer_code
== SET
)
10246 /* Is it a store-flag operation? */
10247 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10248 && XEXP (x
, 1) == const0_rtx
)
10250 /* Thumb also needs an IT insn. */
10251 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10254 if (XEXP (x
, 1) == const0_rtx
)
10259 /* LSR Rd, Rn, #31. */
10261 *cost
+= extra_cost
->alu
.shift
;
10271 *cost
+= COSTS_N_INSNS (1);
10275 /* RSBS T1, Rn, Rn, LSR #31
10277 *cost
+= COSTS_N_INSNS (1);
10279 *cost
+= extra_cost
->alu
.arith_shift
;
10283 /* RSB Rd, Rn, Rn, ASR #1
10284 LSR Rd, Rd, #31. */
10285 *cost
+= COSTS_N_INSNS (1);
10287 *cost
+= (extra_cost
->alu
.arith_shift
10288 + extra_cost
->alu
.shift
);
10294 *cost
+= COSTS_N_INSNS (1);
10296 *cost
+= extra_cost
->alu
.shift
;
10300 /* Remaining cases are either meaningless or would take
10301 three insns anyway. */
10302 *cost
= COSTS_N_INSNS (3);
10305 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10310 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10311 if (CONST_INT_P (XEXP (x
, 1))
10312 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10314 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10321 /* Not directly inside a set. If it involves the condition code
10322 register it must be the condition for a branch, cond_exec or
10323 I_T_E operation. Since the comparison is performed elsewhere
10324 this is just the control part which has no additional
10326 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10327 && XEXP (x
, 1) == const0_rtx
)
10335 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10336 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10339 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10343 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10345 *cost
= LIBCALL_COST (1);
10349 if (mode
== SImode
)
10352 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10356 *cost
= LIBCALL_COST (1);
10360 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10361 && MEM_P (XEXP (x
, 0)))
10363 if (mode
== DImode
)
10364 *cost
+= COSTS_N_INSNS (1);
10369 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10370 *cost
+= extra_cost
->ldst
.load
;
10372 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10374 if (mode
== DImode
)
10375 *cost
+= extra_cost
->alu
.shift
;
10380 /* Widening from less than 32-bits requires an extend operation. */
10381 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10383 /* We have SXTB/SXTH. */
10384 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10386 *cost
+= extra_cost
->alu
.extend
;
10388 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10390 /* Needs two shifts. */
10391 *cost
+= COSTS_N_INSNS (1);
10392 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10394 *cost
+= 2 * extra_cost
->alu
.shift
;
10397 /* Widening beyond 32-bits requires one more insn. */
10398 if (mode
== DImode
)
10400 *cost
+= COSTS_N_INSNS (1);
10402 *cost
+= extra_cost
->alu
.shift
;
10409 || GET_MODE (XEXP (x
, 0)) == SImode
10410 || GET_MODE (XEXP (x
, 0)) == QImode
)
10411 && MEM_P (XEXP (x
, 0)))
10413 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10415 if (mode
== DImode
)
10416 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10421 /* Widening from less than 32-bits requires an extend operation. */
10422 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10424 /* UXTB can be a shorter instruction in Thumb2, but it might
10425 be slower than the AND Rd, Rn, #255 alternative. When
10426 optimizing for speed it should never be slower to use
10427 AND, and we don't really model 16-bit vs 32-bit insns
10430 *cost
+= extra_cost
->alu
.logical
;
10432 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10434 /* We have UXTB/UXTH. */
10435 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10437 *cost
+= extra_cost
->alu
.extend
;
10439 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10441 /* Needs two shifts. It's marginally preferable to use
10442 shifts rather than two BIC instructions as the second
10443 shift may merge with a subsequent insn as a shifter
10445 *cost
= COSTS_N_INSNS (2);
10446 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10448 *cost
+= 2 * extra_cost
->alu
.shift
;
10451 /* Widening beyond 32-bits requires one more insn. */
10452 if (mode
== DImode
)
10454 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10461 /* CONST_INT has no mode, so we cannot tell for sure how many
10462 insns are really going to be needed. The best we can do is
10463 look at the value passed. If it fits in SImode, then assume
10464 that's the mode it will be used for. Otherwise assume it
10465 will be used in DImode. */
10466 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10471 /* Avoid blowing up in arm_gen_constant (). */
10472 if (!(outer_code
== PLUS
10473 || outer_code
== AND
10474 || outer_code
== IOR
10475 || outer_code
== XOR
10476 || outer_code
== MINUS
))
10480 if (mode
== SImode
)
10482 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10483 INTVAL (x
), NULL
, NULL
,
10489 *cost
+= COSTS_N_INSNS (arm_gen_constant
10490 (outer_code
, SImode
, NULL
,
10491 trunc_int_for_mode (INTVAL (x
), SImode
),
10493 + arm_gen_constant (outer_code
, SImode
, NULL
,
10494 INTVAL (x
) >> 32, NULL
,
10506 if (arm_arch_thumb2
&& !flag_pic
)
10507 *cost
+= COSTS_N_INSNS (1);
10509 *cost
+= extra_cost
->ldst
.load
;
10512 *cost
+= COSTS_N_INSNS (1);
10516 *cost
+= COSTS_N_INSNS (1);
10518 *cost
+= extra_cost
->alu
.arith
;
10524 *cost
= COSTS_N_INSNS (4);
10529 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10530 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10532 if (vfp3_const_double_rtx (x
))
10535 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10541 if (mode
== DFmode
)
10542 *cost
+= extra_cost
->ldst
.loadd
;
10544 *cost
+= extra_cost
->ldst
.loadf
;
10547 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10551 *cost
= COSTS_N_INSNS (4);
10557 && TARGET_HARD_FLOAT
10558 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10559 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10560 *cost
= COSTS_N_INSNS (1);
10562 *cost
= COSTS_N_INSNS (4);
10567 /* When optimizing for size, we prefer constant pool entries to
10568 MOVW/MOVT pairs, so bump the cost of these slightly. */
10575 *cost
+= extra_cost
->alu
.clz
;
10579 if (XEXP (x
, 1) == const0_rtx
)
10582 *cost
+= extra_cost
->alu
.log_shift
;
10583 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10586 /* Fall through. */
10590 *cost
+= COSTS_N_INSNS (1);
10594 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10595 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10596 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10597 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10598 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10599 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10600 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10601 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10605 *cost
+= extra_cost
->mult
[1].extend
;
10606 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10607 ZERO_EXTEND
, 0, speed_p
)
10608 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10609 ZERO_EXTEND
, 0, speed_p
));
10612 *cost
= LIBCALL_COST (1);
10615 case UNSPEC_VOLATILE
:
10617 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10620 /* Reading the PC is like reading any other register. Writing it
10621 is more expensive, but we take that into account elsewhere. */
10626 /* TODO: Simple zero_extract of bottom bits using AND. */
10627 /* Fall through. */
10631 && CONST_INT_P (XEXP (x
, 1))
10632 && CONST_INT_P (XEXP (x
, 2)))
10635 *cost
+= extra_cost
->alu
.bfx
;
10636 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10639 /* Without UBFX/SBFX, need to resort to shift operations. */
10640 *cost
+= COSTS_N_INSNS (1);
10642 *cost
+= 2 * extra_cost
->alu
.shift
;
10643 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10647 if (TARGET_HARD_FLOAT
)
10650 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10651 if (!TARGET_FPU_ARMV8
10652 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10654 /* Pre v8, widening HF->DF is a two-step process, first
10655 widening to SFmode. */
10656 *cost
+= COSTS_N_INSNS (1);
10658 *cost
+= extra_cost
->fp
[0].widen
;
10660 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10664 *cost
= LIBCALL_COST (1);
10667 case FLOAT_TRUNCATE
:
10668 if (TARGET_HARD_FLOAT
)
10671 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10672 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10674 /* Vector modes? */
10676 *cost
= LIBCALL_COST (1);
10680 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10682 rtx op0
= XEXP (x
, 0);
10683 rtx op1
= XEXP (x
, 1);
10684 rtx op2
= XEXP (x
, 2);
10687 /* vfms or vfnma. */
10688 if (GET_CODE (op0
) == NEG
)
10689 op0
= XEXP (op0
, 0);
10691 /* vfnms or vfnma. */
10692 if (GET_CODE (op2
) == NEG
)
10693 op2
= XEXP (op2
, 0);
10695 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10696 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10697 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10700 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10705 *cost
= LIBCALL_COST (3);
10710 if (TARGET_HARD_FLOAT
)
10712 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10713 a vcvt fixed-point conversion. */
10714 if (code
== FIX
&& mode
== SImode
10715 && GET_CODE (XEXP (x
, 0)) == FIX
10716 && GET_MODE (XEXP (x
, 0)) == SFmode
10717 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10718 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10722 *cost
+= extra_cost
->fp
[0].toint
;
10724 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10729 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10731 mode
= GET_MODE (XEXP (x
, 0));
10733 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10734 /* Strip of the 'cost' of rounding towards zero. */
10735 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10736 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10739 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10740 /* ??? Increase the cost to deal with transferring from
10741 FP -> CORE registers? */
10744 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10745 && TARGET_FPU_ARMV8
)
10748 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10751 /* Vector costs? */
10753 *cost
= LIBCALL_COST (1);
10757 case UNSIGNED_FLOAT
:
10758 if (TARGET_HARD_FLOAT
)
10760 /* ??? Increase the cost to deal with transferring from CORE
10761 -> FP registers? */
10763 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10766 *cost
= LIBCALL_COST (1);
10774 /* Just a guess. Guess number of instructions in the asm
10775 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10776 though (see PR60663). */
10777 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10778 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10780 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10784 if (mode
!= VOIDmode
)
10785 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10787 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10792 #undef HANDLE_NARROW_SHIFT_ARITH
10794 /* RTX costs entry point. */
10797 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10798 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10801 int code
= GET_CODE (x
);
10802 gcc_assert (current_tune
->insn_extra_cost
);
10804 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10805 (enum rtx_code
) outer_code
,
10806 current_tune
->insn_extra_cost
,
10809 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10811 print_rtl_single (dump_file
, x
);
10812 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10813 *total
, result
? "final" : "partial");
10818 /* All address computations that can be done are free, but rtx cost returns
10819 the same for practically all of them. So we weight the different types
10820 of address here in the order (most pref first):
10821 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10823 arm_arm_address_cost (rtx x
)
10825 enum rtx_code c
= GET_CODE (x
);
10827 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10829 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10834 if (CONST_INT_P (XEXP (x
, 1)))
10837 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10847 arm_thumb_address_cost (rtx x
)
10849 enum rtx_code c
= GET_CODE (x
);
10854 && REG_P (XEXP (x
, 0))
10855 && CONST_INT_P (XEXP (x
, 1)))
10862 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10863 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10865 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10868 /* Adjust cost hook for XScale. */
10870 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10873 /* Some true dependencies can have a higher cost depending
10874 on precisely how certain input operands are used. */
10876 && recog_memoized (insn
) >= 0
10877 && recog_memoized (dep
) >= 0)
10879 int shift_opnum
= get_attr_shift (insn
);
10880 enum attr_type attr_type
= get_attr_type (dep
);
10882 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10883 operand for INSN. If we have a shifted input operand and the
10884 instruction we depend on is another ALU instruction, then we may
10885 have to account for an additional stall. */
10886 if (shift_opnum
!= 0
10887 && (attr_type
== TYPE_ALU_SHIFT_IMM
10888 || attr_type
== TYPE_ALUS_SHIFT_IMM
10889 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10890 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10891 || attr_type
== TYPE_ALU_SHIFT_REG
10892 || attr_type
== TYPE_ALUS_SHIFT_REG
10893 || attr_type
== TYPE_LOGIC_SHIFT_REG
10894 || attr_type
== TYPE_LOGICS_SHIFT_REG
10895 || attr_type
== TYPE_MOV_SHIFT
10896 || attr_type
== TYPE_MVN_SHIFT
10897 || attr_type
== TYPE_MOV_SHIFT_REG
10898 || attr_type
== TYPE_MVN_SHIFT_REG
))
10900 rtx shifted_operand
;
10903 /* Get the shifted operand. */
10904 extract_insn (insn
);
10905 shifted_operand
= recog_data
.operand
[shift_opnum
];
10907 /* Iterate over all the operands in DEP. If we write an operand
10908 that overlaps with SHIFTED_OPERAND, then we have increase the
10909 cost of this dependency. */
10910 extract_insn (dep
);
10911 preprocess_constraints (dep
);
10912 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10914 /* We can ignore strict inputs. */
10915 if (recog_data
.operand_type
[opno
] == OP_IN
)
10918 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10930 /* Adjust cost hook for Cortex A9. */
10932 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10942 case REG_DEP_OUTPUT
:
10943 if (recog_memoized (insn
) >= 0
10944 && recog_memoized (dep
) >= 0)
10946 if (GET_CODE (PATTERN (insn
)) == SET
)
10949 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
10951 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
10953 enum attr_type attr_type_insn
= get_attr_type (insn
);
10954 enum attr_type attr_type_dep
= get_attr_type (dep
);
10956 /* By default all dependencies of the form
10959 have an extra latency of 1 cycle because
10960 of the input and output dependency in this
10961 case. However this gets modeled as an true
10962 dependency and hence all these checks. */
10963 if (REG_P (SET_DEST (PATTERN (insn
)))
10964 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
10966 /* FMACS is a special case where the dependent
10967 instruction can be issued 3 cycles before
10968 the normal latency in case of an output
10970 if ((attr_type_insn
== TYPE_FMACS
10971 || attr_type_insn
== TYPE_FMACD
)
10972 && (attr_type_dep
== TYPE_FMACS
10973 || attr_type_dep
== TYPE_FMACD
))
10975 if (dep_type
== REG_DEP_OUTPUT
)
10976 *cost
= insn_default_latency (dep
) - 3;
10978 *cost
= insn_default_latency (dep
);
10983 if (dep_type
== REG_DEP_OUTPUT
)
10984 *cost
= insn_default_latency (dep
) + 1;
10986 *cost
= insn_default_latency (dep
);
10996 gcc_unreachable ();
11002 /* Adjust cost hook for FA726TE. */
11004 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11007 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11008 have penalty of 3. */
11009 if (dep_type
== REG_DEP_TRUE
11010 && recog_memoized (insn
) >= 0
11011 && recog_memoized (dep
) >= 0
11012 && get_attr_conds (dep
) == CONDS_SET
)
11014 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11015 if (get_attr_conds (insn
) == CONDS_USE
11016 && get_attr_type (insn
) != TYPE_BRANCH
)
11022 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11023 || get_attr_conds (insn
) == CONDS_USE
)
11033 /* Implement TARGET_REGISTER_MOVE_COST.
11035 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11036 it is typically more expensive than a single memory access. We set
11037 the cost to less than two memory accesses so that floating
11038 point to integer conversion does not go through memory. */
11041 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11042 reg_class_t from
, reg_class_t to
)
11046 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11047 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11049 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11050 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11052 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11059 if (from
== HI_REGS
|| to
== HI_REGS
)
11066 /* Implement TARGET_MEMORY_MOVE_COST. */
11069 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11070 bool in ATTRIBUTE_UNUSED
)
11076 if (GET_MODE_SIZE (mode
) < 4)
11079 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11083 /* Vectorizer cost model implementation. */
11085 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11087 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11089 int misalign ATTRIBUTE_UNUSED
)
11093 switch (type_of_cost
)
11096 return current_tune
->vec_costs
->scalar_stmt_cost
;
11099 return current_tune
->vec_costs
->scalar_load_cost
;
11102 return current_tune
->vec_costs
->scalar_store_cost
;
11105 return current_tune
->vec_costs
->vec_stmt_cost
;
11108 return current_tune
->vec_costs
->vec_align_load_cost
;
11111 return current_tune
->vec_costs
->vec_store_cost
;
11113 case vec_to_scalar
:
11114 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11116 case scalar_to_vec
:
11117 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11119 case unaligned_load
:
11120 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11122 case unaligned_store
:
11123 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11125 case cond_branch_taken
:
11126 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11128 case cond_branch_not_taken
:
11129 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11132 case vec_promote_demote
:
11133 return current_tune
->vec_costs
->vec_stmt_cost
;
11135 case vec_construct
:
11136 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11137 return elements
/ 2 + 1;
11140 gcc_unreachable ();
11144 /* Implement targetm.vectorize.add_stmt_cost. */
11147 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11148 struct _stmt_vec_info
*stmt_info
, int misalign
,
11149 enum vect_cost_model_location where
)
11151 unsigned *cost
= (unsigned *) data
;
11152 unsigned retval
= 0;
11154 if (flag_vect_cost_model
)
11156 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11157 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11159 /* Statements in an inner loop relative to the loop being
11160 vectorized are weighted more heavily. The value here is
11161 arbitrary and could potentially be improved with analysis. */
11162 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11163 count
*= 50; /* FIXME. */
11165 retval
= (unsigned) (count
* stmt_cost
);
11166 cost
[where
] += retval
;
11172 /* Return true if and only if this insn can dual-issue only as older. */
11174 cortexa7_older_only (rtx_insn
*insn
)
11176 if (recog_memoized (insn
) < 0)
11179 switch (get_attr_type (insn
))
11181 case TYPE_ALU_DSP_REG
:
11182 case TYPE_ALU_SREG
:
11183 case TYPE_ALUS_SREG
:
11184 case TYPE_LOGIC_REG
:
11185 case TYPE_LOGICS_REG
:
11187 case TYPE_ADCS_REG
:
11192 case TYPE_SHIFT_IMM
:
11193 case TYPE_SHIFT_REG
:
11194 case TYPE_LOAD_BYTE
:
11197 case TYPE_FFARITHS
:
11199 case TYPE_FFARITHD
:
11217 case TYPE_F_STORES
:
11224 /* Return true if and only if this insn can dual-issue as younger. */
11226 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11228 if (recog_memoized (insn
) < 0)
11231 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11235 switch (get_attr_type (insn
))
11238 case TYPE_ALUS_IMM
:
11239 case TYPE_LOGIC_IMM
:
11240 case TYPE_LOGICS_IMM
:
11245 case TYPE_MOV_SHIFT
:
11246 case TYPE_MOV_SHIFT_REG
:
11256 /* Look for an instruction that can dual issue only as an older
11257 instruction, and move it in front of any instructions that can
11258 dual-issue as younger, while preserving the relative order of all
11259 other instructions in the ready list. This is a hueuristic to help
11260 dual-issue in later cycles, by postponing issue of more flexible
11261 instructions. This heuristic may affect dual issue opportunities
11262 in the current cycle. */
11264 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11265 int *n_readyp
, int clock
)
11268 int first_older_only
= -1, first_younger
= -1;
11272 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11276 /* Traverse the ready list from the head (the instruction to issue
11277 first), and looking for the first instruction that can issue as
11278 younger and the first instruction that can dual-issue only as
11280 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11282 rtx_insn
*insn
= ready
[i
];
11283 if (cortexa7_older_only (insn
))
11285 first_older_only
= i
;
11287 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11290 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11294 /* Nothing to reorder because either no younger insn found or insn
11295 that can dual-issue only as older appears before any insn that
11296 can dual-issue as younger. */
11297 if (first_younger
== -1)
11300 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11304 /* Nothing to reorder because no older-only insn in the ready list. */
11305 if (first_older_only
== -1)
11308 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11312 /* Move first_older_only insn before first_younger. */
11314 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11315 INSN_UID(ready
[first_older_only
]),
11316 INSN_UID(ready
[first_younger
]));
11317 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11318 for (i
= first_older_only
; i
< first_younger
; i
++)
11320 ready
[i
] = ready
[i
+1];
11323 ready
[i
] = first_older_only_insn
;
11327 /* Implement TARGET_SCHED_REORDER. */
11329 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11334 case TARGET_CPU_cortexa7
:
11335 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11338 /* Do nothing for other cores. */
11342 return arm_issue_rate ();
11345 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11346 It corrects the value of COST based on the relationship between
11347 INSN and DEP through the dependence LINK. It returns the new
11348 value. There is a per-core adjust_cost hook to adjust scheduler costs
11349 and the per-core hook can choose to completely override the generic
11350 adjust_cost function. Only put bits of code into arm_adjust_cost that
11351 are common across all cores. */
11353 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11358 /* When generating Thumb-1 code, we want to place flag-setting operations
11359 close to a conditional branch which depends on them, so that we can
11360 omit the comparison. */
11363 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11364 && recog_memoized (dep
) >= 0
11365 && get_attr_conds (dep
) == CONDS_SET
)
11368 if (current_tune
->sched_adjust_cost
!= NULL
)
11370 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11374 /* XXX Is this strictly true? */
11375 if (dep_type
== REG_DEP_ANTI
11376 || dep_type
== REG_DEP_OUTPUT
)
11379 /* Call insns don't incur a stall, even if they follow a load. */
11384 if ((i_pat
= single_set (insn
)) != NULL
11385 && MEM_P (SET_SRC (i_pat
))
11386 && (d_pat
= single_set (dep
)) != NULL
11387 && MEM_P (SET_DEST (d_pat
)))
11389 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11390 /* This is a load after a store, there is no conflict if the load reads
11391 from a cached area. Assume that loads from the stack, and from the
11392 constant pool are cached, and that others will miss. This is a
11395 if ((GET_CODE (src_mem
) == SYMBOL_REF
11396 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11397 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11398 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11399 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11407 arm_max_conditional_execute (void)
11409 return max_insns_skipped
;
11413 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11416 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11418 return (optimize
> 0) ? 2 : 0;
11422 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11424 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11427 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11428 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11429 sequences of non-executed instructions in IT blocks probably take the same
11430 amount of time as executed instructions (and the IT instruction itself takes
11431 space in icache). This function was experimentally determined to give good
11432 results on a popular embedded benchmark. */
11435 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11437 return (TARGET_32BIT
&& speed_p
) ? 1
11438 : arm_default_branch_cost (speed_p
, predictable_p
);
11442 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11444 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11447 static bool fp_consts_inited
= false;
11449 static REAL_VALUE_TYPE value_fp0
;
11452 init_fp_table (void)
11456 r
= REAL_VALUE_ATOF ("0", DFmode
);
11458 fp_consts_inited
= true;
11461 /* Return TRUE if rtx X is a valid immediate FP constant. */
11463 arm_const_double_rtx (rtx x
)
11465 const REAL_VALUE_TYPE
*r
;
11467 if (!fp_consts_inited
)
11470 r
= CONST_DOUBLE_REAL_VALUE (x
);
11471 if (REAL_VALUE_MINUS_ZERO (*r
))
11474 if (real_equal (r
, &value_fp0
))
11480 /* VFPv3 has a fairly wide range of representable immediates, formed from
11481 "quarter-precision" floating-point values. These can be evaluated using this
11482 formula (with ^ for exponentiation):
11486 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11487 16 <= n <= 31 and 0 <= r <= 7.
11489 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11491 - A (most-significant) is the sign bit.
11492 - BCD are the exponent (encoded as r XOR 3).
11493 - EFGH are the mantissa (encoded as n - 16).
11496 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11497 fconst[sd] instruction, or -1 if X isn't suitable. */
11499 vfp3_const_double_index (rtx x
)
11501 REAL_VALUE_TYPE r
, m
;
11502 int sign
, exponent
;
11503 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11504 unsigned HOST_WIDE_INT mask
;
11505 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11508 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11511 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11513 /* We can't represent these things, so detect them first. */
11514 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11517 /* Extract sign, exponent and mantissa. */
11518 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11519 r
= real_value_abs (&r
);
11520 exponent
= REAL_EXP (&r
);
11521 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11522 highest (sign) bit, with a fixed binary point at bit point_pos.
11523 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11524 bits for the mantissa, this may fail (low bits would be lost). */
11525 real_ldexp (&m
, &r
, point_pos
- exponent
);
11526 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11527 mantissa
= w
.elt (0);
11528 mant_hi
= w
.elt (1);
11530 /* If there are bits set in the low part of the mantissa, we can't
11531 represent this value. */
11535 /* Now make it so that mantissa contains the most-significant bits, and move
11536 the point_pos to indicate that the least-significant bits have been
11538 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11539 mantissa
= mant_hi
;
11541 /* We can permit four significant bits of mantissa only, plus a high bit
11542 which is always 1. */
11543 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11544 if ((mantissa
& mask
) != 0)
11547 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11548 mantissa
>>= point_pos
- 5;
11550 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11551 floating-point immediate zero with Neon using an integer-zero load, but
11552 that case is handled elsewhere.) */
11556 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11558 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11559 normalized significands are in the range [1, 2). (Our mantissa is shifted
11560 left 4 places at this point relative to normalized IEEE754 values). GCC
11561 internally uses [0.5, 1) (see real.c), so the exponent returned from
11562 REAL_EXP must be altered. */
11563 exponent
= 5 - exponent
;
11565 if (exponent
< 0 || exponent
> 7)
11568 /* Sign, mantissa and exponent are now in the correct form to plug into the
11569 formula described in the comment above. */
11570 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11573 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11575 vfp3_const_double_rtx (rtx x
)
11580 return vfp3_const_double_index (x
) != -1;
11583 /* Recognize immediates which can be used in various Neon instructions. Legal
11584 immediates are described by the following table (for VMVN variants, the
11585 bitwise inverse of the constant shown is recognized. In either case, VMOV
11586 is output and the correct instruction to use for a given constant is chosen
11587 by the assembler). The constant shown is replicated across all elements of
11588 the destination vector.
11590 insn elems variant constant (binary)
11591 ---- ----- ------- -----------------
11592 vmov i32 0 00000000 00000000 00000000 abcdefgh
11593 vmov i32 1 00000000 00000000 abcdefgh 00000000
11594 vmov i32 2 00000000 abcdefgh 00000000 00000000
11595 vmov i32 3 abcdefgh 00000000 00000000 00000000
11596 vmov i16 4 00000000 abcdefgh
11597 vmov i16 5 abcdefgh 00000000
11598 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11599 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11600 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11601 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11602 vmvn i16 10 00000000 abcdefgh
11603 vmvn i16 11 abcdefgh 00000000
11604 vmov i32 12 00000000 00000000 abcdefgh 11111111
11605 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11606 vmov i32 14 00000000 abcdefgh 11111111 11111111
11607 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11608 vmov i8 16 abcdefgh
11609 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11610 eeeeeeee ffffffff gggggggg hhhhhhhh
11611 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11612 vmov f32 19 00000000 00000000 00000000 00000000
11614 For case 18, B = !b. Representable values are exactly those accepted by
11615 vfp3_const_double_index, but are output as floating-point numbers rather
11618 For case 19, we will change it to vmov.i32 when assembling.
11620 Variants 0-5 (inclusive) may also be used as immediates for the second
11621 operand of VORR/VBIC instructions.
11623 The INVERSE argument causes the bitwise inverse of the given operand to be
11624 recognized instead (used for recognizing legal immediates for the VAND/VORN
11625 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11626 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11627 output, rather than the real insns vbic/vorr).
11629 INVERSE makes no difference to the recognition of float vectors.
11631 The return value is the variant of immediate as shown in the above table, or
11632 -1 if the given value doesn't match any of the listed patterns.
11635 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11636 rtx
*modconst
, int *elementwidth
)
11638 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11640 for (i = 0; i < idx; i += (STRIDE)) \
11645 immtype = (CLASS); \
11646 elsize = (ELSIZE); \
11650 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11651 unsigned int innersize
;
11652 unsigned char bytes
[16];
11653 int immtype
= -1, matches
;
11654 unsigned int invmask
= inverse
? 0xff : 0;
11655 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11658 n_elts
= CONST_VECTOR_NUNITS (op
);
11662 if (mode
== VOIDmode
)
11666 innersize
= GET_MODE_UNIT_SIZE (mode
);
11668 /* Vectors of float constants. */
11669 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11671 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11673 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11676 /* FP16 vectors cannot be represented. */
11677 if (GET_MODE_INNER (mode
) == HFmode
)
11680 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11681 are distinct in this context. */
11682 if (!const_vec_duplicate_p (op
))
11686 *modconst
= CONST_VECTOR_ELT (op
, 0);
11691 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11697 /* Splat vector constant out into a byte vector. */
11698 for (i
= 0; i
< n_elts
; i
++)
11700 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11701 unsigned HOST_WIDE_INT elpart
;
11703 gcc_assert (CONST_INT_P (el
));
11704 elpart
= INTVAL (el
);
11706 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11708 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11709 elpart
>>= BITS_PER_UNIT
;
11713 /* Sanity check. */
11714 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11718 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11719 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11721 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11722 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11724 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11725 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11727 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11728 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11730 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11732 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11734 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11735 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11737 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11738 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11740 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11741 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11743 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11744 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11746 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11748 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11750 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11751 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11753 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11754 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11756 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11757 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11759 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11760 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11762 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11764 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11765 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11773 *elementwidth
= elsize
;
11777 unsigned HOST_WIDE_INT imm
= 0;
11779 /* Un-invert bytes of recognized vector, if necessary. */
11781 for (i
= 0; i
< idx
; i
++)
11782 bytes
[i
] ^= invmask
;
11786 /* FIXME: Broken on 32-bit H_W_I hosts. */
11787 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11789 for (i
= 0; i
< 8; i
++)
11790 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11791 << (i
* BITS_PER_UNIT
);
11793 *modconst
= GEN_INT (imm
);
11797 unsigned HOST_WIDE_INT imm
= 0;
11799 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11800 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11802 *modconst
= GEN_INT (imm
);
11810 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11811 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11812 float elements), and a modified constant (whatever should be output for a
11813 VMOV) in *MODCONST. */
11816 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11817 rtx
*modconst
, int *elementwidth
)
11821 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11827 *modconst
= tmpconst
;
11830 *elementwidth
= tmpwidth
;
11835 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11836 the immediate is valid, write a constant suitable for using as an operand
11837 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11838 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11841 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11842 rtx
*modconst
, int *elementwidth
)
11846 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11848 if (retval
< 0 || retval
> 5)
11852 *modconst
= tmpconst
;
11855 *elementwidth
= tmpwidth
;
11860 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11861 the immediate is valid, write a constant suitable for using as an operand
11862 to VSHR/VSHL to *MODCONST and the corresponding element width to
11863 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11864 because they have different limitations. */
11867 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11868 rtx
*modconst
, int *elementwidth
,
11871 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11872 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11873 unsigned HOST_WIDE_INT last_elt
= 0;
11874 unsigned HOST_WIDE_INT maxshift
;
11876 /* Split vector constant out into a byte vector. */
11877 for (i
= 0; i
< n_elts
; i
++)
11879 rtx el
= CONST_VECTOR_ELT (op
, i
);
11880 unsigned HOST_WIDE_INT elpart
;
11882 if (CONST_INT_P (el
))
11883 elpart
= INTVAL (el
);
11884 else if (CONST_DOUBLE_P (el
))
11887 gcc_unreachable ();
11889 if (i
!= 0 && elpart
!= last_elt
)
11895 /* Shift less than element size. */
11896 maxshift
= innersize
* 8;
11900 /* Left shift immediate value can be from 0 to <size>-1. */
11901 if (last_elt
>= maxshift
)
11906 /* Right shift immediate value can be from 1 to <size>. */
11907 if (last_elt
== 0 || last_elt
> maxshift
)
11912 *elementwidth
= innersize
* 8;
11915 *modconst
= CONST_VECTOR_ELT (op
, 0);
11920 /* Return a string suitable for output of Neon immediate logic operation
11924 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11925 int inverse
, int quad
)
11927 int width
, is_valid
;
11928 static char templ
[40];
11930 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11932 gcc_assert (is_valid
!= 0);
11935 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11937 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11942 /* Return a string suitable for output of Neon immediate shift operation
11943 (VSHR or VSHL) MNEM. */
11946 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
11947 machine_mode mode
, int quad
,
11950 int width
, is_valid
;
11951 static char templ
[40];
11953 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
11954 gcc_assert (is_valid
!= 0);
11957 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
11959 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
11964 /* Output a sequence of pairwise operations to implement a reduction.
11965 NOTE: We do "too much work" here, because pairwise operations work on two
11966 registers-worth of operands in one go. Unfortunately we can't exploit those
11967 extra calculations to do the full operation in fewer steps, I don't think.
11968 Although all vector elements of the result but the first are ignored, we
11969 actually calculate the same result in each of the elements. An alternative
11970 such as initially loading a vector with zero to use as each of the second
11971 operands would use up an additional register and take an extra instruction,
11972 for no particular gain. */
11975 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
11976 rtx (*reduc
) (rtx
, rtx
, rtx
))
11978 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
11981 for (i
= parts
/ 2; i
>= 1; i
/= 2)
11983 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
11984 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
11989 /* If VALS is a vector constant that can be loaded into a register
11990 using VDUP, generate instructions to do so and return an RTX to
11991 assign to the register. Otherwise return NULL_RTX. */
11994 neon_vdup_constant (rtx vals
)
11996 machine_mode mode
= GET_MODE (vals
);
11997 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12000 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12003 if (!const_vec_duplicate_p (vals
, &x
))
12004 /* The elements are not all the same. We could handle repeating
12005 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12006 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12010 /* We can load this constant by using VDUP and a constant in a
12011 single ARM register. This will be cheaper than a vector
12014 x
= copy_to_mode_reg (inner_mode
, x
);
12015 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12018 /* Generate code to load VALS, which is a PARALLEL containing only
12019 constants (for vec_init) or CONST_VECTOR, efficiently into a
12020 register. Returns an RTX to copy into the register, or NULL_RTX
12021 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12024 neon_make_constant (rtx vals
)
12026 machine_mode mode
= GET_MODE (vals
);
12028 rtx const_vec
= NULL_RTX
;
12029 int n_elts
= GET_MODE_NUNITS (mode
);
12033 if (GET_CODE (vals
) == CONST_VECTOR
)
12035 else if (GET_CODE (vals
) == PARALLEL
)
12037 /* A CONST_VECTOR must contain only CONST_INTs and
12038 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12039 Only store valid constants in a CONST_VECTOR. */
12040 for (i
= 0; i
< n_elts
; ++i
)
12042 rtx x
= XVECEXP (vals
, 0, i
);
12043 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12046 if (n_const
== n_elts
)
12047 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12050 gcc_unreachable ();
12052 if (const_vec
!= NULL
12053 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12054 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12056 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12057 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12058 pipeline cycle; creating the constant takes one or two ARM
12059 pipeline cycles. */
12061 else if (const_vec
!= NULL_RTX
)
12062 /* Load from constant pool. On Cortex-A8 this takes two cycles
12063 (for either double or quad vectors). We can not take advantage
12064 of single-cycle VLD1 because we need a PC-relative addressing
12068 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12069 We can not construct an initializer. */
12073 /* Initialize vector TARGET to VALS. */
12076 neon_expand_vector_init (rtx target
, rtx vals
)
12078 machine_mode mode
= GET_MODE (target
);
12079 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12080 int n_elts
= GET_MODE_NUNITS (mode
);
12081 int n_var
= 0, one_var
= -1;
12082 bool all_same
= true;
12086 for (i
= 0; i
< n_elts
; ++i
)
12088 x
= XVECEXP (vals
, 0, i
);
12089 if (!CONSTANT_P (x
))
12090 ++n_var
, one_var
= i
;
12092 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12098 rtx constant
= neon_make_constant (vals
);
12099 if (constant
!= NULL_RTX
)
12101 emit_move_insn (target
, constant
);
12106 /* Splat a single non-constant element if we can. */
12107 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12109 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12110 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12114 /* One field is non-constant. Load constant then overwrite varying
12115 field. This is more efficient than using the stack. */
12118 rtx copy
= copy_rtx (vals
);
12119 rtx index
= GEN_INT (one_var
);
12121 /* Load constant part of vector, substitute neighboring value for
12122 varying element. */
12123 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12124 neon_expand_vector_init (target
, copy
);
12126 /* Insert variable. */
12127 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12131 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12134 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12137 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12140 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12143 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12146 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12149 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12152 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12155 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12158 gcc_unreachable ();
12163 /* Construct the vector in memory one field at a time
12164 and load the whole vector. */
12165 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12166 for (i
= 0; i
< n_elts
; i
++)
12167 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12168 i
* GET_MODE_SIZE (inner_mode
)),
12169 XVECEXP (vals
, 0, i
));
12170 emit_move_insn (target
, mem
);
12173 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12174 ERR if it doesn't. EXP indicates the source location, which includes the
12175 inlining history for intrinsics. */
12178 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12179 const_tree exp
, const char *desc
)
12181 HOST_WIDE_INT lane
;
12183 gcc_assert (CONST_INT_P (operand
));
12185 lane
= INTVAL (operand
);
12187 if (lane
< low
|| lane
>= high
)
12190 error ("%K%s %wd out of range %wd - %wd",
12191 exp
, desc
, lane
, low
, high
- 1);
12193 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12197 /* Bounds-check lanes. */
12200 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12203 bounds_check (operand
, low
, high
, exp
, "lane");
12206 /* Bounds-check constants. */
12209 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12211 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12215 neon_element_bits (machine_mode mode
)
12217 return GET_MODE_UNIT_BITSIZE (mode
);
12221 /* Predicates for `match_operand' and `match_operator'. */
12223 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12224 WB is true if full writeback address modes are allowed and is false
12225 if limited writeback address modes (POST_INC and PRE_DEC) are
12229 arm_coproc_mem_operand (rtx op
, bool wb
)
12233 /* Reject eliminable registers. */
12234 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12235 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12236 || reg_mentioned_p (arg_pointer_rtx
, op
)
12237 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12238 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12239 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12240 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12243 /* Constants are converted into offsets from labels. */
12247 ind
= XEXP (op
, 0);
12249 if (reload_completed
12250 && (GET_CODE (ind
) == LABEL_REF
12251 || (GET_CODE (ind
) == CONST
12252 && GET_CODE (XEXP (ind
, 0)) == PLUS
12253 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12254 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12257 /* Match: (mem (reg)). */
12259 return arm_address_register_rtx_p (ind
, 0);
12261 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12262 acceptable in any case (subject to verification by
12263 arm_address_register_rtx_p). We need WB to be true to accept
12264 PRE_INC and POST_DEC. */
12265 if (GET_CODE (ind
) == POST_INC
12266 || GET_CODE (ind
) == PRE_DEC
12268 && (GET_CODE (ind
) == PRE_INC
12269 || GET_CODE (ind
) == POST_DEC
)))
12270 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12273 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12274 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12275 && GET_CODE (XEXP (ind
, 1)) == PLUS
12276 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12277 ind
= XEXP (ind
, 1);
12282 if (GET_CODE (ind
) == PLUS
12283 && REG_P (XEXP (ind
, 0))
12284 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12285 && CONST_INT_P (XEXP (ind
, 1))
12286 && INTVAL (XEXP (ind
, 1)) > -1024
12287 && INTVAL (XEXP (ind
, 1)) < 1024
12288 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12294 /* Return TRUE if OP is a memory operand which we can load or store a vector
12295 to/from. TYPE is one of the following values:
12296 0 - Vector load/stor (vldr)
12297 1 - Core registers (ldm)
12298 2 - Element/structure loads (vld1)
12301 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12305 /* Reject eliminable registers. */
12306 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12307 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12308 || reg_mentioned_p (arg_pointer_rtx
, op
)
12309 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12310 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12311 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12312 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12315 /* Constants are converted into offsets from labels. */
12319 ind
= XEXP (op
, 0);
12321 if (reload_completed
12322 && (GET_CODE (ind
) == LABEL_REF
12323 || (GET_CODE (ind
) == CONST
12324 && GET_CODE (XEXP (ind
, 0)) == PLUS
12325 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12326 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12329 /* Match: (mem (reg)). */
12331 return arm_address_register_rtx_p (ind
, 0);
12333 /* Allow post-increment with Neon registers. */
12334 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12335 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12336 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12338 /* Allow post-increment by register for VLDn */
12339 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12340 && GET_CODE (XEXP (ind
, 1)) == PLUS
12341 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12348 && GET_CODE (ind
) == PLUS
12349 && REG_P (XEXP (ind
, 0))
12350 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12351 && CONST_INT_P (XEXP (ind
, 1))
12352 && INTVAL (XEXP (ind
, 1)) > -1024
12353 /* For quad modes, we restrict the constant offset to be slightly less
12354 than what the instruction format permits. We have no such constraint
12355 on double mode offsets. (This must match arm_legitimate_index_p.) */
12356 && (INTVAL (XEXP (ind
, 1))
12357 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12358 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12364 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12367 neon_struct_mem_operand (rtx op
)
12371 /* Reject eliminable registers. */
12372 if (! (reload_in_progress
|| reload_completed
)
12373 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12374 || reg_mentioned_p (arg_pointer_rtx
, op
)
12375 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12376 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12377 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12378 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12381 /* Constants are converted into offsets from labels. */
12385 ind
= XEXP (op
, 0);
12387 if (reload_completed
12388 && (GET_CODE (ind
) == LABEL_REF
12389 || (GET_CODE (ind
) == CONST
12390 && GET_CODE (XEXP (ind
, 0)) == PLUS
12391 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12392 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12395 /* Match: (mem (reg)). */
12397 return arm_address_register_rtx_p (ind
, 0);
12399 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12400 if (GET_CODE (ind
) == POST_INC
12401 || GET_CODE (ind
) == PRE_DEC
)
12402 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12407 /* Return true if X is a register that will be eliminated later on. */
12409 arm_eliminable_register (rtx x
)
12411 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12412 || REGNO (x
) == ARG_POINTER_REGNUM
12413 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12414 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12417 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12418 coprocessor registers. Otherwise return NO_REGS. */
12421 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12423 if (mode
== HFmode
)
12425 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12426 return GENERAL_REGS
;
12427 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12429 return GENERAL_REGS
;
12432 /* The neon move patterns handle all legitimate vector and struct
12435 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12436 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12437 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12438 || VALID_NEON_STRUCT_MODE (mode
)))
12441 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12444 return GENERAL_REGS
;
12447 /* Values which must be returned in the most-significant end of the return
12451 arm_return_in_msb (const_tree valtype
)
12453 return (TARGET_AAPCS_BASED
12454 && BYTES_BIG_ENDIAN
12455 && (AGGREGATE_TYPE_P (valtype
)
12456 || TREE_CODE (valtype
) == COMPLEX_TYPE
12457 || FIXED_POINT_TYPE_P (valtype
)));
12460 /* Return TRUE if X references a SYMBOL_REF. */
12462 symbol_mentioned_p (rtx x
)
12467 if (GET_CODE (x
) == SYMBOL_REF
)
12470 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12471 are constant offsets, not symbols. */
12472 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12475 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12477 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12483 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12484 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12487 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12494 /* Return TRUE if X references a LABEL_REF. */
12496 label_mentioned_p (rtx x
)
12501 if (GET_CODE (x
) == LABEL_REF
)
12504 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12505 instruction, but they are constant offsets, not symbols. */
12506 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12509 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12510 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12516 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12517 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12520 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12528 tls_mentioned_p (rtx x
)
12530 switch (GET_CODE (x
))
12533 return tls_mentioned_p (XEXP (x
, 0));
12536 if (XINT (x
, 1) == UNSPEC_TLS
)
12539 /* Fall through. */
12545 /* Must not copy any rtx that uses a pc-relative address.
12546 Also, disallow copying of load-exclusive instructions that
12547 may appear after splitting of compare-and-swap-style operations
12548 so as to prevent those loops from being transformed away from their
12549 canonical forms (see PR 69904). */
12552 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12554 /* The tls call insn cannot be copied, as it is paired with a data
12556 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12559 subrtx_iterator::array_type array
;
12560 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12562 const_rtx x
= *iter
;
12563 if (GET_CODE (x
) == UNSPEC
12564 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12565 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12569 rtx set
= single_set (insn
);
12572 rtx src
= SET_SRC (set
);
12573 if (GET_CODE (src
) == ZERO_EXTEND
)
12574 src
= XEXP (src
, 0);
12576 /* Catch the load-exclusive and load-acquire operations. */
12577 if (GET_CODE (src
) == UNSPEC_VOLATILE
12578 && (XINT (src
, 1) == VUNSPEC_LL
12579 || XINT (src
, 1) == VUNSPEC_LAX
))
12586 minmax_code (rtx x
)
12588 enum rtx_code code
= GET_CODE (x
);
12601 gcc_unreachable ();
12605 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12608 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12609 int *mask
, bool *signed_sat
)
12611 /* The high bound must be a power of two minus one. */
12612 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12616 /* The low bound is either zero (for usat) or one less than the
12617 negation of the high bound (for ssat). */
12618 if (INTVAL (lo_bound
) == 0)
12623 *signed_sat
= false;
12628 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12633 *signed_sat
= true;
12641 /* Return 1 if memory locations are adjacent. */
12643 adjacent_mem_locations (rtx a
, rtx b
)
12645 /* We don't guarantee to preserve the order of these memory refs. */
12646 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12649 if ((REG_P (XEXP (a
, 0))
12650 || (GET_CODE (XEXP (a
, 0)) == PLUS
12651 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12652 && (REG_P (XEXP (b
, 0))
12653 || (GET_CODE (XEXP (b
, 0)) == PLUS
12654 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12656 HOST_WIDE_INT val0
= 0, val1
= 0;
12660 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12662 reg0
= XEXP (XEXP (a
, 0), 0);
12663 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12666 reg0
= XEXP (a
, 0);
12668 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12670 reg1
= XEXP (XEXP (b
, 0), 0);
12671 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12674 reg1
= XEXP (b
, 0);
12676 /* Don't accept any offset that will require multiple
12677 instructions to handle, since this would cause the
12678 arith_adjacentmem pattern to output an overlong sequence. */
12679 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12682 /* Don't allow an eliminable register: register elimination can make
12683 the offset too large. */
12684 if (arm_eliminable_register (reg0
))
12687 val_diff
= val1
- val0
;
12691 /* If the target has load delay slots, then there's no benefit
12692 to using an ldm instruction unless the offset is zero and
12693 we are optimizing for size. */
12694 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12695 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12696 && (val_diff
== 4 || val_diff
== -4));
12699 return ((REGNO (reg0
) == REGNO (reg1
))
12700 && (val_diff
== 4 || val_diff
== -4));
12706 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12707 for load operations, false for store operations. CONSECUTIVE is true
12708 if the register numbers in the operation must be consecutive in the register
12709 bank. RETURN_PC is true if value is to be loaded in PC.
12710 The pattern we are trying to match for load is:
12711 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12712 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12715 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12718 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12719 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12720 3. If consecutive is TRUE, then for kth register being loaded,
12721 REGNO (R_dk) = REGNO (R_d0) + k.
12722 The pattern for store is similar. */
12724 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12725 bool consecutive
, bool return_pc
)
12727 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12728 rtx reg
, mem
, addr
;
12730 unsigned first_regno
;
12731 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12733 bool addr_reg_in_reglist
= false;
12734 bool update
= false;
12739 /* If not in SImode, then registers must be consecutive
12740 (e.g., VLDM instructions for DFmode). */
12741 gcc_assert ((mode
== SImode
) || consecutive
);
12742 /* Setting return_pc for stores is illegal. */
12743 gcc_assert (!return_pc
|| load
);
12745 /* Set up the increments and the regs per val based on the mode. */
12746 reg_increment
= GET_MODE_SIZE (mode
);
12747 regs_per_val
= reg_increment
/ 4;
12748 offset_adj
= return_pc
? 1 : 0;
12751 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12752 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12755 /* Check if this is a write-back. */
12756 elt
= XVECEXP (op
, 0, offset_adj
);
12757 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12763 /* The offset adjustment must be the number of registers being
12764 popped times the size of a single register. */
12765 if (!REG_P (SET_DEST (elt
))
12766 || !REG_P (XEXP (SET_SRC (elt
), 0))
12767 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12768 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12769 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12770 ((count
- 1 - offset_adj
) * reg_increment
))
12774 i
= i
+ offset_adj
;
12775 base
= base
+ offset_adj
;
12776 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12777 success depends on the type: VLDM can do just one reg,
12778 LDM must do at least two. */
12779 if ((count
<= i
) && (mode
== SImode
))
12782 elt
= XVECEXP (op
, 0, i
- 1);
12783 if (GET_CODE (elt
) != SET
)
12788 reg
= SET_DEST (elt
);
12789 mem
= SET_SRC (elt
);
12793 reg
= SET_SRC (elt
);
12794 mem
= SET_DEST (elt
);
12797 if (!REG_P (reg
) || !MEM_P (mem
))
12800 regno
= REGNO (reg
);
12801 first_regno
= regno
;
12802 addr
= XEXP (mem
, 0);
12803 if (GET_CODE (addr
) == PLUS
)
12805 if (!CONST_INT_P (XEXP (addr
, 1)))
12808 offset
= INTVAL (XEXP (addr
, 1));
12809 addr
= XEXP (addr
, 0);
12815 /* Don't allow SP to be loaded unless it is also the base register. It
12816 guarantees that SP is reset correctly when an LDM instruction
12817 is interrupted. Otherwise, we might end up with a corrupt stack. */
12818 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12821 for (; i
< count
; i
++)
12823 elt
= XVECEXP (op
, 0, i
);
12824 if (GET_CODE (elt
) != SET
)
12829 reg
= SET_DEST (elt
);
12830 mem
= SET_SRC (elt
);
12834 reg
= SET_SRC (elt
);
12835 mem
= SET_DEST (elt
);
12839 || GET_MODE (reg
) != mode
12840 || REGNO (reg
) <= regno
12843 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12844 /* Don't allow SP to be loaded unless it is also the base register. It
12845 guarantees that SP is reset correctly when an LDM instruction
12846 is interrupted. Otherwise, we might end up with a corrupt stack. */
12847 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12849 || GET_MODE (mem
) != mode
12850 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12851 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12852 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12853 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12854 offset
+ (i
- base
) * reg_increment
))
12855 && (!REG_P (XEXP (mem
, 0))
12856 || offset
+ (i
- base
) * reg_increment
!= 0)))
12859 regno
= REGNO (reg
);
12860 if (regno
== REGNO (addr
))
12861 addr_reg_in_reglist
= true;
12866 if (update
&& addr_reg_in_reglist
)
12869 /* For Thumb-1, address register is always modified - either by write-back
12870 or by explicit load. If the pattern does not describe an update,
12871 then the address register must be in the list of loaded registers. */
12873 return update
|| addr_reg_in_reglist
;
12879 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12880 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12881 instruction. ADD_OFFSET is nonzero if the base address register needs
12882 to be modified with an add instruction before we can use it. */
12885 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12886 int nops
, HOST_WIDE_INT add_offset
)
12888 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12889 if the offset isn't small enough. The reason 2 ldrs are faster
12890 is because these ARMs are able to do more than one cache access
12891 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12892 whilst the ARM8 has a double bandwidth cache. This means that
12893 these cores can do both an instruction fetch and a data fetch in
12894 a single cycle, so the trick of calculating the address into a
12895 scratch register (one of the result regs) and then doing a load
12896 multiple actually becomes slower (and no smaller in code size).
12897 That is the transformation
12899 ldr rd1, [rbase + offset]
12900 ldr rd2, [rbase + offset + 4]
12904 add rd1, rbase, offset
12905 ldmia rd1, {rd1, rd2}
12907 produces worse code -- '3 cycles + any stalls on rd2' instead of
12908 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12909 access per cycle, the first sequence could never complete in less
12910 than 6 cycles, whereas the ldm sequence would only take 5 and
12911 would make better use of sequential accesses if not hitting the
12914 We cheat here and test 'arm_ld_sched' which we currently know to
12915 only be true for the ARM8, ARM9 and StrongARM. If this ever
12916 changes, then the test below needs to be reworked. */
12917 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12920 /* XScale has load-store double instructions, but they have stricter
12921 alignment requirements than load-store multiple, so we cannot
12924 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12925 the pipeline until completion.
12933 An ldr instruction takes 1-3 cycles, but does not block the
12942 Best case ldr will always win. However, the more ldr instructions
12943 we issue, the less likely we are to be able to schedule them well.
12944 Using ldr instructions also increases code size.
12946 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12947 for counts of 3 or 4 regs. */
12948 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
12953 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12954 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12955 an array ORDER which describes the sequence to use when accessing the
12956 offsets that produces an ascending order. In this sequence, each
12957 offset must be larger by exactly 4 than the previous one. ORDER[0]
12958 must have been filled in with the lowest offset by the caller.
12959 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12960 we use to verify that ORDER produces an ascending order of registers.
12961 Return true if it was possible to construct such an order, false if
12965 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
12966 int *unsorted_regs
)
12969 for (i
= 1; i
< nops
; i
++)
12973 order
[i
] = order
[i
- 1];
12974 for (j
= 0; j
< nops
; j
++)
12975 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
12977 /* We must find exactly one offset that is higher than the
12978 previous one by 4. */
12979 if (order
[i
] != order
[i
- 1])
12983 if (order
[i
] == order
[i
- 1])
12985 /* The register numbers must be ascending. */
12986 if (unsorted_regs
!= NULL
12987 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
12993 /* Used to determine in a peephole whether a sequence of load
12994 instructions can be changed into a load-multiple instruction.
12995 NOPS is the number of separate load instructions we are examining. The
12996 first NOPS entries in OPERANDS are the destination registers, the
12997 next NOPS entries are memory operands. If this function is
12998 successful, *BASE is set to the common base register of the memory
12999 accesses; *LOAD_OFFSET is set to the first memory location's offset
13000 from that base register.
13001 REGS is an array filled in with the destination register numbers.
13002 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13003 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13004 the sequence of registers in REGS matches the loads from ascending memory
13005 locations, and the function verifies that the register numbers are
13006 themselves ascending. If CHECK_REGS is false, the register numbers
13007 are stored in the order they are found in the operands. */
13009 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13010 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13012 int unsorted_regs
[MAX_LDM_STM_OPS
];
13013 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13014 int order
[MAX_LDM_STM_OPS
];
13015 rtx base_reg_rtx
= NULL
;
13019 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13020 easily extended if required. */
13021 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13023 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13025 /* Loop over the operands and check that the memory references are
13026 suitable (i.e. immediate offsets from the same base register). At
13027 the same time, extract the target register, and the memory
13029 for (i
= 0; i
< nops
; i
++)
13034 /* Convert a subreg of a mem into the mem itself. */
13035 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13036 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13038 gcc_assert (MEM_P (operands
[nops
+ i
]));
13040 /* Don't reorder volatile memory references; it doesn't seem worth
13041 looking for the case where the order is ok anyway. */
13042 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13045 offset
= const0_rtx
;
13047 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13048 || (GET_CODE (reg
) == SUBREG
13049 && REG_P (reg
= SUBREG_REG (reg
))))
13050 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13051 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13052 || (GET_CODE (reg
) == SUBREG
13053 && REG_P (reg
= SUBREG_REG (reg
))))
13054 && (CONST_INT_P (offset
13055 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13059 base_reg
= REGNO (reg
);
13060 base_reg_rtx
= reg
;
13061 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13064 else if (base_reg
!= (int) REGNO (reg
))
13065 /* Not addressed from the same base register. */
13068 unsorted_regs
[i
] = (REG_P (operands
[i
])
13069 ? REGNO (operands
[i
])
13070 : REGNO (SUBREG_REG (operands
[i
])));
13072 /* If it isn't an integer register, or if it overwrites the
13073 base register but isn't the last insn in the list, then
13074 we can't do this. */
13075 if (unsorted_regs
[i
] < 0
13076 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13077 || unsorted_regs
[i
] > 14
13078 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13081 /* Don't allow SP to be loaded unless it is also the base
13082 register. It guarantees that SP is reset correctly when
13083 an LDM instruction is interrupted. Otherwise, we might
13084 end up with a corrupt stack. */
13085 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13088 unsorted_offsets
[i
] = INTVAL (offset
);
13089 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13093 /* Not a suitable memory address. */
13097 /* All the useful information has now been extracted from the
13098 operands into unsorted_regs and unsorted_offsets; additionally,
13099 order[0] has been set to the lowest offset in the list. Sort
13100 the offsets into order, verifying that they are adjacent, and
13101 check that the register numbers are ascending. */
13102 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13103 check_regs
? unsorted_regs
: NULL
))
13107 memcpy (saved_order
, order
, sizeof order
);
13113 for (i
= 0; i
< nops
; i
++)
13114 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13116 *load_offset
= unsorted_offsets
[order
[0]];
13120 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13123 if (unsorted_offsets
[order
[0]] == 0)
13124 ldm_case
= 1; /* ldmia */
13125 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13126 ldm_case
= 2; /* ldmib */
13127 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13128 ldm_case
= 3; /* ldmda */
13129 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13130 ldm_case
= 4; /* ldmdb */
13131 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13132 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13137 if (!multiple_operation_profitable_p (false, nops
,
13139 ? unsorted_offsets
[order
[0]] : 0))
13145 /* Used to determine in a peephole whether a sequence of store instructions can
13146 be changed into a store-multiple instruction.
13147 NOPS is the number of separate store instructions we are examining.
13148 NOPS_TOTAL is the total number of instructions recognized by the peephole
13150 The first NOPS entries in OPERANDS are the source registers, the next
13151 NOPS entries are memory operands. If this function is successful, *BASE is
13152 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13153 to the first memory location's offset from that base register. REGS is an
13154 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13155 likewise filled with the corresponding rtx's.
13156 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13157 numbers to an ascending order of stores.
13158 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13159 from ascending memory locations, and the function verifies that the register
13160 numbers are themselves ascending. If CHECK_REGS is false, the register
13161 numbers are stored in the order they are found in the operands. */
13163 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13164 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13165 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13167 int unsorted_regs
[MAX_LDM_STM_OPS
];
13168 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13169 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13170 int order
[MAX_LDM_STM_OPS
];
13172 rtx base_reg_rtx
= NULL
;
13175 /* Write back of base register is currently only supported for Thumb 1. */
13176 int base_writeback
= TARGET_THUMB1
;
13178 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13179 easily extended if required. */
13180 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13182 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13184 /* Loop over the operands and check that the memory references are
13185 suitable (i.e. immediate offsets from the same base register). At
13186 the same time, extract the target register, and the memory
13188 for (i
= 0; i
< nops
; i
++)
13193 /* Convert a subreg of a mem into the mem itself. */
13194 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13195 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13197 gcc_assert (MEM_P (operands
[nops
+ i
]));
13199 /* Don't reorder volatile memory references; it doesn't seem worth
13200 looking for the case where the order is ok anyway. */
13201 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13204 offset
= const0_rtx
;
13206 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13207 || (GET_CODE (reg
) == SUBREG
13208 && REG_P (reg
= SUBREG_REG (reg
))))
13209 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13210 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13211 || (GET_CODE (reg
) == SUBREG
13212 && REG_P (reg
= SUBREG_REG (reg
))))
13213 && (CONST_INT_P (offset
13214 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13216 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13217 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13218 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13222 base_reg
= REGNO (reg
);
13223 base_reg_rtx
= reg
;
13224 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13227 else if (base_reg
!= (int) REGNO (reg
))
13228 /* Not addressed from the same base register. */
13231 /* If it isn't an integer register, then we can't do this. */
13232 if (unsorted_regs
[i
] < 0
13233 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13234 /* The effects are unpredictable if the base register is
13235 both updated and stored. */
13236 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13237 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13238 || unsorted_regs
[i
] > 14)
13241 unsorted_offsets
[i
] = INTVAL (offset
);
13242 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13246 /* Not a suitable memory address. */
13250 /* All the useful information has now been extracted from the
13251 operands into unsorted_regs and unsorted_offsets; additionally,
13252 order[0] has been set to the lowest offset in the list. Sort
13253 the offsets into order, verifying that they are adjacent, and
13254 check that the register numbers are ascending. */
13255 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13256 check_regs
? unsorted_regs
: NULL
))
13260 memcpy (saved_order
, order
, sizeof order
);
13266 for (i
= 0; i
< nops
; i
++)
13268 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13270 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13273 *load_offset
= unsorted_offsets
[order
[0]];
13277 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13280 if (unsorted_offsets
[order
[0]] == 0)
13281 stm_case
= 1; /* stmia */
13282 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13283 stm_case
= 2; /* stmib */
13284 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13285 stm_case
= 3; /* stmda */
13286 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13287 stm_case
= 4; /* stmdb */
13291 if (!multiple_operation_profitable_p (false, nops
, 0))
13297 /* Routines for use in generating RTL. */
13299 /* Generate a load-multiple instruction. COUNT is the number of loads in
13300 the instruction; REGS and MEMS are arrays containing the operands.
13301 BASEREG is the base register to be used in addressing the memory operands.
13302 WBACK_OFFSET is nonzero if the instruction should update the base
13306 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13307 HOST_WIDE_INT wback_offset
)
13312 if (!multiple_operation_profitable_p (false, count
, 0))
13318 for (i
= 0; i
< count
; i
++)
13319 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13321 if (wback_offset
!= 0)
13322 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13324 seq
= get_insns ();
13330 result
= gen_rtx_PARALLEL (VOIDmode
,
13331 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13332 if (wback_offset
!= 0)
13334 XVECEXP (result
, 0, 0)
13335 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13340 for (j
= 0; i
< count
; i
++, j
++)
13341 XVECEXP (result
, 0, i
)
13342 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13347 /* Generate a store-multiple instruction. COUNT is the number of stores in
13348 the instruction; REGS and MEMS are arrays containing the operands.
13349 BASEREG is the base register to be used in addressing the memory operands.
13350 WBACK_OFFSET is nonzero if the instruction should update the base
13354 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13355 HOST_WIDE_INT wback_offset
)
13360 if (GET_CODE (basereg
) == PLUS
)
13361 basereg
= XEXP (basereg
, 0);
13363 if (!multiple_operation_profitable_p (false, count
, 0))
13369 for (i
= 0; i
< count
; i
++)
13370 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13372 if (wback_offset
!= 0)
13373 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13375 seq
= get_insns ();
13381 result
= gen_rtx_PARALLEL (VOIDmode
,
13382 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13383 if (wback_offset
!= 0)
13385 XVECEXP (result
, 0, 0)
13386 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13391 for (j
= 0; i
< count
; i
++, j
++)
13392 XVECEXP (result
, 0, i
)
13393 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13398 /* Generate either a load-multiple or a store-multiple instruction. This
13399 function can be used in situations where we can start with a single MEM
13400 rtx and adjust its address upwards.
13401 COUNT is the number of operations in the instruction, not counting a
13402 possible update of the base register. REGS is an array containing the
13404 BASEREG is the base register to be used in addressing the memory operands,
13405 which are constructed from BASEMEM.
13406 WRITE_BACK specifies whether the generated instruction should include an
13407 update of the base register.
13408 OFFSETP is used to pass an offset to and from this function; this offset
13409 is not used when constructing the address (instead BASEMEM should have an
13410 appropriate offset in its address), it is used only for setting
13411 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13414 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13415 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13417 rtx mems
[MAX_LDM_STM_OPS
];
13418 HOST_WIDE_INT offset
= *offsetp
;
13421 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13423 if (GET_CODE (basereg
) == PLUS
)
13424 basereg
= XEXP (basereg
, 0);
13426 for (i
= 0; i
< count
; i
++)
13428 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13429 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13437 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13438 write_back
? 4 * count
: 0);
13440 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13441 write_back
? 4 * count
: 0);
13445 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13446 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13448 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13453 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13454 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13456 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13460 /* Called from a peephole2 expander to turn a sequence of loads into an
13461 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13462 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13463 is true if we can reorder the registers because they are used commutatively
13465 Returns true iff we could generate a new instruction. */
13468 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13470 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13471 rtx mems
[MAX_LDM_STM_OPS
];
13472 int i
, j
, base_reg
;
13474 HOST_WIDE_INT offset
;
13475 int write_back
= FALSE
;
13479 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13480 &base_reg
, &offset
, !sort_regs
);
13486 for (i
= 0; i
< nops
- 1; i
++)
13487 for (j
= i
+ 1; j
< nops
; j
++)
13488 if (regs
[i
] > regs
[j
])
13494 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13498 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13499 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13505 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13506 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13508 if (!TARGET_THUMB1
)
13510 base_reg
= regs
[0];
13511 base_reg_rtx
= newbase
;
13515 for (i
= 0; i
< nops
; i
++)
13517 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13518 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13521 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13522 write_back
? offset
+ i
* 4 : 0));
13526 /* Called from a peephole2 expander to turn a sequence of stores into an
13527 STM instruction. OPERANDS are the operands found by the peephole matcher;
13528 NOPS indicates how many separate stores we are trying to combine.
13529 Returns true iff we could generate a new instruction. */
13532 gen_stm_seq (rtx
*operands
, int nops
)
13535 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13536 rtx mems
[MAX_LDM_STM_OPS
];
13539 HOST_WIDE_INT offset
;
13540 int write_back
= FALSE
;
13543 bool base_reg_dies
;
13545 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13546 mem_order
, &base_reg
, &offset
, true);
13551 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13553 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13556 gcc_assert (base_reg_dies
);
13562 gcc_assert (base_reg_dies
);
13563 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13567 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13569 for (i
= 0; i
< nops
; i
++)
13571 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13572 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13575 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13576 write_back
? offset
+ i
* 4 : 0));
13580 /* Called from a peephole2 expander to turn a sequence of stores that are
13581 preceded by constant loads into an STM instruction. OPERANDS are the
13582 operands found by the peephole matcher; NOPS indicates how many
13583 separate stores we are trying to combine; there are 2 * NOPS
13584 instructions in the peephole.
13585 Returns true iff we could generate a new instruction. */
13588 gen_const_stm_seq (rtx
*operands
, int nops
)
13590 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13591 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13592 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13593 rtx mems
[MAX_LDM_STM_OPS
];
13596 HOST_WIDE_INT offset
;
13597 int write_back
= FALSE
;
13600 bool base_reg_dies
;
13602 HARD_REG_SET allocated
;
13604 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13605 mem_order
, &base_reg
, &offset
, false);
13610 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13612 /* If the same register is used more than once, try to find a free
13614 CLEAR_HARD_REG_SET (allocated
);
13615 for (i
= 0; i
< nops
; i
++)
13617 for (j
= i
+ 1; j
< nops
; j
++)
13618 if (regs
[i
] == regs
[j
])
13620 rtx t
= peep2_find_free_register (0, nops
* 2,
13621 TARGET_THUMB1
? "l" : "r",
13622 SImode
, &allocated
);
13626 regs
[i
] = REGNO (t
);
13630 /* Compute an ordering that maps the register numbers to an ascending
13633 for (i
= 0; i
< nops
; i
++)
13634 if (regs
[i
] < regs
[reg_order
[0]])
13637 for (i
= 1; i
< nops
; i
++)
13639 int this_order
= reg_order
[i
- 1];
13640 for (j
= 0; j
< nops
; j
++)
13641 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13642 && (this_order
== reg_order
[i
- 1]
13643 || regs
[j
] < regs
[this_order
]))
13645 reg_order
[i
] = this_order
;
13648 /* Ensure that registers that must be live after the instruction end
13649 up with the correct value. */
13650 for (i
= 0; i
< nops
; i
++)
13652 int this_order
= reg_order
[i
];
13653 if ((this_order
!= mem_order
[i
]
13654 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13655 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13659 /* Load the constants. */
13660 for (i
= 0; i
< nops
; i
++)
13662 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13663 sorted_regs
[i
] = regs
[reg_order
[i
]];
13664 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13667 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13669 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13672 gcc_assert (base_reg_dies
);
13678 gcc_assert (base_reg_dies
);
13679 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13683 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13685 for (i
= 0; i
< nops
; i
++)
13687 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13688 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13691 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13692 write_back
? offset
+ i
* 4 : 0));
13696 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13697 unaligned copies on processors which support unaligned semantics for those
13698 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13699 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13700 An interleave factor of 1 (the minimum) will perform no interleaving.
13701 Load/store multiple are used for aligned addresses where possible. */
13704 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13705 HOST_WIDE_INT length
,
13706 unsigned int interleave_factor
)
13708 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13709 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13710 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13711 HOST_WIDE_INT i
, j
;
13712 HOST_WIDE_INT remaining
= length
, words
;
13713 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13715 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13716 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13717 HOST_WIDE_INT srcoffset
, dstoffset
;
13718 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13721 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13723 /* Use hard registers if we have aligned source or destination so we can use
13724 load/store multiple with contiguous registers. */
13725 if (dst_aligned
|| src_aligned
)
13726 for (i
= 0; i
< interleave_factor
; i
++)
13727 regs
[i
] = gen_rtx_REG (SImode
, i
);
13729 for (i
= 0; i
< interleave_factor
; i
++)
13730 regs
[i
] = gen_reg_rtx (SImode
);
13732 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13733 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13735 srcoffset
= dstoffset
= 0;
13737 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13738 For copying the last bytes we want to subtract this offset again. */
13739 src_autoinc
= dst_autoinc
= 0;
13741 for (i
= 0; i
< interleave_factor
; i
++)
13744 /* Copy BLOCK_SIZE_BYTES chunks. */
13746 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13749 if (src_aligned
&& interleave_factor
> 1)
13751 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13752 TRUE
, srcbase
, &srcoffset
));
13753 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13757 for (j
= 0; j
< interleave_factor
; j
++)
13759 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13761 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13762 srcoffset
+ j
* UNITS_PER_WORD
);
13763 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13765 srcoffset
+= block_size_bytes
;
13769 if (dst_aligned
&& interleave_factor
> 1)
13771 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13772 TRUE
, dstbase
, &dstoffset
));
13773 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13777 for (j
= 0; j
< interleave_factor
; j
++)
13779 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13781 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13782 dstoffset
+ j
* UNITS_PER_WORD
);
13783 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13785 dstoffset
+= block_size_bytes
;
13788 remaining
-= block_size_bytes
;
13791 /* Copy any whole words left (note these aren't interleaved with any
13792 subsequent halfword/byte load/stores in the interests of simplicity). */
13794 words
= remaining
/ UNITS_PER_WORD
;
13796 gcc_assert (words
< interleave_factor
);
13798 if (src_aligned
&& words
> 1)
13800 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13802 src_autoinc
+= UNITS_PER_WORD
* words
;
13806 for (j
= 0; j
< words
; j
++)
13808 addr
= plus_constant (Pmode
, src
,
13809 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13810 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13811 srcoffset
+ j
* UNITS_PER_WORD
);
13813 emit_move_insn (regs
[j
], mem
);
13815 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13817 srcoffset
+= words
* UNITS_PER_WORD
;
13820 if (dst_aligned
&& words
> 1)
13822 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13824 dst_autoinc
+= words
* UNITS_PER_WORD
;
13828 for (j
= 0; j
< words
; j
++)
13830 addr
= plus_constant (Pmode
, dst
,
13831 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13832 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13833 dstoffset
+ j
* UNITS_PER_WORD
);
13835 emit_move_insn (mem
, regs
[j
]);
13837 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13839 dstoffset
+= words
* UNITS_PER_WORD
;
13842 remaining
-= words
* UNITS_PER_WORD
;
13844 gcc_assert (remaining
< 4);
13846 /* Copy a halfword if necessary. */
13848 if (remaining
>= 2)
13850 halfword_tmp
= gen_reg_rtx (SImode
);
13852 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13853 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13854 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13856 /* Either write out immediately, or delay until we've loaded the last
13857 byte, depending on interleave factor. */
13858 if (interleave_factor
== 1)
13860 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13861 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13862 emit_insn (gen_unaligned_storehi (mem
,
13863 gen_lowpart (HImode
, halfword_tmp
)));
13864 halfword_tmp
= NULL
;
13872 gcc_assert (remaining
< 2);
13874 /* Copy last byte. */
13876 if ((remaining
& 1) != 0)
13878 byte_tmp
= gen_reg_rtx (SImode
);
13880 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13881 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13882 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13884 if (interleave_factor
== 1)
13886 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13887 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13888 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13897 /* Store last halfword if we haven't done so already. */
13901 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13902 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13903 emit_insn (gen_unaligned_storehi (mem
,
13904 gen_lowpart (HImode
, halfword_tmp
)));
13908 /* Likewise for last byte. */
13912 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13913 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13914 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13918 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13921 /* From mips_adjust_block_mem:
13923 Helper function for doing a loop-based block operation on memory
13924 reference MEM. Each iteration of the loop will operate on LENGTH
13927 Create a new base register for use within the loop and point it to
13928 the start of MEM. Create a new memory reference that uses this
13929 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13932 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13935 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13937 /* Although the new mem does not refer to a known location,
13938 it does keep up to LENGTH bytes of alignment. */
13939 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13940 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13943 /* From mips_block_move_loop:
13945 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13946 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13947 the memory regions do not overlap. */
13950 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
13951 unsigned int interleave_factor
,
13952 HOST_WIDE_INT bytes_per_iter
)
13954 rtx src_reg
, dest_reg
, final_src
, test
;
13955 HOST_WIDE_INT leftover
;
13957 leftover
= length
% bytes_per_iter
;
13958 length
-= leftover
;
13960 /* Create registers and memory references for use within the loop. */
13961 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
13962 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
13964 /* Calculate the value that SRC_REG should have after the last iteration of
13966 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
13967 0, 0, OPTAB_WIDEN
);
13969 /* Emit the start of the loop. */
13970 rtx_code_label
*label
= gen_label_rtx ();
13971 emit_label (label
);
13973 /* Emit the loop body. */
13974 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
13975 interleave_factor
);
13977 /* Move on to the next block. */
13978 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
13979 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
13981 /* Emit the loop condition. */
13982 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
13983 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
13985 /* Mop up any left-over bytes. */
13987 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
13990 /* Emit a block move when either the source or destination is unaligned (not
13991 aligned to a four-byte boundary). This may need further tuning depending on
13992 core type, optimize_size setting, etc. */
13995 arm_movmemqi_unaligned (rtx
*operands
)
13997 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14001 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14002 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14003 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14004 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14005 or dst_aligned though: allow more interleaving in those cases since the
14006 resulting code can be smaller. */
14007 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14008 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14011 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14012 interleave_factor
, bytes_per_iter
);
14014 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14015 interleave_factor
);
14019 /* Note that the loop created by arm_block_move_unaligned_loop may be
14020 subject to loop unrolling, which makes tuning this condition a little
14023 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14025 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14032 arm_gen_movmemqi (rtx
*operands
)
14034 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14035 HOST_WIDE_INT srcoffset
, dstoffset
;
14037 rtx src
, dst
, srcbase
, dstbase
;
14038 rtx part_bytes_reg
= NULL
;
14041 if (!CONST_INT_P (operands
[2])
14042 || !CONST_INT_P (operands
[3])
14043 || INTVAL (operands
[2]) > 64)
14046 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14047 return arm_movmemqi_unaligned (operands
);
14049 if (INTVAL (operands
[3]) & 3)
14052 dstbase
= operands
[0];
14053 srcbase
= operands
[1];
14055 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14056 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14058 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14059 out_words_to_go
= INTVAL (operands
[2]) / 4;
14060 last_bytes
= INTVAL (operands
[2]) & 3;
14061 dstoffset
= srcoffset
= 0;
14063 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14064 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14066 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14068 if (in_words_to_go
> 4)
14069 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14070 TRUE
, srcbase
, &srcoffset
));
14072 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14073 src
, FALSE
, srcbase
,
14076 if (out_words_to_go
)
14078 if (out_words_to_go
> 4)
14079 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14080 TRUE
, dstbase
, &dstoffset
));
14081 else if (out_words_to_go
!= 1)
14082 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14083 out_words_to_go
, dst
,
14086 dstbase
, &dstoffset
));
14089 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14090 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14091 if (last_bytes
!= 0)
14093 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14099 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14100 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14103 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14104 if (out_words_to_go
)
14108 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14109 sreg
= copy_to_reg (mem
);
14111 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14112 emit_move_insn (mem
, sreg
);
14115 gcc_assert (!in_words_to_go
); /* Sanity check */
14118 if (in_words_to_go
)
14120 gcc_assert (in_words_to_go
> 0);
14122 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14123 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14126 gcc_assert (!last_bytes
|| part_bytes_reg
);
14128 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14130 rtx tmp
= gen_reg_rtx (SImode
);
14132 /* The bytes we want are in the top end of the word. */
14133 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14134 GEN_INT (8 * (4 - last_bytes
))));
14135 part_bytes_reg
= tmp
;
14139 mem
= adjust_automodify_address (dstbase
, QImode
,
14140 plus_constant (Pmode
, dst
,
14142 dstoffset
+ last_bytes
- 1);
14143 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14147 tmp
= gen_reg_rtx (SImode
);
14148 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14149 part_bytes_reg
= tmp
;
14156 if (last_bytes
> 1)
14158 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14159 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14163 rtx tmp
= gen_reg_rtx (SImode
);
14164 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14165 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14166 part_bytes_reg
= tmp
;
14173 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14174 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14181 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14184 next_consecutive_mem (rtx mem
)
14186 machine_mode mode
= GET_MODE (mem
);
14187 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14188 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14190 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14193 /* Copy using LDRD/STRD instructions whenever possible.
14194 Returns true upon success. */
14196 gen_movmem_ldrd_strd (rtx
*operands
)
14198 unsigned HOST_WIDE_INT len
;
14199 HOST_WIDE_INT align
;
14200 rtx src
, dst
, base
;
14202 bool src_aligned
, dst_aligned
;
14203 bool src_volatile
, dst_volatile
;
14205 gcc_assert (CONST_INT_P (operands
[2]));
14206 gcc_assert (CONST_INT_P (operands
[3]));
14208 len
= UINTVAL (operands
[2]);
14212 /* Maximum alignment we can assume for both src and dst buffers. */
14213 align
= INTVAL (operands
[3]);
14215 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14218 /* Place src and dst addresses in registers
14219 and update the corresponding mem rtx. */
14221 dst_volatile
= MEM_VOLATILE_P (dst
);
14222 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14223 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14224 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14227 src_volatile
= MEM_VOLATILE_P (src
);
14228 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14229 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14230 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14232 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14235 if (src_volatile
|| dst_volatile
)
14238 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14239 if (!(dst_aligned
|| src_aligned
))
14240 return arm_gen_movmemqi (operands
);
14242 /* If the either src or dst is unaligned we'll be accessing it as pairs
14243 of unaligned SImode accesses. Otherwise we can generate DImode
14244 ldrd/strd instructions. */
14245 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14246 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14251 reg0
= gen_reg_rtx (DImode
);
14252 rtx low_reg
= NULL_RTX
;
14253 rtx hi_reg
= NULL_RTX
;
14255 if (!src_aligned
|| !dst_aligned
)
14257 low_reg
= gen_lowpart (SImode
, reg0
);
14258 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14261 emit_move_insn (reg0
, src
);
14264 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14265 src
= next_consecutive_mem (src
);
14266 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14270 emit_move_insn (dst
, reg0
);
14273 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14274 dst
= next_consecutive_mem (dst
);
14275 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14278 src
= next_consecutive_mem (src
);
14279 dst
= next_consecutive_mem (dst
);
14282 gcc_assert (len
< 8);
14285 /* More than a word but less than a double-word to copy. Copy a word. */
14286 reg0
= gen_reg_rtx (SImode
);
14287 src
= adjust_address (src
, SImode
, 0);
14288 dst
= adjust_address (dst
, SImode
, 0);
14290 emit_move_insn (reg0
, src
);
14292 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14295 emit_move_insn (dst
, reg0
);
14297 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14299 src
= next_consecutive_mem (src
);
14300 dst
= next_consecutive_mem (dst
);
14307 /* Copy the remaining bytes. */
14310 dst
= adjust_address (dst
, HImode
, 0);
14311 src
= adjust_address (src
, HImode
, 0);
14312 reg0
= gen_reg_rtx (SImode
);
14314 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14316 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14319 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14321 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14323 src
= next_consecutive_mem (src
);
14324 dst
= next_consecutive_mem (dst
);
14329 dst
= adjust_address (dst
, QImode
, 0);
14330 src
= adjust_address (src
, QImode
, 0);
14331 reg0
= gen_reg_rtx (QImode
);
14332 emit_move_insn (reg0
, src
);
14333 emit_move_insn (dst
, reg0
);
14337 /* Select a dominance comparison mode if possible for a test of the general
14338 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14339 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14340 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14341 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14342 In all cases OP will be either EQ or NE, but we don't need to know which
14343 here. If we are unable to support a dominance comparison we return
14344 CC mode. This will then fail to match for the RTL expressions that
14345 generate this call. */
14347 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14349 enum rtx_code cond1
, cond2
;
14352 /* Currently we will probably get the wrong result if the individual
14353 comparisons are not simple. This also ensures that it is safe to
14354 reverse a comparison if necessary. */
14355 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14357 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14361 /* The if_then_else variant of this tests the second condition if the
14362 first passes, but is true if the first fails. Reverse the first
14363 condition to get a true "inclusive-or" expression. */
14364 if (cond_or
== DOM_CC_NX_OR_Y
)
14365 cond1
= reverse_condition (cond1
);
14367 /* If the comparisons are not equal, and one doesn't dominate the other,
14368 then we can't do this. */
14370 && !comparison_dominates_p (cond1
, cond2
)
14371 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14375 std::swap (cond1
, cond2
);
14380 if (cond_or
== DOM_CC_X_AND_Y
)
14385 case EQ
: return CC_DEQmode
;
14386 case LE
: return CC_DLEmode
;
14387 case LEU
: return CC_DLEUmode
;
14388 case GE
: return CC_DGEmode
;
14389 case GEU
: return CC_DGEUmode
;
14390 default: gcc_unreachable ();
14394 if (cond_or
== DOM_CC_X_AND_Y
)
14406 gcc_unreachable ();
14410 if (cond_or
== DOM_CC_X_AND_Y
)
14422 gcc_unreachable ();
14426 if (cond_or
== DOM_CC_X_AND_Y
)
14427 return CC_DLTUmode
;
14432 return CC_DLTUmode
;
14434 return CC_DLEUmode
;
14438 gcc_unreachable ();
14442 if (cond_or
== DOM_CC_X_AND_Y
)
14443 return CC_DGTUmode
;
14448 return CC_DGTUmode
;
14450 return CC_DGEUmode
;
14454 gcc_unreachable ();
14457 /* The remaining cases only occur when both comparisons are the
14460 gcc_assert (cond1
== cond2
);
14464 gcc_assert (cond1
== cond2
);
14468 gcc_assert (cond1
== cond2
);
14472 gcc_assert (cond1
== cond2
);
14473 return CC_DLEUmode
;
14476 gcc_assert (cond1
== cond2
);
14477 return CC_DGEUmode
;
14480 gcc_unreachable ();
14485 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14487 /* All floating point compares return CCFP if it is an equality
14488 comparison, and CCFPE otherwise. */
14489 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14512 gcc_unreachable ();
14516 /* A compare with a shifted operand. Because of canonicalization, the
14517 comparison will have to be swapped when we emit the assembler. */
14518 if (GET_MODE (y
) == SImode
14519 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14520 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14521 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14522 || GET_CODE (x
) == ROTATERT
))
14525 /* This operation is performed swapped, but since we only rely on the Z
14526 flag we don't need an additional mode. */
14527 if (GET_MODE (y
) == SImode
14528 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14529 && GET_CODE (x
) == NEG
14530 && (op
== EQ
|| op
== NE
))
14533 /* This is a special case that is used by combine to allow a
14534 comparison of a shifted byte load to be split into a zero-extend
14535 followed by a comparison of the shifted integer (only valid for
14536 equalities and unsigned inequalities). */
14537 if (GET_MODE (x
) == SImode
14538 && GET_CODE (x
) == ASHIFT
14539 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14540 && GET_CODE (XEXP (x
, 0)) == SUBREG
14541 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14542 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14543 && (op
== EQ
|| op
== NE
14544 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14545 && CONST_INT_P (y
))
14548 /* A construct for a conditional compare, if the false arm contains
14549 0, then both conditions must be true, otherwise either condition
14550 must be true. Not all conditions are possible, so CCmode is
14551 returned if it can't be done. */
14552 if (GET_CODE (x
) == IF_THEN_ELSE
14553 && (XEXP (x
, 2) == const0_rtx
14554 || XEXP (x
, 2) == const1_rtx
)
14555 && COMPARISON_P (XEXP (x
, 0))
14556 && COMPARISON_P (XEXP (x
, 1)))
14557 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14558 INTVAL (XEXP (x
, 2)));
14560 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14561 if (GET_CODE (x
) == AND
14562 && (op
== EQ
|| op
== NE
)
14563 && COMPARISON_P (XEXP (x
, 0))
14564 && COMPARISON_P (XEXP (x
, 1)))
14565 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14568 if (GET_CODE (x
) == IOR
14569 && (op
== EQ
|| op
== NE
)
14570 && COMPARISON_P (XEXP (x
, 0))
14571 && COMPARISON_P (XEXP (x
, 1)))
14572 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14575 /* An operation (on Thumb) where we want to test for a single bit.
14576 This is done by shifting that bit up into the top bit of a
14577 scratch register; we can then branch on the sign bit. */
14579 && GET_MODE (x
) == SImode
14580 && (op
== EQ
|| op
== NE
)
14581 && GET_CODE (x
) == ZERO_EXTRACT
14582 && XEXP (x
, 1) == const1_rtx
)
14585 /* An operation that sets the condition codes as a side-effect, the
14586 V flag is not set correctly, so we can only use comparisons where
14587 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14589 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14590 if (GET_MODE (x
) == SImode
14592 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14593 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14594 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14595 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14596 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14597 || GET_CODE (x
) == LSHIFTRT
14598 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14599 || GET_CODE (x
) == ROTATERT
14600 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14601 return CC_NOOVmode
;
14603 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14606 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14607 && GET_CODE (x
) == PLUS
14608 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14611 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14617 /* A DImode comparison against zero can be implemented by
14618 or'ing the two halves together. */
14619 if (y
== const0_rtx
)
14622 /* We can do an equality test in three Thumb instructions. */
14632 /* DImode unsigned comparisons can be implemented by cmp +
14633 cmpeq without a scratch register. Not worth doing in
14644 /* DImode signed and unsigned comparisons can be implemented
14645 by cmp + sbcs with a scratch register, but that does not
14646 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14647 gcc_assert (op
!= EQ
&& op
!= NE
);
14651 gcc_unreachable ();
14655 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14656 return GET_MODE (x
);
14661 /* X and Y are two things to compare using CODE. Emit the compare insn and
14662 return the rtx for register 0 in the proper mode. FP means this is a
14663 floating point compare: I don't think that it is needed on the arm. */
14665 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14669 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14671 /* We might have X as a constant, Y as a register because of the predicates
14672 used for cmpdi. If so, force X to a register here. */
14673 if (dimode_comparison
&& !REG_P (x
))
14674 x
= force_reg (DImode
, x
);
14676 mode
= SELECT_CC_MODE (code
, x
, y
);
14677 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14679 if (dimode_comparison
14680 && mode
!= CC_CZmode
)
14684 /* To compare two non-zero values for equality, XOR them and
14685 then compare against zero. Not used for ARM mode; there
14686 CC_CZmode is cheaper. */
14687 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14689 gcc_assert (!reload_completed
);
14690 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14694 /* A scratch register is required. */
14695 if (reload_completed
)
14696 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14698 scratch
= gen_rtx_SCRATCH (SImode
);
14700 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14701 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14702 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14705 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14710 /* Generate a sequence of insns that will generate the correct return
14711 address mask depending on the physical architecture that the program
14714 arm_gen_return_addr_mask (void)
14716 rtx reg
= gen_reg_rtx (Pmode
);
14718 emit_insn (gen_return_addr_mask (reg
));
14723 arm_reload_in_hi (rtx
*operands
)
14725 rtx ref
= operands
[1];
14727 HOST_WIDE_INT offset
= 0;
14729 if (GET_CODE (ref
) == SUBREG
)
14731 offset
= SUBREG_BYTE (ref
);
14732 ref
= SUBREG_REG (ref
);
14737 /* We have a pseudo which has been spilt onto the stack; there
14738 are two cases here: the first where there is a simple
14739 stack-slot replacement and a second where the stack-slot is
14740 out of range, or is used as a subreg. */
14741 if (reg_equiv_mem (REGNO (ref
)))
14743 ref
= reg_equiv_mem (REGNO (ref
));
14744 base
= find_replacement (&XEXP (ref
, 0));
14747 /* The slot is out of range, or was dressed up in a SUBREG. */
14748 base
= reg_equiv_address (REGNO (ref
));
14750 /* PR 62554: If there is no equivalent memory location then just move
14751 the value as an SImode register move. This happens when the target
14752 architecture variant does not have an HImode register move. */
14755 gcc_assert (REG_P (operands
[0]));
14756 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14757 gen_rtx_SUBREG (SImode
, ref
, 0)));
14762 base
= find_replacement (&XEXP (ref
, 0));
14764 /* Handle the case where the address is too complex to be offset by 1. */
14765 if (GET_CODE (base
) == MINUS
14766 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14768 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14770 emit_set_insn (base_plus
, base
);
14773 else if (GET_CODE (base
) == PLUS
)
14775 /* The addend must be CONST_INT, or we would have dealt with it above. */
14776 HOST_WIDE_INT hi
, lo
;
14778 offset
+= INTVAL (XEXP (base
, 1));
14779 base
= XEXP (base
, 0);
14781 /* Rework the address into a legal sequence of insns. */
14782 /* Valid range for lo is -4095 -> 4095 */
14785 : -((-offset
) & 0xfff));
14787 /* Corner case, if lo is the max offset then we would be out of range
14788 once we have added the additional 1 below, so bump the msb into the
14789 pre-loading insn(s). */
14793 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14794 ^ (HOST_WIDE_INT
) 0x80000000)
14795 - (HOST_WIDE_INT
) 0x80000000);
14797 gcc_assert (hi
+ lo
== offset
);
14801 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14803 /* Get the base address; addsi3 knows how to handle constants
14804 that require more than one insn. */
14805 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14811 /* Operands[2] may overlap operands[0] (though it won't overlap
14812 operands[1]), that's why we asked for a DImode reg -- so we can
14813 use the bit that does not overlap. */
14814 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14815 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14817 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14819 emit_insn (gen_zero_extendqisi2 (scratch
,
14820 gen_rtx_MEM (QImode
,
14821 plus_constant (Pmode
, base
,
14823 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14824 gen_rtx_MEM (QImode
,
14825 plus_constant (Pmode
, base
,
14827 if (!BYTES_BIG_ENDIAN
)
14828 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14829 gen_rtx_IOR (SImode
,
14832 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14836 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14837 gen_rtx_IOR (SImode
,
14838 gen_rtx_ASHIFT (SImode
, scratch
,
14840 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14843 /* Handle storing a half-word to memory during reload by synthesizing as two
14844 byte stores. Take care not to clobber the input values until after we
14845 have moved them somewhere safe. This code assumes that if the DImode
14846 scratch in operands[2] overlaps either the input value or output address
14847 in some way, then that value must die in this insn (we absolutely need
14848 two scratch registers for some corner cases). */
14850 arm_reload_out_hi (rtx
*operands
)
14852 rtx ref
= operands
[0];
14853 rtx outval
= operands
[1];
14855 HOST_WIDE_INT offset
= 0;
14857 if (GET_CODE (ref
) == SUBREG
)
14859 offset
= SUBREG_BYTE (ref
);
14860 ref
= SUBREG_REG (ref
);
14865 /* We have a pseudo which has been spilt onto the stack; there
14866 are two cases here: the first where there is a simple
14867 stack-slot replacement and a second where the stack-slot is
14868 out of range, or is used as a subreg. */
14869 if (reg_equiv_mem (REGNO (ref
)))
14871 ref
= reg_equiv_mem (REGNO (ref
));
14872 base
= find_replacement (&XEXP (ref
, 0));
14875 /* The slot is out of range, or was dressed up in a SUBREG. */
14876 base
= reg_equiv_address (REGNO (ref
));
14878 /* PR 62254: If there is no equivalent memory location then just move
14879 the value as an SImode register move. This happens when the target
14880 architecture variant does not have an HImode register move. */
14883 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14885 if (REG_P (outval
))
14887 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14888 gen_rtx_SUBREG (SImode
, outval
, 0)));
14890 else /* SUBREG_P (outval) */
14892 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14893 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14894 SUBREG_REG (outval
)));
14896 /* FIXME: Handle other cases ? */
14897 gcc_unreachable ();
14903 base
= find_replacement (&XEXP (ref
, 0));
14905 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14907 /* Handle the case where the address is too complex to be offset by 1. */
14908 if (GET_CODE (base
) == MINUS
14909 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14911 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14913 /* Be careful not to destroy OUTVAL. */
14914 if (reg_overlap_mentioned_p (base_plus
, outval
))
14916 /* Updating base_plus might destroy outval, see if we can
14917 swap the scratch and base_plus. */
14918 if (!reg_overlap_mentioned_p (scratch
, outval
))
14919 std::swap (scratch
, base_plus
);
14922 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14924 /* Be conservative and copy OUTVAL into the scratch now,
14925 this should only be necessary if outval is a subreg
14926 of something larger than a word. */
14927 /* XXX Might this clobber base? I can't see how it can,
14928 since scratch is known to overlap with OUTVAL, and
14929 must be wider than a word. */
14930 emit_insn (gen_movhi (scratch_hi
, outval
));
14931 outval
= scratch_hi
;
14935 emit_set_insn (base_plus
, base
);
14938 else if (GET_CODE (base
) == PLUS
)
14940 /* The addend must be CONST_INT, or we would have dealt with it above. */
14941 HOST_WIDE_INT hi
, lo
;
14943 offset
+= INTVAL (XEXP (base
, 1));
14944 base
= XEXP (base
, 0);
14946 /* Rework the address into a legal sequence of insns. */
14947 /* Valid range for lo is -4095 -> 4095 */
14950 : -((-offset
) & 0xfff));
14952 /* Corner case, if lo is the max offset then we would be out of range
14953 once we have added the additional 1 below, so bump the msb into the
14954 pre-loading insn(s). */
14958 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14959 ^ (HOST_WIDE_INT
) 0x80000000)
14960 - (HOST_WIDE_INT
) 0x80000000);
14962 gcc_assert (hi
+ lo
== offset
);
14966 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14968 /* Be careful not to destroy OUTVAL. */
14969 if (reg_overlap_mentioned_p (base_plus
, outval
))
14971 /* Updating base_plus might destroy outval, see if we
14972 can swap the scratch and base_plus. */
14973 if (!reg_overlap_mentioned_p (scratch
, outval
))
14974 std::swap (scratch
, base_plus
);
14977 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14979 /* Be conservative and copy outval into scratch now,
14980 this should only be necessary if outval is a
14981 subreg of something larger than a word. */
14982 /* XXX Might this clobber base? I can't see how it
14983 can, since scratch is known to overlap with
14985 emit_insn (gen_movhi (scratch_hi
, outval
));
14986 outval
= scratch_hi
;
14990 /* Get the base address; addsi3 knows how to handle constants
14991 that require more than one insn. */
14992 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14998 if (BYTES_BIG_ENDIAN
)
15000 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15001 plus_constant (Pmode
, base
,
15003 gen_lowpart (QImode
, outval
)));
15004 emit_insn (gen_lshrsi3 (scratch
,
15005 gen_rtx_SUBREG (SImode
, outval
, 0),
15007 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15009 gen_lowpart (QImode
, scratch
)));
15013 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15015 gen_lowpart (QImode
, outval
)));
15016 emit_insn (gen_lshrsi3 (scratch
,
15017 gen_rtx_SUBREG (SImode
, outval
, 0),
15019 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15020 plus_constant (Pmode
, base
,
15022 gen_lowpart (QImode
, scratch
)));
15026 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15027 (padded to the size of a word) should be passed in a register. */
15030 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15032 if (TARGET_AAPCS_BASED
)
15033 return must_pass_in_stack_var_size (mode
, type
);
15035 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15039 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15040 Return true if an argument passed on the stack should be padded upwards,
15041 i.e. if the least-significant byte has useful data.
15042 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15043 aggregate types are placed in the lowest memory address. */
15046 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15048 if (!TARGET_AAPCS_BASED
)
15049 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15051 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15058 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15059 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15060 register has useful data, and return the opposite if the most
15061 significant byte does. */
15064 arm_pad_reg_upward (machine_mode mode
,
15065 tree type
, int first ATTRIBUTE_UNUSED
)
15067 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15069 /* For AAPCS, small aggregates, small fixed-point types,
15070 and small complex types are always padded upwards. */
15073 if ((AGGREGATE_TYPE_P (type
)
15074 || TREE_CODE (type
) == COMPLEX_TYPE
15075 || FIXED_POINT_TYPE_P (type
))
15076 && int_size_in_bytes (type
) <= 4)
15081 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15082 && GET_MODE_SIZE (mode
) <= 4)
15087 /* Otherwise, use default padding. */
15088 return !BYTES_BIG_ENDIAN
;
15091 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15092 assuming that the address in the base register is word aligned. */
15094 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15096 HOST_WIDE_INT max_offset
;
15098 /* Offset must be a multiple of 4 in Thumb mode. */
15099 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15104 else if (TARGET_ARM
)
15109 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15112 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15113 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15114 Assumes that the address in the base register RN is word aligned. Pattern
15115 guarantees that both memory accesses use the same base register,
15116 the offsets are constants within the range, and the gap between the offsets is 4.
15117 If preload complete then check that registers are legal. WBACK indicates whether
15118 address is updated. LOAD indicates whether memory access is load or store. */
15120 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15121 bool wback
, bool load
)
15123 unsigned int t
, t2
, n
;
15125 if (!reload_completed
)
15128 if (!offset_ok_for_ldrd_strd (offset
))
15135 if ((TARGET_THUMB2
)
15136 && ((wback
&& (n
== t
|| n
== t2
))
15137 || (t
== SP_REGNUM
)
15138 || (t
== PC_REGNUM
)
15139 || (t2
== SP_REGNUM
)
15140 || (t2
== PC_REGNUM
)
15141 || (!load
&& (n
== PC_REGNUM
))
15142 || (load
&& (t
== t2
))
15143 /* Triggers Cortex-M3 LDRD errata. */
15144 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15148 && ((wback
&& (n
== t
|| n
== t2
))
15149 || (t2
== PC_REGNUM
)
15150 || (t
% 2 != 0) /* First destination register is not even. */
15152 /* PC can be used as base register (for offset addressing only),
15153 but it is depricated. */
15154 || (n
== PC_REGNUM
)))
15160 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15161 operand MEM's address contains an immediate offset from the base
15162 register and has no side effects, in which case it sets BASE and
15163 OFFSET accordingly. */
15165 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15169 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15171 /* TODO: Handle more general memory operand patterns, such as
15172 PRE_DEC and PRE_INC. */
15174 if (side_effects_p (mem
))
15177 /* Can't deal with subregs. */
15178 if (GET_CODE (mem
) == SUBREG
)
15181 gcc_assert (MEM_P (mem
));
15183 *offset
= const0_rtx
;
15185 addr
= XEXP (mem
, 0);
15187 /* If addr isn't valid for DImode, then we can't handle it. */
15188 if (!arm_legitimate_address_p (DImode
, addr
,
15189 reload_in_progress
|| reload_completed
))
15197 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15199 *base
= XEXP (addr
, 0);
15200 *offset
= XEXP (addr
, 1);
15201 return (REG_P (*base
) && CONST_INT_P (*offset
));
15207 /* Called from a peephole2 to replace two word-size accesses with a
15208 single LDRD/STRD instruction. Returns true iff we can generate a
15209 new instruction sequence. That is, both accesses use the same base
15210 register and the gap between constant offsets is 4. This function
15211 may reorder its operands to match ldrd/strd RTL templates.
15212 OPERANDS are the operands found by the peephole matcher;
15213 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15214 corresponding memory operands. LOAD indicaates whether the access
15215 is load or store. CONST_STORE indicates a store of constant
15216 integer values held in OPERANDS[4,5] and assumes that the pattern
15217 is of length 4 insn, for the purpose of checking dead registers.
15218 COMMUTE indicates that register operands may be reordered. */
15220 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15221 bool const_store
, bool commute
)
15224 HOST_WIDE_INT offsets
[2], offset
;
15225 rtx base
= NULL_RTX
;
15226 rtx cur_base
, cur_offset
, tmp
;
15228 HARD_REG_SET regset
;
15230 gcc_assert (!const_store
|| !load
);
15231 /* Check that the memory references are immediate offsets from the
15232 same base register. Extract the base register, the destination
15233 registers, and the corresponding memory offsets. */
15234 for (i
= 0; i
< nops
; i
++)
15236 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15241 else if (REGNO (base
) != REGNO (cur_base
))
15244 offsets
[i
] = INTVAL (cur_offset
);
15245 if (GET_CODE (operands
[i
]) == SUBREG
)
15247 tmp
= SUBREG_REG (operands
[i
]);
15248 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15253 /* Make sure there is no dependency between the individual loads. */
15254 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15255 return false; /* RAW */
15257 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15258 return false; /* WAW */
15260 /* If the same input register is used in both stores
15261 when storing different constants, try to find a free register.
15262 For example, the code
15267 can be transformed into
15271 in Thumb mode assuming that r1 is free.
15272 For ARM mode do the same but only if the starting register
15273 can be made to be even. */
15275 && REGNO (operands
[0]) == REGNO (operands
[1])
15276 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15280 CLEAR_HARD_REG_SET (regset
);
15281 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15282 if (tmp
== NULL_RTX
)
15285 /* Use the new register in the first load to ensure that
15286 if the original input register is not dead after peephole,
15287 then it will have the correct constant value. */
15290 else if (TARGET_ARM
)
15292 int regno
= REGNO (operands
[0]);
15293 if (!peep2_reg_dead_p (4, operands
[0]))
15295 /* When the input register is even and is not dead after the
15296 pattern, it has to hold the second constant but we cannot
15297 form a legal STRD in ARM mode with this register as the second
15299 if (regno
% 2 == 0)
15302 /* Is regno-1 free? */
15303 SET_HARD_REG_SET (regset
);
15304 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15305 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15306 if (tmp
== NULL_RTX
)
15313 /* Find a DImode register. */
15314 CLEAR_HARD_REG_SET (regset
);
15315 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15316 if (tmp
!= NULL_RTX
)
15318 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15319 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15323 /* Can we use the input register to form a DI register? */
15324 SET_HARD_REG_SET (regset
);
15325 CLEAR_HARD_REG_BIT(regset
,
15326 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15327 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15328 if (tmp
== NULL_RTX
)
15330 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15334 gcc_assert (operands
[0] != NULL_RTX
);
15335 gcc_assert (operands
[1] != NULL_RTX
);
15336 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15337 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15341 /* Make sure the instructions are ordered with lower memory access first. */
15342 if (offsets
[0] > offsets
[1])
15344 gap
= offsets
[0] - offsets
[1];
15345 offset
= offsets
[1];
15347 /* Swap the instructions such that lower memory is accessed first. */
15348 std::swap (operands
[0], operands
[1]);
15349 std::swap (operands
[2], operands
[3]);
15351 std::swap (operands
[4], operands
[5]);
15355 gap
= offsets
[1] - offsets
[0];
15356 offset
= offsets
[0];
15359 /* Make sure accesses are to consecutive memory locations. */
15363 /* Make sure we generate legal instructions. */
15364 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15368 /* In Thumb state, where registers are almost unconstrained, there
15369 is little hope to fix it. */
15373 if (load
&& commute
)
15375 /* Try reordering registers. */
15376 std::swap (operands
[0], operands
[1]);
15377 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15384 /* If input registers are dead after this pattern, they can be
15385 reordered or replaced by other registers that are free in the
15386 current pattern. */
15387 if (!peep2_reg_dead_p (4, operands
[0])
15388 || !peep2_reg_dead_p (4, operands
[1]))
15391 /* Try to reorder the input registers. */
15392 /* For example, the code
15397 can be transformed into
15402 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15405 std::swap (operands
[0], operands
[1]);
15409 /* Try to find a free DI register. */
15410 CLEAR_HARD_REG_SET (regset
);
15411 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15412 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15415 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15416 if (tmp
== NULL_RTX
)
15419 /* DREG must be an even-numbered register in DImode.
15420 Split it into SI registers. */
15421 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15422 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15423 gcc_assert (operands
[0] != NULL_RTX
);
15424 gcc_assert (operands
[1] != NULL_RTX
);
15425 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15426 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15428 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15440 /* Print a symbolic form of X to the debug file, F. */
15442 arm_print_value (FILE *f
, rtx x
)
15444 switch (GET_CODE (x
))
15447 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15451 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15459 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15461 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15462 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15470 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15474 fprintf (f
, "`%s'", XSTR (x
, 0));
15478 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15482 arm_print_value (f
, XEXP (x
, 0));
15486 arm_print_value (f
, XEXP (x
, 0));
15488 arm_print_value (f
, XEXP (x
, 1));
15496 fprintf (f
, "????");
15501 /* Routines for manipulation of the constant pool. */
15503 /* Arm instructions cannot load a large constant directly into a
15504 register; they have to come from a pc relative load. The constant
15505 must therefore be placed in the addressable range of the pc
15506 relative load. Depending on the precise pc relative load
15507 instruction the range is somewhere between 256 bytes and 4k. This
15508 means that we often have to dump a constant inside a function, and
15509 generate code to branch around it.
15511 It is important to minimize this, since the branches will slow
15512 things down and make the code larger.
15514 Normally we can hide the table after an existing unconditional
15515 branch so that there is no interruption of the flow, but in the
15516 worst case the code looks like this:
15534 We fix this by performing a scan after scheduling, which notices
15535 which instructions need to have their operands fetched from the
15536 constant table and builds the table.
15538 The algorithm starts by building a table of all the constants that
15539 need fixing up and all the natural barriers in the function (places
15540 where a constant table can be dropped without breaking the flow).
15541 For each fixup we note how far the pc-relative replacement will be
15542 able to reach and the offset of the instruction into the function.
15544 Having built the table we then group the fixes together to form
15545 tables that are as large as possible (subject to addressing
15546 constraints) and emit each table of constants after the last
15547 barrier that is within range of all the instructions in the group.
15548 If a group does not contain a barrier, then we forcibly create one
15549 by inserting a jump instruction into the flow. Once the table has
15550 been inserted, the insns are then modified to reference the
15551 relevant entry in the pool.
15553 Possible enhancements to the algorithm (not implemented) are:
15555 1) For some processors and object formats, there may be benefit in
15556 aligning the pools to the start of cache lines; this alignment
15557 would need to be taken into account when calculating addressability
15560 /* These typedefs are located at the start of this file, so that
15561 they can be used in the prototypes there. This comment is to
15562 remind readers of that fact so that the following structures
15563 can be understood more easily.
15565 typedef struct minipool_node Mnode;
15566 typedef struct minipool_fixup Mfix; */
15568 struct minipool_node
15570 /* Doubly linked chain of entries. */
15573 /* The maximum offset into the code that this entry can be placed. While
15574 pushing fixes for forward references, all entries are sorted in order
15575 of increasing max_address. */
15576 HOST_WIDE_INT max_address
;
15577 /* Similarly for an entry inserted for a backwards ref. */
15578 HOST_WIDE_INT min_address
;
15579 /* The number of fixes referencing this entry. This can become zero
15580 if we "unpush" an entry. In this case we ignore the entry when we
15581 come to emit the code. */
15583 /* The offset from the start of the minipool. */
15584 HOST_WIDE_INT offset
;
15585 /* The value in table. */
15587 /* The mode of value. */
15589 /* The size of the value. With iWMMXt enabled
15590 sizes > 4 also imply an alignment of 8-bytes. */
15594 struct minipool_fixup
15598 HOST_WIDE_INT address
;
15604 HOST_WIDE_INT forwards
;
15605 HOST_WIDE_INT backwards
;
15608 /* Fixes less than a word need padding out to a word boundary. */
15609 #define MINIPOOL_FIX_SIZE(mode) \
15610 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15612 static Mnode
* minipool_vector_head
;
15613 static Mnode
* minipool_vector_tail
;
15614 static rtx_code_label
*minipool_vector_label
;
15615 static int minipool_pad
;
15617 /* The linked list of all minipool fixes required for this function. */
15618 Mfix
* minipool_fix_head
;
15619 Mfix
* minipool_fix_tail
;
15620 /* The fix entry for the current minipool, once it has been placed. */
15621 Mfix
* minipool_barrier
;
15623 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15624 #define JUMP_TABLES_IN_TEXT_SECTION 0
15627 static HOST_WIDE_INT
15628 get_jump_table_size (rtx_jump_table_data
*insn
)
15630 /* ADDR_VECs only take room if read-only data does into the text
15632 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15634 rtx body
= PATTERN (insn
);
15635 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15636 HOST_WIDE_INT size
;
15637 HOST_WIDE_INT modesize
;
15639 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15640 size
= modesize
* XVECLEN (body
, elt
);
15644 /* Round up size of TBB table to a halfword boundary. */
15645 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15648 /* No padding necessary for TBH. */
15651 /* Add two bytes for alignment on Thumb. */
15656 gcc_unreachable ();
15664 /* Return the maximum amount of padding that will be inserted before
15667 static HOST_WIDE_INT
15668 get_label_padding (rtx label
)
15670 HOST_WIDE_INT align
, min_insn_size
;
15672 align
= 1 << label_to_alignment (label
);
15673 min_insn_size
= TARGET_THUMB
? 2 : 4;
15674 return align
> min_insn_size
? align
- min_insn_size
: 0;
15677 /* Move a minipool fix MP from its current location to before MAX_MP.
15678 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15679 constraints may need updating. */
15681 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15682 HOST_WIDE_INT max_address
)
15684 /* The code below assumes these are different. */
15685 gcc_assert (mp
!= max_mp
);
15687 if (max_mp
== NULL
)
15689 if (max_address
< mp
->max_address
)
15690 mp
->max_address
= max_address
;
15694 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15695 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15697 mp
->max_address
= max_address
;
15699 /* Unlink MP from its current position. Since max_mp is non-null,
15700 mp->prev must be non-null. */
15701 mp
->prev
->next
= mp
->next
;
15702 if (mp
->next
!= NULL
)
15703 mp
->next
->prev
= mp
->prev
;
15705 minipool_vector_tail
= mp
->prev
;
15707 /* Re-insert it before MAX_MP. */
15709 mp
->prev
= max_mp
->prev
;
15712 if (mp
->prev
!= NULL
)
15713 mp
->prev
->next
= mp
;
15715 minipool_vector_head
= mp
;
15718 /* Save the new entry. */
15721 /* Scan over the preceding entries and adjust their addresses as
15723 while (mp
->prev
!= NULL
15724 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15726 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15733 /* Add a constant to the minipool for a forward reference. Returns the
15734 node added or NULL if the constant will not fit in this pool. */
15736 add_minipool_forward_ref (Mfix
*fix
)
15738 /* If set, max_mp is the first pool_entry that has a lower
15739 constraint than the one we are trying to add. */
15740 Mnode
* max_mp
= NULL
;
15741 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15744 /* If the minipool starts before the end of FIX->INSN then this FIX
15745 can not be placed into the current pool. Furthermore, adding the
15746 new constant pool entry may cause the pool to start FIX_SIZE bytes
15748 if (minipool_vector_head
&&
15749 (fix
->address
+ get_attr_length (fix
->insn
)
15750 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15753 /* Scan the pool to see if a constant with the same value has
15754 already been added. While we are doing this, also note the
15755 location where we must insert the constant if it doesn't already
15757 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15759 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15760 && fix
->mode
== mp
->mode
15761 && (!LABEL_P (fix
->value
)
15762 || (CODE_LABEL_NUMBER (fix
->value
)
15763 == CODE_LABEL_NUMBER (mp
->value
)))
15764 && rtx_equal_p (fix
->value
, mp
->value
))
15766 /* More than one fix references this entry. */
15768 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15771 /* Note the insertion point if necessary. */
15773 && mp
->max_address
> max_address
)
15776 /* If we are inserting an 8-bytes aligned quantity and
15777 we have not already found an insertion point, then
15778 make sure that all such 8-byte aligned quantities are
15779 placed at the start of the pool. */
15780 if (ARM_DOUBLEWORD_ALIGN
15782 && fix
->fix_size
>= 8
15783 && mp
->fix_size
< 8)
15786 max_address
= mp
->max_address
;
15790 /* The value is not currently in the minipool, so we need to create
15791 a new entry for it. If MAX_MP is NULL, the entry will be put on
15792 the end of the list since the placement is less constrained than
15793 any existing entry. Otherwise, we insert the new fix before
15794 MAX_MP and, if necessary, adjust the constraints on the other
15797 mp
->fix_size
= fix
->fix_size
;
15798 mp
->mode
= fix
->mode
;
15799 mp
->value
= fix
->value
;
15801 /* Not yet required for a backwards ref. */
15802 mp
->min_address
= -65536;
15804 if (max_mp
== NULL
)
15806 mp
->max_address
= max_address
;
15808 mp
->prev
= minipool_vector_tail
;
15810 if (mp
->prev
== NULL
)
15812 minipool_vector_head
= mp
;
15813 minipool_vector_label
= gen_label_rtx ();
15816 mp
->prev
->next
= mp
;
15818 minipool_vector_tail
= mp
;
15822 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15823 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15825 mp
->max_address
= max_address
;
15828 mp
->prev
= max_mp
->prev
;
15830 if (mp
->prev
!= NULL
)
15831 mp
->prev
->next
= mp
;
15833 minipool_vector_head
= mp
;
15836 /* Save the new entry. */
15839 /* Scan over the preceding entries and adjust their addresses as
15841 while (mp
->prev
!= NULL
15842 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15844 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15852 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15853 HOST_WIDE_INT min_address
)
15855 HOST_WIDE_INT offset
;
15857 /* The code below assumes these are different. */
15858 gcc_assert (mp
!= min_mp
);
15860 if (min_mp
== NULL
)
15862 if (min_address
> mp
->min_address
)
15863 mp
->min_address
= min_address
;
15867 /* We will adjust this below if it is too loose. */
15868 mp
->min_address
= min_address
;
15870 /* Unlink MP from its current position. Since min_mp is non-null,
15871 mp->next must be non-null. */
15872 mp
->next
->prev
= mp
->prev
;
15873 if (mp
->prev
!= NULL
)
15874 mp
->prev
->next
= mp
->next
;
15876 minipool_vector_head
= mp
->next
;
15878 /* Reinsert it after MIN_MP. */
15880 mp
->next
= min_mp
->next
;
15882 if (mp
->next
!= NULL
)
15883 mp
->next
->prev
= mp
;
15885 minipool_vector_tail
= mp
;
15891 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15893 mp
->offset
= offset
;
15894 if (mp
->refcount
> 0)
15895 offset
+= mp
->fix_size
;
15897 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15898 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15904 /* Add a constant to the minipool for a backward reference. Returns the
15905 node added or NULL if the constant will not fit in this pool.
15907 Note that the code for insertion for a backwards reference can be
15908 somewhat confusing because the calculated offsets for each fix do
15909 not take into account the size of the pool (which is still under
15912 add_minipool_backward_ref (Mfix
*fix
)
15914 /* If set, min_mp is the last pool_entry that has a lower constraint
15915 than the one we are trying to add. */
15916 Mnode
*min_mp
= NULL
;
15917 /* This can be negative, since it is only a constraint. */
15918 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15921 /* If we can't reach the current pool from this insn, or if we can't
15922 insert this entry at the end of the pool without pushing other
15923 fixes out of range, then we don't try. This ensures that we
15924 can't fail later on. */
15925 if (min_address
>= minipool_barrier
->address
15926 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15927 >= minipool_barrier
->address
))
15930 /* Scan the pool to see if a constant with the same value has
15931 already been added. While we are doing this, also note the
15932 location where we must insert the constant if it doesn't already
15934 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15936 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15937 && fix
->mode
== mp
->mode
15938 && (!LABEL_P (fix
->value
)
15939 || (CODE_LABEL_NUMBER (fix
->value
)
15940 == CODE_LABEL_NUMBER (mp
->value
)))
15941 && rtx_equal_p (fix
->value
, mp
->value
)
15942 /* Check that there is enough slack to move this entry to the
15943 end of the table (this is conservative). */
15944 && (mp
->max_address
15945 > (minipool_barrier
->address
15946 + minipool_vector_tail
->offset
15947 + minipool_vector_tail
->fix_size
)))
15950 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
15953 if (min_mp
!= NULL
)
15954 mp
->min_address
+= fix
->fix_size
;
15957 /* Note the insertion point if necessary. */
15958 if (mp
->min_address
< min_address
)
15960 /* For now, we do not allow the insertion of 8-byte alignment
15961 requiring nodes anywhere but at the start of the pool. */
15962 if (ARM_DOUBLEWORD_ALIGN
15963 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15968 else if (mp
->max_address
15969 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
15971 /* Inserting before this entry would push the fix beyond
15972 its maximum address (which can happen if we have
15973 re-located a forwards fix); force the new fix to come
15975 if (ARM_DOUBLEWORD_ALIGN
15976 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15981 min_address
= mp
->min_address
+ fix
->fix_size
;
15984 /* Do not insert a non-8-byte aligned quantity before 8-byte
15985 aligned quantities. */
15986 else if (ARM_DOUBLEWORD_ALIGN
15987 && fix
->fix_size
< 8
15988 && mp
->fix_size
>= 8)
15991 min_address
= mp
->min_address
+ fix
->fix_size
;
15996 /* We need to create a new entry. */
15998 mp
->fix_size
= fix
->fix_size
;
15999 mp
->mode
= fix
->mode
;
16000 mp
->value
= fix
->value
;
16002 mp
->max_address
= minipool_barrier
->address
+ 65536;
16004 mp
->min_address
= min_address
;
16006 if (min_mp
== NULL
)
16009 mp
->next
= minipool_vector_head
;
16011 if (mp
->next
== NULL
)
16013 minipool_vector_tail
= mp
;
16014 minipool_vector_label
= gen_label_rtx ();
16017 mp
->next
->prev
= mp
;
16019 minipool_vector_head
= mp
;
16023 mp
->next
= min_mp
->next
;
16027 if (mp
->next
!= NULL
)
16028 mp
->next
->prev
= mp
;
16030 minipool_vector_tail
= mp
;
16033 /* Save the new entry. */
16041 /* Scan over the following entries and adjust their offsets. */
16042 while (mp
->next
!= NULL
)
16044 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16045 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16048 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16050 mp
->next
->offset
= mp
->offset
;
16059 assign_minipool_offsets (Mfix
*barrier
)
16061 HOST_WIDE_INT offset
= 0;
16064 minipool_barrier
= barrier
;
16066 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16068 mp
->offset
= offset
;
16070 if (mp
->refcount
> 0)
16071 offset
+= mp
->fix_size
;
16075 /* Output the literal table */
16077 dump_minipool (rtx_insn
*scan
)
16083 if (ARM_DOUBLEWORD_ALIGN
)
16084 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16085 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16092 fprintf (dump_file
,
16093 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16094 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16096 scan
= emit_label_after (gen_label_rtx (), scan
);
16097 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16098 scan
= emit_label_after (minipool_vector_label
, scan
);
16100 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16102 if (mp
->refcount
> 0)
16106 fprintf (dump_file
,
16107 ";; Offset %u, min %ld, max %ld ",
16108 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16109 (unsigned long) mp
->max_address
);
16110 arm_print_value (dump_file
, mp
->value
);
16111 fputc ('\n', dump_file
);
16114 switch (GET_MODE_SIZE (mp
->mode
))
16116 #ifdef HAVE_consttable_1
16118 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16122 #ifdef HAVE_consttable_2
16124 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16128 #ifdef HAVE_consttable_4
16130 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16134 #ifdef HAVE_consttable_8
16136 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16140 #ifdef HAVE_consttable_16
16142 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16147 gcc_unreachable ();
16155 minipool_vector_head
= minipool_vector_tail
= NULL
;
16156 scan
= emit_insn_after (gen_consttable_end (), scan
);
16157 scan
= emit_barrier_after (scan
);
16160 /* Return the cost of forcibly inserting a barrier after INSN. */
16162 arm_barrier_cost (rtx_insn
*insn
)
16164 /* Basing the location of the pool on the loop depth is preferable,
16165 but at the moment, the basic block information seems to be
16166 corrupt by this stage of the compilation. */
16167 int base_cost
= 50;
16168 rtx_insn
*next
= next_nonnote_insn (insn
);
16170 if (next
!= NULL
&& LABEL_P (next
))
16173 switch (GET_CODE (insn
))
16176 /* It will always be better to place the table before the label, rather
16185 return base_cost
- 10;
16188 return base_cost
+ 10;
16192 /* Find the best place in the insn stream in the range
16193 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16194 Create the barrier by inserting a jump and add a new fix entry for
16197 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16199 HOST_WIDE_INT count
= 0;
16200 rtx_barrier
*barrier
;
16201 rtx_insn
*from
= fix
->insn
;
16202 /* The instruction after which we will insert the jump. */
16203 rtx_insn
*selected
= NULL
;
16205 /* The address at which the jump instruction will be placed. */
16206 HOST_WIDE_INT selected_address
;
16208 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16209 rtx_code_label
*label
= gen_label_rtx ();
16211 selected_cost
= arm_barrier_cost (from
);
16212 selected_address
= fix
->address
;
16214 while (from
&& count
< max_count
)
16216 rtx_jump_table_data
*tmp
;
16219 /* This code shouldn't have been called if there was a natural barrier
16221 gcc_assert (!BARRIER_P (from
));
16223 /* Count the length of this insn. This must stay in sync with the
16224 code that pushes minipool fixes. */
16225 if (LABEL_P (from
))
16226 count
+= get_label_padding (from
);
16228 count
+= get_attr_length (from
);
16230 /* If there is a jump table, add its length. */
16231 if (tablejump_p (from
, NULL
, &tmp
))
16233 count
+= get_jump_table_size (tmp
);
16235 /* Jump tables aren't in a basic block, so base the cost on
16236 the dispatch insn. If we select this location, we will
16237 still put the pool after the table. */
16238 new_cost
= arm_barrier_cost (from
);
16240 if (count
< max_count
16241 && (!selected
|| new_cost
<= selected_cost
))
16244 selected_cost
= new_cost
;
16245 selected_address
= fix
->address
+ count
;
16248 /* Continue after the dispatch table. */
16249 from
= NEXT_INSN (tmp
);
16253 new_cost
= arm_barrier_cost (from
);
16255 if (count
< max_count
16256 && (!selected
|| new_cost
<= selected_cost
))
16259 selected_cost
= new_cost
;
16260 selected_address
= fix
->address
+ count
;
16263 from
= NEXT_INSN (from
);
16266 /* Make sure that we found a place to insert the jump. */
16267 gcc_assert (selected
);
16269 /* Make sure we do not split a call and its corresponding
16270 CALL_ARG_LOCATION note. */
16271 if (CALL_P (selected
))
16273 rtx_insn
*next
= NEXT_INSN (selected
);
16274 if (next
&& NOTE_P (next
)
16275 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16279 /* Create a new JUMP_INSN that branches around a barrier. */
16280 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16281 JUMP_LABEL (from
) = label
;
16282 barrier
= emit_barrier_after (from
);
16283 emit_label_after (label
, barrier
);
16285 /* Create a minipool barrier entry for the new barrier. */
16286 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16287 new_fix
->insn
= barrier
;
16288 new_fix
->address
= selected_address
;
16289 new_fix
->next
= fix
->next
;
16290 fix
->next
= new_fix
;
16295 /* Record that there is a natural barrier in the insn stream at
16298 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16300 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16303 fix
->address
= address
;
16306 if (minipool_fix_head
!= NULL
)
16307 minipool_fix_tail
->next
= fix
;
16309 minipool_fix_head
= fix
;
16311 minipool_fix_tail
= fix
;
16314 /* Record INSN, which will need fixing up to load a value from the
16315 minipool. ADDRESS is the offset of the insn since the start of the
16316 function; LOC is a pointer to the part of the insn which requires
16317 fixing; VALUE is the constant that must be loaded, which is of type
16320 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16321 machine_mode mode
, rtx value
)
16323 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16326 fix
->address
= address
;
16329 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16330 fix
->value
= value
;
16331 fix
->forwards
= get_attr_pool_range (insn
);
16332 fix
->backwards
= get_attr_neg_pool_range (insn
);
16333 fix
->minipool
= NULL
;
16335 /* If an insn doesn't have a range defined for it, then it isn't
16336 expecting to be reworked by this code. Better to stop now than
16337 to generate duff assembly code. */
16338 gcc_assert (fix
->forwards
|| fix
->backwards
);
16340 /* If an entry requires 8-byte alignment then assume all constant pools
16341 require 4 bytes of padding. Trying to do this later on a per-pool
16342 basis is awkward because existing pool entries have to be modified. */
16343 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16348 fprintf (dump_file
,
16349 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16350 GET_MODE_NAME (mode
),
16351 INSN_UID (insn
), (unsigned long) address
,
16352 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16353 arm_print_value (dump_file
, fix
->value
);
16354 fprintf (dump_file
, "\n");
16357 /* Add it to the chain of fixes. */
16360 if (minipool_fix_head
!= NULL
)
16361 minipool_fix_tail
->next
= fix
;
16363 minipool_fix_head
= fix
;
16365 minipool_fix_tail
= fix
;
16368 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16369 Returns the number of insns needed, or 99 if we always want to synthesize
16372 arm_max_const_double_inline_cost ()
16374 /* Let the value get synthesized to avoid the use of literal pools. */
16375 if (arm_disable_literal_pool
)
16378 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16381 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16382 Returns the number of insns needed, or 99 if we don't know how to
16385 arm_const_double_inline_cost (rtx val
)
16387 rtx lowpart
, highpart
;
16390 mode
= GET_MODE (val
);
16392 if (mode
== VOIDmode
)
16395 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16397 lowpart
= gen_lowpart (SImode
, val
);
16398 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16400 gcc_assert (CONST_INT_P (lowpart
));
16401 gcc_assert (CONST_INT_P (highpart
));
16403 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16404 NULL_RTX
, NULL_RTX
, 0, 0)
16405 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16406 NULL_RTX
, NULL_RTX
, 0, 0));
16409 /* Cost of loading a SImode constant. */
16411 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16413 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16414 NULL_RTX
, NULL_RTX
, 1, 0);
16417 /* Return true if it is worthwhile to split a 64-bit constant into two
16418 32-bit operations. This is the case if optimizing for size, or
16419 if we have load delay slots, or if one 32-bit part can be done with
16420 a single data operation. */
16422 arm_const_double_by_parts (rtx val
)
16424 machine_mode mode
= GET_MODE (val
);
16427 if (optimize_size
|| arm_ld_sched
)
16430 if (mode
== VOIDmode
)
16433 part
= gen_highpart_mode (SImode
, mode
, val
);
16435 gcc_assert (CONST_INT_P (part
));
16437 if (const_ok_for_arm (INTVAL (part
))
16438 || const_ok_for_arm (~INTVAL (part
)))
16441 part
= gen_lowpart (SImode
, val
);
16443 gcc_assert (CONST_INT_P (part
));
16445 if (const_ok_for_arm (INTVAL (part
))
16446 || const_ok_for_arm (~INTVAL (part
)))
16452 /* Return true if it is possible to inline both the high and low parts
16453 of a 64-bit constant into 32-bit data processing instructions. */
16455 arm_const_double_by_immediates (rtx val
)
16457 machine_mode mode
= GET_MODE (val
);
16460 if (mode
== VOIDmode
)
16463 part
= gen_highpart_mode (SImode
, mode
, val
);
16465 gcc_assert (CONST_INT_P (part
));
16467 if (!const_ok_for_arm (INTVAL (part
)))
16470 part
= gen_lowpart (SImode
, val
);
16472 gcc_assert (CONST_INT_P (part
));
16474 if (!const_ok_for_arm (INTVAL (part
)))
16480 /* Scan INSN and note any of its operands that need fixing.
16481 If DO_PUSHES is false we do not actually push any of the fixups
16484 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16488 extract_constrain_insn (insn
);
16490 if (recog_data
.n_alternatives
== 0)
16493 /* Fill in recog_op_alt with information about the constraints of
16495 preprocess_constraints (insn
);
16497 const operand_alternative
*op_alt
= which_op_alt ();
16498 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16500 /* Things we need to fix can only occur in inputs. */
16501 if (recog_data
.operand_type
[opno
] != OP_IN
)
16504 /* If this alternative is a memory reference, then any mention
16505 of constants in this alternative is really to fool reload
16506 into allowing us to accept one there. We need to fix them up
16507 now so that we output the right code. */
16508 if (op_alt
[opno
].memory_ok
)
16510 rtx op
= recog_data
.operand
[opno
];
16512 if (CONSTANT_P (op
))
16515 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16516 recog_data
.operand_mode
[opno
], op
);
16518 else if (MEM_P (op
)
16519 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16520 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16524 rtx cop
= avoid_constant_pool_reference (op
);
16526 /* Casting the address of something to a mode narrower
16527 than a word can cause avoid_constant_pool_reference()
16528 to return the pool reference itself. That's no good to
16529 us here. Lets just hope that we can use the
16530 constant pool value directly. */
16532 cop
= get_pool_constant (XEXP (op
, 0));
16534 push_minipool_fix (insn
, address
,
16535 recog_data
.operand_loc
[opno
],
16536 recog_data
.operand_mode
[opno
], cop
);
16546 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16547 and unions in the context of ARMv8-M Security Extensions. It is used as a
16548 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16549 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16550 or four masks, depending on whether it is being computed for a
16551 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16552 respectively. The tree for the type of the argument or a field within an
16553 argument is passed in ARG_TYPE, the current register this argument or field
16554 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16555 argument or field starts at is passed in STARTING_BIT and the last used bit
16556 is kept in LAST_USED_BIT which is also updated accordingly. */
16558 static unsigned HOST_WIDE_INT
16559 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16560 uint32_t * padding_bits_to_clear
,
16561 unsigned starting_bit
, int * last_used_bit
)
16564 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16566 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16568 unsigned current_bit
= starting_bit
;
16570 long int offset
, size
;
16573 field
= TYPE_FIELDS (arg_type
);
16576 /* The offset within a structure is always an offset from
16577 the start of that structure. Make sure we take that into the
16578 calculation of the register based offset that we use here. */
16579 offset
= starting_bit
;
16580 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16583 /* This is the actual size of the field, for bitfields this is the
16584 bitfield width and not the container size. */
16585 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16587 if (*last_used_bit
!= offset
)
16589 if (offset
< *last_used_bit
)
16591 /* This field's offset is before the 'last_used_bit', that
16592 means this field goes on the next register. So we need to
16593 pad the rest of the current register and increase the
16594 register number. */
16596 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16599 padding_bits_to_clear
[*regno
] |= mask
;
16600 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16605 /* Otherwise we pad the bits between the last field's end and
16606 the start of the new field. */
16609 mask
= ((uint32_t)-1) >> (32 - offset
);
16610 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16611 padding_bits_to_clear
[*regno
] |= mask
;
16613 current_bit
= offset
;
16616 /* Calculate further padding bits for inner structs/unions too. */
16617 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16619 *last_used_bit
= current_bit
;
16620 not_to_clear_reg_mask
16621 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16622 padding_bits_to_clear
, offset
,
16627 /* Update 'current_bit' with this field's size. If the
16628 'current_bit' lies in a subsequent register, update 'regno' and
16629 reset 'current_bit' to point to the current bit in that new
16631 current_bit
+= size
;
16632 while (current_bit
>= 32)
16635 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16638 *last_used_bit
= current_bit
;
16641 field
= TREE_CHAIN (field
);
16643 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16645 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16647 tree field
, field_t
;
16648 int i
, regno_t
, field_size
;
16652 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16653 = {-1, -1, -1, -1};
16655 /* To compute the padding bits in a union we only consider bits as
16656 padding bits if they are always either a padding bit or fall outside a
16657 fields size for all fields in the union. */
16658 field
= TYPE_FIELDS (arg_type
);
16661 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16662 = {0U, 0U, 0U, 0U};
16663 int last_used_bit_t
= *last_used_bit
;
16665 field_t
= TREE_TYPE (field
);
16667 /* If the field's type is either a record or a union make sure to
16668 compute their padding bits too. */
16669 if (RECORD_OR_UNION_TYPE_P (field_t
))
16670 not_to_clear_reg_mask
16671 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16672 &padding_bits_to_clear_t
[0],
16673 starting_bit
, &last_used_bit_t
);
16676 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16677 regno_t
= (field_size
/ 32) + *regno
;
16678 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16681 for (i
= *regno
; i
< regno_t
; i
++)
16683 /* For all but the last register used by this field only keep the
16684 padding bits that were padding bits in this field. */
16685 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16688 /* For the last register, keep all padding bits that were padding
16689 bits in this field and any padding bits that are still valid
16690 as padding bits but fall outside of this field's size. */
16691 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16692 padding_bits_to_clear_res
[regno_t
]
16693 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16695 /* Update the maximum size of the fields in terms of registers used
16696 ('max_reg') and the 'last_used_bit' in said register. */
16697 if (max_reg
< regno_t
)
16700 max_bit
= last_used_bit_t
;
16702 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16703 max_bit
= last_used_bit_t
;
16705 field
= TREE_CHAIN (field
);
16708 /* Update the current padding_bits_to_clear using the intersection of the
16709 padding bits of all the fields. */
16710 for (i
=*regno
; i
< max_reg
; i
++)
16711 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16713 /* Do not keep trailing padding bits, we do not know yet whether this
16714 is the end of the argument. */
16715 mask
= ((uint32_t) 1 << max_bit
) - 1;
16716 padding_bits_to_clear
[max_reg
]
16717 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16720 *last_used_bit
= max_bit
;
16723 /* This function should only be used for structs and unions. */
16724 gcc_unreachable ();
16726 return not_to_clear_reg_mask
;
16729 /* In the context of ARMv8-M Security Extensions, this function is used for both
16730 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16731 registers are used when returning or passing arguments, which is then
16732 returned as a mask. It will also compute a mask to indicate padding/unused
16733 bits for each of these registers, and passes this through the
16734 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16735 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16736 the starting register used to pass this argument or return value is passed
16737 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16738 for struct and union types. */
16740 static unsigned HOST_WIDE_INT
16741 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16742 uint32_t * padding_bits_to_clear
)
16745 int last_used_bit
= 0;
16746 unsigned HOST_WIDE_INT not_to_clear_mask
;
16748 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16751 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16752 padding_bits_to_clear
, 0,
16756 /* If the 'last_used_bit' is not zero, that means we are still using a
16757 part of the last 'regno'. In such cases we must clear the trailing
16758 bits. Otherwise we are not using regno and we should mark it as to
16760 if (last_used_bit
!= 0)
16761 padding_bits_to_clear
[regno
]
16762 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16764 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16768 not_to_clear_mask
= 0;
16769 /* We are not dealing with structs nor unions. So these arguments may be
16770 passed in floating point registers too. In some cases a BLKmode is
16771 used when returning or passing arguments in multiple VFP registers. */
16772 if (GET_MODE (arg_rtx
) == BLKmode
)
16777 /* This should really only occur when dealing with the hard-float
16779 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16781 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16783 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16784 gcc_assert (REG_P (reg
));
16786 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16788 /* If we are dealing with DF mode, make sure we don't
16789 clear either of the registers it addresses. */
16790 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16793 unsigned HOST_WIDE_INT mask
;
16794 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16795 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16796 not_to_clear_mask
|= mask
;
16802 /* Otherwise we can rely on the MODE to determine how many registers
16803 are being used by this argument. */
16804 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16805 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16808 unsigned HOST_WIDE_INT
16809 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16810 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16811 not_to_clear_mask
|= mask
;
16816 return not_to_clear_mask
;
16819 /* Saves callee saved registers, clears callee saved registers and caller saved
16820 registers not used to pass arguments before a cmse_nonsecure_call. And
16821 restores the callee saved registers after. */
16824 cmse_nonsecure_call_clear_caller_saved (void)
16828 FOR_EACH_BB_FN (bb
, cfun
)
16832 FOR_BB_INSNS (bb
, insn
)
16834 uint64_t to_clear_mask
, float_mask
;
16836 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16837 unsigned int regno
, maxregno
;
16839 CUMULATIVE_ARGS args_so_far_v
;
16840 cumulative_args_t args_so_far
;
16841 tree arg_type
, fntype
;
16842 bool using_r4
, first_param
= true;
16843 function_args_iterator args_iter
;
16844 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16845 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16847 if (!NONDEBUG_INSN_P (insn
))
16850 if (!CALL_P (insn
))
16853 pat
= PATTERN (insn
);
16854 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16855 call
= XVECEXP (pat
, 0, 0);
16857 /* Get the real call RTX if the insn sets a value, ie. returns. */
16858 if (GET_CODE (call
) == SET
)
16859 call
= SET_SRC (call
);
16861 /* Check if it is a cmse_nonsecure_call. */
16862 unspec
= XEXP (call
, 0);
16863 if (GET_CODE (unspec
) != UNSPEC
16864 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16867 /* Determine the caller-saved registers we need to clear. */
16868 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16869 maxregno
= NUM_ARG_REGS
- 1;
16870 /* Only look at the caller-saved floating point registers in case of
16871 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16872 lazy store and loads which clear both caller- and callee-saved
16874 if (TARGET_HARD_FLOAT_ABI
)
16876 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16877 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16878 to_clear_mask
|= float_mask
;
16879 maxregno
= D7_VFP_REGNUM
;
16882 /* Make sure the register used to hold the function address is not
16884 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16885 gcc_assert (MEM_P (address
));
16886 gcc_assert (REG_P (XEXP (address
, 0)));
16887 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
16889 /* Set basic block of call insn so that df rescan is performed on
16890 insns inserted here. */
16891 set_block_for_insn (insn
, bb
);
16892 df_set_flags (DF_DEFER_INSN_RESCAN
);
16895 /* Make sure the scheduler doesn't schedule other insns beyond
16897 emit_insn (gen_blockage ());
16899 /* Walk through all arguments and clear registers appropriately.
16901 fntype
= TREE_TYPE (MEM_EXPR (address
));
16902 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
16904 args_so_far
= pack_cumulative_args (&args_so_far_v
);
16905 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
16908 machine_mode arg_mode
= TYPE_MODE (arg_type
);
16910 if (VOID_TYPE_P (arg_type
))
16914 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
16917 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
16919 gcc_assert (REG_P (arg_rtx
));
16921 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
16923 padding_bits_to_clear_ptr
);
16925 first_param
= false;
16928 /* Clear padding bits where needed. */
16929 cleared_reg
= XEXP (address
, 0);
16930 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
16932 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
16934 if (padding_bits_to_clear
[regno
] == 0)
16937 /* If this is a Thumb-1 target copy the address of the function
16938 we are calling from 'r4' into 'ip' such that we can use r4 to
16939 clear the unused bits in the arguments. */
16940 if (TARGET_THUMB1
&& !using_r4
)
16944 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
16948 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
16949 emit_move_insn (reg
, tmp
);
16950 /* Also fill the top half of the negated
16951 padding_bits_to_clear. */
16952 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
16954 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
16955 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
16961 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
16962 gen_rtx_REG (SImode
, regno
),
16967 emit_move_insn (cleared_reg
,
16968 gen_rtx_REG (SImode
, IP_REGNUM
));
16970 /* We use right shift and left shift to clear the LSB of the address
16971 we jump to instead of using bic, to avoid having to use an extra
16972 register on Thumb-1. */
16973 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
16974 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
16975 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
16976 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
16978 /* Clearing all registers that leak before doing a non-secure
16980 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
16982 if (!(to_clear_mask
& (1LL << regno
)))
16985 /* If regno is an even vfp register and its successor is also to
16986 be cleared, use vmov. */
16987 if (IS_VFP_REGNUM (regno
))
16989 if (TARGET_VFP_DOUBLE
16990 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
16991 && to_clear_mask
& (1LL << (regno
+ 1)))
16992 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
16993 CONST0_RTX (DFmode
));
16995 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
16996 CONST0_RTX (SFmode
));
16999 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17002 seq
= get_insns ();
17004 emit_insn_before (seq
, insn
);
17010 /* Rewrite move insn into subtract of 0 if the condition codes will
17011 be useful in next conditional jump insn. */
17014 thumb1_reorg (void)
17018 FOR_EACH_BB_FN (bb
, cfun
)
17021 rtx cmp
, op0
, op1
, set
= NULL
;
17022 rtx_insn
*prev
, *insn
= BB_END (bb
);
17023 bool insn_clobbered
= false;
17025 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17026 insn
= PREV_INSN (insn
);
17028 /* Find the last cbranchsi4_insn in basic block BB. */
17029 if (insn
== BB_HEAD (bb
)
17030 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17033 /* Get the register with which we are comparing. */
17034 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17035 op0
= XEXP (cmp
, 0);
17036 op1
= XEXP (cmp
, 1);
17038 /* Check that comparison is against ZERO. */
17039 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17042 /* Find the first flag setting insn before INSN in basic block BB. */
17043 gcc_assert (insn
!= BB_HEAD (bb
));
17044 for (prev
= PREV_INSN (insn
);
17046 && prev
!= BB_HEAD (bb
)
17048 || DEBUG_INSN_P (prev
)
17049 || ((set
= single_set (prev
)) != NULL
17050 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17051 prev
= PREV_INSN (prev
))
17053 if (reg_set_p (op0
, prev
))
17054 insn_clobbered
= true;
17057 /* Skip if op0 is clobbered by insn other than prev. */
17058 if (insn_clobbered
)
17064 dest
= SET_DEST (set
);
17065 src
= SET_SRC (set
);
17066 if (!low_register_operand (dest
, SImode
)
17067 || !low_register_operand (src
, SImode
))
17070 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17071 in INSN. Both src and dest of the move insn are checked. */
17072 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17074 dest
= copy_rtx (dest
);
17075 src
= copy_rtx (src
);
17076 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17077 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17078 INSN_CODE (prev
) = -1;
17079 /* Set test register in INSN to dest. */
17080 XEXP (cmp
, 0) = copy_rtx (dest
);
17081 INSN_CODE (insn
) = -1;
17086 /* Convert instructions to their cc-clobbering variant if possible, since
17087 that allows us to use smaller encodings. */
17090 thumb2_reorg (void)
17095 INIT_REG_SET (&live
);
17097 /* We are freeing block_for_insn in the toplev to keep compatibility
17098 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17099 compute_bb_for_insn ();
17102 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17104 FOR_EACH_BB_FN (bb
, cfun
)
17106 if ((current_tune
->disparage_flag_setting_t16_encodings
17107 == tune_params::DISPARAGE_FLAGS_ALL
)
17108 && optimize_bb_for_speed_p (bb
))
17112 Convert_Action action
= SKIP
;
17113 Convert_Action action_for_partial_flag_setting
17114 = ((current_tune
->disparage_flag_setting_t16_encodings
17115 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17116 && optimize_bb_for_speed_p (bb
))
17119 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17120 df_simulate_initialize_backwards (bb
, &live
);
17121 FOR_BB_INSNS_REVERSE (bb
, insn
)
17123 if (NONJUMP_INSN_P (insn
)
17124 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17125 && GET_CODE (PATTERN (insn
)) == SET
)
17128 rtx pat
= PATTERN (insn
);
17129 rtx dst
= XEXP (pat
, 0);
17130 rtx src
= XEXP (pat
, 1);
17131 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17133 if (UNARY_P (src
) || BINARY_P (src
))
17134 op0
= XEXP (src
, 0);
17136 if (BINARY_P (src
))
17137 op1
= XEXP (src
, 1);
17139 if (low_register_operand (dst
, SImode
))
17141 switch (GET_CODE (src
))
17144 /* Adding two registers and storing the result
17145 in the first source is already a 16-bit
17147 if (rtx_equal_p (dst
, op0
)
17148 && register_operand (op1
, SImode
))
17151 if (low_register_operand (op0
, SImode
))
17153 /* ADDS <Rd>,<Rn>,<Rm> */
17154 if (low_register_operand (op1
, SImode
))
17156 /* ADDS <Rdn>,#<imm8> */
17157 /* SUBS <Rdn>,#<imm8> */
17158 else if (rtx_equal_p (dst
, op0
)
17159 && CONST_INT_P (op1
)
17160 && IN_RANGE (INTVAL (op1
), -255, 255))
17162 /* ADDS <Rd>,<Rn>,#<imm3> */
17163 /* SUBS <Rd>,<Rn>,#<imm3> */
17164 else if (CONST_INT_P (op1
)
17165 && IN_RANGE (INTVAL (op1
), -7, 7))
17168 /* ADCS <Rd>, <Rn> */
17169 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17170 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17171 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17173 && COMPARISON_P (op1
)
17174 && cc_register (XEXP (op1
, 0), VOIDmode
)
17175 && maybe_get_arm_condition_code (op1
) == ARM_CS
17176 && XEXP (op1
, 1) == const0_rtx
)
17181 /* RSBS <Rd>,<Rn>,#0
17182 Not handled here: see NEG below. */
17183 /* SUBS <Rd>,<Rn>,#<imm3>
17185 Not handled here: see PLUS above. */
17186 /* SUBS <Rd>,<Rn>,<Rm> */
17187 if (low_register_operand (op0
, SImode
)
17188 && low_register_operand (op1
, SImode
))
17193 /* MULS <Rdm>,<Rn>,<Rdm>
17194 As an exception to the rule, this is only used
17195 when optimizing for size since MULS is slow on all
17196 known implementations. We do not even want to use
17197 MULS in cold code, if optimizing for speed, so we
17198 test the global flag here. */
17199 if (!optimize_size
)
17201 /* Fall through. */
17205 /* ANDS <Rdn>,<Rm> */
17206 if (rtx_equal_p (dst
, op0
)
17207 && low_register_operand (op1
, SImode
))
17208 action
= action_for_partial_flag_setting
;
17209 else if (rtx_equal_p (dst
, op1
)
17210 && low_register_operand (op0
, SImode
))
17211 action
= action_for_partial_flag_setting
== SKIP
17212 ? SKIP
: SWAP_CONV
;
17218 /* ASRS <Rdn>,<Rm> */
17219 /* LSRS <Rdn>,<Rm> */
17220 /* LSLS <Rdn>,<Rm> */
17221 if (rtx_equal_p (dst
, op0
)
17222 && low_register_operand (op1
, SImode
))
17223 action
= action_for_partial_flag_setting
;
17224 /* ASRS <Rd>,<Rm>,#<imm5> */
17225 /* LSRS <Rd>,<Rm>,#<imm5> */
17226 /* LSLS <Rd>,<Rm>,#<imm5> */
17227 else if (low_register_operand (op0
, SImode
)
17228 && CONST_INT_P (op1
)
17229 && IN_RANGE (INTVAL (op1
), 0, 31))
17230 action
= action_for_partial_flag_setting
;
17234 /* RORS <Rdn>,<Rm> */
17235 if (rtx_equal_p (dst
, op0
)
17236 && low_register_operand (op1
, SImode
))
17237 action
= action_for_partial_flag_setting
;
17241 /* MVNS <Rd>,<Rm> */
17242 if (low_register_operand (op0
, SImode
))
17243 action
= action_for_partial_flag_setting
;
17247 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17248 if (low_register_operand (op0
, SImode
))
17253 /* MOVS <Rd>,#<imm8> */
17254 if (CONST_INT_P (src
)
17255 && IN_RANGE (INTVAL (src
), 0, 255))
17256 action
= action_for_partial_flag_setting
;
17260 /* MOVS and MOV<c> with registers have different
17261 encodings, so are not relevant here. */
17269 if (action
!= SKIP
)
17271 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17272 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17275 if (action
== SWAP_CONV
)
17277 src
= copy_rtx (src
);
17278 XEXP (src
, 0) = op1
;
17279 XEXP (src
, 1) = op0
;
17280 pat
= gen_rtx_SET (dst
, src
);
17281 vec
= gen_rtvec (2, pat
, clobber
);
17283 else /* action == CONV */
17284 vec
= gen_rtvec (2, pat
, clobber
);
17286 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17287 INSN_CODE (insn
) = -1;
17291 if (NONDEBUG_INSN_P (insn
))
17292 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17296 CLEAR_REG_SET (&live
);
17299 /* Gcc puts the pool in the wrong place for ARM, since we can only
17300 load addresses a limited distance around the pc. We do some
17301 special munging to move the constant pool values to the correct
17302 point in the code. */
17307 HOST_WIDE_INT address
= 0;
17311 cmse_nonsecure_call_clear_caller_saved ();
17314 else if (TARGET_THUMB2
)
17317 /* Ensure all insns that must be split have been split at this point.
17318 Otherwise, the pool placement code below may compute incorrect
17319 insn lengths. Note that when optimizing, all insns have already
17320 been split at this point. */
17322 split_all_insns_noflow ();
17324 minipool_fix_head
= minipool_fix_tail
= NULL
;
17326 /* The first insn must always be a note, or the code below won't
17327 scan it properly. */
17328 insn
= get_insns ();
17329 gcc_assert (NOTE_P (insn
));
17332 /* Scan all the insns and record the operands that will need fixing. */
17333 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17335 if (BARRIER_P (insn
))
17336 push_minipool_barrier (insn
, address
);
17337 else if (INSN_P (insn
))
17339 rtx_jump_table_data
*table
;
17341 note_invalid_constants (insn
, address
, true);
17342 address
+= get_attr_length (insn
);
17344 /* If the insn is a vector jump, add the size of the table
17345 and skip the table. */
17346 if (tablejump_p (insn
, NULL
, &table
))
17348 address
+= get_jump_table_size (table
);
17352 else if (LABEL_P (insn
))
17353 /* Add the worst-case padding due to alignment. We don't add
17354 the _current_ padding because the minipool insertions
17355 themselves might change it. */
17356 address
+= get_label_padding (insn
);
17359 fix
= minipool_fix_head
;
17361 /* Now scan the fixups and perform the required changes. */
17366 Mfix
* last_added_fix
;
17367 Mfix
* last_barrier
= NULL
;
17370 /* Skip any further barriers before the next fix. */
17371 while (fix
&& BARRIER_P (fix
->insn
))
17374 /* No more fixes. */
17378 last_added_fix
= NULL
;
17380 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17382 if (BARRIER_P (ftmp
->insn
))
17384 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17387 last_barrier
= ftmp
;
17389 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17392 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17395 /* If we found a barrier, drop back to that; any fixes that we
17396 could have reached but come after the barrier will now go in
17397 the next mini-pool. */
17398 if (last_barrier
!= NULL
)
17400 /* Reduce the refcount for those fixes that won't go into this
17402 for (fdel
= last_barrier
->next
;
17403 fdel
&& fdel
!= ftmp
;
17406 fdel
->minipool
->refcount
--;
17407 fdel
->minipool
= NULL
;
17410 ftmp
= last_barrier
;
17414 /* ftmp is first fix that we can't fit into this pool and
17415 there no natural barriers that we could use. Insert a
17416 new barrier in the code somewhere between the previous
17417 fix and this one, and arrange to jump around it. */
17418 HOST_WIDE_INT max_address
;
17420 /* The last item on the list of fixes must be a barrier, so
17421 we can never run off the end of the list of fixes without
17422 last_barrier being set. */
17425 max_address
= minipool_vector_head
->max_address
;
17426 /* Check that there isn't another fix that is in range that
17427 we couldn't fit into this pool because the pool was
17428 already too large: we need to put the pool before such an
17429 instruction. The pool itself may come just after the
17430 fix because create_fix_barrier also allows space for a
17431 jump instruction. */
17432 if (ftmp
->address
< max_address
)
17433 max_address
= ftmp
->address
+ 1;
17435 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17438 assign_minipool_offsets (last_barrier
);
17442 if (!BARRIER_P (ftmp
->insn
)
17443 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17450 /* Scan over the fixes we have identified for this pool, fixing them
17451 up and adding the constants to the pool itself. */
17452 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17453 this_fix
= this_fix
->next
)
17454 if (!BARRIER_P (this_fix
->insn
))
17457 = plus_constant (Pmode
,
17458 gen_rtx_LABEL_REF (VOIDmode
,
17459 minipool_vector_label
),
17460 this_fix
->minipool
->offset
);
17461 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17464 dump_minipool (last_barrier
->insn
);
17468 /* From now on we must synthesize any constants that we can't handle
17469 directly. This can happen if the RTL gets split during final
17470 instruction generation. */
17471 cfun
->machine
->after_arm_reorg
= 1;
17473 /* Free the minipool memory. */
17474 obstack_free (&minipool_obstack
, minipool_startobj
);
17477 /* Routines to output assembly language. */
17479 /* Return string representation of passed in real value. */
17480 static const char *
17481 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17483 if (!fp_consts_inited
)
17486 gcc_assert (real_equal (r
, &value_fp0
));
17490 /* OPERANDS[0] is the entire list of insns that constitute pop,
17491 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17492 is in the list, UPDATE is true iff the list contains explicit
17493 update of base register. */
17495 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17501 const char *conditional
;
17502 int num_saves
= XVECLEN (operands
[0], 0);
17503 unsigned int regno
;
17504 unsigned int regno_base
= REGNO (operands
[1]);
17505 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17508 offset
+= update
? 1 : 0;
17509 offset
+= return_pc
? 1 : 0;
17511 /* Is the base register in the list? */
17512 for (i
= offset
; i
< num_saves
; i
++)
17514 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17515 /* If SP is in the list, then the base register must be SP. */
17516 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17517 /* If base register is in the list, there must be no explicit update. */
17518 if (regno
== regno_base
)
17519 gcc_assert (!update
);
17522 conditional
= reverse
? "%?%D0" : "%?%d0";
17523 /* Can't use POP if returning from an interrupt. */
17524 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17525 sprintf (pattern
, "pop%s\t{", conditional
);
17528 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17529 It's just a convention, their semantics are identical. */
17530 if (regno_base
== SP_REGNUM
)
17531 sprintf (pattern
, "ldmfd%s\t", conditional
);
17533 sprintf (pattern
, "ldmia%s\t", conditional
);
17535 sprintf (pattern
, "ldm%s\t", conditional
);
17537 strcat (pattern
, reg_names
[regno_base
]);
17539 strcat (pattern
, "!, {");
17541 strcat (pattern
, ", {");
17544 /* Output the first destination register. */
17546 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17548 /* Output the rest of the destination registers. */
17549 for (i
= offset
+ 1; i
< num_saves
; i
++)
17551 strcat (pattern
, ", ");
17553 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17556 strcat (pattern
, "}");
17558 if (interrupt_p
&& return_pc
)
17559 strcat (pattern
, "^");
17561 output_asm_insn (pattern
, &cond
);
17565 /* Output the assembly for a store multiple. */
17568 vfp_output_vstmd (rtx
* operands
)
17574 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17575 ? XEXP (operands
[0], 0)
17576 : XEXP (XEXP (operands
[0], 0), 0);
17577 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17580 strcpy (pattern
, "vpush%?.64\t{%P1");
17582 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17584 p
= strlen (pattern
);
17586 gcc_assert (REG_P (operands
[1]));
17588 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17589 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17591 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17593 strcpy (&pattern
[p
], "}");
17595 output_asm_insn (pattern
, operands
);
17600 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17601 number of bytes pushed. */
17604 vfp_emit_fstmd (int base_reg
, int count
)
17611 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17612 register pairs are stored by a store multiple insn. We avoid this
17613 by pushing an extra pair. */
17614 if (count
== 2 && !arm_arch6
)
17616 if (base_reg
== LAST_VFP_REGNUM
- 3)
17621 /* FSTMD may not store more than 16 doubleword registers at once. Split
17622 larger stores into multiple parts (up to a maximum of two, in
17627 /* NOTE: base_reg is an internal register number, so each D register
17629 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17630 saved
+= vfp_emit_fstmd (base_reg
, 16);
17634 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17635 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17637 reg
= gen_rtx_REG (DFmode
, base_reg
);
17640 XVECEXP (par
, 0, 0)
17641 = gen_rtx_SET (gen_frame_mem
17643 gen_rtx_PRE_MODIFY (Pmode
,
17646 (Pmode
, stack_pointer_rtx
,
17649 gen_rtx_UNSPEC (BLKmode
,
17650 gen_rtvec (1, reg
),
17651 UNSPEC_PUSH_MULT
));
17653 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17654 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17655 RTX_FRAME_RELATED_P (tmp
) = 1;
17656 XVECEXP (dwarf
, 0, 0) = tmp
;
17658 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17659 RTX_FRAME_RELATED_P (tmp
) = 1;
17660 XVECEXP (dwarf
, 0, 1) = tmp
;
17662 for (i
= 1; i
< count
; i
++)
17664 reg
= gen_rtx_REG (DFmode
, base_reg
);
17666 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17668 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17669 plus_constant (Pmode
,
17673 RTX_FRAME_RELATED_P (tmp
) = 1;
17674 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17677 par
= emit_insn (par
);
17678 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17679 RTX_FRAME_RELATED_P (par
) = 1;
17684 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17685 has the cmse_nonsecure_call attribute and returns false otherwise. */
17688 detect_cmse_nonsecure_call (tree addr
)
17693 tree fntype
= TREE_TYPE (addr
);
17694 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17695 TYPE_ATTRIBUTES (fntype
)))
17701 /* Emit a call instruction with pattern PAT. ADDR is the address of
17702 the call target. */
17705 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17709 insn
= emit_call_insn (pat
);
17711 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17712 If the call might use such an entry, add a use of the PIC register
17713 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17714 if (TARGET_VXWORKS_RTP
17717 && GET_CODE (addr
) == SYMBOL_REF
17718 && (SYMBOL_REF_DECL (addr
)
17719 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17720 : !SYMBOL_REF_LOCAL_P (addr
)))
17722 require_pic_register ();
17723 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17726 if (TARGET_AAPCS_BASED
)
17728 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17729 linker. We need to add an IP clobber to allow setting
17730 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17731 is not needed since it's a fixed register. */
17732 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17733 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17737 /* Output a 'call' insn. */
17739 output_call (rtx
*operands
)
17741 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17743 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17744 if (REGNO (operands
[0]) == LR_REGNUM
)
17746 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17747 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17750 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17752 if (TARGET_INTERWORK
|| arm_arch4t
)
17753 output_asm_insn ("bx%?\t%0", operands
);
17755 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17760 /* Output a move from arm registers to arm registers of a long double
17761 OPERANDS[0] is the destination.
17762 OPERANDS[1] is the source. */
17764 output_mov_long_double_arm_from_arm (rtx
*operands
)
17766 /* We have to be careful here because the two might overlap. */
17767 int dest_start
= REGNO (operands
[0]);
17768 int src_start
= REGNO (operands
[1]);
17772 if (dest_start
< src_start
)
17774 for (i
= 0; i
< 3; i
++)
17776 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17777 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17778 output_asm_insn ("mov%?\t%0, %1", ops
);
17783 for (i
= 2; i
>= 0; i
--)
17785 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17786 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17787 output_asm_insn ("mov%?\t%0, %1", ops
);
17795 arm_emit_movpair (rtx dest
, rtx src
)
17797 /* If the src is an immediate, simplify it. */
17798 if (CONST_INT_P (src
))
17800 HOST_WIDE_INT val
= INTVAL (src
);
17801 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17802 if ((val
>> 16) & 0x0000ffff)
17804 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17806 GEN_INT ((val
>> 16) & 0x0000ffff));
17807 rtx_insn
*insn
= get_last_insn ();
17808 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17812 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17813 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17814 rtx_insn
*insn
= get_last_insn ();
17815 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17818 /* Output a move between double words. It must be REG<-MEM
17821 output_move_double (rtx
*operands
, bool emit
, int *count
)
17823 enum rtx_code code0
= GET_CODE (operands
[0]);
17824 enum rtx_code code1
= GET_CODE (operands
[1]);
17829 /* The only case when this might happen is when
17830 you are looking at the length of a DImode instruction
17831 that has an invalid constant in it. */
17832 if (code0
== REG
&& code1
!= MEM
)
17834 gcc_assert (!emit
);
17841 unsigned int reg0
= REGNO (operands
[0]);
17843 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17845 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17847 switch (GET_CODE (XEXP (operands
[1], 0)))
17854 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17855 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17857 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17862 gcc_assert (TARGET_LDRD
);
17864 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17871 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17873 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17881 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17883 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17888 gcc_assert (TARGET_LDRD
);
17890 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17895 /* Autoicrement addressing modes should never have overlapping
17896 base and destination registers, and overlapping index registers
17897 are already prohibited, so this doesn't need to worry about
17899 otherops
[0] = operands
[0];
17900 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17901 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17903 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17905 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17907 /* Registers overlap so split out the increment. */
17910 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17911 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17918 /* Use a single insn if we can.
17919 FIXME: IWMMXT allows offsets larger than ldrd can
17920 handle, fix these up with a pair of ldr. */
17922 || !CONST_INT_P (otherops
[2])
17923 || (INTVAL (otherops
[2]) > -256
17924 && INTVAL (otherops
[2]) < 256))
17927 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
17933 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17934 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17944 /* Use a single insn if we can.
17945 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17946 fix these up with a pair of ldr. */
17948 || !CONST_INT_P (otherops
[2])
17949 || (INTVAL (otherops
[2]) > -256
17950 && INTVAL (otherops
[2]) < 256))
17953 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
17959 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17960 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17970 /* We might be able to use ldrd %0, %1 here. However the range is
17971 different to ldr/adr, and it is broken on some ARMv7-M
17972 implementations. */
17973 /* Use the second register of the pair to avoid problematic
17975 otherops
[1] = operands
[1];
17977 output_asm_insn ("adr%?\t%0, %1", otherops
);
17978 operands
[1] = otherops
[0];
17982 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
17984 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
17991 /* ??? This needs checking for thumb2. */
17993 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17994 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17996 otherops
[0] = operands
[0];
17997 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17998 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18000 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18002 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18004 switch ((int) INTVAL (otherops
[2]))
18008 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18014 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18020 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18024 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18025 operands
[1] = otherops
[0];
18027 && (REG_P (otherops
[2])
18029 || (CONST_INT_P (otherops
[2])
18030 && INTVAL (otherops
[2]) > -256
18031 && INTVAL (otherops
[2]) < 256)))
18033 if (reg_overlap_mentioned_p (operands
[0],
18036 /* Swap base and index registers over to
18037 avoid a conflict. */
18038 std::swap (otherops
[1], otherops
[2]);
18040 /* If both registers conflict, it will usually
18041 have been fixed by a splitter. */
18042 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18043 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18047 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18048 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18055 otherops
[0] = operands
[0];
18057 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18062 if (CONST_INT_P (otherops
[2]))
18066 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18067 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18069 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18075 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18081 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18088 return "ldrd%?\t%0, [%1]";
18090 return "ldmia%?\t%1, %M0";
18094 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18095 /* Take care of overlapping base/data reg. */
18096 if (reg_mentioned_p (operands
[0], operands
[1]))
18100 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18101 output_asm_insn ("ldr%?\t%0, %1", operands
);
18111 output_asm_insn ("ldr%?\t%0, %1", operands
);
18112 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18122 /* Constraints should ensure this. */
18123 gcc_assert (code0
== MEM
&& code1
== REG
);
18124 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18125 || (TARGET_ARM
&& TARGET_LDRD
));
18127 switch (GET_CODE (XEXP (operands
[0], 0)))
18133 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18135 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18140 gcc_assert (TARGET_LDRD
);
18142 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18149 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18151 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18159 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18161 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18166 gcc_assert (TARGET_LDRD
);
18168 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18173 otherops
[0] = operands
[1];
18174 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18175 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18177 /* IWMMXT allows offsets larger than ldrd can handle,
18178 fix these up with a pair of ldr. */
18180 && CONST_INT_P (otherops
[2])
18181 && (INTVAL(otherops
[2]) <= -256
18182 || INTVAL(otherops
[2]) >= 256))
18184 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18188 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18189 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18198 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18199 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18205 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18208 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18213 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18218 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18219 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18221 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18225 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18232 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18239 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18244 && (REG_P (otherops
[2])
18246 || (CONST_INT_P (otherops
[2])
18247 && INTVAL (otherops
[2]) > -256
18248 && INTVAL (otherops
[2]) < 256)))
18250 otherops
[0] = operands
[1];
18251 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18253 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18259 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18260 otherops
[1] = operands
[1];
18263 output_asm_insn ("str%?\t%1, %0", operands
);
18264 output_asm_insn ("str%?\t%H1, %0", otherops
);
18274 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18275 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18278 output_move_quad (rtx
*operands
)
18280 if (REG_P (operands
[0]))
18282 /* Load, or reg->reg move. */
18284 if (MEM_P (operands
[1]))
18286 switch (GET_CODE (XEXP (operands
[1], 0)))
18289 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18294 output_asm_insn ("adr%?\t%0, %1", operands
);
18295 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18299 gcc_unreachable ();
18307 gcc_assert (REG_P (operands
[1]));
18309 dest
= REGNO (operands
[0]);
18310 src
= REGNO (operands
[1]);
18312 /* This seems pretty dumb, but hopefully GCC won't try to do it
18315 for (i
= 0; i
< 4; i
++)
18317 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18318 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18319 output_asm_insn ("mov%?\t%0, %1", ops
);
18322 for (i
= 3; i
>= 0; i
--)
18324 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18325 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18326 output_asm_insn ("mov%?\t%0, %1", ops
);
18332 gcc_assert (MEM_P (operands
[0]));
18333 gcc_assert (REG_P (operands
[1]));
18334 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18336 switch (GET_CODE (XEXP (operands
[0], 0)))
18339 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18343 gcc_unreachable ();
18350 /* Output a VFP load or store instruction. */
18353 output_move_vfp (rtx
*operands
)
18355 rtx reg
, mem
, addr
, ops
[2];
18356 int load
= REG_P (operands
[0]);
18357 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18358 int sp
= (!TARGET_VFP_FP16INST
18359 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18360 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18365 reg
= operands
[!load
];
18366 mem
= operands
[load
];
18368 mode
= GET_MODE (reg
);
18370 gcc_assert (REG_P (reg
));
18371 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18372 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18378 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18379 gcc_assert (MEM_P (mem
));
18381 addr
= XEXP (mem
, 0);
18383 switch (GET_CODE (addr
))
18386 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18387 ops
[0] = XEXP (addr
, 0);
18392 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18393 ops
[0] = XEXP (addr
, 0);
18398 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18404 sprintf (buff
, templ
,
18405 load
? "ld" : "st",
18406 dp
? "64" : sp
? "32" : "16",
18408 integer_p
? "\t%@ int" : "");
18409 output_asm_insn (buff
, ops
);
18414 /* Output a Neon double-word or quad-word load or store, or a load
18415 or store for larger structure modes.
18417 WARNING: The ordering of elements is weird in big-endian mode,
18418 because the EABI requires that vectors stored in memory appear
18419 as though they were stored by a VSTM, as required by the EABI.
18420 GCC RTL defines element ordering based on in-memory order.
18421 This can be different from the architectural ordering of elements
18422 within a NEON register. The intrinsics defined in arm_neon.h use the
18423 NEON register element ordering, not the GCC RTL element ordering.
18425 For example, the in-memory ordering of a big-endian a quadword
18426 vector with 16-bit elements when stored from register pair {d0,d1}
18427 will be (lowest address first, d0[N] is NEON register element N):
18429 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18431 When necessary, quadword registers (dN, dN+1) are moved to ARM
18432 registers from rN in the order:
18434 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18436 So that STM/LDM can be used on vectors in ARM registers, and the
18437 same memory layout will result as if VSTM/VLDM were used.
18439 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18440 possible, which allows use of appropriate alignment tags.
18441 Note that the choice of "64" is independent of the actual vector
18442 element size; this size simply ensures that the behavior is
18443 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18445 Due to limitations of those instructions, use of VST1.64/VLD1.64
18446 is not possible if:
18447 - the address contains PRE_DEC, or
18448 - the mode refers to more than 4 double-word registers
18450 In those cases, it would be possible to replace VSTM/VLDM by a
18451 sequence of instructions; this is not currently implemented since
18452 this is not certain to actually improve performance. */
18455 output_move_neon (rtx
*operands
)
18457 rtx reg
, mem
, addr
, ops
[2];
18458 int regno
, nregs
, load
= REG_P (operands
[0]);
18463 reg
= operands
[!load
];
18464 mem
= operands
[load
];
18466 mode
= GET_MODE (reg
);
18468 gcc_assert (REG_P (reg
));
18469 regno
= REGNO (reg
);
18470 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18471 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18472 || NEON_REGNO_OK_FOR_QUAD (regno
));
18473 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18474 || VALID_NEON_QREG_MODE (mode
)
18475 || VALID_NEON_STRUCT_MODE (mode
));
18476 gcc_assert (MEM_P (mem
));
18478 addr
= XEXP (mem
, 0);
18480 /* Strip off const from addresses like (const (plus (...))). */
18481 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18482 addr
= XEXP (addr
, 0);
18484 switch (GET_CODE (addr
))
18487 /* We have to use vldm / vstm for too-large modes. */
18490 templ
= "v%smia%%?\t%%0!, %%h1";
18491 ops
[0] = XEXP (addr
, 0);
18495 templ
= "v%s1.64\t%%h1, %%A0";
18502 /* We have to use vldm / vstm in this case, since there is no
18503 pre-decrement form of the vld1 / vst1 instructions. */
18504 templ
= "v%smdb%%?\t%%0!, %%h1";
18505 ops
[0] = XEXP (addr
, 0);
18510 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18511 gcc_unreachable ();
18514 /* We have to use vldm / vstm for too-large modes. */
18518 templ
= "v%smia%%?\t%%m0, %%h1";
18520 templ
= "v%s1.64\t%%h1, %%A0";
18526 /* Fall through. */
18532 for (i
= 0; i
< nregs
; i
++)
18534 /* We're only using DImode here because it's a convenient size. */
18535 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18536 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18537 if (reg_overlap_mentioned_p (ops
[0], mem
))
18539 gcc_assert (overlap
== -1);
18544 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18545 output_asm_insn (buff
, ops
);
18550 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18551 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18552 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18553 output_asm_insn (buff
, ops
);
18560 gcc_unreachable ();
18563 sprintf (buff
, templ
, load
? "ld" : "st");
18564 output_asm_insn (buff
, ops
);
18569 /* Compute and return the length of neon_mov<mode>, where <mode> is
18570 one of VSTRUCT modes: EI, OI, CI or XI. */
18572 arm_attr_length_move_neon (rtx_insn
*insn
)
18574 rtx reg
, mem
, addr
;
18578 extract_insn_cached (insn
);
18580 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18582 mode
= GET_MODE (recog_data
.operand
[0]);
18593 gcc_unreachable ();
18597 load
= REG_P (recog_data
.operand
[0]);
18598 reg
= recog_data
.operand
[!load
];
18599 mem
= recog_data
.operand
[load
];
18601 gcc_assert (MEM_P (mem
));
18603 mode
= GET_MODE (reg
);
18604 addr
= XEXP (mem
, 0);
18606 /* Strip off const from addresses like (const (plus (...))). */
18607 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18608 addr
= XEXP (addr
, 0);
18610 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18612 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18619 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18623 arm_address_offset_is_imm (rtx_insn
*insn
)
18627 extract_insn_cached (insn
);
18629 if (REG_P (recog_data
.operand
[0]))
18632 mem
= recog_data
.operand
[0];
18634 gcc_assert (MEM_P (mem
));
18636 addr
= XEXP (mem
, 0);
18639 || (GET_CODE (addr
) == PLUS
18640 && REG_P (XEXP (addr
, 0))
18641 && CONST_INT_P (XEXP (addr
, 1))))
18647 /* Output an ADD r, s, #n where n may be too big for one instruction.
18648 If adding zero to one register, output nothing. */
18650 output_add_immediate (rtx
*operands
)
18652 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18654 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18657 output_multi_immediate (operands
,
18658 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18661 output_multi_immediate (operands
,
18662 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18669 /* Output a multiple immediate operation.
18670 OPERANDS is the vector of operands referred to in the output patterns.
18671 INSTR1 is the output pattern to use for the first constant.
18672 INSTR2 is the output pattern to use for subsequent constants.
18673 IMMED_OP is the index of the constant slot in OPERANDS.
18674 N is the constant value. */
18675 static const char *
18676 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18677 int immed_op
, HOST_WIDE_INT n
)
18679 #if HOST_BITS_PER_WIDE_INT > 32
18685 /* Quick and easy output. */
18686 operands
[immed_op
] = const0_rtx
;
18687 output_asm_insn (instr1
, operands
);
18692 const char * instr
= instr1
;
18694 /* Note that n is never zero here (which would give no output). */
18695 for (i
= 0; i
< 32; i
+= 2)
18699 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18700 output_asm_insn (instr
, operands
);
18710 /* Return the name of a shifter operation. */
18711 static const char *
18712 arm_shift_nmem(enum rtx_code code
)
18717 return ARM_LSL_NAME
;
18733 /* Return the appropriate ARM instruction for the operation code.
18734 The returned result should not be overwritten. OP is the rtx of the
18735 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18738 arithmetic_instr (rtx op
, int shift_first_arg
)
18740 switch (GET_CODE (op
))
18746 return shift_first_arg
? "rsb" : "sub";
18761 return arm_shift_nmem(GET_CODE(op
));
18764 gcc_unreachable ();
18768 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18769 for the operation code. The returned result should not be overwritten.
18770 OP is the rtx code of the shift.
18771 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18773 static const char *
18774 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18777 enum rtx_code code
= GET_CODE (op
);
18782 if (!CONST_INT_P (XEXP (op
, 1)))
18784 output_operand_lossage ("invalid shift operand");
18789 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18797 mnem
= arm_shift_nmem(code
);
18798 if (CONST_INT_P (XEXP (op
, 1)))
18800 *amountp
= INTVAL (XEXP (op
, 1));
18802 else if (REG_P (XEXP (op
, 1)))
18809 output_operand_lossage ("invalid shift operand");
18815 /* We never have to worry about the amount being other than a
18816 power of 2, since this case can never be reloaded from a reg. */
18817 if (!CONST_INT_P (XEXP (op
, 1)))
18819 output_operand_lossage ("invalid shift operand");
18823 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18825 /* Amount must be a power of two. */
18826 if (*amountp
& (*amountp
- 1))
18828 output_operand_lossage ("invalid shift operand");
18832 *amountp
= exact_log2 (*amountp
);
18833 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18834 return ARM_LSL_NAME
;
18837 output_operand_lossage ("invalid shift operand");
18841 /* This is not 100% correct, but follows from the desire to merge
18842 multiplication by a power of 2 with the recognizer for a
18843 shift. >=32 is not a valid shift for "lsl", so we must try and
18844 output a shift that produces the correct arithmetical result.
18845 Using lsr #32 is identical except for the fact that the carry bit
18846 is not set correctly if we set the flags; but we never use the
18847 carry bit from such an operation, so we can ignore that. */
18848 if (code
== ROTATERT
)
18849 /* Rotate is just modulo 32. */
18851 else if (*amountp
!= (*amountp
& 31))
18853 if (code
== ASHIFT
)
18858 /* Shifts of 0 are no-ops. */
18865 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18866 because /bin/as is horribly restrictive. The judgement about
18867 whether or not each character is 'printable' (and can be output as
18868 is) or not (and must be printed with an octal escape) must be made
18869 with reference to the *host* character set -- the situation is
18870 similar to that discussed in the comments above pp_c_char in
18871 c-pretty-print.c. */
18873 #define MAX_ASCII_LEN 51
18876 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18879 int len_so_far
= 0;
18881 fputs ("\t.ascii\t\"", stream
);
18883 for (i
= 0; i
< len
; i
++)
18887 if (len_so_far
>= MAX_ASCII_LEN
)
18889 fputs ("\"\n\t.ascii\t\"", stream
);
18895 if (c
== '\\' || c
== '\"')
18897 putc ('\\', stream
);
18905 fprintf (stream
, "\\%03o", c
);
18910 fputs ("\"\n", stream
);
18913 /* Whether a register is callee saved or not. This is necessary because high
18914 registers are marked as caller saved when optimizing for size on Thumb-1
18915 targets despite being callee saved in order to avoid using them. */
18916 #define callee_saved_reg_p(reg) \
18917 (!call_used_regs[reg] \
18918 || (TARGET_THUMB1 && optimize_size \
18919 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18921 /* Compute the register save mask for registers 0 through 12
18922 inclusive. This code is used by arm_compute_save_reg_mask. */
18924 static unsigned long
18925 arm_compute_save_reg0_reg12_mask (void)
18927 unsigned long func_type
= arm_current_func_type ();
18928 unsigned long save_reg_mask
= 0;
18931 if (IS_INTERRUPT (func_type
))
18933 unsigned int max_reg
;
18934 /* Interrupt functions must not corrupt any registers,
18935 even call clobbered ones. If this is a leaf function
18936 we can just examine the registers used by the RTL, but
18937 otherwise we have to assume that whatever function is
18938 called might clobber anything, and so we have to save
18939 all the call-clobbered registers as well. */
18940 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18941 /* FIQ handlers have registers r8 - r12 banked, so
18942 we only need to check r0 - r7, Normal ISRs only
18943 bank r14 and r15, so we must check up to r12.
18944 r13 is the stack pointer which is always preserved,
18945 so we do not need to consider it here. */
18950 for (reg
= 0; reg
<= max_reg
; reg
++)
18951 if (df_regs_ever_live_p (reg
)
18952 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18953 save_reg_mask
|= (1 << reg
);
18955 /* Also save the pic base register if necessary. */
18957 && !TARGET_SINGLE_PIC_BASE
18958 && arm_pic_register
!= INVALID_REGNUM
18959 && crtl
->uses_pic_offset_table
)
18960 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18962 else if (IS_VOLATILE(func_type
))
18964 /* For noreturn functions we historically omitted register saves
18965 altogether. However this really messes up debugging. As a
18966 compromise save just the frame pointers. Combined with the link
18967 register saved elsewhere this should be sufficient to get
18969 if (frame_pointer_needed
)
18970 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18971 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18972 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18973 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18974 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18978 /* In the normal case we only need to save those registers
18979 which are call saved and which are used by this function. */
18980 for (reg
= 0; reg
<= 11; reg
++)
18981 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18982 save_reg_mask
|= (1 << reg
);
18984 /* Handle the frame pointer as a special case. */
18985 if (frame_pointer_needed
)
18986 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18988 /* If we aren't loading the PIC register,
18989 don't stack it even though it may be live. */
18991 && !TARGET_SINGLE_PIC_BASE
18992 && arm_pic_register
!= INVALID_REGNUM
18993 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18994 || crtl
->uses_pic_offset_table
))
18995 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18997 /* The prologue will copy SP into R0, so save it. */
18998 if (IS_STACKALIGN (func_type
))
18999 save_reg_mask
|= 1;
19002 /* Save registers so the exception handler can modify them. */
19003 if (crtl
->calls_eh_return
)
19009 reg
= EH_RETURN_DATA_REGNO (i
);
19010 if (reg
== INVALID_REGNUM
)
19012 save_reg_mask
|= 1 << reg
;
19016 return save_reg_mask
;
19019 /* Return true if r3 is live at the start of the function. */
19022 arm_r3_live_at_start_p (void)
19024 /* Just look at cfg info, which is still close enough to correct at this
19025 point. This gives false positives for broken functions that might use
19026 uninitialized data that happens to be allocated in r3, but who cares? */
19027 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19030 /* Compute the number of bytes used to store the static chain register on the
19031 stack, above the stack frame. We need to know this accurately to get the
19032 alignment of the rest of the stack frame correct. */
19035 arm_compute_static_chain_stack_bytes (void)
19037 /* See the defining assertion in arm_expand_prologue. */
19038 if (IS_NESTED (arm_current_func_type ())
19039 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19040 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19041 && !df_regs_ever_live_p (LR_REGNUM
)))
19042 && arm_r3_live_at_start_p ()
19043 && crtl
->args
.pretend_args_size
== 0)
19049 /* Compute a bit mask of which registers need to be
19050 saved on the stack for the current function.
19051 This is used by arm_get_frame_offsets, which may add extra registers. */
19053 static unsigned long
19054 arm_compute_save_reg_mask (void)
19056 unsigned int save_reg_mask
= 0;
19057 unsigned long func_type
= arm_current_func_type ();
19060 if (IS_NAKED (func_type
))
19061 /* This should never really happen. */
19064 /* If we are creating a stack frame, then we must save the frame pointer,
19065 IP (which will hold the old stack pointer), LR and the PC. */
19066 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19068 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19071 | (1 << PC_REGNUM
);
19073 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19075 /* Decide if we need to save the link register.
19076 Interrupt routines have their own banked link register,
19077 so they never need to save it.
19078 Otherwise if we do not use the link register we do not need to save
19079 it. If we are pushing other registers onto the stack however, we
19080 can save an instruction in the epilogue by pushing the link register
19081 now and then popping it back into the PC. This incurs extra memory
19082 accesses though, so we only do it when optimizing for size, and only
19083 if we know that we will not need a fancy return sequence. */
19084 if (df_regs_ever_live_p (LR_REGNUM
)
19087 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19088 && !crtl
->tail_call_emit
19089 && !crtl
->calls_eh_return
))
19090 save_reg_mask
|= 1 << LR_REGNUM
;
19092 if (cfun
->machine
->lr_save_eliminated
)
19093 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19095 if (TARGET_REALLY_IWMMXT
19096 && ((bit_count (save_reg_mask
)
19097 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19098 arm_compute_static_chain_stack_bytes())
19101 /* The total number of registers that are going to be pushed
19102 onto the stack is odd. We need to ensure that the stack
19103 is 64-bit aligned before we start to save iWMMXt registers,
19104 and also before we start to create locals. (A local variable
19105 might be a double or long long which we will load/store using
19106 an iWMMXt instruction). Therefore we need to push another
19107 ARM register, so that the stack will be 64-bit aligned. We
19108 try to avoid using the arg registers (r0 -r3) as they might be
19109 used to pass values in a tail call. */
19110 for (reg
= 4; reg
<= 12; reg
++)
19111 if ((save_reg_mask
& (1 << reg
)) == 0)
19115 save_reg_mask
|= (1 << reg
);
19118 cfun
->machine
->sibcall_blocked
= 1;
19119 save_reg_mask
|= (1 << 3);
19123 /* We may need to push an additional register for use initializing the
19124 PIC base register. */
19125 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19126 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19128 reg
= thumb_find_work_register (1 << 4);
19129 if (!call_used_regs
[reg
])
19130 save_reg_mask
|= (1 << reg
);
19133 return save_reg_mask
;
19136 /* Compute a bit mask of which registers need to be
19137 saved on the stack for the current function. */
19138 static unsigned long
19139 thumb1_compute_save_reg_mask (void)
19141 unsigned long mask
;
19145 for (reg
= 0; reg
< 12; reg
++)
19146 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19149 /* Handle the frame pointer as a special case. */
19150 if (frame_pointer_needed
)
19151 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19154 && !TARGET_SINGLE_PIC_BASE
19155 && arm_pic_register
!= INVALID_REGNUM
19156 && crtl
->uses_pic_offset_table
)
19157 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19159 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19160 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19161 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19163 /* LR will also be pushed if any lo regs are pushed. */
19164 if (mask
& 0xff || thumb_force_lr_save ())
19165 mask
|= (1 << LR_REGNUM
);
19167 /* Make sure we have a low work register if we need one.
19168 We will need one if we are going to push a high register,
19169 but we are not currently intending to push a low register. */
19170 if ((mask
& 0xff) == 0
19171 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19173 /* Use thumb_find_work_register to choose which register
19174 we will use. If the register is live then we will
19175 have to push it. Use LAST_LO_REGNUM as our fallback
19176 choice for the register to select. */
19177 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19178 /* Make sure the register returned by thumb_find_work_register is
19179 not part of the return value. */
19180 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19181 reg
= LAST_LO_REGNUM
;
19183 if (callee_saved_reg_p (reg
))
19187 /* The 504 below is 8 bytes less than 512 because there are two possible
19188 alignment words. We can't tell here if they will be present or not so we
19189 have to play it safe and assume that they are. */
19190 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19191 ROUND_UP_WORD (get_frame_size ()) +
19192 crtl
->outgoing_args_size
) >= 504)
19194 /* This is the same as the code in thumb1_expand_prologue() which
19195 determines which register to use for stack decrement. */
19196 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19197 if (mask
& (1 << reg
))
19200 if (reg
> LAST_LO_REGNUM
)
19202 /* Make sure we have a register available for stack decrement. */
19203 mask
|= 1 << LAST_LO_REGNUM
;
19211 /* Return the number of bytes required to save VFP registers. */
19213 arm_get_vfp_saved_size (void)
19215 unsigned int regno
;
19220 /* Space for saved VFP registers. */
19221 if (TARGET_HARD_FLOAT
)
19224 for (regno
= FIRST_VFP_REGNUM
;
19225 regno
< LAST_VFP_REGNUM
;
19228 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19229 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19233 /* Workaround ARM10 VFPr1 bug. */
19234 if (count
== 2 && !arm_arch6
)
19236 saved
+= count
* 8;
19245 if (count
== 2 && !arm_arch6
)
19247 saved
+= count
* 8;
19254 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19255 everything bar the final return instruction. If simple_return is true,
19256 then do not output epilogue, because it has already been emitted in RTL. */
19258 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19259 bool simple_return
)
19261 char conditional
[10];
19264 unsigned long live_regs_mask
;
19265 unsigned long func_type
;
19266 arm_stack_offsets
*offsets
;
19268 func_type
= arm_current_func_type ();
19270 if (IS_NAKED (func_type
))
19273 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19275 /* If this function was declared non-returning, and we have
19276 found a tail call, then we have to trust that the called
19277 function won't return. */
19282 /* Otherwise, trap an attempted return by aborting. */
19284 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19286 assemble_external_libcall (ops
[1]);
19287 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19293 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19295 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19297 cfun
->machine
->return_used_this_function
= 1;
19299 offsets
= arm_get_frame_offsets ();
19300 live_regs_mask
= offsets
->saved_regs_mask
;
19302 if (!simple_return
&& live_regs_mask
)
19304 const char * return_reg
;
19306 /* If we do not have any special requirements for function exit
19307 (e.g. interworking) then we can load the return address
19308 directly into the PC. Otherwise we must load it into LR. */
19310 && !IS_CMSE_ENTRY (func_type
)
19311 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19312 return_reg
= reg_names
[PC_REGNUM
];
19314 return_reg
= reg_names
[LR_REGNUM
];
19316 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19318 /* There are three possible reasons for the IP register
19319 being saved. 1) a stack frame was created, in which case
19320 IP contains the old stack pointer, or 2) an ISR routine
19321 corrupted it, or 3) it was saved to align the stack on
19322 iWMMXt. In case 1, restore IP into SP, otherwise just
19324 if (frame_pointer_needed
)
19326 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19327 live_regs_mask
|= (1 << SP_REGNUM
);
19330 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19333 /* On some ARM architectures it is faster to use LDR rather than
19334 LDM to load a single register. On other architectures, the
19335 cost is the same. In 26 bit mode, or for exception handlers,
19336 we have to use LDM to load the PC so that the CPSR is also
19338 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19339 if (live_regs_mask
== (1U << reg
))
19342 if (reg
<= LAST_ARM_REGNUM
19343 && (reg
!= LR_REGNUM
19345 || ! IS_INTERRUPT (func_type
)))
19347 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19348 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19355 /* Generate the load multiple instruction to restore the
19356 registers. Note we can get here, even if
19357 frame_pointer_needed is true, but only if sp already
19358 points to the base of the saved core registers. */
19359 if (live_regs_mask
& (1 << SP_REGNUM
))
19361 unsigned HOST_WIDE_INT stack_adjust
;
19363 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19364 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19366 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19367 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19370 /* If we can't use ldmib (SA110 bug),
19371 then try to pop r3 instead. */
19373 live_regs_mask
|= 1 << 3;
19375 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19378 /* For interrupt returns we have to use an LDM rather than
19379 a POP so that we can use the exception return variant. */
19380 else if (IS_INTERRUPT (func_type
))
19381 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19383 sprintf (instr
, "pop%s\t{", conditional
);
19385 p
= instr
+ strlen (instr
);
19387 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19388 if (live_regs_mask
& (1 << reg
))
19390 int l
= strlen (reg_names
[reg
]);
19396 memcpy (p
, ", ", 2);
19400 memcpy (p
, "%|", 2);
19401 memcpy (p
+ 2, reg_names
[reg
], l
);
19405 if (live_regs_mask
& (1 << LR_REGNUM
))
19407 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19408 /* If returning from an interrupt, restore the CPSR. */
19409 if (IS_INTERRUPT (func_type
))
19416 output_asm_insn (instr
, & operand
);
19418 /* See if we need to generate an extra instruction to
19419 perform the actual function return. */
19421 && func_type
!= ARM_FT_INTERWORKED
19422 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19424 /* The return has already been handled
19425 by loading the LR into the PC. */
19432 switch ((int) ARM_FUNC_TYPE (func_type
))
19436 /* ??? This is wrong for unified assembly syntax. */
19437 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19440 case ARM_FT_INTERWORKED
:
19441 gcc_assert (arm_arch5
|| arm_arch4t
);
19442 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19445 case ARM_FT_EXCEPTION
:
19446 /* ??? This is wrong for unified assembly syntax. */
19447 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19451 if (IS_CMSE_ENTRY (func_type
))
19453 /* Check if we have to clear the 'GE bits' which is only used if
19454 parallel add and subtraction instructions are available. */
19455 if (TARGET_INT_SIMD
)
19456 snprintf (instr
, sizeof (instr
),
19457 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19459 snprintf (instr
, sizeof (instr
),
19460 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19462 output_asm_insn (instr
, & operand
);
19463 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19465 /* Clear the cumulative exception-status bits (0-4,7) and the
19466 condition code bits (28-31) of the FPSCR. We need to
19467 remember to clear the first scratch register used (IP) and
19468 save and restore the second (r4). */
19469 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19470 output_asm_insn (instr
, & operand
);
19471 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19472 output_asm_insn (instr
, & operand
);
19473 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19474 output_asm_insn (instr
, & operand
);
19475 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19476 output_asm_insn (instr
, & operand
);
19477 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19478 output_asm_insn (instr
, & operand
);
19479 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19480 output_asm_insn (instr
, & operand
);
19481 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19482 output_asm_insn (instr
, & operand
);
19483 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19484 output_asm_insn (instr
, & operand
);
19486 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19488 /* Use bx if it's available. */
19489 else if (arm_arch5
|| arm_arch4t
)
19490 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19492 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19496 output_asm_insn (instr
, & operand
);
19502 /* Output in FILE asm statements needed to declare the NAME of the function
19503 defined by its DECL node. */
19506 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19508 size_t cmse_name_len
;
19509 char *cmse_name
= 0;
19510 char cmse_prefix
[] = "__acle_se_";
19512 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19513 extra function label for each function with the 'cmse_nonsecure_entry'
19514 attribute. This extra function label should be prepended with
19515 '__acle_se_', telling the linker that it needs to create secure gateway
19516 veneers for this function. */
19517 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19518 DECL_ATTRIBUTES (decl
)))
19520 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19521 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19522 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19523 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19525 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19526 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19529 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19530 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19531 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19532 ASM_OUTPUT_LABEL (file
, name
);
19535 ASM_OUTPUT_LABEL (file
, cmse_name
);
19537 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19540 /* Write the function name into the code section, directly preceding
19541 the function prologue.
19543 Code will be output similar to this:
19545 .ascii "arm_poke_function_name", 0
19548 .word 0xff000000 + (t1 - t0)
19549 arm_poke_function_name
19551 stmfd sp!, {fp, ip, lr, pc}
19554 When performing a stack backtrace, code can inspect the value
19555 of 'pc' stored at 'fp' + 0. If the trace function then looks
19556 at location pc - 12 and the top 8 bits are set, then we know
19557 that there is a function name embedded immediately preceding this
19558 location and has length ((pc[-3]) & 0xff000000).
19560 We assume that pc is declared as a pointer to an unsigned long.
19562 It is of no benefit to output the function name if we are assembling
19563 a leaf function. These function types will not contain a stack
19564 backtrace structure, therefore it is not possible to determine the
19567 arm_poke_function_name (FILE *stream
, const char *name
)
19569 unsigned long alignlength
;
19570 unsigned long length
;
19573 length
= strlen (name
) + 1;
19574 alignlength
= ROUND_UP_WORD (length
);
19576 ASM_OUTPUT_ASCII (stream
, name
, length
);
19577 ASM_OUTPUT_ALIGN (stream
, 2);
19578 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19579 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19582 /* Place some comments into the assembler stream
19583 describing the current function. */
19585 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19587 unsigned long func_type
;
19589 /* Sanity check. */
19590 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19592 func_type
= arm_current_func_type ();
19594 switch ((int) ARM_FUNC_TYPE (func_type
))
19597 case ARM_FT_NORMAL
:
19599 case ARM_FT_INTERWORKED
:
19600 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19603 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19606 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19608 case ARM_FT_EXCEPTION
:
19609 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19613 if (IS_NAKED (func_type
))
19614 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19616 if (IS_VOLATILE (func_type
))
19617 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19619 if (IS_NESTED (func_type
))
19620 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19621 if (IS_STACKALIGN (func_type
))
19622 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19623 if (IS_CMSE_ENTRY (func_type
))
19624 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19626 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19628 crtl
->args
.pretend_args_size
, frame_size
);
19630 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19631 frame_pointer_needed
,
19632 cfun
->machine
->uses_anonymous_args
);
19634 if (cfun
->machine
->lr_save_eliminated
)
19635 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19637 if (crtl
->calls_eh_return
)
19638 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19643 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19644 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19646 arm_stack_offsets
*offsets
;
19652 /* Emit any call-via-reg trampolines that are needed for v4t support
19653 of call_reg and call_value_reg type insns. */
19654 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19656 rtx label
= cfun
->machine
->call_via
[regno
];
19660 switch_to_section (function_section (current_function_decl
));
19661 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19662 CODE_LABEL_NUMBER (label
));
19663 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19667 /* ??? Probably not safe to set this here, since it assumes that a
19668 function will be emitted as assembly immediately after we generate
19669 RTL for it. This does not happen for inline functions. */
19670 cfun
->machine
->return_used_this_function
= 0;
19672 else /* TARGET_32BIT */
19674 /* We need to take into account any stack-frame rounding. */
19675 offsets
= arm_get_frame_offsets ();
19677 gcc_assert (!use_return_insn (FALSE
, NULL
)
19678 || (cfun
->machine
->return_used_this_function
!= 0)
19679 || offsets
->saved_regs
== offsets
->outgoing_args
19680 || frame_pointer_needed
);
19684 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19685 STR and STRD. If an even number of registers are being pushed, one
19686 or more STRD patterns are created for each register pair. If an
19687 odd number of registers are pushed, emit an initial STR followed by
19688 as many STRD instructions as are needed. This works best when the
19689 stack is initially 64-bit aligned (the normal case), since it
19690 ensures that each STRD is also 64-bit aligned. */
19692 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19697 rtx par
= NULL_RTX
;
19698 rtx dwarf
= NULL_RTX
;
19702 num_regs
= bit_count (saved_regs_mask
);
19704 /* Must be at least one register to save, and can't save SP or PC. */
19705 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19706 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19707 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19709 /* Create sequence for DWARF info. All the frame-related data for
19710 debugging is held in this wrapper. */
19711 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19713 /* Describe the stack adjustment. */
19714 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19715 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19716 RTX_FRAME_RELATED_P (tmp
) = 1;
19717 XVECEXP (dwarf
, 0, 0) = tmp
;
19719 /* Find the first register. */
19720 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19725 /* If there's an odd number of registers to push. Start off by
19726 pushing a single register. This ensures that subsequent strd
19727 operations are dword aligned (assuming that SP was originally
19728 64-bit aligned). */
19729 if ((num_regs
& 1) != 0)
19731 rtx reg
, mem
, insn
;
19733 reg
= gen_rtx_REG (SImode
, regno
);
19735 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19736 stack_pointer_rtx
));
19738 mem
= gen_frame_mem (Pmode
,
19740 (Pmode
, stack_pointer_rtx
,
19741 plus_constant (Pmode
, stack_pointer_rtx
,
19744 tmp
= gen_rtx_SET (mem
, reg
);
19745 RTX_FRAME_RELATED_P (tmp
) = 1;
19746 insn
= emit_insn (tmp
);
19747 RTX_FRAME_RELATED_P (insn
) = 1;
19748 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19749 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19750 RTX_FRAME_RELATED_P (tmp
) = 1;
19753 XVECEXP (dwarf
, 0, i
) = tmp
;
19757 while (i
< num_regs
)
19758 if (saved_regs_mask
& (1 << regno
))
19760 rtx reg1
, reg2
, mem1
, mem2
;
19761 rtx tmp0
, tmp1
, tmp2
;
19764 /* Find the register to pair with this one. */
19765 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19769 reg1
= gen_rtx_REG (SImode
, regno
);
19770 reg2
= gen_rtx_REG (SImode
, regno2
);
19777 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19780 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19782 -4 * (num_regs
- 1)));
19783 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19784 plus_constant (Pmode
, stack_pointer_rtx
,
19786 tmp1
= gen_rtx_SET (mem1
, reg1
);
19787 tmp2
= gen_rtx_SET (mem2
, reg2
);
19788 RTX_FRAME_RELATED_P (tmp0
) = 1;
19789 RTX_FRAME_RELATED_P (tmp1
) = 1;
19790 RTX_FRAME_RELATED_P (tmp2
) = 1;
19791 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19792 XVECEXP (par
, 0, 0) = tmp0
;
19793 XVECEXP (par
, 0, 1) = tmp1
;
19794 XVECEXP (par
, 0, 2) = tmp2
;
19795 insn
= emit_insn (par
);
19796 RTX_FRAME_RELATED_P (insn
) = 1;
19797 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19801 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19804 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19807 tmp1
= gen_rtx_SET (mem1
, reg1
);
19808 tmp2
= gen_rtx_SET (mem2
, reg2
);
19809 RTX_FRAME_RELATED_P (tmp1
) = 1;
19810 RTX_FRAME_RELATED_P (tmp2
) = 1;
19811 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19812 XVECEXP (par
, 0, 0) = tmp1
;
19813 XVECEXP (par
, 0, 1) = tmp2
;
19817 /* Create unwind information. This is an approximation. */
19818 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19819 plus_constant (Pmode
,
19823 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19824 plus_constant (Pmode
,
19829 RTX_FRAME_RELATED_P (tmp1
) = 1;
19830 RTX_FRAME_RELATED_P (tmp2
) = 1;
19831 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19832 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19834 regno
= regno2
+ 1;
19842 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19843 whenever possible, otherwise it emits single-word stores. The first store
19844 also allocates stack space for all saved registers, using writeback with
19845 post-addressing mode. All other stores use offset addressing. If no STRD
19846 can be emitted, this function emits a sequence of single-word stores,
19847 and not an STM as before, because single-word stores provide more freedom
19848 scheduling and can be turned into an STM by peephole optimizations. */
19850 arm_emit_strd_push (unsigned long saved_regs_mask
)
19853 int i
, j
, dwarf_index
= 0;
19855 rtx dwarf
= NULL_RTX
;
19856 rtx insn
= NULL_RTX
;
19859 /* TODO: A more efficient code can be emitted by changing the
19860 layout, e.g., first push all pairs that can use STRD to keep the
19861 stack aligned, and then push all other registers. */
19862 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19863 if (saved_regs_mask
& (1 << i
))
19866 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19867 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19868 gcc_assert (num_regs
> 0);
19870 /* Create sequence for DWARF info. */
19871 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19873 /* For dwarf info, we generate explicit stack update. */
19874 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19875 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19876 RTX_FRAME_RELATED_P (tmp
) = 1;
19877 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19879 /* Save registers. */
19880 offset
= - 4 * num_regs
;
19882 while (j
<= LAST_ARM_REGNUM
)
19883 if (saved_regs_mask
& (1 << j
))
19886 && (saved_regs_mask
& (1 << (j
+ 1))))
19888 /* Current register and previous register form register pair for
19889 which STRD can be generated. */
19892 /* Allocate stack space for all saved registers. */
19893 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19894 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19895 mem
= gen_frame_mem (DImode
, tmp
);
19898 else if (offset
> 0)
19899 mem
= gen_frame_mem (DImode
,
19900 plus_constant (Pmode
,
19904 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19906 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19907 RTX_FRAME_RELATED_P (tmp
) = 1;
19908 tmp
= emit_insn (tmp
);
19910 /* Record the first store insn. */
19911 if (dwarf_index
== 1)
19914 /* Generate dwarf info. */
19915 mem
= gen_frame_mem (SImode
,
19916 plus_constant (Pmode
,
19919 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19920 RTX_FRAME_RELATED_P (tmp
) = 1;
19921 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19923 mem
= gen_frame_mem (SImode
,
19924 plus_constant (Pmode
,
19927 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19928 RTX_FRAME_RELATED_P (tmp
) = 1;
19929 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19936 /* Emit a single word store. */
19939 /* Allocate stack space for all saved registers. */
19940 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19941 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19942 mem
= gen_frame_mem (SImode
, tmp
);
19945 else if (offset
> 0)
19946 mem
= gen_frame_mem (SImode
,
19947 plus_constant (Pmode
,
19951 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19953 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19954 RTX_FRAME_RELATED_P (tmp
) = 1;
19955 tmp
= emit_insn (tmp
);
19957 /* Record the first store insn. */
19958 if (dwarf_index
== 1)
19961 /* Generate dwarf info. */
19962 mem
= gen_frame_mem (SImode
,
19963 plus_constant(Pmode
,
19966 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19967 RTX_FRAME_RELATED_P (tmp
) = 1;
19968 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19977 /* Attach dwarf info to the first insn we generate. */
19978 gcc_assert (insn
!= NULL_RTX
);
19979 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19980 RTX_FRAME_RELATED_P (insn
) = 1;
19983 /* Generate and emit an insn that we will recognize as a push_multi.
19984 Unfortunately, since this insn does not reflect very well the actual
19985 semantics of the operation, we need to annotate the insn for the benefit
19986 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19987 MASK for registers that should be annotated for DWARF2 frame unwind
19990 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19993 int num_dwarf_regs
= 0;
19997 int dwarf_par_index
;
20000 /* We don't record the PC in the dwarf frame information. */
20001 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20003 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20005 if (mask
& (1 << i
))
20007 if (dwarf_regs_mask
& (1 << i
))
20011 gcc_assert (num_regs
&& num_regs
<= 16);
20012 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20014 /* For the body of the insn we are going to generate an UNSPEC in
20015 parallel with several USEs. This allows the insn to be recognized
20016 by the push_multi pattern in the arm.md file.
20018 The body of the insn looks something like this:
20021 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20022 (const_int:SI <num>)))
20023 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20029 For the frame note however, we try to be more explicit and actually
20030 show each register being stored into the stack frame, plus a (single)
20031 decrement of the stack pointer. We do it this way in order to be
20032 friendly to the stack unwinding code, which only wants to see a single
20033 stack decrement per instruction. The RTL we generate for the note looks
20034 something like this:
20037 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20038 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20039 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20040 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20044 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20045 instead we'd have a parallel expression detailing all
20046 the stores to the various memory addresses so that debug
20047 information is more up-to-date. Remember however while writing
20048 this to take care of the constraints with the push instruction.
20050 Note also that this has to be taken care of for the VFP registers.
20052 For more see PR43399. */
20054 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20055 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20056 dwarf_par_index
= 1;
20058 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20060 if (mask
& (1 << i
))
20062 reg
= gen_rtx_REG (SImode
, i
);
20064 XVECEXP (par
, 0, 0)
20065 = gen_rtx_SET (gen_frame_mem
20067 gen_rtx_PRE_MODIFY (Pmode
,
20070 (Pmode
, stack_pointer_rtx
,
20073 gen_rtx_UNSPEC (BLKmode
,
20074 gen_rtvec (1, reg
),
20075 UNSPEC_PUSH_MULT
));
20077 if (dwarf_regs_mask
& (1 << i
))
20079 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20081 RTX_FRAME_RELATED_P (tmp
) = 1;
20082 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20089 for (j
= 1, i
++; j
< num_regs
; i
++)
20091 if (mask
& (1 << i
))
20093 reg
= gen_rtx_REG (SImode
, i
);
20095 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20097 if (dwarf_regs_mask
& (1 << i
))
20100 = gen_rtx_SET (gen_frame_mem
20102 plus_constant (Pmode
, stack_pointer_rtx
,
20105 RTX_FRAME_RELATED_P (tmp
) = 1;
20106 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20113 par
= emit_insn (par
);
20115 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20116 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20117 RTX_FRAME_RELATED_P (tmp
) = 1;
20118 XVECEXP (dwarf
, 0, 0) = tmp
;
20120 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20125 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20126 SIZE is the offset to be adjusted.
20127 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20129 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20133 RTX_FRAME_RELATED_P (insn
) = 1;
20134 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20135 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20138 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20139 SAVED_REGS_MASK shows which registers need to be restored.
20141 Unfortunately, since this insn does not reflect very well the actual
20142 semantics of the operation, we need to annotate the insn for the benefit
20143 of DWARF2 frame unwind information. */
20145 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20150 rtx dwarf
= NULL_RTX
;
20152 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20156 offset_adj
= return_in_pc
? 1 : 0;
20157 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20158 if (saved_regs_mask
& (1 << i
))
20161 gcc_assert (num_regs
&& num_regs
<= 16);
20163 /* If SP is in reglist, then we don't emit SP update insn. */
20164 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20166 /* The parallel needs to hold num_regs SETs
20167 and one SET for the stack update. */
20168 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20171 XVECEXP (par
, 0, 0) = ret_rtx
;
20175 /* Increment the stack pointer, based on there being
20176 num_regs 4-byte registers to restore. */
20177 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20178 plus_constant (Pmode
,
20181 RTX_FRAME_RELATED_P (tmp
) = 1;
20182 XVECEXP (par
, 0, offset_adj
) = tmp
;
20185 /* Now restore every reg, which may include PC. */
20186 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20187 if (saved_regs_mask
& (1 << i
))
20189 reg
= gen_rtx_REG (SImode
, i
);
20190 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20192 /* Emit single load with writeback. */
20193 tmp
= gen_frame_mem (SImode
,
20194 gen_rtx_POST_INC (Pmode
,
20195 stack_pointer_rtx
));
20196 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20197 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20201 tmp
= gen_rtx_SET (reg
,
20204 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20205 RTX_FRAME_RELATED_P (tmp
) = 1;
20206 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20208 /* We need to maintain a sequence for DWARF info too. As dwarf info
20209 should not have PC, skip PC. */
20210 if (i
!= PC_REGNUM
)
20211 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20217 par
= emit_jump_insn (par
);
20219 par
= emit_insn (par
);
20221 REG_NOTES (par
) = dwarf
;
20223 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20224 stack_pointer_rtx
, stack_pointer_rtx
);
20227 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20228 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20230 Unfortunately, since this insn does not reflect very well the actual
20231 semantics of the operation, we need to annotate the insn for the benefit
20232 of DWARF2 frame unwind information. */
20234 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20238 rtx dwarf
= NULL_RTX
;
20241 gcc_assert (num_regs
&& num_regs
<= 32);
20243 /* Workaround ARM10 VFPr1 bug. */
20244 if (num_regs
== 2 && !arm_arch6
)
20246 if (first_reg
== 15)
20252 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20253 there could be up to 32 D-registers to restore.
20254 If there are more than 16 D-registers, make two recursive calls,
20255 each of which emits one pop_multi instruction. */
20258 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20259 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20263 /* The parallel needs to hold num_regs SETs
20264 and one SET for the stack update. */
20265 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20267 /* Increment the stack pointer, based on there being
20268 num_regs 8-byte registers to restore. */
20269 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20270 RTX_FRAME_RELATED_P (tmp
) = 1;
20271 XVECEXP (par
, 0, 0) = tmp
;
20273 /* Now show every reg that will be restored, using a SET for each. */
20274 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20276 reg
= gen_rtx_REG (DFmode
, i
);
20278 tmp
= gen_rtx_SET (reg
,
20281 plus_constant (Pmode
, base_reg
, 8 * j
)));
20282 RTX_FRAME_RELATED_P (tmp
) = 1;
20283 XVECEXP (par
, 0, j
+ 1) = tmp
;
20285 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20290 par
= emit_insn (par
);
20291 REG_NOTES (par
) = dwarf
;
20293 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20294 if (REGNO (base_reg
) == IP_REGNUM
)
20296 RTX_FRAME_RELATED_P (par
) = 1;
20297 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20300 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20301 base_reg
, base_reg
);
20304 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20305 number of registers are being popped, multiple LDRD patterns are created for
20306 all register pairs. If odd number of registers are popped, last register is
20307 loaded by using LDR pattern. */
20309 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20313 rtx par
= NULL_RTX
;
20314 rtx dwarf
= NULL_RTX
;
20315 rtx tmp
, reg
, tmp1
;
20316 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20318 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20319 if (saved_regs_mask
& (1 << i
))
20322 gcc_assert (num_regs
&& num_regs
<= 16);
20324 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20325 to be popped. So, if num_regs is even, now it will become odd,
20326 and we can generate pop with PC. If num_regs is odd, it will be
20327 even now, and ldr with return can be generated for PC. */
20331 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20333 /* Var j iterates over all the registers to gather all the registers in
20334 saved_regs_mask. Var i gives index of saved registers in stack frame.
20335 A PARALLEL RTX of register-pair is created here, so that pattern for
20336 LDRD can be matched. As PC is always last register to be popped, and
20337 we have already decremented num_regs if PC, we don't have to worry
20338 about PC in this loop. */
20339 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20340 if (saved_regs_mask
& (1 << j
))
20342 /* Create RTX for memory load. */
20343 reg
= gen_rtx_REG (SImode
, j
);
20344 tmp
= gen_rtx_SET (reg
,
20345 gen_frame_mem (SImode
,
20346 plus_constant (Pmode
,
20347 stack_pointer_rtx
, 4 * i
)));
20348 RTX_FRAME_RELATED_P (tmp
) = 1;
20352 /* When saved-register index (i) is even, the RTX to be emitted is
20353 yet to be created. Hence create it first. The LDRD pattern we
20354 are generating is :
20355 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20356 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20357 where target registers need not be consecutive. */
20358 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20362 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20363 added as 0th element and if i is odd, reg_i is added as 1st element
20364 of LDRD pattern shown above. */
20365 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20366 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20370 /* When saved-register index (i) is odd, RTXs for both the registers
20371 to be loaded are generated in above given LDRD pattern, and the
20372 pattern can be emitted now. */
20373 par
= emit_insn (par
);
20374 REG_NOTES (par
) = dwarf
;
20375 RTX_FRAME_RELATED_P (par
) = 1;
20381 /* If the number of registers pushed is odd AND return_in_pc is false OR
20382 number of registers are even AND return_in_pc is true, last register is
20383 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20384 then LDR with post increment. */
20386 /* Increment the stack pointer, based on there being
20387 num_regs 4-byte registers to restore. */
20388 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20389 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20390 RTX_FRAME_RELATED_P (tmp
) = 1;
20391 tmp
= emit_insn (tmp
);
20394 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20395 stack_pointer_rtx
, stack_pointer_rtx
);
20400 if (((num_regs
% 2) == 1 && !return_in_pc
)
20401 || ((num_regs
% 2) == 0 && return_in_pc
))
20403 /* Scan for the single register to be popped. Skip until the saved
20404 register is found. */
20405 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20407 /* Gen LDR with post increment here. */
20408 tmp1
= gen_rtx_MEM (SImode
,
20409 gen_rtx_POST_INC (SImode
,
20410 stack_pointer_rtx
));
20411 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20413 reg
= gen_rtx_REG (SImode
, j
);
20414 tmp
= gen_rtx_SET (reg
, tmp1
);
20415 RTX_FRAME_RELATED_P (tmp
) = 1;
20416 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20420 /* If return_in_pc, j must be PC_REGNUM. */
20421 gcc_assert (j
== PC_REGNUM
);
20422 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20423 XVECEXP (par
, 0, 0) = ret_rtx
;
20424 XVECEXP (par
, 0, 1) = tmp
;
20425 par
= emit_jump_insn (par
);
20429 par
= emit_insn (tmp
);
20430 REG_NOTES (par
) = dwarf
;
20431 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20432 stack_pointer_rtx
, stack_pointer_rtx
);
20436 else if ((num_regs
% 2) == 1 && return_in_pc
)
20438 /* There are 2 registers to be popped. So, generate the pattern
20439 pop_multiple_with_stack_update_and_return to pop in PC. */
20440 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20446 /* LDRD in ARM mode needs consecutive registers as operands. This function
20447 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20448 offset addressing and then generates one separate stack udpate. This provides
20449 more scheduling freedom, compared to writeback on every load. However,
20450 if the function returns using load into PC directly
20451 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20452 before the last load. TODO: Add a peephole optimization to recognize
20453 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20454 peephole optimization to merge the load at stack-offset zero
20455 with the stack update instruction using load with writeback
20456 in post-index addressing mode. */
20458 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20462 rtx par
= NULL_RTX
;
20463 rtx dwarf
= NULL_RTX
;
20466 /* Restore saved registers. */
20467 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20469 while (j
<= LAST_ARM_REGNUM
)
20470 if (saved_regs_mask
& (1 << j
))
20473 && (saved_regs_mask
& (1 << (j
+ 1)))
20474 && (j
+ 1) != PC_REGNUM
)
20476 /* Current register and next register form register pair for which
20477 LDRD can be generated. PC is always the last register popped, and
20478 we handle it separately. */
20480 mem
= gen_frame_mem (DImode
,
20481 plus_constant (Pmode
,
20485 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20487 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20488 tmp
= emit_insn (tmp
);
20489 RTX_FRAME_RELATED_P (tmp
) = 1;
20491 /* Generate dwarf info. */
20493 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20494 gen_rtx_REG (SImode
, j
),
20496 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20497 gen_rtx_REG (SImode
, j
+ 1),
20500 REG_NOTES (tmp
) = dwarf
;
20505 else if (j
!= PC_REGNUM
)
20507 /* Emit a single word load. */
20509 mem
= gen_frame_mem (SImode
,
20510 plus_constant (Pmode
,
20514 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20516 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20517 tmp
= emit_insn (tmp
);
20518 RTX_FRAME_RELATED_P (tmp
) = 1;
20520 /* Generate dwarf info. */
20521 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20522 gen_rtx_REG (SImode
, j
),
20528 else /* j == PC_REGNUM */
20534 /* Update the stack. */
20537 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20538 plus_constant (Pmode
,
20541 tmp
= emit_insn (tmp
);
20542 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20543 stack_pointer_rtx
, stack_pointer_rtx
);
20547 if (saved_regs_mask
& (1 << PC_REGNUM
))
20549 /* Only PC is to be popped. */
20550 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20551 XVECEXP (par
, 0, 0) = ret_rtx
;
20552 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20553 gen_frame_mem (SImode
,
20554 gen_rtx_POST_INC (SImode
,
20555 stack_pointer_rtx
)));
20556 RTX_FRAME_RELATED_P (tmp
) = 1;
20557 XVECEXP (par
, 0, 1) = tmp
;
20558 par
= emit_jump_insn (par
);
20560 /* Generate dwarf info. */
20561 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20562 gen_rtx_REG (SImode
, PC_REGNUM
),
20564 REG_NOTES (par
) = dwarf
;
20565 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20566 stack_pointer_rtx
, stack_pointer_rtx
);
20570 /* Calculate the size of the return value that is passed in registers. */
20572 arm_size_return_regs (void)
20576 if (crtl
->return_rtx
!= 0)
20577 mode
= GET_MODE (crtl
->return_rtx
);
20579 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20581 return GET_MODE_SIZE (mode
);
20584 /* Return true if the current function needs to save/restore LR. */
20586 thumb_force_lr_save (void)
20588 return !cfun
->machine
->lr_save_eliminated
20590 || thumb_far_jump_used_p ()
20591 || df_regs_ever_live_p (LR_REGNUM
));
20594 /* We do not know if r3 will be available because
20595 we do have an indirect tailcall happening in this
20596 particular case. */
20598 is_indirect_tailcall_p (rtx call
)
20600 rtx pat
= PATTERN (call
);
20602 /* Indirect tail call. */
20603 pat
= XVECEXP (pat
, 0, 0);
20604 if (GET_CODE (pat
) == SET
)
20605 pat
= SET_SRC (pat
);
20607 pat
= XEXP (XEXP (pat
, 0), 0);
20608 return REG_P (pat
);
20611 /* Return true if r3 is used by any of the tail call insns in the
20612 current function. */
20614 any_sibcall_could_use_r3 (void)
20619 if (!crtl
->tail_call_emit
)
20621 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20622 if (e
->flags
& EDGE_SIBCALL
)
20624 rtx_insn
*call
= BB_END (e
->src
);
20625 if (!CALL_P (call
))
20626 call
= prev_nonnote_nondebug_insn (call
);
20627 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20628 if (find_regno_fusage (call
, USE
, 3)
20629 || is_indirect_tailcall_p (call
))
20636 /* Compute the distance from register FROM to register TO.
20637 These can be the arg pointer (26), the soft frame pointer (25),
20638 the stack pointer (13) or the hard frame pointer (11).
20639 In thumb mode r7 is used as the soft frame pointer, if needed.
20640 Typical stack layout looks like this:
20642 old stack pointer -> | |
20645 | | saved arguments for
20646 | | vararg functions
20649 hard FP & arg pointer -> | | \
20657 soft frame pointer -> | | /
20662 locals base pointer -> | | /
20667 current stack pointer -> | | /
20670 For a given function some or all of these stack components
20671 may not be needed, giving rise to the possibility of
20672 eliminating some of the registers.
20674 The values returned by this function must reflect the behavior
20675 of arm_expand_prologue() and arm_compute_save_reg_mask().
20677 The sign of the number returned reflects the direction of stack
20678 growth, so the values are positive for all eliminations except
20679 from the soft frame pointer to the hard frame pointer.
20681 SFP may point just inside the local variables block to ensure correct
20685 /* Calculate stack offsets. These are used to calculate register elimination
20686 offsets and in prologue/epilogue code. Also calculates which registers
20687 should be saved. */
20689 static arm_stack_offsets
*
20690 arm_get_frame_offsets (void)
20692 struct arm_stack_offsets
*offsets
;
20693 unsigned long func_type
;
20696 HOST_WIDE_INT frame_size
;
20699 offsets
= &cfun
->machine
->stack_offsets
;
20701 if (reload_completed
)
20704 /* Initially this is the size of the local variables. It will translated
20705 into an offset once we have determined the size of preceding data. */
20706 frame_size
= ROUND_UP_WORD (get_frame_size ());
20708 /* Space for variadic functions. */
20709 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20711 /* In Thumb mode this is incorrect, but never used. */
20713 = (offsets
->saved_args
20714 + arm_compute_static_chain_stack_bytes ()
20715 + (frame_pointer_needed
? 4 : 0));
20719 unsigned int regno
;
20721 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20722 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20723 saved
= core_saved
;
20725 /* We know that SP will be doubleword aligned on entry, and we must
20726 preserve that condition at any subroutine call. We also require the
20727 soft frame pointer to be doubleword aligned. */
20729 if (TARGET_REALLY_IWMMXT
)
20731 /* Check for the call-saved iWMMXt registers. */
20732 for (regno
= FIRST_IWMMXT_REGNUM
;
20733 regno
<= LAST_IWMMXT_REGNUM
;
20735 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20739 func_type
= arm_current_func_type ();
20740 /* Space for saved VFP registers. */
20741 if (! IS_VOLATILE (func_type
)
20742 && TARGET_HARD_FLOAT
)
20743 saved
+= arm_get_vfp_saved_size ();
20745 else /* TARGET_THUMB1 */
20747 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20748 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20749 saved
= core_saved
;
20750 if (TARGET_BACKTRACE
)
20754 /* Saved registers include the stack frame. */
20755 offsets
->saved_regs
20756 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20757 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20759 /* A leaf function does not need any stack alignment if it has nothing
20761 if (crtl
->is_leaf
&& frame_size
== 0
20762 /* However if it calls alloca(), we have a dynamically allocated
20763 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20764 && ! cfun
->calls_alloca
)
20766 offsets
->outgoing_args
= offsets
->soft_frame
;
20767 offsets
->locals_base
= offsets
->soft_frame
;
20771 /* Ensure SFP has the correct alignment. */
20772 if (ARM_DOUBLEWORD_ALIGN
20773 && (offsets
->soft_frame
& 7))
20775 offsets
->soft_frame
+= 4;
20776 /* Try to align stack by pushing an extra reg. Don't bother doing this
20777 when there is a stack frame as the alignment will be rolled into
20778 the normal stack adjustment. */
20779 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20783 /* Register r3 is caller-saved. Normally it does not need to be
20784 saved on entry by the prologue. However if we choose to save
20785 it for padding then we may confuse the compiler into thinking
20786 a prologue sequence is required when in fact it is not. This
20787 will occur when shrink-wrapping if r3 is used as a scratch
20788 register and there are no other callee-saved writes.
20790 This situation can be avoided when other callee-saved registers
20791 are available and r3 is not mandatory if we choose a callee-saved
20792 register for padding. */
20793 bool prefer_callee_reg_p
= false;
20795 /* If it is safe to use r3, then do so. This sometimes
20796 generates better code on Thumb-2 by avoiding the need to
20797 use 32-bit push/pop instructions. */
20798 if (! any_sibcall_could_use_r3 ()
20799 && arm_size_return_regs () <= 12
20800 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20802 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20805 if (!TARGET_THUMB2
)
20806 prefer_callee_reg_p
= true;
20809 || prefer_callee_reg_p
)
20811 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20813 /* Avoid fixed registers; they may be changed at
20814 arbitrary times so it's unsafe to restore them
20815 during the epilogue. */
20817 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20827 offsets
->saved_regs
+= 4;
20828 offsets
->saved_regs_mask
|= (1 << reg
);
20833 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20834 offsets
->outgoing_args
= (offsets
->locals_base
20835 + crtl
->outgoing_args_size
);
20837 if (ARM_DOUBLEWORD_ALIGN
)
20839 /* Ensure SP remains doubleword aligned. */
20840 if (offsets
->outgoing_args
& 7)
20841 offsets
->outgoing_args
+= 4;
20842 gcc_assert (!(offsets
->outgoing_args
& 7));
20849 /* Calculate the relative offsets for the different stack pointers. Positive
20850 offsets are in the direction of stack growth. */
20853 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20855 arm_stack_offsets
*offsets
;
20857 offsets
= arm_get_frame_offsets ();
20859 /* OK, now we have enough information to compute the distances.
20860 There must be an entry in these switch tables for each pair
20861 of registers in ELIMINABLE_REGS, even if some of the entries
20862 seem to be redundant or useless. */
20865 case ARG_POINTER_REGNUM
:
20868 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20871 case FRAME_POINTER_REGNUM
:
20872 /* This is the reverse of the soft frame pointer
20873 to hard frame pointer elimination below. */
20874 return offsets
->soft_frame
- offsets
->saved_args
;
20876 case ARM_HARD_FRAME_POINTER_REGNUM
:
20877 /* This is only non-zero in the case where the static chain register
20878 is stored above the frame. */
20879 return offsets
->frame
- offsets
->saved_args
- 4;
20881 case STACK_POINTER_REGNUM
:
20882 /* If nothing has been pushed on the stack at all
20883 then this will return -4. This *is* correct! */
20884 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20887 gcc_unreachable ();
20889 gcc_unreachable ();
20891 case FRAME_POINTER_REGNUM
:
20894 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20897 case ARM_HARD_FRAME_POINTER_REGNUM
:
20898 /* The hard frame pointer points to the top entry in the
20899 stack frame. The soft frame pointer to the bottom entry
20900 in the stack frame. If there is no stack frame at all,
20901 then they are identical. */
20903 return offsets
->frame
- offsets
->soft_frame
;
20905 case STACK_POINTER_REGNUM
:
20906 return offsets
->outgoing_args
- offsets
->soft_frame
;
20909 gcc_unreachable ();
20911 gcc_unreachable ();
20914 /* You cannot eliminate from the stack pointer.
20915 In theory you could eliminate from the hard frame
20916 pointer to the stack pointer, but this will never
20917 happen, since if a stack frame is not needed the
20918 hard frame pointer will never be used. */
20919 gcc_unreachable ();
20923 /* Given FROM and TO register numbers, say whether this elimination is
20924 allowed. Frame pointer elimination is automatically handled.
20926 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20927 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20928 pointer, we must eliminate FRAME_POINTER_REGNUM into
20929 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20930 ARG_POINTER_REGNUM. */
20933 arm_can_eliminate (const int from
, const int to
)
20935 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20936 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20937 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20938 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20942 /* Emit RTL to save coprocessor registers on function entry. Returns the
20943 number of bytes pushed. */
20946 arm_save_coproc_regs(void)
20948 int saved_size
= 0;
20950 unsigned start_reg
;
20953 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20954 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20956 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20957 insn
= gen_rtx_MEM (V2SImode
, insn
);
20958 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20959 RTX_FRAME_RELATED_P (insn
) = 1;
20963 if (TARGET_HARD_FLOAT
)
20965 start_reg
= FIRST_VFP_REGNUM
;
20967 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20969 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20970 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20972 if (start_reg
!= reg
)
20973 saved_size
+= vfp_emit_fstmd (start_reg
,
20974 (reg
- start_reg
) / 2);
20975 start_reg
= reg
+ 2;
20978 if (start_reg
!= reg
)
20979 saved_size
+= vfp_emit_fstmd (start_reg
,
20980 (reg
- start_reg
) / 2);
20986 /* Set the Thumb frame pointer from the stack pointer. */
20989 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20991 HOST_WIDE_INT amount
;
20994 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20996 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20997 stack_pointer_rtx
, GEN_INT (amount
)));
21000 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21001 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21002 expects the first two operands to be the same. */
21005 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21007 hard_frame_pointer_rtx
));
21011 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21012 hard_frame_pointer_rtx
,
21013 stack_pointer_rtx
));
21015 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21016 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21017 RTX_FRAME_RELATED_P (dwarf
) = 1;
21018 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21021 RTX_FRAME_RELATED_P (insn
) = 1;
21024 struct scratch_reg
{
21029 /* Return a short-lived scratch register for use as a 2nd scratch register on
21030 function entry after the registers are saved in the prologue. This register
21031 must be released by means of release_scratch_register_on_entry. IP is not
21032 considered since it is always used as the 1st scratch register if available.
21034 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21035 mask of live registers. */
21038 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21039 unsigned long live_regs
)
21045 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21051 for (i
= 4; i
< 11; i
++)
21052 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21060 /* If IP is used as the 1st scratch register for a nested function,
21061 then either r3 wasn't available or is used to preserve IP. */
21062 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21064 regno
= (regno1
== 3 ? 2 : 3);
21066 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21071 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21074 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21075 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21076 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21077 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21078 RTX_FRAME_RELATED_P (insn
) = 1;
21079 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21083 /* Release a scratch register obtained from the preceding function. */
21086 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21090 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21091 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21092 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21093 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21094 RTX_FRAME_RELATED_P (insn
) = 1;
21095 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21099 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21101 #if PROBE_INTERVAL > 4096
21102 #error Cannot use indexed addressing mode for stack probing
21105 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21106 inclusive. These are offsets from the current stack pointer. REGNO1
21107 is the index number of the 1st scratch register and LIVE_REGS is the
21108 mask of live registers. */
21111 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21112 unsigned int regno1
, unsigned long live_regs
)
21114 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21116 /* See if we have a constant small number of probes to generate. If so,
21117 that's the easy case. */
21118 if (size
<= PROBE_INTERVAL
)
21120 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21121 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21122 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21125 /* The run-time loop is made up of 10 insns in the generic case while the
21126 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21127 else if (size
<= 5 * PROBE_INTERVAL
)
21129 HOST_WIDE_INT i
, rem
;
21131 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21132 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21133 emit_stack_probe (reg1
);
21135 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21136 it exceeds SIZE. If only two probes are needed, this will not
21137 generate any code. Then probe at FIRST + SIZE. */
21138 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21140 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21141 emit_stack_probe (reg1
);
21144 rem
= size
- (i
- PROBE_INTERVAL
);
21145 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21147 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21148 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21151 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21154 /* Otherwise, do the same as above, but in a loop. Note that we must be
21155 extra careful with variables wrapping around because we might be at
21156 the very top (or the very bottom) of the address space and we have
21157 to be able to handle this case properly; in particular, we use an
21158 equality test for the loop condition. */
21161 HOST_WIDE_INT rounded_size
;
21162 struct scratch_reg sr
;
21164 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21166 emit_move_insn (reg1
, GEN_INT (first
));
21169 /* Step 1: round SIZE to the previous multiple of the interval. */
21171 rounded_size
= size
& -PROBE_INTERVAL
;
21172 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21175 /* Step 2: compute initial and final value of the loop counter. */
21177 /* TEST_ADDR = SP + FIRST. */
21178 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21180 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21181 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21184 /* Step 3: the loop
21188 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21191 while (TEST_ADDR != LAST_ADDR)
21193 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21194 until it is equal to ROUNDED_SIZE. */
21196 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21199 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21200 that SIZE is equal to ROUNDED_SIZE. */
21202 if (size
!= rounded_size
)
21204 HOST_WIDE_INT rem
= size
- rounded_size
;
21206 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21208 emit_set_insn (sr
.reg
,
21209 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21210 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21211 PROBE_INTERVAL
- rem
));
21214 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21217 release_scratch_register_on_entry (&sr
);
21220 /* Make sure nothing is scheduled before we are done. */
21221 emit_insn (gen_blockage ());
21224 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21225 absolute addresses. */
21228 output_probe_stack_range (rtx reg1
, rtx reg2
)
21230 static int labelno
= 0;
21234 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21237 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21239 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21241 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21242 output_asm_insn ("sub\t%0, %0, %1", xops
);
21244 /* Probe at TEST_ADDR. */
21245 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21247 /* Test if TEST_ADDR == LAST_ADDR. */
21249 output_asm_insn ("cmp\t%0, %1", xops
);
21252 fputs ("\tbne\t", asm_out_file
);
21253 assemble_name_raw (asm_out_file
, loop_lab
);
21254 fputc ('\n', asm_out_file
);
21259 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21262 arm_expand_prologue (void)
21267 unsigned long live_regs_mask
;
21268 unsigned long func_type
;
21270 int saved_pretend_args
= 0;
21271 int saved_regs
= 0;
21272 unsigned HOST_WIDE_INT args_to_push
;
21273 HOST_WIDE_INT size
;
21274 arm_stack_offsets
*offsets
;
21277 func_type
= arm_current_func_type ();
21279 /* Naked functions don't have prologues. */
21280 if (IS_NAKED (func_type
))
21282 if (flag_stack_usage_info
)
21283 current_function_static_stack_size
= 0;
21287 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21288 args_to_push
= crtl
->args
.pretend_args_size
;
21290 /* Compute which register we will have to save onto the stack. */
21291 offsets
= arm_get_frame_offsets ();
21292 live_regs_mask
= offsets
->saved_regs_mask
;
21294 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21296 if (IS_STACKALIGN (func_type
))
21300 /* Handle a word-aligned stack pointer. We generate the following:
21305 <save and restore r0 in normal prologue/epilogue>
21309 The unwinder doesn't need to know about the stack realignment.
21310 Just tell it we saved SP in r0. */
21311 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21313 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21314 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21316 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21317 RTX_FRAME_RELATED_P (insn
) = 1;
21318 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21320 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21322 /* ??? The CFA changes here, which may cause GDB to conclude that it
21323 has entered a different function. That said, the unwind info is
21324 correct, individually, before and after this instruction because
21325 we've described the save of SP, which will override the default
21326 handling of SP as restoring from the CFA. */
21327 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21330 /* The static chain register is the same as the IP register. If it is
21331 clobbered when creating the frame, we need to save and restore it. */
21332 clobber_ip
= IS_NESTED (func_type
)
21333 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21334 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21335 && !df_regs_ever_live_p (LR_REGNUM
)
21336 && arm_r3_live_at_start_p ()));
21338 /* Find somewhere to store IP whilst the frame is being created.
21339 We try the following places in order:
21341 1. The last argument register r3 if it is available.
21342 2. A slot on the stack above the frame if there are no
21343 arguments to push onto the stack.
21344 3. Register r3 again, after pushing the argument registers
21345 onto the stack, if this is a varargs function.
21346 4. The last slot on the stack created for the arguments to
21347 push, if this isn't a varargs function.
21349 Note - we only need to tell the dwarf2 backend about the SP
21350 adjustment in the second variant; the static chain register
21351 doesn't need to be unwound, as it doesn't contain a value
21352 inherited from the caller. */
21355 if (!arm_r3_live_at_start_p ())
21356 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21357 else if (args_to_push
== 0)
21361 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21364 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21365 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21368 /* Just tell the dwarf backend that we adjusted SP. */
21369 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21370 plus_constant (Pmode
, stack_pointer_rtx
,
21372 RTX_FRAME_RELATED_P (insn
) = 1;
21373 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21377 /* Store the args on the stack. */
21378 if (cfun
->machine
->uses_anonymous_args
)
21380 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21381 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21382 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21383 saved_pretend_args
= 1;
21389 if (args_to_push
== 4)
21390 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21392 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21393 plus_constant (Pmode
,
21397 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21399 /* Just tell the dwarf backend that we adjusted SP. */
21400 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21401 plus_constant (Pmode
, stack_pointer_rtx
,
21403 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21406 RTX_FRAME_RELATED_P (insn
) = 1;
21407 fp_offset
= args_to_push
;
21412 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21414 if (IS_INTERRUPT (func_type
))
21416 /* Interrupt functions must not corrupt any registers.
21417 Creating a frame pointer however, corrupts the IP
21418 register, so we must push it first. */
21419 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21421 /* Do not set RTX_FRAME_RELATED_P on this insn.
21422 The dwarf stack unwinding code only wants to see one
21423 stack decrement per function, and this is not it. If
21424 this instruction is labeled as being part of the frame
21425 creation sequence then dwarf2out_frame_debug_expr will
21426 die when it encounters the assignment of IP to FP
21427 later on, since the use of SP here establishes SP as
21428 the CFA register and not IP.
21430 Anyway this instruction is not really part of the stack
21431 frame creation although it is part of the prologue. */
21434 insn
= emit_set_insn (ip_rtx
,
21435 plus_constant (Pmode
, stack_pointer_rtx
,
21437 RTX_FRAME_RELATED_P (insn
) = 1;
21442 /* Push the argument registers, or reserve space for them. */
21443 if (cfun
->machine
->uses_anonymous_args
)
21444 insn
= emit_multi_reg_push
21445 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21446 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21449 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21450 GEN_INT (- args_to_push
)));
21451 RTX_FRAME_RELATED_P (insn
) = 1;
21454 /* If this is an interrupt service routine, and the link register
21455 is going to be pushed, and we're not generating extra
21456 push of IP (needed when frame is needed and frame layout if apcs),
21457 subtracting four from LR now will mean that the function return
21458 can be done with a single instruction. */
21459 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21460 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21461 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21464 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21466 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21469 if (live_regs_mask
)
21471 unsigned long dwarf_regs_mask
= live_regs_mask
;
21473 saved_regs
+= bit_count (live_regs_mask
) * 4;
21474 if (optimize_size
&& !frame_pointer_needed
21475 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21477 /* If no coprocessor registers are being pushed and we don't have
21478 to worry about a frame pointer then push extra registers to
21479 create the stack frame. This is done is a way that does not
21480 alter the frame layout, so is independent of the epilogue. */
21484 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21486 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21487 if (frame
&& n
* 4 >= frame
)
21490 live_regs_mask
|= (1 << n
) - 1;
21491 saved_regs
+= frame
;
21496 && current_tune
->prefer_ldrd_strd
21497 && !optimize_function_for_size_p (cfun
))
21499 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21501 thumb2_emit_strd_push (live_regs_mask
);
21502 else if (TARGET_ARM
21503 && !TARGET_APCS_FRAME
21504 && !IS_INTERRUPT (func_type
))
21505 arm_emit_strd_push (live_regs_mask
);
21508 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21509 RTX_FRAME_RELATED_P (insn
) = 1;
21514 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21515 RTX_FRAME_RELATED_P (insn
) = 1;
21519 if (! IS_VOLATILE (func_type
))
21520 saved_regs
+= arm_save_coproc_regs ();
21522 if (frame_pointer_needed
&& TARGET_ARM
)
21524 /* Create the new frame pointer. */
21525 if (TARGET_APCS_FRAME
)
21527 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21528 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21529 RTX_FRAME_RELATED_P (insn
) = 1;
21533 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21534 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21535 stack_pointer_rtx
, insn
));
21536 RTX_FRAME_RELATED_P (insn
) = 1;
21540 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21541 if (flag_stack_usage_info
)
21542 current_function_static_stack_size
= size
;
21544 /* If this isn't an interrupt service routine and we have a frame, then do
21545 stack checking. We use IP as the first scratch register, except for the
21546 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21547 if (!IS_INTERRUPT (func_type
)
21548 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21550 unsigned int regno
;
21552 if (!IS_NESTED (func_type
) || clobber_ip
)
21554 else if (df_regs_ever_live_p (LR_REGNUM
))
21559 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21561 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21562 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21563 size
- STACK_CHECK_PROTECT
,
21564 regno
, live_regs_mask
);
21567 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21568 regno
, live_regs_mask
);
21571 /* Recover the static chain register. */
21574 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21575 insn
= gen_rtx_REG (SImode
, 3);
21578 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21579 insn
= gen_frame_mem (SImode
, insn
);
21581 emit_set_insn (ip_rtx
, insn
);
21582 emit_insn (gen_force_register_use (ip_rtx
));
21585 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21587 /* This add can produce multiple insns for a large constant, so we
21588 need to get tricky. */
21589 rtx_insn
*last
= get_last_insn ();
21591 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21592 - offsets
->outgoing_args
);
21594 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21598 last
= last
? NEXT_INSN (last
) : get_insns ();
21599 RTX_FRAME_RELATED_P (last
) = 1;
21601 while (last
!= insn
);
21603 /* If the frame pointer is needed, emit a special barrier that
21604 will prevent the scheduler from moving stores to the frame
21605 before the stack adjustment. */
21606 if (frame_pointer_needed
)
21607 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21608 hard_frame_pointer_rtx
));
21612 if (frame_pointer_needed
&& TARGET_THUMB2
)
21613 thumb_set_frame_pointer (offsets
);
21615 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21617 unsigned long mask
;
21619 mask
= live_regs_mask
;
21620 mask
&= THUMB2_WORK_REGS
;
21621 if (!IS_NESTED (func_type
))
21622 mask
|= (1 << IP_REGNUM
);
21623 arm_load_pic_register (mask
);
21626 /* If we are profiling, make sure no instructions are scheduled before
21627 the call to mcount. Similarly if the user has requested no
21628 scheduling in the prolog. Similarly if we want non-call exceptions
21629 using the EABI unwinder, to prevent faulting instructions from being
21630 swapped with a stack adjustment. */
21631 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21632 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21633 && cfun
->can_throw_non_call_exceptions
))
21634 emit_insn (gen_blockage ());
21636 /* If the link register is being kept alive, with the return address in it,
21637 then make sure that it does not get reused by the ce2 pass. */
21638 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21639 cfun
->machine
->lr_save_eliminated
= 1;
21642 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21644 arm_print_condition (FILE *stream
)
21646 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21648 /* Branch conversion is not implemented for Thumb-2. */
21651 output_operand_lossage ("predicated Thumb instruction");
21654 if (current_insn_predicate
!= NULL
)
21656 output_operand_lossage
21657 ("predicated instruction in conditional sequence");
21661 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21663 else if (current_insn_predicate
)
21665 enum arm_cond_code code
;
21669 output_operand_lossage ("predicated Thumb instruction");
21673 code
= get_arm_condition_code (current_insn_predicate
);
21674 fputs (arm_condition_codes
[code
], stream
);
21679 /* Globally reserved letters: acln
21680 Puncutation letters currently used: @_|?().!#
21681 Lower case letters currently used: bcdefhimpqtvwxyz
21682 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21683 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21685 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21687 If CODE is 'd', then the X is a condition operand and the instruction
21688 should only be executed if the condition is true.
21689 if CODE is 'D', then the X is a condition operand and the instruction
21690 should only be executed if the condition is false: however, if the mode
21691 of the comparison is CCFPEmode, then always execute the instruction -- we
21692 do this because in these circumstances !GE does not necessarily imply LT;
21693 in these cases the instruction pattern will take care to make sure that
21694 an instruction containing %d will follow, thereby undoing the effects of
21695 doing this instruction unconditionally.
21696 If CODE is 'N' then X is a floating point operand that must be negated
21698 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21699 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21701 arm_print_operand (FILE *stream
, rtx x
, int code
)
21706 fputs (ASM_COMMENT_START
, stream
);
21710 fputs (user_label_prefix
, stream
);
21714 fputs (REGISTER_PREFIX
, stream
);
21718 arm_print_condition (stream
);
21722 /* The current condition code for a condition code setting instruction.
21723 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21724 fputc('s', stream
);
21725 arm_print_condition (stream
);
21729 /* If the instruction is conditionally executed then print
21730 the current condition code, otherwise print 's'. */
21731 gcc_assert (TARGET_THUMB2
);
21732 if (current_insn_predicate
)
21733 arm_print_condition (stream
);
21735 fputc('s', stream
);
21738 /* %# is a "break" sequence. It doesn't output anything, but is used to
21739 separate e.g. operand numbers from following text, if that text consists
21740 of further digits which we don't want to be part of the operand
21748 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21749 fprintf (stream
, "%s", fp_const_from_val (&r
));
21753 /* An integer or symbol address without a preceding # sign. */
21755 switch (GET_CODE (x
))
21758 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21762 output_addr_const (stream
, x
);
21766 if (GET_CODE (XEXP (x
, 0)) == PLUS
21767 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21769 output_addr_const (stream
, x
);
21772 /* Fall through. */
21775 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21779 /* An integer that we want to print in HEX. */
21781 switch (GET_CODE (x
))
21784 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21788 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21793 if (CONST_INT_P (x
))
21796 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21797 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21801 putc ('~', stream
);
21802 output_addr_const (stream
, x
);
21807 /* Print the log2 of a CONST_INT. */
21811 if (!CONST_INT_P (x
)
21812 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21813 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21815 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21820 /* The low 16 bits of an immediate constant. */
21821 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21825 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21829 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21837 shift
= shift_op (x
, &val
);
21841 fprintf (stream
, ", %s ", shift
);
21843 arm_print_operand (stream
, XEXP (x
, 1), 0);
21845 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21850 /* An explanation of the 'Q', 'R' and 'H' register operands:
21852 In a pair of registers containing a DI or DF value the 'Q'
21853 operand returns the register number of the register containing
21854 the least significant part of the value. The 'R' operand returns
21855 the register number of the register containing the most
21856 significant part of the value.
21858 The 'H' operand returns the higher of the two register numbers.
21859 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21860 same as the 'Q' operand, since the most significant part of the
21861 value is held in the lower number register. The reverse is true
21862 on systems where WORDS_BIG_ENDIAN is false.
21864 The purpose of these operands is to distinguish between cases
21865 where the endian-ness of the values is important (for example
21866 when they are added together), and cases where the endian-ness
21867 is irrelevant, but the order of register operations is important.
21868 For example when loading a value from memory into a register
21869 pair, the endian-ness does not matter. Provided that the value
21870 from the lower memory address is put into the lower numbered
21871 register, and the value from the higher address is put into the
21872 higher numbered register, the load will work regardless of whether
21873 the value being loaded is big-wordian or little-wordian. The
21874 order of the two register loads can matter however, if the address
21875 of the memory location is actually held in one of the registers
21876 being overwritten by the load.
21878 The 'Q' and 'R' constraints are also available for 64-bit
21881 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21883 rtx part
= gen_lowpart (SImode
, x
);
21884 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21888 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21890 output_operand_lossage ("invalid operand for code '%c'", code
);
21894 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21898 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21900 machine_mode mode
= GET_MODE (x
);
21903 if (mode
== VOIDmode
)
21905 part
= gen_highpart_mode (SImode
, mode
, x
);
21906 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21910 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21912 output_operand_lossage ("invalid operand for code '%c'", code
);
21916 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21920 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21922 output_operand_lossage ("invalid operand for code '%c'", code
);
21926 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21930 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21932 output_operand_lossage ("invalid operand for code '%c'", code
);
21936 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21940 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21942 output_operand_lossage ("invalid operand for code '%c'", code
);
21946 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21950 asm_fprintf (stream
, "%r",
21951 REG_P (XEXP (x
, 0))
21952 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21956 asm_fprintf (stream
, "{%r-%r}",
21958 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21961 /* Like 'M', but writing doubleword vector registers, for use by Neon
21965 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21966 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21968 asm_fprintf (stream
, "{d%d}", regno
);
21970 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21975 /* CONST_TRUE_RTX means always -- that's the default. */
21976 if (x
== const_true_rtx
)
21979 if (!COMPARISON_P (x
))
21981 output_operand_lossage ("invalid operand for code '%c'", code
);
21985 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21990 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21991 want to do that. */
21992 if (x
== const_true_rtx
)
21994 output_operand_lossage ("instruction never executed");
21997 if (!COMPARISON_P (x
))
21999 output_operand_lossage ("invalid operand for code '%c'", code
);
22003 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22004 (get_arm_condition_code (x
))],
22014 /* Former Maverick support, removed after GCC-4.7. */
22015 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22020 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22021 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22022 /* Bad value for wCG register number. */
22024 output_operand_lossage ("invalid operand for code '%c'", code
);
22029 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22032 /* Print an iWMMXt control register name. */
22034 if (!CONST_INT_P (x
)
22036 || INTVAL (x
) >= 16)
22037 /* Bad value for wC register number. */
22039 output_operand_lossage ("invalid operand for code '%c'", code
);
22045 static const char * wc_reg_names
[16] =
22047 "wCID", "wCon", "wCSSF", "wCASF",
22048 "wC4", "wC5", "wC6", "wC7",
22049 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22050 "wC12", "wC13", "wC14", "wC15"
22053 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22057 /* Print the high single-precision register of a VFP double-precision
22061 machine_mode mode
= GET_MODE (x
);
22064 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22066 output_operand_lossage ("invalid operand for code '%c'", code
);
22071 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22073 output_operand_lossage ("invalid operand for code '%c'", code
);
22077 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22081 /* Print a VFP/Neon double precision or quad precision register name. */
22085 machine_mode mode
= GET_MODE (x
);
22086 int is_quad
= (code
== 'q');
22089 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22091 output_operand_lossage ("invalid operand for code '%c'", code
);
22096 || !IS_VFP_REGNUM (REGNO (x
)))
22098 output_operand_lossage ("invalid operand for code '%c'", code
);
22103 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22104 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22106 output_operand_lossage ("invalid operand for code '%c'", code
);
22110 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22111 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22115 /* These two codes print the low/high doubleword register of a Neon quad
22116 register, respectively. For pair-structure types, can also print
22117 low/high quadword registers. */
22121 machine_mode mode
= GET_MODE (x
);
22124 if ((GET_MODE_SIZE (mode
) != 16
22125 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22127 output_operand_lossage ("invalid operand for code '%c'", code
);
22132 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22134 output_operand_lossage ("invalid operand for code '%c'", code
);
22138 if (GET_MODE_SIZE (mode
) == 16)
22139 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22140 + (code
== 'f' ? 1 : 0));
22142 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22143 + (code
== 'f' ? 1 : 0));
22147 /* Print a VFPv3 floating-point constant, represented as an integer
22151 int index
= vfp3_const_double_index (x
);
22152 gcc_assert (index
!= -1);
22153 fprintf (stream
, "%d", index
);
22157 /* Print bits representing opcode features for Neon.
22159 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22160 and polynomials as unsigned.
22162 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22164 Bit 2 is 1 for rounding functions, 0 otherwise. */
22166 /* Identify the type as 's', 'u', 'p' or 'f'. */
22169 HOST_WIDE_INT bits
= INTVAL (x
);
22170 fputc ("uspf"[bits
& 3], stream
);
22174 /* Likewise, but signed and unsigned integers are both 'i'. */
22177 HOST_WIDE_INT bits
= INTVAL (x
);
22178 fputc ("iipf"[bits
& 3], stream
);
22182 /* As for 'T', but emit 'u' instead of 'p'. */
22185 HOST_WIDE_INT bits
= INTVAL (x
);
22186 fputc ("usuf"[bits
& 3], stream
);
22190 /* Bit 2: rounding (vs none). */
22193 HOST_WIDE_INT bits
= INTVAL (x
);
22194 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22198 /* Memory operand for vld1/vst1 instruction. */
22202 bool postinc
= FALSE
;
22203 rtx postinc_reg
= NULL
;
22204 unsigned align
, memsize
, align_bits
;
22206 gcc_assert (MEM_P (x
));
22207 addr
= XEXP (x
, 0);
22208 if (GET_CODE (addr
) == POST_INC
)
22211 addr
= XEXP (addr
, 0);
22213 if (GET_CODE (addr
) == POST_MODIFY
)
22215 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22216 addr
= XEXP (addr
, 0);
22218 asm_fprintf (stream
, "[%r", REGNO (addr
));
22220 /* We know the alignment of this access, so we can emit a hint in the
22221 instruction (for some alignments) as an aid to the memory subsystem
22223 align
= MEM_ALIGN (x
) >> 3;
22224 memsize
= MEM_SIZE (x
);
22226 /* Only certain alignment specifiers are supported by the hardware. */
22227 if (memsize
== 32 && (align
% 32) == 0)
22229 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22231 else if (memsize
>= 8 && (align
% 8) == 0)
22236 if (align_bits
!= 0)
22237 asm_fprintf (stream
, ":%d", align_bits
);
22239 asm_fprintf (stream
, "]");
22242 fputs("!", stream
);
22244 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22252 gcc_assert (MEM_P (x
));
22253 addr
= XEXP (x
, 0);
22254 gcc_assert (REG_P (addr
));
22255 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22259 /* Translate an S register number into a D register number and element index. */
22262 machine_mode mode
= GET_MODE (x
);
22265 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22267 output_operand_lossage ("invalid operand for code '%c'", code
);
22272 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22274 output_operand_lossage ("invalid operand for code '%c'", code
);
22278 regno
= regno
- FIRST_VFP_REGNUM
;
22279 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22284 gcc_assert (CONST_DOUBLE_P (x
));
22286 result
= vfp3_const_double_for_fract_bits (x
);
22288 result
= vfp3_const_double_for_bits (x
);
22289 fprintf (stream
, "#%d", result
);
22292 /* Register specifier for vld1.16/vst1.16. Translate the S register
22293 number into a D register number and element index. */
22296 machine_mode mode
= GET_MODE (x
);
22299 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22301 output_operand_lossage ("invalid operand for code '%c'", code
);
22306 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22308 output_operand_lossage ("invalid operand for code '%c'", code
);
22312 regno
= regno
- FIRST_VFP_REGNUM
;
22313 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22320 output_operand_lossage ("missing operand");
22324 switch (GET_CODE (x
))
22327 asm_fprintf (stream
, "%r", REGNO (x
));
22331 output_address (GET_MODE (x
), XEXP (x
, 0));
22337 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22338 sizeof (fpstr
), 0, 1);
22339 fprintf (stream
, "#%s", fpstr
);
22344 gcc_assert (GET_CODE (x
) != NEG
);
22345 fputc ('#', stream
);
22346 if (GET_CODE (x
) == HIGH
)
22348 fputs (":lower16:", stream
);
22352 output_addr_const (stream
, x
);
22358 /* Target hook for printing a memory address. */
22360 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22364 int is_minus
= GET_CODE (x
) == MINUS
;
22367 asm_fprintf (stream
, "[%r]", REGNO (x
));
22368 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22370 rtx base
= XEXP (x
, 0);
22371 rtx index
= XEXP (x
, 1);
22372 HOST_WIDE_INT offset
= 0;
22374 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22376 /* Ensure that BASE is a register. */
22377 /* (one of them must be). */
22378 /* Also ensure the SP is not used as in index register. */
22379 std::swap (base
, index
);
22381 switch (GET_CODE (index
))
22384 offset
= INTVAL (index
);
22387 asm_fprintf (stream
, "[%r, #%wd]",
22388 REGNO (base
), offset
);
22392 asm_fprintf (stream
, "[%r, %s%r]",
22393 REGNO (base
), is_minus
? "-" : "",
22403 asm_fprintf (stream
, "[%r, %s%r",
22404 REGNO (base
), is_minus
? "-" : "",
22405 REGNO (XEXP (index
, 0)));
22406 arm_print_operand (stream
, index
, 'S');
22407 fputs ("]", stream
);
22412 gcc_unreachable ();
22415 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22416 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22418 gcc_assert (REG_P (XEXP (x
, 0)));
22420 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22421 asm_fprintf (stream
, "[%r, #%s%d]!",
22422 REGNO (XEXP (x
, 0)),
22423 GET_CODE (x
) == PRE_DEC
? "-" : "",
22424 GET_MODE_SIZE (mode
));
22426 asm_fprintf (stream
, "[%r], #%s%d",
22427 REGNO (XEXP (x
, 0)),
22428 GET_CODE (x
) == POST_DEC
? "-" : "",
22429 GET_MODE_SIZE (mode
));
22431 else if (GET_CODE (x
) == PRE_MODIFY
)
22433 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22434 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22435 asm_fprintf (stream
, "#%wd]!",
22436 INTVAL (XEXP (XEXP (x
, 1), 1)));
22438 asm_fprintf (stream
, "%r]!",
22439 REGNO (XEXP (XEXP (x
, 1), 1)));
22441 else if (GET_CODE (x
) == POST_MODIFY
)
22443 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22444 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22445 asm_fprintf (stream
, "#%wd",
22446 INTVAL (XEXP (XEXP (x
, 1), 1)));
22448 asm_fprintf (stream
, "%r",
22449 REGNO (XEXP (XEXP (x
, 1), 1)));
22451 else output_addr_const (stream
, x
);
22456 asm_fprintf (stream
, "[%r]", REGNO (x
));
22457 else if (GET_CODE (x
) == POST_INC
)
22458 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22459 else if (GET_CODE (x
) == PLUS
)
22461 gcc_assert (REG_P (XEXP (x
, 0)));
22462 if (CONST_INT_P (XEXP (x
, 1)))
22463 asm_fprintf (stream
, "[%r, #%wd]",
22464 REGNO (XEXP (x
, 0)),
22465 INTVAL (XEXP (x
, 1)));
22467 asm_fprintf (stream
, "[%r, %r]",
22468 REGNO (XEXP (x
, 0)),
22469 REGNO (XEXP (x
, 1)));
22472 output_addr_const (stream
, x
);
22476 /* Target hook for indicating whether a punctuation character for
22477 TARGET_PRINT_OPERAND is valid. */
22479 arm_print_operand_punct_valid_p (unsigned char code
)
22481 return (code
== '@' || code
== '|' || code
== '.'
22482 || code
== '(' || code
== ')' || code
== '#'
22483 || (TARGET_32BIT
&& (code
== '?'))
22484 || (TARGET_THUMB2
&& (code
== '!'))
22485 || (TARGET_THUMB
&& (code
== '_')));
22488 /* Target hook for assembling integer objects. The ARM version needs to
22489 handle word-sized values specially. */
22491 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22495 if (size
== UNITS_PER_WORD
&& aligned_p
)
22497 fputs ("\t.word\t", asm_out_file
);
22498 output_addr_const (asm_out_file
, x
);
22500 /* Mark symbols as position independent. We only do this in the
22501 .text segment, not in the .data segment. */
22502 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22503 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22505 /* See legitimize_pic_address for an explanation of the
22506 TARGET_VXWORKS_RTP check. */
22507 if (!arm_pic_data_is_text_relative
22508 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22509 fputs ("(GOT)", asm_out_file
);
22511 fputs ("(GOTOFF)", asm_out_file
);
22513 fputc ('\n', asm_out_file
);
22517 mode
= GET_MODE (x
);
22519 if (arm_vector_mode_supported_p (mode
))
22523 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22525 units
= CONST_VECTOR_NUNITS (x
);
22526 size
= GET_MODE_UNIT_SIZE (mode
);
22528 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22529 for (i
= 0; i
< units
; i
++)
22531 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22533 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22536 for (i
= 0; i
< units
; i
++)
22538 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22540 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22541 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22547 return default_assemble_integer (x
, size
, aligned_p
);
22551 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22555 if (!TARGET_AAPCS_BASED
)
22558 default_named_section_asm_out_constructor
22559 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22563 /* Put these in the .init_array section, using a special relocation. */
22564 if (priority
!= DEFAULT_INIT_PRIORITY
)
22567 sprintf (buf
, "%s.%.5u",
22568 is_ctor
? ".init_array" : ".fini_array",
22570 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22577 switch_to_section (s
);
22578 assemble_align (POINTER_SIZE
);
22579 fputs ("\t.word\t", asm_out_file
);
22580 output_addr_const (asm_out_file
, symbol
);
22581 fputs ("(target1)\n", asm_out_file
);
22584 /* Add a function to the list of static constructors. */
22587 arm_elf_asm_constructor (rtx symbol
, int priority
)
22589 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22592 /* Add a function to the list of static destructors. */
22595 arm_elf_asm_destructor (rtx symbol
, int priority
)
22597 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22600 /* A finite state machine takes care of noticing whether or not instructions
22601 can be conditionally executed, and thus decrease execution time and code
22602 size by deleting branch instructions. The fsm is controlled by
22603 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22605 /* The state of the fsm controlling condition codes are:
22606 0: normal, do nothing special
22607 1: make ASM_OUTPUT_OPCODE not output this instruction
22608 2: make ASM_OUTPUT_OPCODE not output this instruction
22609 3: make instructions conditional
22610 4: make instructions conditional
22612 State transitions (state->state by whom under condition):
22613 0 -> 1 final_prescan_insn if the `target' is a label
22614 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22615 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22616 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22617 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22618 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22619 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22620 (the target insn is arm_target_insn).
22622 If the jump clobbers the conditions then we use states 2 and 4.
22624 A similar thing can be done with conditional return insns.
22626 XXX In case the `target' is an unconditional branch, this conditionalising
22627 of the instructions always reduces code size, but not always execution
22628 time. But then, I want to reduce the code size to somewhere near what
22629 /bin/cc produces. */
22631 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22632 instructions. When a COND_EXEC instruction is seen the subsequent
22633 instructions are scanned so that multiple conditional instructions can be
22634 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22635 specify the length and true/false mask for the IT block. These will be
22636 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22638 /* Returns the index of the ARM condition code string in
22639 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22640 COMPARISON should be an rtx like `(eq (...) (...))'. */
22643 maybe_get_arm_condition_code (rtx comparison
)
22645 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22646 enum arm_cond_code code
;
22647 enum rtx_code comp_code
= GET_CODE (comparison
);
22649 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22650 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22651 XEXP (comparison
, 1));
22655 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22656 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22657 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22658 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22659 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22660 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22661 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22662 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22663 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22664 case CC_DLTUmode
: code
= ARM_CC
;
22667 if (comp_code
== EQ
)
22668 return ARM_INVERSE_CONDITION_CODE (code
);
22669 if (comp_code
== NE
)
22676 case NE
: return ARM_NE
;
22677 case EQ
: return ARM_EQ
;
22678 case GE
: return ARM_PL
;
22679 case LT
: return ARM_MI
;
22680 default: return ARM_NV
;
22686 case NE
: return ARM_NE
;
22687 case EQ
: return ARM_EQ
;
22688 default: return ARM_NV
;
22694 case NE
: return ARM_MI
;
22695 case EQ
: return ARM_PL
;
22696 default: return ARM_NV
;
22701 /* We can handle all cases except UNEQ and LTGT. */
22704 case GE
: return ARM_GE
;
22705 case GT
: return ARM_GT
;
22706 case LE
: return ARM_LS
;
22707 case LT
: return ARM_MI
;
22708 case NE
: return ARM_NE
;
22709 case EQ
: return ARM_EQ
;
22710 case ORDERED
: return ARM_VC
;
22711 case UNORDERED
: return ARM_VS
;
22712 case UNLT
: return ARM_LT
;
22713 case UNLE
: return ARM_LE
;
22714 case UNGT
: return ARM_HI
;
22715 case UNGE
: return ARM_PL
;
22716 /* UNEQ and LTGT do not have a representation. */
22717 case UNEQ
: /* Fall through. */
22718 case LTGT
: /* Fall through. */
22719 default: return ARM_NV
;
22725 case NE
: return ARM_NE
;
22726 case EQ
: return ARM_EQ
;
22727 case GE
: return ARM_LE
;
22728 case GT
: return ARM_LT
;
22729 case LE
: return ARM_GE
;
22730 case LT
: return ARM_GT
;
22731 case GEU
: return ARM_LS
;
22732 case GTU
: return ARM_CC
;
22733 case LEU
: return ARM_CS
;
22734 case LTU
: return ARM_HI
;
22735 default: return ARM_NV
;
22741 case LTU
: return ARM_CS
;
22742 case GEU
: return ARM_CC
;
22743 case NE
: return ARM_CS
;
22744 case EQ
: return ARM_CC
;
22745 default: return ARM_NV
;
22751 case NE
: return ARM_NE
;
22752 case EQ
: return ARM_EQ
;
22753 case GEU
: return ARM_CS
;
22754 case GTU
: return ARM_HI
;
22755 case LEU
: return ARM_LS
;
22756 case LTU
: return ARM_CC
;
22757 default: return ARM_NV
;
22763 case GE
: return ARM_GE
;
22764 case LT
: return ARM_LT
;
22765 case GEU
: return ARM_CS
;
22766 case LTU
: return ARM_CC
;
22767 default: return ARM_NV
;
22773 case NE
: return ARM_VS
;
22774 case EQ
: return ARM_VC
;
22775 default: return ARM_NV
;
22781 case NE
: return ARM_NE
;
22782 case EQ
: return ARM_EQ
;
22783 case GE
: return ARM_GE
;
22784 case GT
: return ARM_GT
;
22785 case LE
: return ARM_LE
;
22786 case LT
: return ARM_LT
;
22787 case GEU
: return ARM_CS
;
22788 case GTU
: return ARM_HI
;
22789 case LEU
: return ARM_LS
;
22790 case LTU
: return ARM_CC
;
22791 default: return ARM_NV
;
22794 default: gcc_unreachable ();
22798 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22799 static enum arm_cond_code
22800 get_arm_condition_code (rtx comparison
)
22802 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22803 gcc_assert (code
!= ARM_NV
);
22807 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22810 thumb2_final_prescan_insn (rtx_insn
*insn
)
22812 rtx_insn
*first_insn
= insn
;
22813 rtx body
= PATTERN (insn
);
22815 enum arm_cond_code code
;
22820 /* max_insns_skipped in the tune was already taken into account in the
22821 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22822 just emit the IT blocks as we can. It does not make sense to split
22824 max
= MAX_INSN_PER_IT_BLOCK
;
22826 /* Remove the previous insn from the count of insns to be output. */
22827 if (arm_condexec_count
)
22828 arm_condexec_count
--;
22830 /* Nothing to do if we are already inside a conditional block. */
22831 if (arm_condexec_count
)
22834 if (GET_CODE (body
) != COND_EXEC
)
22837 /* Conditional jumps are implemented directly. */
22841 predicate
= COND_EXEC_TEST (body
);
22842 arm_current_cc
= get_arm_condition_code (predicate
);
22844 n
= get_attr_ce_count (insn
);
22845 arm_condexec_count
= 1;
22846 arm_condexec_mask
= (1 << n
) - 1;
22847 arm_condexec_masklen
= n
;
22848 /* See if subsequent instructions can be combined into the same block. */
22851 insn
= next_nonnote_insn (insn
);
22853 /* Jumping into the middle of an IT block is illegal, so a label or
22854 barrier terminates the block. */
22855 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22858 body
= PATTERN (insn
);
22859 /* USE and CLOBBER aren't really insns, so just skip them. */
22860 if (GET_CODE (body
) == USE
22861 || GET_CODE (body
) == CLOBBER
)
22864 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22865 if (GET_CODE (body
) != COND_EXEC
)
22867 /* Maximum number of conditionally executed instructions in a block. */
22868 n
= get_attr_ce_count (insn
);
22869 if (arm_condexec_masklen
+ n
> max
)
22872 predicate
= COND_EXEC_TEST (body
);
22873 code
= get_arm_condition_code (predicate
);
22874 mask
= (1 << n
) - 1;
22875 if (arm_current_cc
== code
)
22876 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22877 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22880 arm_condexec_count
++;
22881 arm_condexec_masklen
+= n
;
22883 /* A jump must be the last instruction in a conditional block. */
22887 /* Restore recog_data (getting the attributes of other insns can
22888 destroy this array, but final.c assumes that it remains intact
22889 across this call). */
22890 extract_constrain_insn_cached (first_insn
);
22894 arm_final_prescan_insn (rtx_insn
*insn
)
22896 /* BODY will hold the body of INSN. */
22897 rtx body
= PATTERN (insn
);
22899 /* This will be 1 if trying to repeat the trick, and things need to be
22900 reversed if it appears to fail. */
22903 /* If we start with a return insn, we only succeed if we find another one. */
22904 int seeking_return
= 0;
22905 enum rtx_code return_code
= UNKNOWN
;
22907 /* START_INSN will hold the insn from where we start looking. This is the
22908 first insn after the following code_label if REVERSE is true. */
22909 rtx_insn
*start_insn
= insn
;
22911 /* If in state 4, check if the target branch is reached, in order to
22912 change back to state 0. */
22913 if (arm_ccfsm_state
== 4)
22915 if (insn
== arm_target_insn
)
22917 arm_target_insn
= NULL
;
22918 arm_ccfsm_state
= 0;
22923 /* If in state 3, it is possible to repeat the trick, if this insn is an
22924 unconditional branch to a label, and immediately following this branch
22925 is the previous target label which is only used once, and the label this
22926 branch jumps to is not too far off. */
22927 if (arm_ccfsm_state
== 3)
22929 if (simplejump_p (insn
))
22931 start_insn
= next_nonnote_insn (start_insn
);
22932 if (BARRIER_P (start_insn
))
22934 /* XXX Isn't this always a barrier? */
22935 start_insn
= next_nonnote_insn (start_insn
);
22937 if (LABEL_P (start_insn
)
22938 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22939 && LABEL_NUSES (start_insn
) == 1)
22944 else if (ANY_RETURN_P (body
))
22946 start_insn
= next_nonnote_insn (start_insn
);
22947 if (BARRIER_P (start_insn
))
22948 start_insn
= next_nonnote_insn (start_insn
);
22949 if (LABEL_P (start_insn
)
22950 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22951 && LABEL_NUSES (start_insn
) == 1)
22954 seeking_return
= 1;
22955 return_code
= GET_CODE (body
);
22964 gcc_assert (!arm_ccfsm_state
|| reverse
);
22965 if (!JUMP_P (insn
))
22968 /* This jump might be paralleled with a clobber of the condition codes
22969 the jump should always come first */
22970 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22971 body
= XVECEXP (body
, 0, 0);
22974 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22975 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22978 int fail
= FALSE
, succeed
= FALSE
;
22979 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22980 int then_not_else
= TRUE
;
22981 rtx_insn
*this_insn
= start_insn
;
22984 /* Register the insn jumped to. */
22987 if (!seeking_return
)
22988 label
= XEXP (SET_SRC (body
), 0);
22990 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22991 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22992 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22994 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22995 then_not_else
= FALSE
;
22997 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22999 seeking_return
= 1;
23000 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23002 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23004 seeking_return
= 1;
23005 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23006 then_not_else
= FALSE
;
23009 gcc_unreachable ();
23011 /* See how many insns this branch skips, and what kind of insns. If all
23012 insns are okay, and the label or unconditional branch to the same
23013 label is not too far away, succeed. */
23014 for (insns_skipped
= 0;
23015 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23019 this_insn
= next_nonnote_insn (this_insn
);
23023 switch (GET_CODE (this_insn
))
23026 /* Succeed if it is the target label, otherwise fail since
23027 control falls in from somewhere else. */
23028 if (this_insn
== label
)
23030 arm_ccfsm_state
= 1;
23038 /* Succeed if the following insn is the target label.
23040 If return insns are used then the last insn in a function
23041 will be a barrier. */
23042 this_insn
= next_nonnote_insn (this_insn
);
23043 if (this_insn
&& this_insn
== label
)
23045 arm_ccfsm_state
= 1;
23053 /* The AAPCS says that conditional calls should not be
23054 used since they make interworking inefficient (the
23055 linker can't transform BL<cond> into BLX). That's
23056 only a problem if the machine has BLX. */
23063 /* Succeed if the following insn is the target label, or
23064 if the following two insns are a barrier and the
23066 this_insn
= next_nonnote_insn (this_insn
);
23067 if (this_insn
&& BARRIER_P (this_insn
))
23068 this_insn
= next_nonnote_insn (this_insn
);
23070 if (this_insn
&& this_insn
== label
23071 && insns_skipped
< max_insns_skipped
)
23073 arm_ccfsm_state
= 1;
23081 /* If this is an unconditional branch to the same label, succeed.
23082 If it is to another label, do nothing. If it is conditional,
23084 /* XXX Probably, the tests for SET and the PC are
23087 scanbody
= PATTERN (this_insn
);
23088 if (GET_CODE (scanbody
) == SET
23089 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23091 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23092 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23094 arm_ccfsm_state
= 2;
23097 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23100 /* Fail if a conditional return is undesirable (e.g. on a
23101 StrongARM), but still allow this if optimizing for size. */
23102 else if (GET_CODE (scanbody
) == return_code
23103 && !use_return_insn (TRUE
, NULL
)
23106 else if (GET_CODE (scanbody
) == return_code
)
23108 arm_ccfsm_state
= 2;
23111 else if (GET_CODE (scanbody
) == PARALLEL
)
23113 switch (get_attr_conds (this_insn
))
23123 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23128 /* Instructions using or affecting the condition codes make it
23130 scanbody
= PATTERN (this_insn
);
23131 if (!(GET_CODE (scanbody
) == SET
23132 || GET_CODE (scanbody
) == PARALLEL
)
23133 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23143 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23144 arm_target_label
= CODE_LABEL_NUMBER (label
);
23147 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23149 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23151 this_insn
= next_nonnote_insn (this_insn
);
23152 gcc_assert (!this_insn
23153 || (!BARRIER_P (this_insn
)
23154 && !LABEL_P (this_insn
)));
23158 /* Oh, dear! we ran off the end.. give up. */
23159 extract_constrain_insn_cached (insn
);
23160 arm_ccfsm_state
= 0;
23161 arm_target_insn
= NULL
;
23164 arm_target_insn
= this_insn
;
23167 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23170 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23172 if (reverse
|| then_not_else
)
23173 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23176 /* Restore recog_data (getting the attributes of other insns can
23177 destroy this array, but final.c assumes that it remains intact
23178 across this call. */
23179 extract_constrain_insn_cached (insn
);
23183 /* Output IT instructions. */
23185 thumb2_asm_output_opcode (FILE * stream
)
23190 if (arm_condexec_mask
)
23192 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23193 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23195 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23196 arm_condition_codes
[arm_current_cc
]);
23197 arm_condexec_mask
= 0;
23201 /* Returns true if REGNO is a valid register
23202 for holding a quantity of type MODE. */
23204 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23206 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23207 return (regno
== CC_REGNUM
23208 || (TARGET_HARD_FLOAT
23209 && regno
== VFPCC_REGNUM
));
23211 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23215 /* For the Thumb we only allow values bigger than SImode in
23216 registers 0 - 6, so that there is always a second low
23217 register available to hold the upper part of the value.
23218 We probably we ought to ensure that the register is the
23219 start of an even numbered register pair. */
23220 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23222 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23224 if (mode
== SFmode
|| mode
== SImode
)
23225 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23227 if (mode
== DFmode
)
23228 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23230 if (mode
== HFmode
)
23231 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23233 /* VFP registers can hold HImode values. */
23234 if (mode
== HImode
)
23235 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23238 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23239 || (VALID_NEON_QREG_MODE (mode
)
23240 && NEON_REGNO_OK_FOR_QUAD (regno
))
23241 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23242 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23243 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23244 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23245 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23250 if (TARGET_REALLY_IWMMXT
)
23252 if (IS_IWMMXT_GR_REGNUM (regno
))
23253 return mode
== SImode
;
23255 if (IS_IWMMXT_REGNUM (regno
))
23256 return VALID_IWMMXT_REG_MODE (mode
);
23259 /* We allow almost any value to be stored in the general registers.
23260 Restrict doubleword quantities to even register pairs in ARM state
23261 so that we can use ldrd. Do not allow very large Neon structure
23262 opaque modes in general registers; they would use too many. */
23263 if (regno
<= LAST_ARM_REGNUM
)
23265 if (ARM_NUM_REGS (mode
) > 4)
23271 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23274 if (regno
== FRAME_POINTER_REGNUM
23275 || regno
== ARG_POINTER_REGNUM
)
23276 /* We only allow integers in the fake hard registers. */
23277 return GET_MODE_CLASS (mode
) == MODE_INT
;
23282 /* Implement MODES_TIEABLE_P. */
23285 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23287 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23290 /* We specifically want to allow elements of "structure" modes to
23291 be tieable to the structure. This more general condition allows
23292 other rarer situations too. */
23294 && (VALID_NEON_DREG_MODE (mode1
)
23295 || VALID_NEON_QREG_MODE (mode1
)
23296 || VALID_NEON_STRUCT_MODE (mode1
))
23297 && (VALID_NEON_DREG_MODE (mode2
)
23298 || VALID_NEON_QREG_MODE (mode2
)
23299 || VALID_NEON_STRUCT_MODE (mode2
)))
23305 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23306 not used in arm mode. */
23309 arm_regno_class (int regno
)
23311 if (regno
== PC_REGNUM
)
23316 if (regno
== STACK_POINTER_REGNUM
)
23318 if (regno
== CC_REGNUM
)
23325 if (TARGET_THUMB2
&& regno
< 8)
23328 if ( regno
<= LAST_ARM_REGNUM
23329 || regno
== FRAME_POINTER_REGNUM
23330 || regno
== ARG_POINTER_REGNUM
)
23331 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23333 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23334 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23336 if (IS_VFP_REGNUM (regno
))
23338 if (regno
<= D7_VFP_REGNUM
)
23339 return VFP_D0_D7_REGS
;
23340 else if (regno
<= LAST_LO_VFP_REGNUM
)
23341 return VFP_LO_REGS
;
23343 return VFP_HI_REGS
;
23346 if (IS_IWMMXT_REGNUM (regno
))
23347 return IWMMXT_REGS
;
23349 if (IS_IWMMXT_GR_REGNUM (regno
))
23350 return IWMMXT_GR_REGS
;
23355 /* Handle a special case when computing the offset
23356 of an argument from the frame pointer. */
23358 arm_debugger_arg_offset (int value
, rtx addr
)
23362 /* We are only interested if dbxout_parms() failed to compute the offset. */
23366 /* We can only cope with the case where the address is held in a register. */
23370 /* If we are using the frame pointer to point at the argument, then
23371 an offset of 0 is correct. */
23372 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23375 /* If we are using the stack pointer to point at the
23376 argument, then an offset of 0 is correct. */
23377 /* ??? Check this is consistent with thumb2 frame layout. */
23378 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23379 && REGNO (addr
) == SP_REGNUM
)
23382 /* Oh dear. The argument is pointed to by a register rather
23383 than being held in a register, or being stored at a known
23384 offset from the frame pointer. Since GDB only understands
23385 those two kinds of argument we must translate the address
23386 held in the register into an offset from the frame pointer.
23387 We do this by searching through the insns for the function
23388 looking to see where this register gets its value. If the
23389 register is initialized from the frame pointer plus an offset
23390 then we are in luck and we can continue, otherwise we give up.
23392 This code is exercised by producing debugging information
23393 for a function with arguments like this:
23395 double func (double a, double b, int c, double d) {return d;}
23397 Without this code the stab for parameter 'd' will be set to
23398 an offset of 0 from the frame pointer, rather than 8. */
23400 /* The if() statement says:
23402 If the insn is a normal instruction
23403 and if the insn is setting the value in a register
23404 and if the register being set is the register holding the address of the argument
23405 and if the address is computing by an addition
23406 that involves adding to a register
23407 which is the frame pointer
23412 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23414 if ( NONJUMP_INSN_P (insn
)
23415 && GET_CODE (PATTERN (insn
)) == SET
23416 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23417 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23418 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23419 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23420 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23423 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23432 warning (0, "unable to compute real location of stacked parameter");
23433 value
= 8; /* XXX magic hack */
23439 /* Implement TARGET_PROMOTED_TYPE. */
23442 arm_promoted_type (const_tree t
)
23444 if (SCALAR_FLOAT_TYPE_P (t
)
23445 && TYPE_PRECISION (t
) == 16
23446 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23447 return float_type_node
;
23451 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23452 This simply adds HFmode as a supported mode; even though we don't
23453 implement arithmetic on this type directly, it's supported by
23454 optabs conversions, much the way the double-word arithmetic is
23455 special-cased in the default hook. */
23458 arm_scalar_mode_supported_p (machine_mode mode
)
23460 if (mode
== HFmode
)
23461 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23462 else if (ALL_FIXED_POINT_MODE_P (mode
))
23465 return default_scalar_mode_supported_p (mode
);
23468 /* Set the value of FLT_EVAL_METHOD.
23469 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23471 0: evaluate all operations and constants, whose semantic type has at
23472 most the range and precision of type float, to the range and
23473 precision of float; evaluate all other operations and constants to
23474 the range and precision of the semantic type;
23476 N, where _FloatN is a supported interchange floating type
23477 evaluate all operations and constants, whose semantic type has at
23478 most the range and precision of _FloatN type, to the range and
23479 precision of the _FloatN type; evaluate all other operations and
23480 constants to the range and precision of the semantic type;
23482 If we have the ARMv8.2-A extensions then we support _Float16 in native
23483 precision, so we should set this to 16. Otherwise, we support the type,
23484 but want to evaluate expressions in float precision, so set this to
23487 static enum flt_eval_method
23488 arm_excess_precision (enum excess_precision_type type
)
23492 case EXCESS_PRECISION_TYPE_FAST
:
23493 case EXCESS_PRECISION_TYPE_STANDARD
:
23494 /* We can calculate either in 16-bit range and precision or
23495 32-bit range and precision. Make that decision based on whether
23496 we have native support for the ARMv8.2-A 16-bit floating-point
23497 instructions or not. */
23498 return (TARGET_VFP_FP16INST
23499 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23500 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23501 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23502 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23504 gcc_unreachable ();
23506 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23510 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23511 _Float16 if we are using anything other than ieee format for 16-bit
23512 floating point. Otherwise, punt to the default implementation. */
23513 static machine_mode
23514 arm_floatn_mode (int n
, bool extended
)
23516 if (!extended
&& n
== 16)
23517 return arm_fp16_format
== ARM_FP16_FORMAT_IEEE
? HFmode
: VOIDmode
;
23519 return default_floatn_mode (n
, extended
);
23523 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23524 not to early-clobber SRC registers in the process.
23526 We assume that the operands described by SRC and DEST represent a
23527 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23528 number of components into which the copy has been decomposed. */
23530 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23534 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23535 || REGNO (operands
[0]) < REGNO (operands
[1]))
23537 for (i
= 0; i
< count
; i
++)
23539 operands
[2 * i
] = dest
[i
];
23540 operands
[2 * i
+ 1] = src
[i
];
23545 for (i
= 0; i
< count
; i
++)
23547 operands
[2 * i
] = dest
[count
- i
- 1];
23548 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23553 /* Split operands into moves from op[1] + op[2] into op[0]. */
23556 neon_split_vcombine (rtx operands
[3])
23558 unsigned int dest
= REGNO (operands
[0]);
23559 unsigned int src1
= REGNO (operands
[1]);
23560 unsigned int src2
= REGNO (operands
[2]);
23561 machine_mode halfmode
= GET_MODE (operands
[1]);
23562 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23563 rtx destlo
, desthi
;
23565 if (src1
== dest
&& src2
== dest
+ halfregs
)
23567 /* No-op move. Can't split to nothing; emit something. */
23568 emit_note (NOTE_INSN_DELETED
);
23572 /* Preserve register attributes for variable tracking. */
23573 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23574 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23575 GET_MODE_SIZE (halfmode
));
23577 /* Special case of reversed high/low parts. Use VSWP. */
23578 if (src2
== dest
&& src1
== dest
+ halfregs
)
23580 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23581 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23582 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23586 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23588 /* Try to avoid unnecessary moves if part of the result
23589 is in the right place already. */
23591 emit_move_insn (destlo
, operands
[1]);
23592 if (src2
!= dest
+ halfregs
)
23593 emit_move_insn (desthi
, operands
[2]);
23597 if (src2
!= dest
+ halfregs
)
23598 emit_move_insn (desthi
, operands
[2]);
23600 emit_move_insn (destlo
, operands
[1]);
23604 /* Return the number (counting from 0) of
23605 the least significant set bit in MASK. */
23608 number_of_first_bit_set (unsigned mask
)
23610 return ctz_hwi (mask
);
23613 /* Like emit_multi_reg_push, but allowing for a different set of
23614 registers to be described as saved. MASK is the set of registers
23615 to be saved; REAL_REGS is the set of registers to be described as
23616 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23619 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23621 unsigned long regno
;
23622 rtx par
[10], tmp
, reg
;
23626 /* Build the parallel of the registers actually being stored. */
23627 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23629 regno
= ctz_hwi (mask
);
23630 reg
= gen_rtx_REG (SImode
, regno
);
23633 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23635 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23640 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23641 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23642 tmp
= gen_frame_mem (BLKmode
, tmp
);
23643 tmp
= gen_rtx_SET (tmp
, par
[0]);
23646 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23647 insn
= emit_insn (tmp
);
23649 /* Always build the stack adjustment note for unwind info. */
23650 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23651 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23654 /* Build the parallel of the registers recorded as saved for unwind. */
23655 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23657 regno
= ctz_hwi (real_regs
);
23658 reg
= gen_rtx_REG (SImode
, regno
);
23660 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23661 tmp
= gen_frame_mem (SImode
, tmp
);
23662 tmp
= gen_rtx_SET (tmp
, reg
);
23663 RTX_FRAME_RELATED_P (tmp
) = 1;
23671 RTX_FRAME_RELATED_P (par
[0]) = 1;
23672 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23675 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23680 /* Emit code to push or pop registers to or from the stack. F is the
23681 assembly file. MASK is the registers to pop. */
23683 thumb_pop (FILE *f
, unsigned long mask
)
23686 int lo_mask
= mask
& 0xFF;
23687 int pushed_words
= 0;
23691 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23693 /* Special case. Do not generate a POP PC statement here, do it in
23695 thumb_exit (f
, -1);
23699 fprintf (f
, "\tpop\t{");
23701 /* Look at the low registers first. */
23702 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23706 asm_fprintf (f
, "%r", regno
);
23708 if ((lo_mask
& ~1) != 0)
23715 if (mask
& (1 << PC_REGNUM
))
23717 /* Catch popping the PC. */
23718 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23719 || IS_CMSE_ENTRY (arm_current_func_type ()))
23721 /* The PC is never poped directly, instead
23722 it is popped into r3 and then BX is used. */
23723 fprintf (f
, "}\n");
23725 thumb_exit (f
, -1);
23734 asm_fprintf (f
, "%r", PC_REGNUM
);
23738 fprintf (f
, "}\n");
23741 /* Generate code to return from a thumb function.
23742 If 'reg_containing_return_addr' is -1, then the return address is
23743 actually on the stack, at the stack pointer. */
23745 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23747 unsigned regs_available_for_popping
;
23748 unsigned regs_to_pop
;
23750 unsigned available
;
23754 int restore_a4
= FALSE
;
23756 /* Compute the registers we need to pop. */
23760 if (reg_containing_return_addr
== -1)
23762 regs_to_pop
|= 1 << LR_REGNUM
;
23766 if (TARGET_BACKTRACE
)
23768 /* Restore the (ARM) frame pointer and stack pointer. */
23769 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23773 /* If there is nothing to pop then just emit the BX instruction and
23775 if (pops_needed
== 0)
23777 if (crtl
->calls_eh_return
)
23778 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23780 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23782 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23783 reg_containing_return_addr
);
23784 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23787 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23790 /* Otherwise if we are not supporting interworking and we have not created
23791 a backtrace structure and the function was not entered in ARM mode then
23792 just pop the return address straight into the PC. */
23793 else if (!TARGET_INTERWORK
23794 && !TARGET_BACKTRACE
23795 && !is_called_in_ARM_mode (current_function_decl
)
23796 && !crtl
->calls_eh_return
23797 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23799 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23803 /* Find out how many of the (return) argument registers we can corrupt. */
23804 regs_available_for_popping
= 0;
23806 /* If returning via __builtin_eh_return, the bottom three registers
23807 all contain information needed for the return. */
23808 if (crtl
->calls_eh_return
)
23812 /* If we can deduce the registers used from the function's
23813 return value. This is more reliable that examining
23814 df_regs_ever_live_p () because that will be set if the register is
23815 ever used in the function, not just if the register is used
23816 to hold a return value. */
23818 if (crtl
->return_rtx
!= 0)
23819 mode
= GET_MODE (crtl
->return_rtx
);
23821 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23823 size
= GET_MODE_SIZE (mode
);
23827 /* In a void function we can use any argument register.
23828 In a function that returns a structure on the stack
23829 we can use the second and third argument registers. */
23830 if (mode
== VOIDmode
)
23831 regs_available_for_popping
=
23832 (1 << ARG_REGISTER (1))
23833 | (1 << ARG_REGISTER (2))
23834 | (1 << ARG_REGISTER (3));
23836 regs_available_for_popping
=
23837 (1 << ARG_REGISTER (2))
23838 | (1 << ARG_REGISTER (3));
23840 else if (size
<= 4)
23841 regs_available_for_popping
=
23842 (1 << ARG_REGISTER (2))
23843 | (1 << ARG_REGISTER (3));
23844 else if (size
<= 8)
23845 regs_available_for_popping
=
23846 (1 << ARG_REGISTER (3));
23849 /* Match registers to be popped with registers into which we pop them. */
23850 for (available
= regs_available_for_popping
,
23851 required
= regs_to_pop
;
23852 required
!= 0 && available
!= 0;
23853 available
&= ~(available
& - available
),
23854 required
&= ~(required
& - required
))
23857 /* If we have any popping registers left over, remove them. */
23859 regs_available_for_popping
&= ~available
;
23861 /* Otherwise if we need another popping register we can use
23862 the fourth argument register. */
23863 else if (pops_needed
)
23865 /* If we have not found any free argument registers and
23866 reg a4 contains the return address, we must move it. */
23867 if (regs_available_for_popping
== 0
23868 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23870 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23871 reg_containing_return_addr
= LR_REGNUM
;
23873 else if (size
> 12)
23875 /* Register a4 is being used to hold part of the return value,
23876 but we have dire need of a free, low register. */
23879 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23882 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23884 /* The fourth argument register is available. */
23885 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23891 /* Pop as many registers as we can. */
23892 thumb_pop (f
, regs_available_for_popping
);
23894 /* Process the registers we popped. */
23895 if (reg_containing_return_addr
== -1)
23897 /* The return address was popped into the lowest numbered register. */
23898 regs_to_pop
&= ~(1 << LR_REGNUM
);
23900 reg_containing_return_addr
=
23901 number_of_first_bit_set (regs_available_for_popping
);
23903 /* Remove this register for the mask of available registers, so that
23904 the return address will not be corrupted by further pops. */
23905 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23908 /* If we popped other registers then handle them here. */
23909 if (regs_available_for_popping
)
23913 /* Work out which register currently contains the frame pointer. */
23914 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23916 /* Move it into the correct place. */
23917 asm_fprintf (f
, "\tmov\t%r, %r\n",
23918 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23920 /* (Temporarily) remove it from the mask of popped registers. */
23921 regs_available_for_popping
&= ~(1 << frame_pointer
);
23922 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23924 if (regs_available_for_popping
)
23928 /* We popped the stack pointer as well,
23929 find the register that contains it. */
23930 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23932 /* Move it into the stack register. */
23933 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23935 /* At this point we have popped all necessary registers, so
23936 do not worry about restoring regs_available_for_popping
23937 to its correct value:
23939 assert (pops_needed == 0)
23940 assert (regs_available_for_popping == (1 << frame_pointer))
23941 assert (regs_to_pop == (1 << STACK_POINTER)) */
23945 /* Since we have just move the popped value into the frame
23946 pointer, the popping register is available for reuse, and
23947 we know that we still have the stack pointer left to pop. */
23948 regs_available_for_popping
|= (1 << frame_pointer
);
23952 /* If we still have registers left on the stack, but we no longer have
23953 any registers into which we can pop them, then we must move the return
23954 address into the link register and make available the register that
23956 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23958 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23960 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23961 reg_containing_return_addr
);
23963 reg_containing_return_addr
= LR_REGNUM
;
23966 /* If we have registers left on the stack then pop some more.
23967 We know that at most we will want to pop FP and SP. */
23968 if (pops_needed
> 0)
23973 thumb_pop (f
, regs_available_for_popping
);
23975 /* We have popped either FP or SP.
23976 Move whichever one it is into the correct register. */
23977 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23978 move_to
= number_of_first_bit_set (regs_to_pop
);
23980 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23982 regs_to_pop
&= ~(1 << move_to
);
23987 /* If we still have not popped everything then we must have only
23988 had one register available to us and we are now popping the SP. */
23989 if (pops_needed
> 0)
23993 thumb_pop (f
, regs_available_for_popping
);
23995 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23997 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23999 assert (regs_to_pop == (1 << STACK_POINTER))
24000 assert (pops_needed == 1)
24004 /* If necessary restore the a4 register. */
24007 if (reg_containing_return_addr
!= LR_REGNUM
)
24009 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24010 reg_containing_return_addr
= LR_REGNUM
;
24013 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24016 if (crtl
->calls_eh_return
)
24017 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24019 /* Return to caller. */
24020 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24022 /* This is for the cases where LR is not being used to contain the return
24023 address. It may therefore contain information that we might not want
24024 to leak, hence it must be cleared. The value in R0 will never be a
24025 secret at this point, so it is safe to use it, see the clearing code
24026 in 'cmse_nonsecure_entry_clear_before_return'. */
24027 if (reg_containing_return_addr
!= LR_REGNUM
)
24028 asm_fprintf (f
, "\tmov\tlr, r0\n");
24030 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24031 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24034 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24037 /* Scan INSN just before assembler is output for it.
24038 For Thumb-1, we track the status of the condition codes; this
24039 information is used in the cbranchsi4_insn pattern. */
24041 thumb1_final_prescan_insn (rtx_insn
*insn
)
24043 if (flag_print_asm_name
)
24044 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24045 INSN_ADDRESSES (INSN_UID (insn
)));
24046 /* Don't overwrite the previous setter when we get to a cbranch. */
24047 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24049 enum attr_conds conds
;
24051 if (cfun
->machine
->thumb1_cc_insn
)
24053 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24054 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24057 conds
= get_attr_conds (insn
);
24058 if (conds
== CONDS_SET
)
24060 rtx set
= single_set (insn
);
24061 cfun
->machine
->thumb1_cc_insn
= insn
;
24062 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24063 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24064 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24065 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24067 rtx src1
= XEXP (SET_SRC (set
), 1);
24068 if (src1
== const0_rtx
)
24069 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24071 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24073 /* Record the src register operand instead of dest because
24074 cprop_hardreg pass propagates src. */
24075 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24078 else if (conds
!= CONDS_NOCOND
)
24079 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24082 /* Check if unexpected far jump is used. */
24083 if (cfun
->machine
->lr_save_eliminated
24084 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24085 internal_error("Unexpected thumb1 far jump");
24089 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24091 unsigned HOST_WIDE_INT mask
= 0xff;
24094 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24095 if (val
== 0) /* XXX */
24098 for (i
= 0; i
< 25; i
++)
24099 if ((val
& (mask
<< i
)) == val
)
24105 /* Returns nonzero if the current function contains,
24106 or might contain a far jump. */
24108 thumb_far_jump_used_p (void)
24111 bool far_jump
= false;
24112 unsigned int func_size
= 0;
24114 /* If we have already decided that far jumps may be used,
24115 do not bother checking again, and always return true even if
24116 it turns out that they are not being used. Once we have made
24117 the decision that far jumps are present (and that hence the link
24118 register will be pushed onto the stack) we cannot go back on it. */
24119 if (cfun
->machine
->far_jump_used
)
24122 /* If this function is not being called from the prologue/epilogue
24123 generation code then it must be being called from the
24124 INITIAL_ELIMINATION_OFFSET macro. */
24125 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24127 /* In this case we know that we are being asked about the elimination
24128 of the arg pointer register. If that register is not being used,
24129 then there are no arguments on the stack, and we do not have to
24130 worry that a far jump might force the prologue to push the link
24131 register, changing the stack offsets. In this case we can just
24132 return false, since the presence of far jumps in the function will
24133 not affect stack offsets.
24135 If the arg pointer is live (or if it was live, but has now been
24136 eliminated and so set to dead) then we do have to test to see if
24137 the function might contain a far jump. This test can lead to some
24138 false negatives, since before reload is completed, then length of
24139 branch instructions is not known, so gcc defaults to returning their
24140 longest length, which in turn sets the far jump attribute to true.
24142 A false negative will not result in bad code being generated, but it
24143 will result in a needless push and pop of the link register. We
24144 hope that this does not occur too often.
24146 If we need doubleword stack alignment this could affect the other
24147 elimination offsets so we can't risk getting it wrong. */
24148 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24149 cfun
->machine
->arg_pointer_live
= 1;
24150 else if (!cfun
->machine
->arg_pointer_live
)
24154 /* We should not change far_jump_used during or after reload, as there is
24155 no chance to change stack frame layout. */
24156 if (reload_in_progress
|| reload_completed
)
24159 /* Check to see if the function contains a branch
24160 insn with the far jump attribute set. */
24161 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24163 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24167 func_size
+= get_attr_length (insn
);
24170 /* Attribute far_jump will always be true for thumb1 before
24171 shorten_branch pass. So checking far_jump attribute before
24172 shorten_branch isn't much useful.
24174 Following heuristic tries to estimate more accurately if a far jump
24175 may finally be used. The heuristic is very conservative as there is
24176 no chance to roll-back the decision of not to use far jump.
24178 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24179 2-byte insn is associated with a 4 byte constant pool. Using
24180 function size 2048/3 as the threshold is conservative enough. */
24183 if ((func_size
* 3) >= 2048)
24185 /* Record the fact that we have decided that
24186 the function does use far jumps. */
24187 cfun
->machine
->far_jump_used
= 1;
24195 /* Return nonzero if FUNC must be entered in ARM mode. */
24197 is_called_in_ARM_mode (tree func
)
24199 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24201 /* Ignore the problem about functions whose address is taken. */
24202 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24206 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24212 /* Given the stack offsets and register mask in OFFSETS, decide how
24213 many additional registers to push instead of subtracting a constant
24214 from SP. For epilogues the principle is the same except we use pop.
24215 FOR_PROLOGUE indicates which we're generating. */
24217 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24219 HOST_WIDE_INT amount
;
24220 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24221 /* Extract a mask of the ones we can give to the Thumb's push/pop
24223 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24224 /* Then count how many other high registers will need to be pushed. */
24225 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24226 int n_free
, reg_base
, size
;
24228 if (!for_prologue
&& frame_pointer_needed
)
24229 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24231 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24233 /* If the stack frame size is 512 exactly, we can save one load
24234 instruction, which should make this a win even when optimizing
24236 if (!optimize_size
&& amount
!= 512)
24239 /* Can't do this if there are high registers to push. */
24240 if (high_regs_pushed
!= 0)
24243 /* Shouldn't do it in the prologue if no registers would normally
24244 be pushed at all. In the epilogue, also allow it if we'll have
24245 a pop insn for the PC. */
24248 || TARGET_BACKTRACE
24249 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24250 || TARGET_INTERWORK
24251 || crtl
->args
.pretend_args_size
!= 0))
24254 /* Don't do this if thumb_expand_prologue wants to emit instructions
24255 between the push and the stack frame allocation. */
24257 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24258 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24265 size
= arm_size_return_regs ();
24266 reg_base
= ARM_NUM_INTS (size
);
24267 live_regs_mask
>>= reg_base
;
24270 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24271 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24273 live_regs_mask
>>= 1;
24279 gcc_assert (amount
/ 4 * 4 == amount
);
24281 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24282 return (amount
- 508) / 4;
24283 if (amount
<= n_free
* 4)
24288 /* The bits which aren't usefully expanded as rtl. */
24290 thumb1_unexpanded_epilogue (void)
24292 arm_stack_offsets
*offsets
;
24294 unsigned long live_regs_mask
= 0;
24295 int high_regs_pushed
= 0;
24297 int had_to_push_lr
;
24300 if (cfun
->machine
->return_used_this_function
!= 0)
24303 if (IS_NAKED (arm_current_func_type ()))
24306 offsets
= arm_get_frame_offsets ();
24307 live_regs_mask
= offsets
->saved_regs_mask
;
24308 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24310 /* If we can deduce the registers used from the function's return value.
24311 This is more reliable that examining df_regs_ever_live_p () because that
24312 will be set if the register is ever used in the function, not just if
24313 the register is used to hold a return value. */
24314 size
= arm_size_return_regs ();
24316 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24319 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24320 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24323 /* The prolog may have pushed some high registers to use as
24324 work registers. e.g. the testsuite file:
24325 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24326 compiles to produce:
24327 push {r4, r5, r6, r7, lr}
24331 as part of the prolog. We have to undo that pushing here. */
24333 if (high_regs_pushed
)
24335 unsigned long mask
= live_regs_mask
& 0xff;
24338 /* The available low registers depend on the size of the value we are
24346 /* Oh dear! We have no low registers into which we can pop
24349 ("no low registers available for popping high registers");
24351 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24352 if (live_regs_mask
& (1 << next_hi_reg
))
24355 while (high_regs_pushed
)
24357 /* Find lo register(s) into which the high register(s) can
24359 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24361 if (mask
& (1 << regno
))
24362 high_regs_pushed
--;
24363 if (high_regs_pushed
== 0)
24367 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24369 /* Pop the values into the low register(s). */
24370 thumb_pop (asm_out_file
, mask
);
24372 /* Move the value(s) into the high registers. */
24373 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24375 if (mask
& (1 << regno
))
24377 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24380 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24381 if (live_regs_mask
& (1 << next_hi_reg
))
24386 live_regs_mask
&= ~0x0f00;
24389 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24390 live_regs_mask
&= 0xff;
24392 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24394 /* Pop the return address into the PC. */
24395 if (had_to_push_lr
)
24396 live_regs_mask
|= 1 << PC_REGNUM
;
24398 /* Either no argument registers were pushed or a backtrace
24399 structure was created which includes an adjusted stack
24400 pointer, so just pop everything. */
24401 if (live_regs_mask
)
24402 thumb_pop (asm_out_file
, live_regs_mask
);
24404 /* We have either just popped the return address into the
24405 PC or it is was kept in LR for the entire function.
24406 Note that thumb_pop has already called thumb_exit if the
24407 PC was in the list. */
24408 if (!had_to_push_lr
)
24409 thumb_exit (asm_out_file
, LR_REGNUM
);
24413 /* Pop everything but the return address. */
24414 if (live_regs_mask
)
24415 thumb_pop (asm_out_file
, live_regs_mask
);
24417 if (had_to_push_lr
)
24421 /* We have no free low regs, so save one. */
24422 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24426 /* Get the return address into a temporary register. */
24427 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24431 /* Move the return address to lr. */
24432 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24434 /* Restore the low register. */
24435 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24440 regno
= LAST_ARG_REGNUM
;
24445 /* Remove the argument registers that were pushed onto the stack. */
24446 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24447 SP_REGNUM
, SP_REGNUM
,
24448 crtl
->args
.pretend_args_size
);
24450 thumb_exit (asm_out_file
, regno
);
24456 /* Functions to save and restore machine-specific function data. */
24457 static struct machine_function
*
24458 arm_init_machine_status (void)
24460 struct machine_function
*machine
;
24461 machine
= ggc_cleared_alloc
<machine_function
> ();
24463 #if ARM_FT_UNKNOWN != 0
24464 machine
->func_type
= ARM_FT_UNKNOWN
;
24469 /* Return an RTX indicating where the return address to the
24470 calling function can be found. */
24472 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24477 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24480 /* Do anything needed before RTL is emitted for each function. */
24482 arm_init_expanders (void)
24484 /* Arrange to initialize and mark the machine per-function status. */
24485 init_machine_status
= arm_init_machine_status
;
24487 /* This is to stop the combine pass optimizing away the alignment
24488 adjustment of va_arg. */
24489 /* ??? It is claimed that this should not be necessary. */
24491 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24494 /* Check that FUNC is called with a different mode. */
24497 arm_change_mode_p (tree func
)
24499 if (TREE_CODE (func
) != FUNCTION_DECL
)
24502 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24505 callee_tree
= target_option_default_node
;
24507 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24508 int flags
= callee_opts
->x_target_flags
;
24510 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24513 /* Like arm_compute_initial_elimination offset. Simpler because there
24514 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24515 to point at the base of the local variables after static stack
24516 space for a function has been allocated. */
24519 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24521 arm_stack_offsets
*offsets
;
24523 offsets
= arm_get_frame_offsets ();
24527 case ARG_POINTER_REGNUM
:
24530 case STACK_POINTER_REGNUM
:
24531 return offsets
->outgoing_args
- offsets
->saved_args
;
24533 case FRAME_POINTER_REGNUM
:
24534 return offsets
->soft_frame
- offsets
->saved_args
;
24536 case ARM_HARD_FRAME_POINTER_REGNUM
:
24537 return offsets
->saved_regs
- offsets
->saved_args
;
24539 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24540 return offsets
->locals_base
- offsets
->saved_args
;
24543 gcc_unreachable ();
24547 case FRAME_POINTER_REGNUM
:
24550 case STACK_POINTER_REGNUM
:
24551 return offsets
->outgoing_args
- offsets
->soft_frame
;
24553 case ARM_HARD_FRAME_POINTER_REGNUM
:
24554 return offsets
->saved_regs
- offsets
->soft_frame
;
24556 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24557 return offsets
->locals_base
- offsets
->soft_frame
;
24560 gcc_unreachable ();
24565 gcc_unreachable ();
24569 /* Generate the function's prologue. */
24572 thumb1_expand_prologue (void)
24576 HOST_WIDE_INT amount
;
24577 HOST_WIDE_INT size
;
24578 arm_stack_offsets
*offsets
;
24579 unsigned long func_type
;
24581 unsigned long live_regs_mask
;
24582 unsigned long l_mask
;
24583 unsigned high_regs_pushed
= 0;
24584 bool lr_needs_saving
;
24586 func_type
= arm_current_func_type ();
24588 /* Naked functions don't have prologues. */
24589 if (IS_NAKED (func_type
))
24591 if (flag_stack_usage_info
)
24592 current_function_static_stack_size
= 0;
24596 if (IS_INTERRUPT (func_type
))
24598 error ("interrupt Service Routines cannot be coded in Thumb mode");
24602 if (is_called_in_ARM_mode (current_function_decl
))
24603 emit_insn (gen_prologue_thumb1_interwork ());
24605 offsets
= arm_get_frame_offsets ();
24606 live_regs_mask
= offsets
->saved_regs_mask
;
24607 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24609 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24610 l_mask
= live_regs_mask
& 0x40ff;
24611 /* Then count how many other high registers will need to be pushed. */
24612 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24614 if (crtl
->args
.pretend_args_size
)
24616 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24618 if (cfun
->machine
->uses_anonymous_args
)
24620 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24621 unsigned long mask
;
24623 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24624 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24626 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24630 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24631 stack_pointer_rtx
, x
));
24633 RTX_FRAME_RELATED_P (insn
) = 1;
24636 if (TARGET_BACKTRACE
)
24638 HOST_WIDE_INT offset
= 0;
24639 unsigned work_register
;
24640 rtx work_reg
, x
, arm_hfp_rtx
;
24642 /* We have been asked to create a stack backtrace structure.
24643 The code looks like this:
24647 0 sub SP, #16 Reserve space for 4 registers.
24648 2 push {R7} Push low registers.
24649 4 add R7, SP, #20 Get the stack pointer before the push.
24650 6 str R7, [SP, #8] Store the stack pointer
24651 (before reserving the space).
24652 8 mov R7, PC Get hold of the start of this code + 12.
24653 10 str R7, [SP, #16] Store it.
24654 12 mov R7, FP Get hold of the current frame pointer.
24655 14 str R7, [SP, #4] Store it.
24656 16 mov R7, LR Get hold of the current return address.
24657 18 str R7, [SP, #12] Store it.
24658 20 add R7, SP, #16 Point at the start of the
24659 backtrace structure.
24660 22 mov FP, R7 Put this value into the frame pointer. */
24662 work_register
= thumb_find_work_register (live_regs_mask
);
24663 work_reg
= gen_rtx_REG (SImode
, work_register
);
24664 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24666 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24667 stack_pointer_rtx
, GEN_INT (-16)));
24668 RTX_FRAME_RELATED_P (insn
) = 1;
24672 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24673 RTX_FRAME_RELATED_P (insn
) = 1;
24674 lr_needs_saving
= false;
24676 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24679 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24680 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24682 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24683 x
= gen_frame_mem (SImode
, x
);
24684 emit_move_insn (x
, work_reg
);
24686 /* Make sure that the instruction fetching the PC is in the right place
24687 to calculate "start of backtrace creation code + 12". */
24688 /* ??? The stores using the common WORK_REG ought to be enough to
24689 prevent the scheduler from doing anything weird. Failing that
24690 we could always move all of the following into an UNSPEC_VOLATILE. */
24693 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24694 emit_move_insn (work_reg
, x
);
24696 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24697 x
= gen_frame_mem (SImode
, x
);
24698 emit_move_insn (x
, work_reg
);
24700 emit_move_insn (work_reg
, arm_hfp_rtx
);
24702 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24703 x
= gen_frame_mem (SImode
, x
);
24704 emit_move_insn (x
, work_reg
);
24708 emit_move_insn (work_reg
, arm_hfp_rtx
);
24710 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24711 x
= gen_frame_mem (SImode
, x
);
24712 emit_move_insn (x
, work_reg
);
24714 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24715 emit_move_insn (work_reg
, x
);
24717 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24718 x
= gen_frame_mem (SImode
, x
);
24719 emit_move_insn (x
, work_reg
);
24722 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24723 emit_move_insn (work_reg
, x
);
24725 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24726 x
= gen_frame_mem (SImode
, x
);
24727 emit_move_insn (x
, work_reg
);
24729 x
= GEN_INT (offset
+ 12);
24730 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24732 emit_move_insn (arm_hfp_rtx
, work_reg
);
24734 /* Optimization: If we are not pushing any low registers but we are going
24735 to push some high registers then delay our first push. This will just
24736 be a push of LR and we can combine it with the push of the first high
24738 else if ((l_mask
& 0xff) != 0
24739 || (high_regs_pushed
== 0 && lr_needs_saving
))
24741 unsigned long mask
= l_mask
;
24742 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24743 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24744 RTX_FRAME_RELATED_P (insn
) = 1;
24745 lr_needs_saving
= false;
24748 if (high_regs_pushed
)
24750 unsigned pushable_regs
;
24751 unsigned next_hi_reg
;
24752 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24753 : crtl
->args
.info
.nregs
;
24754 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24756 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24757 if (live_regs_mask
& (1 << next_hi_reg
))
24760 /* Here we need to mask out registers used for passing arguments
24761 even if they can be pushed. This is to avoid using them to stash the high
24762 registers. Such kind of stash may clobber the use of arguments. */
24763 pushable_regs
= l_mask
& (~arg_regs_mask
);
24764 if (lr_needs_saving
)
24765 pushable_regs
&= ~(1 << LR_REGNUM
);
24767 if (pushable_regs
== 0)
24768 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24770 while (high_regs_pushed
> 0)
24772 unsigned long real_regs_mask
= 0;
24773 unsigned long push_mask
= 0;
24775 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24777 if (pushable_regs
& (1 << regno
))
24779 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24780 gen_rtx_REG (SImode
, next_hi_reg
));
24782 high_regs_pushed
--;
24783 real_regs_mask
|= (1 << next_hi_reg
);
24784 push_mask
|= (1 << regno
);
24786 if (high_regs_pushed
)
24788 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24790 if (live_regs_mask
& (1 << next_hi_reg
))
24798 /* If we had to find a work register and we have not yet
24799 saved the LR then add it to the list of regs to push. */
24800 if (lr_needs_saving
)
24802 push_mask
|= 1 << LR_REGNUM
;
24803 real_regs_mask
|= 1 << LR_REGNUM
;
24804 lr_needs_saving
= false;
24807 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24808 RTX_FRAME_RELATED_P (insn
) = 1;
24812 /* Load the pic register before setting the frame pointer,
24813 so we can use r7 as a temporary work register. */
24814 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24815 arm_load_pic_register (live_regs_mask
);
24817 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24818 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24819 stack_pointer_rtx
);
24821 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24822 if (flag_stack_usage_info
)
24823 current_function_static_stack_size
= size
;
24825 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24826 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24827 sorry ("-fstack-check=specific for Thumb-1");
24829 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24830 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24835 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24836 GEN_INT (- amount
)));
24837 RTX_FRAME_RELATED_P (insn
) = 1;
24843 /* The stack decrement is too big for an immediate value in a single
24844 insn. In theory we could issue multiple subtracts, but after
24845 three of them it becomes more space efficient to place the full
24846 value in the constant pool and load into a register. (Also the
24847 ARM debugger really likes to see only one stack decrement per
24848 function). So instead we look for a scratch register into which
24849 we can load the decrement, and then we subtract this from the
24850 stack pointer. Unfortunately on the thumb the only available
24851 scratch registers are the argument registers, and we cannot use
24852 these as they may hold arguments to the function. Instead we
24853 attempt to locate a call preserved register which is used by this
24854 function. If we can find one, then we know that it will have
24855 been pushed at the start of the prologue and so we can corrupt
24857 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24858 if (live_regs_mask
& (1 << regno
))
24861 gcc_assert(regno
<= LAST_LO_REGNUM
);
24863 reg
= gen_rtx_REG (SImode
, regno
);
24865 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24867 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24868 stack_pointer_rtx
, reg
));
24870 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24871 plus_constant (Pmode
, stack_pointer_rtx
,
24873 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24874 RTX_FRAME_RELATED_P (insn
) = 1;
24878 if (frame_pointer_needed
)
24879 thumb_set_frame_pointer (offsets
);
24881 /* If we are profiling, make sure no instructions are scheduled before
24882 the call to mcount. Similarly if the user has requested no
24883 scheduling in the prolog. Similarly if we want non-call exceptions
24884 using the EABI unwinder, to prevent faulting instructions from being
24885 swapped with a stack adjustment. */
24886 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24887 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24888 && cfun
->can_throw_non_call_exceptions
))
24889 emit_insn (gen_blockage ());
24891 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24892 if (live_regs_mask
& 0xff)
24893 cfun
->machine
->lr_save_eliminated
= 0;
24896 /* Clear caller saved registers not used to pass return values and leaked
24897 condition flags before exiting a cmse_nonsecure_entry function. */
24900 cmse_nonsecure_entry_clear_before_return (void)
24902 uint64_t to_clear_mask
[2];
24903 uint32_t padding_bits_to_clear
= 0;
24904 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
24905 int regno
, maxregno
= IP_REGNUM
;
24909 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
24910 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
24912 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24913 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24914 to make sure the instructions used to clear them are present. */
24915 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
24917 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
24918 maxregno
= LAST_VFP_REGNUM
;
24920 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
24921 to_clear_mask
[0] |= float_mask
;
24923 float_mask
= (1ULL << (maxregno
- 63)) - 1;
24924 to_clear_mask
[1] = float_mask
;
24926 /* Make sure we don't clear the two scratch registers used to clear the
24927 relevant FPSCR bits in output_return_instruction. */
24928 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
24929 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
24930 emit_use (gen_rtx_REG (SImode
, 4));
24931 to_clear_mask
[0] &= ~(1ULL << 4);
24934 /* If the user has defined registers to be caller saved, these are no longer
24935 restored by the function before returning and must thus be cleared for
24936 security purposes. */
24937 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
24939 /* We do not touch registers that can be used to pass arguments as per
24940 the AAPCS, since these should never be made callee-saved by user
24942 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
24944 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
24946 if (call_used_regs
[regno
])
24947 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
24950 /* Make sure we do not clear the registers used to return the result in. */
24951 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
24952 if (!VOID_TYPE_P (result_type
))
24954 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
24956 /* No need to check that we return in registers, because we don't
24957 support returning on stack yet. */
24959 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
24960 padding_bits_to_clear_ptr
);
24963 if (padding_bits_to_clear
!= 0)
24966 /* Padding bits to clear is not 0 so we know we are dealing with
24967 returning a composite type, which only uses r0. Let's make sure that
24968 r1-r3 is cleared too, we will use r1 as a scratch register. */
24969 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
24971 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
24973 /* Fill the lower half of the negated padding_bits_to_clear. */
24974 emit_move_insn (reg_rtx
,
24975 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
24977 /* Also fill the top half of the negated padding_bits_to_clear. */
24978 if (((~padding_bits_to_clear
) >> 16) > 0)
24979 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
24982 GEN_INT ((~padding_bits_to_clear
) >> 16)));
24984 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
24985 gen_rtx_REG (SImode
, R0_REGNUM
),
24989 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
24991 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
24994 if (IS_VFP_REGNUM (regno
))
24996 /* If regno is an even vfp register and its successor is also to
24997 be cleared, use vmov. */
24998 if (TARGET_VFP_DOUBLE
24999 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25000 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25002 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25003 CONST1_RTX (DFmode
));
25004 emit_use (gen_rtx_REG (DFmode
, regno
));
25009 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25010 CONST1_RTX (SFmode
));
25011 emit_use (gen_rtx_REG (SFmode
, regno
));
25018 if (regno
== R0_REGNUM
)
25019 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25022 /* R0 has either been cleared before, see code above, or it
25023 holds a return value, either way it is not secret
25025 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25026 gen_rtx_REG (SImode
, R0_REGNUM
));
25027 emit_use (gen_rtx_REG (SImode
, regno
));
25031 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25032 gen_rtx_REG (SImode
, LR_REGNUM
));
25033 emit_use (gen_rtx_REG (SImode
, regno
));
25039 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25040 POP instruction can be generated. LR should be replaced by PC. All
25041 the checks required are already done by USE_RETURN_INSN (). Hence,
25042 all we really need to check here is if single register is to be
25043 returned, or multiple register return. */
25045 thumb2_expand_return (bool simple_return
)
25048 unsigned long saved_regs_mask
;
25049 arm_stack_offsets
*offsets
;
25051 offsets
= arm_get_frame_offsets ();
25052 saved_regs_mask
= offsets
->saved_regs_mask
;
25054 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25055 if (saved_regs_mask
& (1 << i
))
25058 if (!simple_return
&& saved_regs_mask
)
25060 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25061 functions or adapt code to handle according to ACLE. This path should
25062 not be reachable for cmse_nonsecure_entry functions though we prefer
25063 to assert it for now to ensure that future code changes do not silently
25064 change this behavior. */
25065 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25068 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25069 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25070 rtx addr
= gen_rtx_MEM (SImode
,
25071 gen_rtx_POST_INC (SImode
,
25072 stack_pointer_rtx
));
25073 set_mem_alias_set (addr
, get_frame_alias_set ());
25074 XVECEXP (par
, 0, 0) = ret_rtx
;
25075 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25076 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25077 emit_jump_insn (par
);
25081 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25082 saved_regs_mask
|= (1 << PC_REGNUM
);
25083 arm_emit_multi_reg_pop (saved_regs_mask
);
25088 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25089 cmse_nonsecure_entry_clear_before_return ();
25090 emit_jump_insn (simple_return_rtx
);
25095 thumb1_expand_epilogue (void)
25097 HOST_WIDE_INT amount
;
25098 arm_stack_offsets
*offsets
;
25101 /* Naked functions don't have prologues. */
25102 if (IS_NAKED (arm_current_func_type ()))
25105 offsets
= arm_get_frame_offsets ();
25106 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25108 if (frame_pointer_needed
)
25110 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25111 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25113 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25115 gcc_assert (amount
>= 0);
25118 emit_insn (gen_blockage ());
25121 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25122 GEN_INT (amount
)));
25125 /* r3 is always free in the epilogue. */
25126 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25128 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25129 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25133 /* Emit a USE (stack_pointer_rtx), so that
25134 the stack adjustment will not be deleted. */
25135 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25137 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25138 emit_insn (gen_blockage ());
25140 /* Emit a clobber for each insn that will be restored in the epilogue,
25141 so that flow2 will get register lifetimes correct. */
25142 for (regno
= 0; regno
< 13; regno
++)
25143 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25144 emit_clobber (gen_rtx_REG (SImode
, regno
));
25146 if (! df_regs_ever_live_p (LR_REGNUM
))
25147 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25149 /* Clear all caller-saved regs that are not used to return. */
25150 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25151 cmse_nonsecure_entry_clear_before_return ();
25154 /* Epilogue code for APCS frame. */
25156 arm_expand_epilogue_apcs_frame (bool really_return
)
25158 unsigned long func_type
;
25159 unsigned long saved_regs_mask
;
25162 int floats_from_frame
= 0;
25163 arm_stack_offsets
*offsets
;
25165 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25166 func_type
= arm_current_func_type ();
25168 /* Get frame offsets for ARM. */
25169 offsets
= arm_get_frame_offsets ();
25170 saved_regs_mask
= offsets
->saved_regs_mask
;
25172 /* Find the offset of the floating-point save area in the frame. */
25174 = (offsets
->saved_args
25175 + arm_compute_static_chain_stack_bytes ()
25178 /* Compute how many core registers saved and how far away the floats are. */
25179 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25180 if (saved_regs_mask
& (1 << i
))
25183 floats_from_frame
+= 4;
25186 if (TARGET_HARD_FLOAT
)
25189 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25191 /* The offset is from IP_REGNUM. */
25192 int saved_size
= arm_get_vfp_saved_size ();
25193 if (saved_size
> 0)
25196 floats_from_frame
+= saved_size
;
25197 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25198 hard_frame_pointer_rtx
,
25199 GEN_INT (-floats_from_frame
)));
25200 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25201 ip_rtx
, hard_frame_pointer_rtx
);
25204 /* Generate VFP register multi-pop. */
25205 start_reg
= FIRST_VFP_REGNUM
;
25207 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25208 /* Look for a case where a reg does not need restoring. */
25209 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25210 && (!df_regs_ever_live_p (i
+ 1)
25211 || call_used_regs
[i
+ 1]))
25213 if (start_reg
!= i
)
25214 arm_emit_vfp_multi_reg_pop (start_reg
,
25215 (i
- start_reg
) / 2,
25216 gen_rtx_REG (SImode
,
25221 /* Restore the remaining regs that we have discovered (or possibly
25222 even all of them, if the conditional in the for loop never
25224 if (start_reg
!= i
)
25225 arm_emit_vfp_multi_reg_pop (start_reg
,
25226 (i
- start_reg
) / 2,
25227 gen_rtx_REG (SImode
, IP_REGNUM
));
25232 /* The frame pointer is guaranteed to be non-double-word aligned, as
25233 it is set to double-word-aligned old_stack_pointer - 4. */
25235 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25237 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25238 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25240 rtx addr
= gen_frame_mem (V2SImode
,
25241 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25243 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25244 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25245 gen_rtx_REG (V2SImode
, i
),
25251 /* saved_regs_mask should contain IP which contains old stack pointer
25252 at the time of activation creation. Since SP and IP are adjacent registers,
25253 we can restore the value directly into SP. */
25254 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25255 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25256 saved_regs_mask
|= (1 << SP_REGNUM
);
25258 /* There are two registers left in saved_regs_mask - LR and PC. We
25259 only need to restore LR (the return address), but to
25260 save time we can load it directly into PC, unless we need a
25261 special function exit sequence, or we are not really returning. */
25263 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25264 && !crtl
->calls_eh_return
)
25265 /* Delete LR from the register mask, so that LR on
25266 the stack is loaded into the PC in the register mask. */
25267 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25269 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25271 num_regs
= bit_count (saved_regs_mask
);
25272 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25275 emit_insn (gen_blockage ());
25276 /* Unwind the stack to just below the saved registers. */
25277 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25278 hard_frame_pointer_rtx
,
25279 GEN_INT (- 4 * num_regs
)));
25281 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25282 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25285 arm_emit_multi_reg_pop (saved_regs_mask
);
25287 if (IS_INTERRUPT (func_type
))
25289 /* Interrupt handlers will have pushed the
25290 IP onto the stack, so restore it now. */
25292 rtx addr
= gen_rtx_MEM (SImode
,
25293 gen_rtx_POST_INC (SImode
,
25294 stack_pointer_rtx
));
25295 set_mem_alias_set (addr
, get_frame_alias_set ());
25296 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25297 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25298 gen_rtx_REG (SImode
, IP_REGNUM
),
25302 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25305 if (crtl
->calls_eh_return
)
25306 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25308 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25310 if (IS_STACKALIGN (func_type
))
25311 /* Restore the original stack pointer. Before prologue, the stack was
25312 realigned and the original stack pointer saved in r0. For details,
25313 see comment in arm_expand_prologue. */
25314 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25316 emit_jump_insn (simple_return_rtx
);
25319 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25320 function is not a sibcall. */
25322 arm_expand_epilogue (bool really_return
)
25324 unsigned long func_type
;
25325 unsigned long saved_regs_mask
;
25329 arm_stack_offsets
*offsets
;
25331 func_type
= arm_current_func_type ();
25333 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25334 let output_return_instruction take care of instruction emission if any. */
25335 if (IS_NAKED (func_type
)
25336 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25339 emit_jump_insn (simple_return_rtx
);
25343 /* If we are throwing an exception, then we really must be doing a
25344 return, so we can't tail-call. */
25345 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25347 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25349 arm_expand_epilogue_apcs_frame (really_return
);
25353 /* Get frame offsets for ARM. */
25354 offsets
= arm_get_frame_offsets ();
25355 saved_regs_mask
= offsets
->saved_regs_mask
;
25356 num_regs
= bit_count (saved_regs_mask
);
25358 if (frame_pointer_needed
)
25361 /* Restore stack pointer if necessary. */
25364 /* In ARM mode, frame pointer points to first saved register.
25365 Restore stack pointer to last saved register. */
25366 amount
= offsets
->frame
- offsets
->saved_regs
;
25368 /* Force out any pending memory operations that reference stacked data
25369 before stack de-allocation occurs. */
25370 emit_insn (gen_blockage ());
25371 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25372 hard_frame_pointer_rtx
,
25373 GEN_INT (amount
)));
25374 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25376 hard_frame_pointer_rtx
);
25378 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25380 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25384 /* In Thumb-2 mode, the frame pointer points to the last saved
25386 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25389 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25390 hard_frame_pointer_rtx
,
25391 GEN_INT (amount
)));
25392 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25393 hard_frame_pointer_rtx
,
25394 hard_frame_pointer_rtx
);
25397 /* Force out any pending memory operations that reference stacked data
25398 before stack de-allocation occurs. */
25399 emit_insn (gen_blockage ());
25400 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25401 hard_frame_pointer_rtx
));
25402 arm_add_cfa_adjust_cfa_note (insn
, 0,
25404 hard_frame_pointer_rtx
);
25405 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25407 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25412 /* Pop off outgoing args and local frame to adjust stack pointer to
25413 last saved register. */
25414 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25418 /* Force out any pending memory operations that reference stacked data
25419 before stack de-allocation occurs. */
25420 emit_insn (gen_blockage ());
25421 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25423 GEN_INT (amount
)));
25424 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25425 stack_pointer_rtx
, stack_pointer_rtx
);
25426 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25428 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25432 if (TARGET_HARD_FLOAT
)
25434 /* Generate VFP register multi-pop. */
25435 int end_reg
= LAST_VFP_REGNUM
+ 1;
25437 /* Scan the registers in reverse order. We need to match
25438 any groupings made in the prologue and generate matching
25439 vldm operations. The need to match groups is because,
25440 unlike pop, vldm can only do consecutive regs. */
25441 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25442 /* Look for a case where a reg does not need restoring. */
25443 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25444 && (!df_regs_ever_live_p (i
+ 1)
25445 || call_used_regs
[i
+ 1]))
25447 /* Restore the regs discovered so far (from reg+2 to
25449 if (end_reg
> i
+ 2)
25450 arm_emit_vfp_multi_reg_pop (i
+ 2,
25451 (end_reg
- (i
+ 2)) / 2,
25452 stack_pointer_rtx
);
25456 /* Restore the remaining regs that we have discovered (or possibly
25457 even all of them, if the conditional in the for loop never
25459 if (end_reg
> i
+ 2)
25460 arm_emit_vfp_multi_reg_pop (i
+ 2,
25461 (end_reg
- (i
+ 2)) / 2,
25462 stack_pointer_rtx
);
25466 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25467 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25470 rtx addr
= gen_rtx_MEM (V2SImode
,
25471 gen_rtx_POST_INC (SImode
,
25472 stack_pointer_rtx
));
25473 set_mem_alias_set (addr
, get_frame_alias_set ());
25474 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25475 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25476 gen_rtx_REG (V2SImode
, i
),
25478 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25479 stack_pointer_rtx
, stack_pointer_rtx
);
25482 if (saved_regs_mask
)
25485 bool return_in_pc
= false;
25487 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25488 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25489 && !IS_CMSE_ENTRY (func_type
)
25490 && !IS_STACKALIGN (func_type
)
25492 && crtl
->args
.pretend_args_size
== 0
25493 && saved_regs_mask
& (1 << LR_REGNUM
)
25494 && !crtl
->calls_eh_return
)
25496 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25497 saved_regs_mask
|= (1 << PC_REGNUM
);
25498 return_in_pc
= true;
25501 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25503 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25504 if (saved_regs_mask
& (1 << i
))
25506 rtx addr
= gen_rtx_MEM (SImode
,
25507 gen_rtx_POST_INC (SImode
,
25508 stack_pointer_rtx
));
25509 set_mem_alias_set (addr
, get_frame_alias_set ());
25511 if (i
== PC_REGNUM
)
25513 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25514 XVECEXP (insn
, 0, 0) = ret_rtx
;
25515 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25517 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25518 insn
= emit_jump_insn (insn
);
25522 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25524 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25525 gen_rtx_REG (SImode
, i
),
25527 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25529 stack_pointer_rtx
);
25536 && current_tune
->prefer_ldrd_strd
25537 && !optimize_function_for_size_p (cfun
))
25540 thumb2_emit_ldrd_pop (saved_regs_mask
);
25541 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25542 arm_emit_ldrd_pop (saved_regs_mask
);
25544 arm_emit_multi_reg_pop (saved_regs_mask
);
25547 arm_emit_multi_reg_pop (saved_regs_mask
);
25555 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25559 rtx dwarf
= NULL_RTX
;
25561 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25563 GEN_INT (amount
)));
25565 RTX_FRAME_RELATED_P (tmp
) = 1;
25567 if (cfun
->machine
->uses_anonymous_args
)
25569 /* Restore pretend args. Refer arm_expand_prologue on how to save
25570 pretend_args in stack. */
25571 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25572 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25573 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25574 if (saved_regs_mask
& (1 << i
))
25576 rtx reg
= gen_rtx_REG (SImode
, i
);
25577 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25580 REG_NOTES (tmp
) = dwarf
;
25582 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25583 stack_pointer_rtx
, stack_pointer_rtx
);
25586 /* Clear all caller-saved regs that are not used to return. */
25587 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25589 /* CMSE_ENTRY always returns. */
25590 gcc_assert (really_return
);
25591 cmse_nonsecure_entry_clear_before_return ();
25594 if (!really_return
)
25597 if (crtl
->calls_eh_return
)
25598 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25600 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25602 if (IS_STACKALIGN (func_type
))
25603 /* Restore the original stack pointer. Before prologue, the stack was
25604 realigned and the original stack pointer saved in r0. For details,
25605 see comment in arm_expand_prologue. */
25606 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25608 emit_jump_insn (simple_return_rtx
);
25611 /* Implementation of insn prologue_thumb1_interwork. This is the first
25612 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25615 thumb1_output_interwork (void)
25618 FILE *f
= asm_out_file
;
25620 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25621 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25623 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25625 /* Generate code sequence to switch us into Thumb mode. */
25626 /* The .code 32 directive has already been emitted by
25627 ASM_DECLARE_FUNCTION_NAME. */
25628 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25629 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25631 /* Generate a label, so that the debugger will notice the
25632 change in instruction sets. This label is also used by
25633 the assembler to bypass the ARM code when this function
25634 is called from a Thumb encoded function elsewhere in the
25635 same file. Hence the definition of STUB_NAME here must
25636 agree with the definition in gas/config/tc-arm.c. */
25638 #define STUB_NAME ".real_start_of"
25640 fprintf (f
, "\t.code\t16\n");
25642 if (arm_dllexport_name_p (name
))
25643 name
= arm_strip_name_encoding (name
);
25645 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25646 fprintf (f
, "\t.thumb_func\n");
25647 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25652 /* Handle the case of a double word load into a low register from
25653 a computed memory address. The computed address may involve a
25654 register which is overwritten by the load. */
25656 thumb_load_double_from_address (rtx
*operands
)
25664 gcc_assert (REG_P (operands
[0]));
25665 gcc_assert (MEM_P (operands
[1]));
25667 /* Get the memory address. */
25668 addr
= XEXP (operands
[1], 0);
25670 /* Work out how the memory address is computed. */
25671 switch (GET_CODE (addr
))
25674 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25676 if (REGNO (operands
[0]) == REGNO (addr
))
25678 output_asm_insn ("ldr\t%H0, %2", operands
);
25679 output_asm_insn ("ldr\t%0, %1", operands
);
25683 output_asm_insn ("ldr\t%0, %1", operands
);
25684 output_asm_insn ("ldr\t%H0, %2", operands
);
25689 /* Compute <address> + 4 for the high order load. */
25690 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25692 output_asm_insn ("ldr\t%0, %1", operands
);
25693 output_asm_insn ("ldr\t%H0, %2", operands
);
25697 arg1
= XEXP (addr
, 0);
25698 arg2
= XEXP (addr
, 1);
25700 if (CONSTANT_P (arg1
))
25701 base
= arg2
, offset
= arg1
;
25703 base
= arg1
, offset
= arg2
;
25705 gcc_assert (REG_P (base
));
25707 /* Catch the case of <address> = <reg> + <reg> */
25708 if (REG_P (offset
))
25710 int reg_offset
= REGNO (offset
);
25711 int reg_base
= REGNO (base
);
25712 int reg_dest
= REGNO (operands
[0]);
25714 /* Add the base and offset registers together into the
25715 higher destination register. */
25716 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25717 reg_dest
+ 1, reg_base
, reg_offset
);
25719 /* Load the lower destination register from the address in
25720 the higher destination register. */
25721 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25722 reg_dest
, reg_dest
+ 1);
25724 /* Load the higher destination register from its own address
25726 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25727 reg_dest
+ 1, reg_dest
+ 1);
25731 /* Compute <address> + 4 for the high order load. */
25732 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25734 /* If the computed address is held in the low order register
25735 then load the high order register first, otherwise always
25736 load the low order register first. */
25737 if (REGNO (operands
[0]) == REGNO (base
))
25739 output_asm_insn ("ldr\t%H0, %2", operands
);
25740 output_asm_insn ("ldr\t%0, %1", operands
);
25744 output_asm_insn ("ldr\t%0, %1", operands
);
25745 output_asm_insn ("ldr\t%H0, %2", operands
);
25751 /* With no registers to worry about we can just load the value
25753 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25755 output_asm_insn ("ldr\t%H0, %2", operands
);
25756 output_asm_insn ("ldr\t%0, %1", operands
);
25760 gcc_unreachable ();
25767 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25772 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25773 std::swap (operands
[4], operands
[5]);
25775 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25776 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25780 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25781 std::swap (operands
[4], operands
[5]);
25782 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25783 std::swap (operands
[5], operands
[6]);
25784 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25785 std::swap (operands
[4], operands
[5]);
25787 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25788 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25792 gcc_unreachable ();
25798 /* Output a call-via instruction for thumb state. */
25800 thumb_call_via_reg (rtx reg
)
25802 int regno
= REGNO (reg
);
25805 gcc_assert (regno
< LR_REGNUM
);
25807 /* If we are in the normal text section we can use a single instance
25808 per compilation unit. If we are doing function sections, then we need
25809 an entry per section, since we can't rely on reachability. */
25810 if (in_section
== text_section
)
25812 thumb_call_reg_needed
= 1;
25814 if (thumb_call_via_label
[regno
] == NULL
)
25815 thumb_call_via_label
[regno
] = gen_label_rtx ();
25816 labelp
= thumb_call_via_label
+ regno
;
25820 if (cfun
->machine
->call_via
[regno
] == NULL
)
25821 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25822 labelp
= cfun
->machine
->call_via
+ regno
;
25825 output_asm_insn ("bl\t%a0", labelp
);
25829 /* Routines for generating rtl. */
25831 thumb_expand_movmemqi (rtx
*operands
)
25833 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25834 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25835 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25836 HOST_WIDE_INT offset
= 0;
25840 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25846 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25852 rtx reg
= gen_reg_rtx (SImode
);
25853 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25854 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25861 rtx reg
= gen_reg_rtx (HImode
);
25862 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25863 plus_constant (Pmode
, in
,
25865 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25874 rtx reg
= gen_reg_rtx (QImode
);
25875 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25876 plus_constant (Pmode
, in
,
25878 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25885 thumb_reload_out_hi (rtx
*operands
)
25887 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25890 /* Return the length of a function name prefix
25891 that starts with the character 'c'. */
25893 arm_get_strip_length (int c
)
25897 ARM_NAME_ENCODING_LENGTHS
25902 /* Return a pointer to a function's name with any
25903 and all prefix encodings stripped from it. */
25905 arm_strip_name_encoding (const char *name
)
25909 while ((skip
= arm_get_strip_length (* name
)))
25915 /* If there is a '*' anywhere in the name's prefix, then
25916 emit the stripped name verbatim, otherwise prepend an
25917 underscore if leading underscores are being used. */
25919 arm_asm_output_labelref (FILE *stream
, const char *name
)
25924 while ((skip
= arm_get_strip_length (* name
)))
25926 verbatim
|= (*name
== '*');
25931 fputs (name
, stream
);
25933 asm_fprintf (stream
, "%U%s", name
);
25936 /* This function is used to emit an EABI tag and its associated value.
25937 We emit the numerical value of the tag in case the assembler does not
25938 support textual tags. (Eg gas prior to 2.20). If requested we include
25939 the tag name in a comment so that anyone reading the assembler output
25940 will know which tag is being set.
25942 This function is not static because arm-c.c needs it too. */
25945 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25947 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25948 if (flag_verbose_asm
|| flag_debug_asm
)
25949 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25950 asm_fprintf (asm_out_file
, "\n");
25953 /* This function is used to print CPU tuning information as comment
25954 in assembler file. Pointers are not printed for now. */
25957 arm_print_tune_info (void)
25959 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25960 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25961 current_tune
->constant_limit
);
25962 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25963 current_tune
->max_insns_skipped
);
25964 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25965 current_tune
->prefetch
.num_slots
);
25966 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25967 current_tune
->prefetch
.l1_cache_size
);
25968 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25969 current_tune
->prefetch
.l1_cache_line_size
);
25970 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25971 (int) current_tune
->prefer_constant_pool
);
25972 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25973 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25974 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25975 current_tune
->branch_cost (false, false));
25976 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25977 current_tune
->branch_cost (false, true));
25978 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25979 current_tune
->branch_cost (true, false));
25980 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25981 current_tune
->branch_cost (true, true));
25982 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25983 (int) current_tune
->prefer_ldrd_strd
);
25984 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25985 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25986 (int) current_tune
->logical_op_non_short_circuit_arm
);
25987 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25988 (int) current_tune
->prefer_neon_for_64bits
);
25989 asm_fprintf (asm_out_file
,
25990 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25991 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25992 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25993 (int) current_tune
->string_ops_prefer_neon
);
25994 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25995 current_tune
->max_insns_inline_memset
);
25996 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25997 current_tune
->fusible_ops
);
25998 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25999 (int) current_tune
->sched_autopref
);
26003 arm_file_start (void)
26009 /* We don't have a specified CPU. Use the architecture to
26012 Note: it might be better to do this unconditionally, then the
26013 assembler would not need to know about all new CPU names as
26015 if (!arm_active_target
.core_name
)
26017 /* armv7ve doesn't support any extensions. */
26018 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26020 /* Keep backward compatability for assemblers
26021 which don't support armv7ve. */
26022 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26023 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26024 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26025 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26026 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26030 const char* pos
= strchr (arm_active_target
.arch_name
, '+');
26034 gcc_assert (strlen (arm_active_target
.arch_name
)
26035 <= sizeof (buf
) / sizeof (*pos
));
26036 strncpy (buf
, arm_active_target
.arch_name
,
26037 (pos
- arm_active_target
.arch_name
) * sizeof (*pos
));
26038 buf
[pos
- arm_active_target
.arch_name
] = '\0';
26039 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26040 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26043 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26044 arm_active_target
.arch_name
);
26047 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26048 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26049 arm_active_target
.core_name
+ 8);
26052 const char* truncated_name
26053 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26054 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26057 if (print_tune_info
)
26058 arm_print_tune_info ();
26060 if (! TARGET_SOFT_FLOAT
)
26062 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26063 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26065 if (TARGET_HARD_FLOAT_ABI
)
26066 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26069 /* Some of these attributes only apply when the corresponding features
26070 are used. However we don't have any easy way of figuring this out.
26071 Conservatively record the setting that would have been used. */
26073 if (flag_rounding_math
)
26074 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26076 if (!flag_unsafe_math_optimizations
)
26078 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26079 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26081 if (flag_signaling_nans
)
26082 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26084 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26085 flag_finite_math_only
? 1 : 3);
26087 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26088 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26089 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26090 flag_short_enums
? 1 : 2);
26092 /* Tag_ABI_optimization_goals. */
26095 else if (optimize
>= 2)
26101 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26103 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26106 if (arm_fp16_format
)
26107 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26108 (int) arm_fp16_format
);
26110 if (arm_lang_output_object_attributes_hook
)
26111 arm_lang_output_object_attributes_hook();
26114 default_file_start ();
26118 arm_file_end (void)
26122 if (NEED_INDICATE_EXEC_STACK
)
26123 /* Add .note.GNU-stack. */
26124 file_end_indicate_exec_stack ();
26126 if (! thumb_call_reg_needed
)
26129 switch_to_section (text_section
);
26130 asm_fprintf (asm_out_file
, "\t.code 16\n");
26131 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26133 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26135 rtx label
= thumb_call_via_label
[regno
];
26139 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26140 CODE_LABEL_NUMBER (label
));
26141 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26147 /* Symbols in the text segment can be accessed without indirecting via the
26148 constant pool; it may take an extra binary operation, but this is still
26149 faster than indirecting via memory. Don't do this when not optimizing,
26150 since we won't be calculating al of the offsets necessary to do this
26154 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26156 if (optimize
> 0 && TREE_CONSTANT (decl
))
26157 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26159 default_encode_section_info (decl
, rtl
, first
);
26161 #endif /* !ARM_PE */
26164 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26166 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26167 && !strcmp (prefix
, "L"))
26169 arm_ccfsm_state
= 0;
26170 arm_target_insn
= NULL
;
26172 default_internal_label (stream
, prefix
, labelno
);
26175 /* Output code to add DELTA to the first argument, and then jump
26176 to FUNCTION. Used for C++ multiple inheritance. */
26179 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26180 HOST_WIDE_INT
, tree function
)
26182 static int thunk_label
= 0;
26185 int mi_delta
= delta
;
26186 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26188 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26191 mi_delta
= - mi_delta
;
26193 final_start_function (emit_barrier (), file
, 1);
26197 int labelno
= thunk_label
++;
26198 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26199 /* Thunks are entered in arm mode when avaiable. */
26200 if (TARGET_THUMB1_ONLY
)
26202 /* push r3 so we can use it as a temporary. */
26203 /* TODO: Omit this save if r3 is not used. */
26204 fputs ("\tpush {r3}\n", file
);
26205 fputs ("\tldr\tr3, ", file
);
26209 fputs ("\tldr\tr12, ", file
);
26211 assemble_name (file
, label
);
26212 fputc ('\n', file
);
26215 /* If we are generating PIC, the ldr instruction below loads
26216 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26217 the address of the add + 8, so we have:
26219 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26222 Note that we have "+ 1" because some versions of GNU ld
26223 don't set the low bit of the result for R_ARM_REL32
26224 relocations against thumb function symbols.
26225 On ARMv6M this is +4, not +8. */
26226 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26227 assemble_name (file
, labelpc
);
26228 fputs (":\n", file
);
26229 if (TARGET_THUMB1_ONLY
)
26231 /* This is 2 insns after the start of the thunk, so we know it
26232 is 4-byte aligned. */
26233 fputs ("\tadd\tr3, pc, r3\n", file
);
26234 fputs ("\tmov r12, r3\n", file
);
26237 fputs ("\tadd\tr12, pc, r12\n", file
);
26239 else if (TARGET_THUMB1_ONLY
)
26240 fputs ("\tmov r12, r3\n", file
);
26242 if (TARGET_THUMB1_ONLY
)
26244 if (mi_delta
> 255)
26246 fputs ("\tldr\tr3, ", file
);
26247 assemble_name (file
, label
);
26248 fputs ("+4\n", file
);
26249 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26250 mi_op
, this_regno
, this_regno
);
26252 else if (mi_delta
!= 0)
26254 /* Thumb1 unified syntax requires s suffix in instruction name when
26255 one of the operands is immediate. */
26256 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26257 mi_op
, this_regno
, this_regno
,
26263 /* TODO: Use movw/movt for large constants when available. */
26264 while (mi_delta
!= 0)
26266 if ((mi_delta
& (3 << shift
)) == 0)
26270 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26271 mi_op
, this_regno
, this_regno
,
26272 mi_delta
& (0xff << shift
));
26273 mi_delta
&= ~(0xff << shift
);
26280 if (TARGET_THUMB1_ONLY
)
26281 fputs ("\tpop\t{r3}\n", file
);
26283 fprintf (file
, "\tbx\tr12\n");
26284 ASM_OUTPUT_ALIGN (file
, 2);
26285 assemble_name (file
, label
);
26286 fputs (":\n", file
);
26289 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26290 rtx tem
= XEXP (DECL_RTL (function
), 0);
26291 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26292 pipeline offset is four rather than eight. Adjust the offset
26294 tem
= plus_constant (GET_MODE (tem
), tem
,
26295 TARGET_THUMB1_ONLY
? -3 : -7);
26296 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26298 gen_rtx_SYMBOL_REF (Pmode
,
26299 ggc_strdup (labelpc
)));
26300 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26303 /* Output ".word .LTHUNKn". */
26304 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26306 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26307 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26311 fputs ("\tb\t", file
);
26312 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26313 if (NEED_PLT_RELOC
)
26314 fputs ("(PLT)", file
);
26315 fputc ('\n', file
);
26318 final_end_function ();
26321 /* MI thunk handling for TARGET_32BIT. */
26324 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26325 HOST_WIDE_INT vcall_offset
, tree function
)
26327 /* On ARM, this_regno is R0 or R1 depending on
26328 whether the function returns an aggregate or not.
26330 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26332 ? R1_REGNUM
: R0_REGNUM
);
26334 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26335 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26336 reload_completed
= 1;
26337 emit_note (NOTE_INSN_PROLOGUE_END
);
26339 /* Add DELTA to THIS_RTX. */
26341 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26342 delta
, this_rtx
, this_rtx
, false);
26344 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26345 if (vcall_offset
!= 0)
26347 /* Load *THIS_RTX. */
26348 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26349 /* Compute *THIS_RTX + VCALL_OFFSET. */
26350 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26352 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26353 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26354 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26357 /* Generate a tail call to the target function. */
26358 if (!TREE_USED (function
))
26360 assemble_external (function
);
26361 TREE_USED (function
) = 1;
26363 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26364 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26365 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26366 SIBLING_CALL_P (insn
) = 1;
26368 insn
= get_insns ();
26369 shorten_branches (insn
);
26370 final_start_function (insn
, file
, 1);
26371 final (insn
, file
, 1);
26372 final_end_function ();
26374 /* Stop pretending this is a post-reload pass. */
26375 reload_completed
= 0;
26378 /* Output code to add DELTA to the first argument, and then jump
26379 to FUNCTION. Used for C++ multiple inheritance. */
26382 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26383 HOST_WIDE_INT vcall_offset
, tree function
)
26386 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26388 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26392 arm_emit_vector_const (FILE *file
, rtx x
)
26395 const char * pattern
;
26397 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26399 switch (GET_MODE (x
))
26401 case V2SImode
: pattern
= "%08x"; break;
26402 case V4HImode
: pattern
= "%04x"; break;
26403 case V8QImode
: pattern
= "%02x"; break;
26404 default: gcc_unreachable ();
26407 fprintf (file
, "0x");
26408 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26412 element
= CONST_VECTOR_ELT (x
, i
);
26413 fprintf (file
, pattern
, INTVAL (element
));
26419 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26420 HFmode constant pool entries are actually loaded with ldr. */
26422 arm_emit_fp16_const (rtx c
)
26426 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26427 if (WORDS_BIG_ENDIAN
)
26428 assemble_zeros (2);
26429 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26430 if (!WORDS_BIG_ENDIAN
)
26431 assemble_zeros (2);
26435 arm_output_load_gr (rtx
*operands
)
26442 if (!MEM_P (operands
[1])
26443 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26444 || !REG_P (reg
= XEXP (sum
, 0))
26445 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26446 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26447 return "wldrw%?\t%0, %1";
26449 /* Fix up an out-of-range load of a GR register. */
26450 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26451 wcgr
= operands
[0];
26453 output_asm_insn ("ldr%?\t%0, %1", operands
);
26455 operands
[0] = wcgr
;
26457 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26458 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26463 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26465 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26466 named arg and all anonymous args onto the stack.
26467 XXX I know the prologue shouldn't be pushing registers, but it is faster
26471 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26475 int second_time ATTRIBUTE_UNUSED
)
26477 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26480 cfun
->machine
->uses_anonymous_args
= 1;
26481 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26483 nregs
= pcum
->aapcs_ncrn
;
26484 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26488 nregs
= pcum
->nregs
;
26490 if (nregs
< NUM_ARG_REGS
)
26491 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26494 /* We can't rely on the caller doing the proper promotion when
26495 using APCS or ATPCS. */
26498 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26500 return !TARGET_AAPCS_BASED
;
26503 static machine_mode
26504 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26506 int *punsignedp ATTRIBUTE_UNUSED
,
26507 const_tree fntype ATTRIBUTE_UNUSED
,
26508 int for_return ATTRIBUTE_UNUSED
)
26510 if (GET_MODE_CLASS (mode
) == MODE_INT
26511 && GET_MODE_SIZE (mode
) < 4)
26517 /* AAPCS based ABIs use short enums by default. */
26520 arm_default_short_enums (void)
26522 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26526 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26529 arm_align_anon_bitfield (void)
26531 return TARGET_AAPCS_BASED
;
26535 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26538 arm_cxx_guard_type (void)
26540 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26544 /* The EABI says test the least significant bit of a guard variable. */
26547 arm_cxx_guard_mask_bit (void)
26549 return TARGET_AAPCS_BASED
;
26553 /* The EABI specifies that all array cookies are 8 bytes long. */
26556 arm_get_cookie_size (tree type
)
26560 if (!TARGET_AAPCS_BASED
)
26561 return default_cxx_get_cookie_size (type
);
26563 size
= build_int_cst (sizetype
, 8);
26568 /* The EABI says that array cookies should also contain the element size. */
26571 arm_cookie_has_size (void)
26573 return TARGET_AAPCS_BASED
;
26577 /* The EABI says constructors and destructors should return a pointer to
26578 the object constructed/destroyed. */
26581 arm_cxx_cdtor_returns_this (void)
26583 return TARGET_AAPCS_BASED
;
26586 /* The EABI says that an inline function may never be the key
26590 arm_cxx_key_method_may_be_inline (void)
26592 return !TARGET_AAPCS_BASED
;
26596 arm_cxx_determine_class_data_visibility (tree decl
)
26598 if (!TARGET_AAPCS_BASED
26599 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26602 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26603 is exported. However, on systems without dynamic vague linkage,
26604 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26605 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26606 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26608 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26609 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26613 arm_cxx_class_data_always_comdat (void)
26615 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26616 vague linkage if the class has no key function. */
26617 return !TARGET_AAPCS_BASED
;
26621 /* The EABI says __aeabi_atexit should be used to register static
26625 arm_cxx_use_aeabi_atexit (void)
26627 return TARGET_AAPCS_BASED
;
26632 arm_set_return_address (rtx source
, rtx scratch
)
26634 arm_stack_offsets
*offsets
;
26635 HOST_WIDE_INT delta
;
26637 unsigned long saved_regs
;
26639 offsets
= arm_get_frame_offsets ();
26640 saved_regs
= offsets
->saved_regs_mask
;
26642 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26643 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26646 if (frame_pointer_needed
)
26647 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26650 /* LR will be the first saved register. */
26651 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26656 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26657 GEN_INT (delta
& ~4095)));
26662 addr
= stack_pointer_rtx
;
26664 addr
= plus_constant (Pmode
, addr
, delta
);
26666 /* The store needs to be marked as frame related in order to prevent
26667 DSE from deleting it as dead if it is based on fp. */
26668 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26669 RTX_FRAME_RELATED_P (insn
) = 1;
26670 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26676 thumb_set_return_address (rtx source
, rtx scratch
)
26678 arm_stack_offsets
*offsets
;
26679 HOST_WIDE_INT delta
;
26680 HOST_WIDE_INT limit
;
26683 unsigned long mask
;
26687 offsets
= arm_get_frame_offsets ();
26688 mask
= offsets
->saved_regs_mask
;
26689 if (mask
& (1 << LR_REGNUM
))
26692 /* Find the saved regs. */
26693 if (frame_pointer_needed
)
26695 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26696 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26702 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26705 /* Allow for the stack frame. */
26706 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26708 /* The link register is always the first saved register. */
26711 /* Construct the address. */
26712 addr
= gen_rtx_REG (SImode
, reg
);
26715 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26716 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26720 addr
= plus_constant (Pmode
, addr
, delta
);
26722 /* The store needs to be marked as frame related in order to prevent
26723 DSE from deleting it as dead if it is based on fp. */
26724 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26725 RTX_FRAME_RELATED_P (insn
) = 1;
26726 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26729 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26732 /* Implements target hook vector_mode_supported_p. */
26734 arm_vector_mode_supported_p (machine_mode mode
)
26736 /* Neon also supports V2SImode, etc. listed in the clause below. */
26737 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26738 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26739 || mode
== V2DImode
|| mode
== V8HFmode
))
26742 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26743 && ((mode
== V2SImode
)
26744 || (mode
== V4HImode
)
26745 || (mode
== V8QImode
)))
26748 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26749 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26750 || mode
== V2HAmode
))
26756 /* Implements target hook array_mode_supported_p. */
26759 arm_array_mode_supported_p (machine_mode mode
,
26760 unsigned HOST_WIDE_INT nelems
)
26763 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26764 && (nelems
>= 2 && nelems
<= 4))
26770 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26771 registers when autovectorizing for Neon, at least until multiple vector
26772 widths are supported properly by the middle-end. */
26774 static machine_mode
26775 arm_preferred_simd_mode (machine_mode mode
)
26781 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26783 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26785 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26787 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26789 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26796 if (TARGET_REALLY_IWMMXT
)
26812 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26814 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26815 using r0-r4 for function arguments, r7 for the stack frame and don't have
26816 enough left over to do doubleword arithmetic. For Thumb-2 all the
26817 potentially problematic instructions accept high registers so this is not
26818 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26819 that require many low registers. */
26821 arm_class_likely_spilled_p (reg_class_t rclass
)
26823 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26824 || rclass
== CC_REG
)
26830 /* Implements target hook small_register_classes_for_mode_p. */
26832 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26834 return TARGET_THUMB1
;
26837 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26838 ARM insns and therefore guarantee that the shift count is modulo 256.
26839 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26840 guarantee no particular behavior for out-of-range counts. */
26842 static unsigned HOST_WIDE_INT
26843 arm_shift_truncation_mask (machine_mode mode
)
26845 return mode
== SImode
? 255 : 0;
26849 /* Map internal gcc register numbers to DWARF2 register numbers. */
26852 arm_dbx_register_number (unsigned int regno
)
26857 if (IS_VFP_REGNUM (regno
))
26859 /* See comment in arm_dwarf_register_span. */
26860 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26861 return 64 + regno
- FIRST_VFP_REGNUM
;
26863 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26866 if (IS_IWMMXT_GR_REGNUM (regno
))
26867 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26869 if (IS_IWMMXT_REGNUM (regno
))
26870 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26872 return DWARF_FRAME_REGISTERS
;
26875 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26876 GCC models tham as 64 32-bit registers, so we need to describe this to
26877 the DWARF generation code. Other registers can use the default. */
26879 arm_dwarf_register_span (rtx rtl
)
26887 regno
= REGNO (rtl
);
26888 if (!IS_VFP_REGNUM (regno
))
26891 /* XXX FIXME: The EABI defines two VFP register ranges:
26892 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26894 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26895 corresponding D register. Until GDB supports this, we shall use the
26896 legacy encodings. We also use these encodings for D0-D15 for
26897 compatibility with older debuggers. */
26898 mode
= GET_MODE (rtl
);
26899 if (GET_MODE_SIZE (mode
) < 8)
26902 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26904 nregs
= GET_MODE_SIZE (mode
) / 4;
26905 for (i
= 0; i
< nregs
; i
+= 2)
26906 if (TARGET_BIG_END
)
26908 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26909 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26913 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26914 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26919 nregs
= GET_MODE_SIZE (mode
) / 8;
26920 for (i
= 0; i
< nregs
; i
++)
26921 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26924 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26927 #if ARM_UNWIND_INFO
26928 /* Emit unwind directives for a store-multiple instruction or stack pointer
26929 push during alignment.
26930 These should only ever be generated by the function prologue code, so
26931 expect them to have a particular form.
26932 The store-multiple instruction sometimes pushes pc as the last register,
26933 although it should not be tracked into unwind information, or for -Os
26934 sometimes pushes some dummy registers before first register that needs
26935 to be tracked in unwind information; such dummy registers are there just
26936 to avoid separate stack adjustment, and will not be restored in the
26940 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26943 HOST_WIDE_INT offset
;
26944 HOST_WIDE_INT nregs
;
26948 unsigned padfirst
= 0, padlast
= 0;
26951 e
= XVECEXP (p
, 0, 0);
26952 gcc_assert (GET_CODE (e
) == SET
);
26954 /* First insn will adjust the stack pointer. */
26955 gcc_assert (GET_CODE (e
) == SET
26956 && REG_P (SET_DEST (e
))
26957 && REGNO (SET_DEST (e
)) == SP_REGNUM
26958 && GET_CODE (SET_SRC (e
)) == PLUS
);
26960 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26961 nregs
= XVECLEN (p
, 0) - 1;
26962 gcc_assert (nregs
);
26964 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26967 /* For -Os dummy registers can be pushed at the beginning to
26968 avoid separate stack pointer adjustment. */
26969 e
= XVECEXP (p
, 0, 1);
26970 e
= XEXP (SET_DEST (e
), 0);
26971 if (GET_CODE (e
) == PLUS
)
26972 padfirst
= INTVAL (XEXP (e
, 1));
26973 gcc_assert (padfirst
== 0 || optimize_size
);
26974 /* The function prologue may also push pc, but not annotate it as it is
26975 never restored. We turn this into a stack pointer adjustment. */
26976 e
= XVECEXP (p
, 0, nregs
);
26977 e
= XEXP (SET_DEST (e
), 0);
26978 if (GET_CODE (e
) == PLUS
)
26979 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26981 padlast
= offset
- 4;
26982 gcc_assert (padlast
== 0 || padlast
== 4);
26984 fprintf (asm_out_file
, "\t.pad #4\n");
26986 fprintf (asm_out_file
, "\t.save {");
26988 else if (IS_VFP_REGNUM (reg
))
26991 fprintf (asm_out_file
, "\t.vsave {");
26994 /* Unknown register type. */
26995 gcc_unreachable ();
26997 /* If the stack increment doesn't match the size of the saved registers,
26998 something has gone horribly wrong. */
26999 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27003 /* The remaining insns will describe the stores. */
27004 for (i
= 1; i
<= nregs
; i
++)
27006 /* Expect (set (mem <addr>) (reg)).
27007 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27008 e
= XVECEXP (p
, 0, i
);
27009 gcc_assert (GET_CODE (e
) == SET
27010 && MEM_P (SET_DEST (e
))
27011 && REG_P (SET_SRC (e
)));
27013 reg
= REGNO (SET_SRC (e
));
27014 gcc_assert (reg
>= lastreg
);
27017 fprintf (asm_out_file
, ", ");
27018 /* We can't use %r for vfp because we need to use the
27019 double precision register names. */
27020 if (IS_VFP_REGNUM (reg
))
27021 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27023 asm_fprintf (asm_out_file
, "%r", reg
);
27027 /* Check that the addresses are consecutive. */
27028 e
= XEXP (SET_DEST (e
), 0);
27029 if (GET_CODE (e
) == PLUS
)
27030 gcc_assert (REG_P (XEXP (e
, 0))
27031 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27032 && CONST_INT_P (XEXP (e
, 1))
27033 && offset
== INTVAL (XEXP (e
, 1)));
27037 && REGNO (e
) == SP_REGNUM
);
27038 offset
+= reg_size
;
27041 fprintf (asm_out_file
, "}\n");
27043 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27046 /* Emit unwind directives for a SET. */
27049 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27057 switch (GET_CODE (e0
))
27060 /* Pushing a single register. */
27061 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27062 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27063 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27066 asm_fprintf (asm_out_file
, "\t.save ");
27067 if (IS_VFP_REGNUM (REGNO (e1
)))
27068 asm_fprintf(asm_out_file
, "{d%d}\n",
27069 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27071 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27075 if (REGNO (e0
) == SP_REGNUM
)
27077 /* A stack increment. */
27078 if (GET_CODE (e1
) != PLUS
27079 || !REG_P (XEXP (e1
, 0))
27080 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27081 || !CONST_INT_P (XEXP (e1
, 1)))
27084 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27085 -INTVAL (XEXP (e1
, 1)));
27087 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27089 HOST_WIDE_INT offset
;
27091 if (GET_CODE (e1
) == PLUS
)
27093 if (!REG_P (XEXP (e1
, 0))
27094 || !CONST_INT_P (XEXP (e1
, 1)))
27096 reg
= REGNO (XEXP (e1
, 0));
27097 offset
= INTVAL (XEXP (e1
, 1));
27098 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27099 HARD_FRAME_POINTER_REGNUM
, reg
,
27102 else if (REG_P (e1
))
27105 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27106 HARD_FRAME_POINTER_REGNUM
, reg
);
27111 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27113 /* Move from sp to reg. */
27114 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27116 else if (GET_CODE (e1
) == PLUS
27117 && REG_P (XEXP (e1
, 0))
27118 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27119 && CONST_INT_P (XEXP (e1
, 1)))
27121 /* Set reg to offset from sp. */
27122 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27123 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27135 /* Emit unwind directives for the given insn. */
27138 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27141 bool handled_one
= false;
27143 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27146 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27147 && (TREE_NOTHROW (current_function_decl
)
27148 || crtl
->all_throwers_are_sibcalls
))
27151 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27154 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27156 switch (REG_NOTE_KIND (note
))
27158 case REG_FRAME_RELATED_EXPR
:
27159 pat
= XEXP (note
, 0);
27162 case REG_CFA_REGISTER
:
27163 pat
= XEXP (note
, 0);
27166 pat
= PATTERN (insn
);
27167 if (GET_CODE (pat
) == PARALLEL
)
27168 pat
= XVECEXP (pat
, 0, 0);
27171 /* Only emitted for IS_STACKALIGN re-alignment. */
27176 src
= SET_SRC (pat
);
27177 dest
= SET_DEST (pat
);
27179 gcc_assert (src
== stack_pointer_rtx
);
27180 reg
= REGNO (dest
);
27181 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27184 handled_one
= true;
27187 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27188 to get correct dwarf information for shrink-wrap. We should not
27189 emit unwind information for it because these are used either for
27190 pretend arguments or notes to adjust sp and restore registers from
27192 case REG_CFA_DEF_CFA
:
27193 case REG_CFA_ADJUST_CFA
:
27194 case REG_CFA_RESTORE
:
27197 case REG_CFA_EXPRESSION
:
27198 case REG_CFA_OFFSET
:
27199 /* ??? Only handling here what we actually emit. */
27200 gcc_unreachable ();
27208 pat
= PATTERN (insn
);
27211 switch (GET_CODE (pat
))
27214 arm_unwind_emit_set (asm_out_file
, pat
);
27218 /* Store multiple. */
27219 arm_unwind_emit_sequence (asm_out_file
, pat
);
27228 /* Output a reference from a function exception table to the type_info
27229 object X. The EABI specifies that the symbol should be relocated by
27230 an R_ARM_TARGET2 relocation. */
27233 arm_output_ttype (rtx x
)
27235 fputs ("\t.word\t", asm_out_file
);
27236 output_addr_const (asm_out_file
, x
);
27237 /* Use special relocations for symbol references. */
27238 if (!CONST_INT_P (x
))
27239 fputs ("(TARGET2)", asm_out_file
);
27240 fputc ('\n', asm_out_file
);
27245 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27248 arm_asm_emit_except_personality (rtx personality
)
27250 fputs ("\t.personality\t", asm_out_file
);
27251 output_addr_const (asm_out_file
, personality
);
27252 fputc ('\n', asm_out_file
);
27254 #endif /* ARM_UNWIND_INFO */
27256 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27259 arm_asm_init_sections (void)
27261 #if ARM_UNWIND_INFO
27262 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27264 #endif /* ARM_UNWIND_INFO */
27266 #ifdef OBJECT_FORMAT_ELF
27267 if (target_pure_code
)
27268 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27272 /* Output unwind directives for the start/end of a function. */
27275 arm_output_fn_unwind (FILE * f
, bool prologue
)
27277 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27281 fputs ("\t.fnstart\n", f
);
27284 /* If this function will never be unwound, then mark it as such.
27285 The came condition is used in arm_unwind_emit to suppress
27286 the frame annotations. */
27287 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27288 && (TREE_NOTHROW (current_function_decl
)
27289 || crtl
->all_throwers_are_sibcalls
))
27290 fputs("\t.cantunwind\n", f
);
27292 fputs ("\t.fnend\n", f
);
27297 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27299 enum tls_reloc reloc
;
27302 val
= XVECEXP (x
, 0, 0);
27303 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27305 output_addr_const (fp
, val
);
27310 fputs ("(tlsgd)", fp
);
27313 fputs ("(tlsldm)", fp
);
27316 fputs ("(tlsldo)", fp
);
27319 fputs ("(gottpoff)", fp
);
27322 fputs ("(tpoff)", fp
);
27325 fputs ("(tlsdesc)", fp
);
27328 gcc_unreachable ();
27337 fputs (" + (. - ", fp
);
27338 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27339 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27340 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27341 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27351 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27354 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27356 gcc_assert (size
== 4);
27357 fputs ("\t.word\t", file
);
27358 output_addr_const (file
, x
);
27359 fputs ("(tlsldo)", file
);
27362 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27365 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27367 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27368 return arm_emit_tls_decoration (fp
, x
);
27369 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27372 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27374 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27375 assemble_name_raw (fp
, label
);
27379 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27381 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27385 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27389 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27391 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27395 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27399 else if (GET_CODE (x
) == CONST_VECTOR
)
27400 return arm_emit_vector_const (fp
, x
);
27405 /* Output assembly for a shift instruction.
27406 SET_FLAGS determines how the instruction modifies the condition codes.
27407 0 - Do not set condition codes.
27408 1 - Set condition codes.
27409 2 - Use smallest instruction. */
27411 arm_output_shift(rtx
* operands
, int set_flags
)
27414 static const char flag_chars
[3] = {'?', '.', '!'};
27419 c
= flag_chars
[set_flags
];
27420 shift
= shift_op(operands
[3], &val
);
27424 operands
[2] = GEN_INT(val
);
27425 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27428 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27430 output_asm_insn (pattern
, operands
);
27434 /* Output assembly for a WMMX immediate shift instruction. */
27436 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27438 int shift
= INTVAL (operands
[2]);
27440 machine_mode opmode
= GET_MODE (operands
[0]);
27442 gcc_assert (shift
>= 0);
27444 /* If the shift value in the register versions is > 63 (for D qualifier),
27445 31 (for W qualifier) or 15 (for H qualifier). */
27446 if (((opmode
== V4HImode
) && (shift
> 15))
27447 || ((opmode
== V2SImode
) && (shift
> 31))
27448 || ((opmode
== DImode
) && (shift
> 63)))
27452 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27453 output_asm_insn (templ
, operands
);
27454 if (opmode
== DImode
)
27456 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27457 output_asm_insn (templ
, operands
);
27462 /* The destination register will contain all zeros. */
27463 sprintf (templ
, "wzero\t%%0");
27464 output_asm_insn (templ
, operands
);
27469 if ((opmode
== DImode
) && (shift
> 32))
27471 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27472 output_asm_insn (templ
, operands
);
27473 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27474 output_asm_insn (templ
, operands
);
27478 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27479 output_asm_insn (templ
, operands
);
27484 /* Output assembly for a WMMX tinsr instruction. */
27486 arm_output_iwmmxt_tinsr (rtx
*operands
)
27488 int mask
= INTVAL (operands
[3]);
27491 int units
= mode_nunits
[GET_MODE (operands
[0])];
27492 gcc_assert ((mask
& (mask
- 1)) == 0);
27493 for (i
= 0; i
< units
; ++i
)
27495 if ((mask
& 0x01) == 1)
27501 gcc_assert (i
< units
);
27503 switch (GET_MODE (operands
[0]))
27506 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27509 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27512 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27515 gcc_unreachable ();
27518 output_asm_insn (templ
, operands
);
27523 /* Output a Thumb-1 casesi dispatch sequence. */
27525 thumb1_output_casesi (rtx
*operands
)
27527 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27529 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27531 switch (GET_MODE(diff_vec
))
27534 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27535 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27537 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27538 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27540 return "bl\t%___gnu_thumb1_case_si";
27542 gcc_unreachable ();
27546 /* Output a Thumb-2 casesi instruction. */
27548 thumb2_output_casesi (rtx
*operands
)
27550 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27552 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27554 output_asm_insn ("cmp\t%0, %1", operands
);
27555 output_asm_insn ("bhi\t%l3", operands
);
27556 switch (GET_MODE(diff_vec
))
27559 return "tbb\t[%|pc, %0]";
27561 return "tbh\t[%|pc, %0, lsl #1]";
27565 output_asm_insn ("adr\t%4, %l2", operands
);
27566 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27567 output_asm_insn ("add\t%4, %4, %5", operands
);
27572 output_asm_insn ("adr\t%4, %l2", operands
);
27573 return "ldr\t%|pc, [%4, %0, lsl #2]";
27576 gcc_unreachable ();
27580 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27581 per-core tuning structs. */
27583 arm_issue_rate (void)
27585 return current_tune
->issue_rate
;
27588 /* Return how many instructions should scheduler lookahead to choose the
27591 arm_first_cycle_multipass_dfa_lookahead (void)
27593 int issue_rate
= arm_issue_rate ();
27595 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27598 /* Enable modeling of L2 auto-prefetcher. */
27600 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27602 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27606 arm_mangle_type (const_tree type
)
27608 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27609 has to be managled as if it is in the "std" namespace. */
27610 if (TARGET_AAPCS_BASED
27611 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27612 return "St9__va_list";
27614 /* Half-precision float. */
27615 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27618 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27620 if (TYPE_NAME (type
) != NULL
)
27621 return arm_mangle_builtin_type (type
);
27623 /* Use the default mangling. */
27627 /* Order of allocation of core registers for Thumb: this allocation is
27628 written over the corresponding initial entries of the array
27629 initialized with REG_ALLOC_ORDER. We allocate all low registers
27630 first. Saving and restoring a low register is usually cheaper than
27631 using a call-clobbered high register. */
27633 static const int thumb_core_reg_alloc_order
[] =
27635 3, 2, 1, 0, 4, 5, 6, 7,
27636 14, 12, 8, 9, 10, 11
27639 /* Adjust register allocation order when compiling for Thumb. */
27642 arm_order_regs_for_local_alloc (void)
27644 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27645 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27647 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27648 sizeof (thumb_core_reg_alloc_order
));
27651 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27654 arm_frame_pointer_required (void)
27656 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27659 /* If the function receives nonlocal gotos, it needs to save the frame
27660 pointer in the nonlocal_goto_save_area object. */
27661 if (cfun
->has_nonlocal_label
)
27664 /* The frame pointer is required for non-leaf APCS frames. */
27665 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27668 /* If we are probing the stack in the prologue, we will have a faulting
27669 instruction prior to the stack adjustment and this requires a frame
27670 pointer if we want to catch the exception using the EABI unwinder. */
27671 if (!IS_INTERRUPT (arm_current_func_type ())
27672 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27673 && arm_except_unwind_info (&global_options
) == UI_TARGET
27674 && cfun
->can_throw_non_call_exceptions
)
27676 HOST_WIDE_INT size
= get_frame_size ();
27678 /* That's irrelevant if there is no stack adjustment. */
27682 /* That's relevant only if there is a stack probe. */
27683 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27685 /* We don't have the final size of the frame so adjust. */
27686 size
+= 32 * UNITS_PER_WORD
;
27687 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27697 /* Only thumb1 can't support conditional execution, so return true if
27698 the target is not thumb1. */
27700 arm_have_conditional_execution (void)
27702 return !TARGET_THUMB1
;
27705 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27706 static HOST_WIDE_INT
27707 arm_vector_alignment (const_tree type
)
27709 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27711 if (TARGET_AAPCS_BASED
)
27712 align
= MIN (align
, 64);
27717 static unsigned int
27718 arm_autovectorize_vector_sizes (void)
27720 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27724 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27726 /* Vectors which aren't in packed structures will not be less aligned than
27727 the natural alignment of their element type, so this is safe. */
27728 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27731 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27735 arm_builtin_support_vector_misalignment (machine_mode mode
,
27736 const_tree type
, int misalignment
,
27739 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27741 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27746 /* If the misalignment is unknown, we should be able to handle the access
27747 so long as it is not to a member of a packed data structure. */
27748 if (misalignment
== -1)
27751 /* Return true if the misalignment is a multiple of the natural alignment
27752 of the vector's element type. This is probably always going to be
27753 true in practice, since we've already established that this isn't a
27755 return ((misalignment
% align
) == 0);
27758 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27763 arm_conditional_register_usage (void)
27767 if (TARGET_THUMB1
&& optimize_size
)
27769 /* When optimizing for size on Thumb-1, it's better not
27770 to use the HI regs, because of the overhead of
27772 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27773 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27776 /* The link register can be clobbered by any branch insn,
27777 but we have no way to track that at present, so mark
27778 it as unavailable. */
27780 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27782 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27784 /* VFPv3 registers are disabled when earlier VFP
27785 versions are selected due to the definition of
27786 LAST_VFP_REGNUM. */
27787 for (regno
= FIRST_VFP_REGNUM
;
27788 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27790 fixed_regs
[regno
] = 0;
27791 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27792 || regno
>= FIRST_VFP_REGNUM
+ 32;
27796 if (TARGET_REALLY_IWMMXT
)
27798 regno
= FIRST_IWMMXT_GR_REGNUM
;
27799 /* The 2002/10/09 revision of the XScale ABI has wCG0
27800 and wCG1 as call-preserved registers. The 2002/11/21
27801 revision changed this so that all wCG registers are
27802 scratch registers. */
27803 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27804 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27805 fixed_regs
[regno
] = 0;
27806 /* The XScale ABI has wR0 - wR9 as scratch registers,
27807 the rest as call-preserved registers. */
27808 for (regno
= FIRST_IWMMXT_REGNUM
;
27809 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27811 fixed_regs
[regno
] = 0;
27812 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27816 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27818 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27819 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27821 else if (TARGET_APCS_STACK
)
27823 fixed_regs
[10] = 1;
27824 call_used_regs
[10] = 1;
27826 /* -mcaller-super-interworking reserves r11 for calls to
27827 _interwork_r11_call_via_rN(). Making the register global
27828 is an easy way of ensuring that it remains valid for all
27830 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27831 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27833 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27834 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27835 if (TARGET_CALLER_INTERWORKING
)
27836 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27838 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27842 arm_preferred_rename_class (reg_class_t rclass
)
27844 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27845 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27846 and code size can be reduced. */
27847 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27853 /* Compute the attribute "length" of insn "*push_multi".
27854 So this function MUST be kept in sync with that insn pattern. */
27856 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27858 int i
, regno
, hi_reg
;
27859 int num_saves
= XVECLEN (parallel_op
, 0);
27869 regno
= REGNO (first_op
);
27870 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27871 list is 8-bit. Normally this means all registers in the list must be
27872 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27873 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27874 with 16-bit encoding. */
27875 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27876 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27878 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27879 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27887 /* Compute the attribute "length" of insn. Currently, this function is used
27888 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27889 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27890 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27891 true if OPERANDS contains insn which explicit updates base register. */
27894 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
27903 rtx parallel_op
= operands
[0];
27904 /* Initialize to elements number of PARALLEL. */
27905 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
27906 /* Initialize the value to base register. */
27907 unsigned regno
= REGNO (operands
[1]);
27908 /* Skip return and write back pattern.
27909 We only need register pop pattern for later analysis. */
27910 unsigned first_indx
= 0;
27911 first_indx
+= return_pc
? 1 : 0;
27912 first_indx
+= write_back_p
? 1 : 0;
27914 /* A pop operation can be done through LDM or POP. If the base register is SP
27915 and if it's with write back, then a LDM will be alias of POP. */
27916 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
27917 bool ldm_p
= !pop_p
;
27919 /* Check base register for LDM. */
27920 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
27923 /* Check each register in the list. */
27924 for (; indx
>= first_indx
; indx
--)
27926 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
27927 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27928 comment in arm_attr_length_push_multi. */
27929 if (REGNO_REG_CLASS (regno
) == HI_REGS
27930 && (regno
!= PC_REGNUM
|| ldm_p
))
27937 /* Compute the number of instructions emitted by output_move_double. */
27939 arm_count_output_move_double_insns (rtx
*operands
)
27943 /* output_move_double may modify the operands array, so call it
27944 here on a copy of the array. */
27945 ops
[0] = operands
[0];
27946 ops
[1] = operands
[1];
27947 output_move_double (ops
, false, &count
);
27952 vfp3_const_double_for_fract_bits (rtx operand
)
27954 REAL_VALUE_TYPE r0
;
27956 if (!CONST_DOUBLE_P (operand
))
27959 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
27960 if (exact_real_inverse (DFmode
, &r0
)
27961 && !REAL_VALUE_NEGATIVE (r0
))
27963 if (exact_real_truncate (DFmode
, &r0
))
27965 HOST_WIDE_INT value
= real_to_integer (&r0
);
27966 value
= value
& 0xffffffff;
27967 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27969 int ret
= exact_log2 (value
);
27970 gcc_assert (IN_RANGE (ret
, 0, 31));
27978 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27979 log2 is in [1, 32], return that log2. Otherwise return -1.
27980 This is used in the patterns for vcvt.s32.f32 floating-point to
27981 fixed-point conversions. */
27984 vfp3_const_double_for_bits (rtx x
)
27986 const REAL_VALUE_TYPE
*r
;
27988 if (!CONST_DOUBLE_P (x
))
27991 r
= CONST_DOUBLE_REAL_VALUE (x
);
27993 if (REAL_VALUE_NEGATIVE (*r
)
27994 || REAL_VALUE_ISNAN (*r
)
27995 || REAL_VALUE_ISINF (*r
)
27996 || !real_isinteger (r
, SFmode
))
27999 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28001 /* The exact_log2 above will have returned -1 if this is
28002 not an exact log2. */
28003 if (!IN_RANGE (hwint
, 1, 32))
28010 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28013 arm_pre_atomic_barrier (enum memmodel model
)
28015 if (need_atomic_barrier_p (model
, true))
28016 emit_insn (gen_memory_barrier ());
28020 arm_post_atomic_barrier (enum memmodel model
)
28022 if (need_atomic_barrier_p (model
, false))
28023 emit_insn (gen_memory_barrier ());
28026 /* Emit the load-exclusive and store-exclusive instructions.
28027 Use acquire and release versions if necessary. */
28030 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28032 rtx (*gen
) (rtx
, rtx
);
28038 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28039 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28040 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28041 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28043 gcc_unreachable ();
28050 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28051 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28052 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28053 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28055 gcc_unreachable ();
28059 emit_insn (gen (rval
, mem
));
28063 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28066 rtx (*gen
) (rtx
, rtx
, rtx
);
28072 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28073 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28074 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28075 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28077 gcc_unreachable ();
28084 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28085 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28086 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28087 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28089 gcc_unreachable ();
28093 emit_insn (gen (bval
, rval
, mem
));
28096 /* Mark the previous jump instruction as unlikely. */
28099 emit_unlikely_jump (rtx insn
)
28101 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28103 rtx_insn
*jump
= emit_jump_insn (insn
);
28104 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
28107 /* Expand a compare and swap pattern. */
28110 arm_expand_compare_and_swap (rtx operands
[])
28112 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28114 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28116 bval
= operands
[0];
28117 rval
= operands
[1];
28119 oldval
= operands
[3];
28120 newval
= operands
[4];
28121 is_weak
= operands
[5];
28122 mod_s
= operands
[6];
28123 mod_f
= operands
[7];
28124 mode
= GET_MODE (mem
);
28126 /* Normally the succ memory model must be stronger than fail, but in the
28127 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28128 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28130 if (TARGET_HAVE_LDACQ
28131 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28132 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28133 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28139 /* For narrow modes, we're going to perform the comparison in SImode,
28140 so do the zero-extension now. */
28141 rval
= gen_reg_rtx (SImode
);
28142 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28146 /* Force the value into a register if needed. We waited until after
28147 the zero-extension above to do this properly. */
28148 if (!arm_add_operand (oldval
, SImode
))
28149 oldval
= force_reg (SImode
, oldval
);
28153 if (!cmpdi_operand (oldval
, mode
))
28154 oldval
= force_reg (mode
, oldval
);
28158 gcc_unreachable ();
28163 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
28164 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
28165 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
28166 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
28168 gcc_unreachable ();
28171 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CCmode
, CC_REGNUM
);
28172 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28174 if (mode
== QImode
|| mode
== HImode
)
28175 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28177 /* In all cases, we arrange for success to be signaled by Z set.
28178 This arrangement allows for the boolean result to be used directly
28179 in a subsequent branch, post optimization. For Thumb-1 targets, the
28180 boolean negation of the result is also stored in bval because Thumb-1
28181 backend lacks dependency tracking for CC flag due to flag-setting not
28182 being represented at RTL level. */
28184 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28187 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28188 emit_insn (gen_rtx_SET (bval
, x
));
28192 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28193 another memory store between the load-exclusive and store-exclusive can
28194 reset the monitor from Exclusive to Open state. This means we must wait
28195 until after reload to split the pattern, lest we get a register spill in
28196 the middle of the atomic sequence. Success of the compare and swap is
28197 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28198 for Thumb-1 targets (ie. negation of the boolean value returned by
28199 atomic_compare_and_swapmode standard pattern in operand 0). */
28202 arm_split_compare_and_swap (rtx operands
[])
28204 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28206 enum memmodel mod_s
, mod_f
;
28208 rtx_code_label
*label1
, *label2
;
28211 rval
= operands
[1];
28213 oldval
= operands
[3];
28214 newval
= operands
[4];
28215 is_weak
= (operands
[5] != const0_rtx
);
28216 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28217 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28218 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28219 mode
= GET_MODE (mem
);
28221 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28223 bool use_acquire
= TARGET_HAVE_LDACQ
28224 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28225 || is_mm_release (mod_s
));
28227 bool use_release
= TARGET_HAVE_LDACQ
28228 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28229 || is_mm_acquire (mod_s
));
28231 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28232 a full barrier is emitted after the store-release. */
28234 use_acquire
= false;
28236 /* Checks whether a barrier is needed and emits one accordingly. */
28237 if (!(use_acquire
|| use_release
))
28238 arm_pre_atomic_barrier (mod_s
);
28243 label1
= gen_label_rtx ();
28244 emit_label (label1
);
28246 label2
= gen_label_rtx ();
28248 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28250 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28251 as required to communicate with arm_expand_compare_and_swap. */
28254 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28255 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28256 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28257 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28258 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28262 emit_move_insn (neg_bval
, const1_rtx
);
28263 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28264 if (thumb1_cmpneg_operand (oldval
, SImode
))
28265 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28268 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28271 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28273 /* Weak or strong, we want EQ to be true for success, so that we
28274 match the flags that we got from the compare above. */
28277 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28278 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28279 emit_insn (gen_rtx_SET (cond
, x
));
28284 /* Z is set to boolean value of !neg_bval, as required to communicate
28285 with arm_expand_compare_and_swap. */
28286 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28287 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28290 if (!is_mm_relaxed (mod_f
))
28291 emit_label (label2
);
28293 /* Checks whether a barrier is needed and emits one accordingly. */
28295 || !(use_acquire
|| use_release
))
28296 arm_post_atomic_barrier (mod_s
);
28298 if (is_mm_relaxed (mod_f
))
28299 emit_label (label2
);
28302 /* Split an atomic operation pattern. Operation is given by CODE and is one
28303 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28304 operation). Operation is performed on the content at MEM and on VALUE
28305 following the memory model MODEL_RTX. The content at MEM before and after
28306 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28307 success of the operation is returned in COND. Using a scratch register or
28308 an operand register for these determines what result is returned for that
28312 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28313 rtx value
, rtx model_rtx
, rtx cond
)
28315 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28316 machine_mode mode
= GET_MODE (mem
);
28317 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28318 rtx_code_label
*label
;
28319 bool all_low_regs
, bind_old_new
;
28322 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28324 bool use_acquire
= TARGET_HAVE_LDACQ
28325 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28326 || is_mm_release (model
));
28328 bool use_release
= TARGET_HAVE_LDACQ
28329 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28330 || is_mm_acquire (model
));
28332 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28333 a full barrier is emitted after the store-release. */
28335 use_acquire
= false;
28337 /* Checks whether a barrier is needed and emits one accordingly. */
28338 if (!(use_acquire
|| use_release
))
28339 arm_pre_atomic_barrier (model
);
28341 label
= gen_label_rtx ();
28342 emit_label (label
);
28345 new_out
= gen_lowpart (wmode
, new_out
);
28347 old_out
= gen_lowpart (wmode
, old_out
);
28350 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28352 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28354 /* Does the operation require destination and first operand to use the same
28355 register? This is decided by register constraints of relevant insn
28356 patterns in thumb1.md. */
28357 gcc_assert (!new_out
|| REG_P (new_out
));
28358 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28359 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28360 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28365 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28367 /* We want to return the old value while putting the result of the operation
28368 in the same register as the old value so copy the old value over to the
28369 destination register and use that register for the operation. */
28370 if (old_out
&& bind_old_new
)
28372 emit_move_insn (new_out
, old_out
);
28383 x
= gen_rtx_AND (wmode
, old_out
, value
);
28384 emit_insn (gen_rtx_SET (new_out
, x
));
28385 x
= gen_rtx_NOT (wmode
, new_out
);
28386 emit_insn (gen_rtx_SET (new_out
, x
));
28390 if (CONST_INT_P (value
))
28392 value
= GEN_INT (-INTVAL (value
));
28398 if (mode
== DImode
)
28400 /* DImode plus/minus need to clobber flags. */
28401 /* The adddi3 and subdi3 patterns are incorrectly written so that
28402 they require matching operands, even when we could easily support
28403 three operands. Thankfully, this can be fixed up post-splitting,
28404 as the individual add+adc patterns do accept three operands and
28405 post-reload cprop can make these moves go away. */
28406 emit_move_insn (new_out
, old_out
);
28408 x
= gen_adddi3 (new_out
, new_out
, value
);
28410 x
= gen_subdi3 (new_out
, new_out
, value
);
28417 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28418 emit_insn (gen_rtx_SET (new_out
, x
));
28422 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28425 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28426 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28428 /* Checks whether a barrier is needed and emits one accordingly. */
28430 || !(use_acquire
|| use_release
))
28431 arm_post_atomic_barrier (model
);
28434 #define MAX_VECT_LEN 16
28436 struct expand_vec_perm_d
28438 rtx target
, op0
, op1
;
28439 unsigned char perm
[MAX_VECT_LEN
];
28440 machine_mode vmode
;
28441 unsigned char nelt
;
28446 /* Generate a variable permutation. */
28449 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28451 machine_mode vmode
= GET_MODE (target
);
28452 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28454 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28455 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28456 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28457 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28458 gcc_checking_assert (TARGET_NEON
);
28462 if (vmode
== V8QImode
)
28463 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28465 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28471 if (vmode
== V8QImode
)
28473 pair
= gen_reg_rtx (V16QImode
);
28474 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28475 pair
= gen_lowpart (TImode
, pair
);
28476 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28480 pair
= gen_reg_rtx (OImode
);
28481 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28482 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28488 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28490 machine_mode vmode
= GET_MODE (target
);
28491 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28492 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28493 rtx rmask
[MAX_VECT_LEN
], mask
;
28495 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28496 numbering of elements for big-endian, we must reverse the order. */
28497 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28499 /* The VTBL instruction does not use a modulo index, so we must take care
28500 of that ourselves. */
28501 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28502 for (i
= 0; i
< nelt
; ++i
)
28504 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28505 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28507 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28510 /* Map lane ordering between architectural lane order, and GCC lane order,
28511 taking into account ABI. See comment above output_move_neon for details. */
28514 neon_endian_lane_map (machine_mode mode
, int lane
)
28516 if (BYTES_BIG_ENDIAN
)
28518 int nelems
= GET_MODE_NUNITS (mode
);
28519 /* Reverse lane order. */
28520 lane
= (nelems
- 1 - lane
);
28521 /* Reverse D register order, to match ABI. */
28522 if (GET_MODE_SIZE (mode
) == 16)
28523 lane
= lane
^ (nelems
/ 2);
28528 /* Some permutations index into pairs of vectors, this is a helper function
28529 to map indexes into those pairs of vectors. */
28532 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28534 int nelem
= GET_MODE_NUNITS (mode
);
28535 if (BYTES_BIG_ENDIAN
)
28537 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28541 /* Generate or test for an insn that supports a constant permutation. */
28543 /* Recognize patterns for the VUZP insns. */
28546 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28548 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28549 rtx out0
, out1
, in0
, in1
;
28550 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28554 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28557 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28558 big endian pattern on 64 bit vectors, so we correct for that. */
28559 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28560 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28562 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28564 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28566 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28570 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28572 for (i
= 0; i
< nelt
; i
++)
28575 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28576 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28586 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28587 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28588 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28589 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28590 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28591 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28592 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28593 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28594 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28595 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28597 gcc_unreachable ();
28602 if (swap_nelt
!= 0)
28603 std::swap (in0
, in1
);
28606 out1
= gen_reg_rtx (d
->vmode
);
28608 std::swap (out0
, out1
);
28610 emit_insn (gen (out0
, in0
, in1
, out1
));
28614 /* Recognize patterns for the VZIP insns. */
28617 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28619 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28620 rtx out0
, out1
, in0
, in1
;
28621 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28625 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28628 is_swapped
= BYTES_BIG_ENDIAN
;
28630 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28633 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28635 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28639 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28641 for (i
= 0; i
< nelt
/ 2; i
++)
28644 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28645 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28649 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28650 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28661 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28662 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28663 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28664 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28665 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28666 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28667 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28668 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28669 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28670 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28672 gcc_unreachable ();
28678 std::swap (in0
, in1
);
28681 out1
= gen_reg_rtx (d
->vmode
);
28683 std::swap (out0
, out1
);
28685 emit_insn (gen (out0
, in0
, in1
, out1
));
28689 /* Recognize patterns for the VREV insns. */
28692 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28694 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28695 rtx (*gen
)(rtx
, rtx
);
28697 if (!d
->one_vector_p
)
28706 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28707 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28715 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28716 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28717 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28718 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28719 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28720 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28728 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28729 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28730 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28731 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28732 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28733 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28734 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28735 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28744 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28745 for (j
= 0; j
<= diff
; j
+= 1)
28747 /* This is guaranteed to be true as the value of diff
28748 is 7, 3, 1 and we should have enough elements in the
28749 queue to generate this. Getting a vector mask with a
28750 value of diff other than these values implies that
28751 something is wrong by the time we get here. */
28752 gcc_assert (i
+ j
< nelt
);
28753 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28761 emit_insn (gen (d
->target
, d
->op0
));
28765 /* Recognize patterns for the VTRN insns. */
28768 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28770 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28771 rtx out0
, out1
, in0
, in1
;
28772 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28774 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28777 /* Note that these are little-endian tests. Adjust for big-endian later. */
28778 if (d
->perm
[0] == 0)
28780 else if (d
->perm
[0] == 1)
28784 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28786 for (i
= 0; i
< nelt
; i
+= 2)
28788 if (d
->perm
[i
] != i
+ odd
)
28790 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28800 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28801 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28802 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28803 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28804 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28805 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28806 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28807 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28808 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28809 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28811 gcc_unreachable ();
28816 if (BYTES_BIG_ENDIAN
)
28818 std::swap (in0
, in1
);
28823 out1
= gen_reg_rtx (d
->vmode
);
28825 std::swap (out0
, out1
);
28827 emit_insn (gen (out0
, in0
, in1
, out1
));
28831 /* Recognize patterns for the VEXT insns. */
28834 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28836 unsigned int i
, nelt
= d
->nelt
;
28837 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28840 unsigned int location
;
28842 unsigned int next
= d
->perm
[0] + 1;
28844 /* TODO: Handle GCC's numbering of elements for big-endian. */
28845 if (BYTES_BIG_ENDIAN
)
28848 /* Check if the extracted indexes are increasing by one. */
28849 for (i
= 1; i
< nelt
; next
++, i
++)
28851 /* If we hit the most significant element of the 2nd vector in
28852 the previous iteration, no need to test further. */
28853 if (next
== 2 * nelt
)
28856 /* If we are operating on only one vector: it could be a
28857 rotation. If there are only two elements of size < 64, let
28858 arm_evpc_neon_vrev catch it. */
28859 if (d
->one_vector_p
&& (next
== nelt
))
28861 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28867 if (d
->perm
[i
] != next
)
28871 location
= d
->perm
[0];
28875 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28876 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28877 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28878 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28879 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28880 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28881 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
28882 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
28883 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28884 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28885 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28894 offset
= GEN_INT (location
);
28895 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28899 /* The NEON VTBL instruction is a fully variable permuation that's even
28900 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28901 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28902 can do slightly better by expanding this as a constant where we don't
28903 have to apply a mask. */
28906 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28908 rtx rperm
[MAX_VECT_LEN
], sel
;
28909 machine_mode vmode
= d
->vmode
;
28910 unsigned int i
, nelt
= d
->nelt
;
28912 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28913 numbering of elements for big-endian, we must reverse the order. */
28914 if (BYTES_BIG_ENDIAN
)
28920 /* Generic code will try constant permutation twice. Once with the
28921 original mode and again with the elements lowered to QImode.
28922 So wait and don't do the selector expansion ourselves. */
28923 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28926 for (i
= 0; i
< nelt
; ++i
)
28927 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28928 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28929 sel
= force_reg (vmode
, sel
);
28931 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28936 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28938 /* Check if the input mask matches vext before reordering the
28941 if (arm_evpc_neon_vext (d
))
28944 /* The pattern matching functions above are written to look for a small
28945 number to begin the sequence (0, 1, N/2). If we begin with an index
28946 from the second operand, we can swap the operands. */
28947 if (d
->perm
[0] >= d
->nelt
)
28949 unsigned i
, nelt
= d
->nelt
;
28951 for (i
= 0; i
< nelt
; ++i
)
28952 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28954 std::swap (d
->op0
, d
->op1
);
28959 if (arm_evpc_neon_vuzp (d
))
28961 if (arm_evpc_neon_vzip (d
))
28963 if (arm_evpc_neon_vrev (d
))
28965 if (arm_evpc_neon_vtrn (d
))
28967 return arm_evpc_neon_vtbl (d
);
28972 /* Expand a vec_perm_const pattern. */
28975 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28977 struct expand_vec_perm_d d
;
28978 int i
, nelt
, which
;
28984 d
.vmode
= GET_MODE (target
);
28985 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28986 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28987 d
.testing_p
= false;
28989 for (i
= which
= 0; i
< nelt
; ++i
)
28991 rtx e
= XVECEXP (sel
, 0, i
);
28992 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28993 which
|= (ei
< nelt
? 1 : 2);
29003 d
.one_vector_p
= false;
29004 if (!rtx_equal_p (op0
, op1
))
29007 /* The elements of PERM do not suggest that only the first operand
29008 is used, but both operands are identical. Allow easier matching
29009 of the permutation by folding the permutation into the single
29013 for (i
= 0; i
< nelt
; ++i
)
29014 d
.perm
[i
] &= nelt
- 1;
29016 d
.one_vector_p
= true;
29021 d
.one_vector_p
= true;
29025 return arm_expand_vec_perm_const_1 (&d
);
29028 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29031 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29032 const unsigned char *sel
)
29034 struct expand_vec_perm_d d
;
29035 unsigned int i
, nelt
, which
;
29039 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29040 d
.testing_p
= true;
29041 memcpy (d
.perm
, sel
, nelt
);
29043 /* Categorize the set of elements in the selector. */
29044 for (i
= which
= 0; i
< nelt
; ++i
)
29046 unsigned char e
= d
.perm
[i
];
29047 gcc_assert (e
< 2 * nelt
);
29048 which
|= (e
< nelt
? 1 : 2);
29051 /* For all elements from second vector, fold the elements to first. */
29053 for (i
= 0; i
< nelt
; ++i
)
29056 /* Check whether the mask can be applied to the vector type. */
29057 d
.one_vector_p
= (which
!= 3);
29059 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29060 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29061 if (!d
.one_vector_p
)
29062 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29065 ret
= arm_expand_vec_perm_const_1 (&d
);
29072 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29074 /* If we are soft float and we do not have ldrd
29075 then all auto increment forms are ok. */
29076 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29081 /* Post increment and Pre Decrement are supported for all
29082 instruction forms except for vector forms. */
29085 if (VECTOR_MODE_P (mode
))
29087 if (code
!= ARM_PRE_DEC
)
29097 /* Without LDRD and mode size greater than
29098 word size, there is no point in auto-incrementing
29099 because ldm and stm will not have these forms. */
29100 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29103 /* Vector and floating point modes do not support
29104 these auto increment forms. */
29105 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29118 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29119 on ARM, since we know that shifts by negative amounts are no-ops.
29120 Additionally, the default expansion code is not available or suitable
29121 for post-reload insn splits (this can occur when the register allocator
29122 chooses not to do a shift in NEON).
29124 This function is used in both initial expand and post-reload splits, and
29125 handles all kinds of 64-bit shifts.
29127 Input requirements:
29128 - It is safe for the input and output to be the same register, but
29129 early-clobber rules apply for the shift amount and scratch registers.
29130 - Shift by register requires both scratch registers. In all other cases
29131 the scratch registers may be NULL.
29132 - Ashiftrt by a register also clobbers the CC register. */
29134 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29135 rtx amount
, rtx scratch1
, rtx scratch2
)
29137 rtx out_high
= gen_highpart (SImode
, out
);
29138 rtx out_low
= gen_lowpart (SImode
, out
);
29139 rtx in_high
= gen_highpart (SImode
, in
);
29140 rtx in_low
= gen_lowpart (SImode
, in
);
29143 in = the register pair containing the input value.
29144 out = the destination register pair.
29145 up = the high- or low-part of each pair.
29146 down = the opposite part to "up".
29147 In a shift, we can consider bits to shift from "up"-stream to
29148 "down"-stream, so in a left-shift "up" is the low-part and "down"
29149 is the high-part of each register pair. */
29151 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29152 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29153 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29154 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29156 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29158 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29159 && GET_MODE (out
) == DImode
);
29161 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29162 && GET_MODE (in
) == DImode
);
29164 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29165 && GET_MODE (amount
) == SImode
)
29166 || CONST_INT_P (amount
)));
29167 gcc_assert (scratch1
== NULL
29168 || (GET_CODE (scratch1
) == SCRATCH
)
29169 || (GET_MODE (scratch1
) == SImode
29170 && REG_P (scratch1
)));
29171 gcc_assert (scratch2
== NULL
29172 || (GET_CODE (scratch2
) == SCRATCH
)
29173 || (GET_MODE (scratch2
) == SImode
29174 && REG_P (scratch2
)));
29175 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29176 || !HARD_REGISTER_P (out
)
29177 || (REGNO (out
) != REGNO (amount
)
29178 && REGNO (out
) + 1 != REGNO (amount
)));
29180 /* Macros to make following code more readable. */
29181 #define SUB_32(DEST,SRC) \
29182 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29183 #define RSB_32(DEST,SRC) \
29184 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29185 #define SUB_S_32(DEST,SRC) \
29186 gen_addsi3_compare0 ((DEST), (SRC), \
29188 #define SET(DEST,SRC) \
29189 gen_rtx_SET ((DEST), (SRC))
29190 #define SHIFT(CODE,SRC,AMOUNT) \
29191 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29192 #define LSHIFT(CODE,SRC,AMOUNT) \
29193 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29194 SImode, (SRC), (AMOUNT))
29195 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29196 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29197 SImode, (SRC), (AMOUNT))
29199 gen_rtx_IOR (SImode, (A), (B))
29200 #define BRANCH(COND,LABEL) \
29201 gen_arm_cond_branch ((LABEL), \
29202 gen_rtx_ ## COND (CCmode, cc_reg, \
29206 /* Shifts by register and shifts by constant are handled separately. */
29207 if (CONST_INT_P (amount
))
29209 /* We have a shift-by-constant. */
29211 /* First, handle out-of-range shift amounts.
29212 In both cases we try to match the result an ARM instruction in a
29213 shift-by-register would give. This helps reduce execution
29214 differences between optimization levels, but it won't stop other
29215 parts of the compiler doing different things. This is "undefined
29216 behavior, in any case. */
29217 if (INTVAL (amount
) <= 0)
29218 emit_insn (gen_movdi (out
, in
));
29219 else if (INTVAL (amount
) >= 64)
29221 if (code
== ASHIFTRT
)
29223 rtx const31_rtx
= GEN_INT (31);
29224 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29225 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29228 emit_insn (gen_movdi (out
, const0_rtx
));
29231 /* Now handle valid shifts. */
29232 else if (INTVAL (amount
) < 32)
29234 /* Shifts by a constant less than 32. */
29235 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29237 /* Clearing the out register in DImode first avoids lots
29238 of spilling and results in less stack usage.
29239 Later this redundant insn is completely removed.
29240 Do that only if "in" and "out" are different registers. */
29241 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29242 emit_insn (SET (out
, const0_rtx
));
29243 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29244 emit_insn (SET (out_down
,
29245 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29247 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29251 /* Shifts by a constant greater than 31. */
29252 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29254 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29255 emit_insn (SET (out
, const0_rtx
));
29256 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29257 if (code
== ASHIFTRT
)
29258 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29261 emit_insn (SET (out_up
, const0_rtx
));
29266 /* We have a shift-by-register. */
29267 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29269 /* This alternative requires the scratch registers. */
29270 gcc_assert (scratch1
&& REG_P (scratch1
));
29271 gcc_assert (scratch2
&& REG_P (scratch2
));
29273 /* We will need the values "amount-32" and "32-amount" later.
29274 Swapping them around now allows the later code to be more general. */
29278 emit_insn (SUB_32 (scratch1
, amount
));
29279 emit_insn (RSB_32 (scratch2
, amount
));
29282 emit_insn (RSB_32 (scratch1
, amount
));
29283 /* Also set CC = amount > 32. */
29284 emit_insn (SUB_S_32 (scratch2
, amount
));
29287 emit_insn (RSB_32 (scratch1
, amount
));
29288 emit_insn (SUB_32 (scratch2
, amount
));
29291 gcc_unreachable ();
29294 /* Emit code like this:
29297 out_down = in_down << amount;
29298 out_down = (in_up << (amount - 32)) | out_down;
29299 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29300 out_up = in_up << amount;
29303 out_down = in_down >> amount;
29304 out_down = (in_up << (32 - amount)) | out_down;
29306 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29307 out_up = in_up << amount;
29310 out_down = in_down >> amount;
29311 out_down = (in_up << (32 - amount)) | out_down;
29313 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29314 out_up = in_up << amount;
29316 The ARM and Thumb2 variants are the same but implemented slightly
29317 differently. If this were only called during expand we could just
29318 use the Thumb2 case and let combine do the right thing, but this
29319 can also be called from post-reload splitters. */
29321 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29323 if (!TARGET_THUMB2
)
29325 /* Emit code for ARM mode. */
29326 emit_insn (SET (out_down
,
29327 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29328 if (code
== ASHIFTRT
)
29330 rtx_code_label
*done_label
= gen_label_rtx ();
29331 emit_jump_insn (BRANCH (LT
, done_label
));
29332 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29334 emit_label (done_label
);
29337 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29342 /* Emit code for Thumb2 mode.
29343 Thumb2 can't do shift and or in one insn. */
29344 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29345 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29347 if (code
== ASHIFTRT
)
29349 rtx_code_label
*done_label
= gen_label_rtx ();
29350 emit_jump_insn (BRANCH (LT
, done_label
));
29351 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29352 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29353 emit_label (done_label
);
29357 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29358 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29362 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29376 /* Returns true if the pattern is a valid symbolic address, which is either a
29377 symbol_ref or (symbol_ref + addend).
29379 According to the ARM ELF ABI, the initial addend of REL-type relocations
29380 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29381 literal field of the instruction as a 16-bit signed value in the range
29382 -32768 <= A < 32768. */
29385 arm_valid_symbolic_address_p (rtx addr
)
29387 rtx xop0
, xop1
= NULL_RTX
;
29390 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29393 /* (const (plus: symbol_ref const_int)) */
29394 if (GET_CODE (addr
) == CONST
)
29395 tmp
= XEXP (addr
, 0);
29397 if (GET_CODE (tmp
) == PLUS
)
29399 xop0
= XEXP (tmp
, 0);
29400 xop1
= XEXP (tmp
, 1);
29402 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29403 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29409 /* Returns true if a valid comparison operation and makes
29410 the operands in a form that is valid. */
29412 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29414 enum rtx_code code
= GET_CODE (*comparison
);
29416 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29417 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29419 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29421 if (code
== UNEQ
|| code
== LTGT
)
29424 code_int
= (int)code
;
29425 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29426 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29431 if (!arm_add_operand (*op1
, mode
))
29432 *op1
= force_reg (mode
, *op1
);
29433 if (!arm_add_operand (*op2
, mode
))
29434 *op2
= force_reg (mode
, *op2
);
29438 if (!cmpdi_operand (*op1
, mode
))
29439 *op1
= force_reg (mode
, *op1
);
29440 if (!cmpdi_operand (*op2
, mode
))
29441 *op2
= force_reg (mode
, *op2
);
29445 if (!TARGET_VFP_FP16INST
)
29447 /* FP16 comparisons are done in SF mode. */
29449 *op1
= convert_to_mode (mode
, *op1
, 1);
29450 *op2
= convert_to_mode (mode
, *op2
, 1);
29451 /* Fall through. */
29454 if (!vfp_compare_operand (*op1
, mode
))
29455 *op1
= force_reg (mode
, *op1
);
29456 if (!vfp_compare_operand (*op2
, mode
))
29457 *op2
= force_reg (mode
, *op2
);
29467 /* Maximum number of instructions to set block of memory. */
29469 arm_block_set_max_insns (void)
29471 if (optimize_function_for_size_p (cfun
))
29474 return current_tune
->max_insns_inline_memset
;
29477 /* Return TRUE if it's profitable to set block of memory for
29478 non-vectorized case. VAL is the value to set the memory
29479 with. LENGTH is the number of bytes to set. ALIGN is the
29480 alignment of the destination memory in bytes. UNALIGNED_P
29481 is TRUE if we can only set the memory with instructions
29482 meeting alignment requirements. USE_STRD_P is TRUE if we
29483 can use strd to set the memory. */
29485 arm_block_set_non_vect_profit_p (rtx val
,
29486 unsigned HOST_WIDE_INT length
,
29487 unsigned HOST_WIDE_INT align
,
29488 bool unaligned_p
, bool use_strd_p
)
29491 /* For leftovers in bytes of 0-7, we can set the memory block using
29492 strb/strh/str with minimum instruction number. */
29493 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29497 num
= arm_const_inline_cost (SET
, val
);
29498 num
+= length
/ align
+ length
% align
;
29500 else if (use_strd_p
)
29502 num
= arm_const_double_inline_cost (val
);
29503 num
+= (length
>> 3) + leftover
[length
& 7];
29507 num
= arm_const_inline_cost (SET
, val
);
29508 num
+= (length
>> 2) + leftover
[length
& 3];
29511 /* We may be able to combine last pair STRH/STRB into a single STR
29512 by shifting one byte back. */
29513 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29516 return (num
<= arm_block_set_max_insns ());
29519 /* Return TRUE if it's profitable to set block of memory for
29520 vectorized case. LENGTH is the number of bytes to set.
29521 ALIGN is the alignment of destination memory in bytes.
29522 MODE is the vector mode used to set the memory. */
29524 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29525 unsigned HOST_WIDE_INT align
,
29529 bool unaligned_p
= ((align
& 3) != 0);
29530 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29532 /* Instruction loading constant value. */
29534 /* Instructions storing the memory. */
29535 num
+= (length
+ nelt
- 1) / nelt
;
29536 /* Instructions adjusting the address expression. Only need to
29537 adjust address expression if it's 4 bytes aligned and bytes
29538 leftover can only be stored by mis-aligned store instruction. */
29539 if (!unaligned_p
&& (length
& 3) != 0)
29542 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29543 if (!unaligned_p
&& mode
== V16QImode
)
29546 return (num
<= arm_block_set_max_insns ());
29549 /* Set a block of memory using vectorization instructions for the
29550 unaligned case. We fill the first LENGTH bytes of the memory
29551 area starting from DSTBASE with byte constant VALUE. ALIGN is
29552 the alignment requirement of memory. Return TRUE if succeeded. */
29554 arm_block_set_unaligned_vect (rtx dstbase
,
29555 unsigned HOST_WIDE_INT length
,
29556 unsigned HOST_WIDE_INT value
,
29557 unsigned HOST_WIDE_INT align
)
29559 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29561 rtx val_elt
, val_vec
, reg
;
29562 rtx rval
[MAX_VECT_LEN
];
29563 rtx (*gen_func
) (rtx
, rtx
);
29565 unsigned HOST_WIDE_INT v
= value
;
29566 unsigned int offset
= 0;
29567 gcc_assert ((align
& 0x3) != 0);
29568 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29569 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29570 if (length
>= nelt_v16
)
29573 gen_func
= gen_movmisalignv16qi
;
29578 gen_func
= gen_movmisalignv8qi
;
29580 nelt_mode
= GET_MODE_NUNITS (mode
);
29581 gcc_assert (length
>= nelt_mode
);
29582 /* Skip if it isn't profitable. */
29583 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29586 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29587 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29589 v
= sext_hwi (v
, BITS_PER_WORD
);
29590 val_elt
= GEN_INT (v
);
29591 for (j
= 0; j
< nelt_mode
; j
++)
29594 reg
= gen_reg_rtx (mode
);
29595 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29596 /* Emit instruction loading the constant value. */
29597 emit_move_insn (reg
, val_vec
);
29599 /* Handle nelt_mode bytes in a vector. */
29600 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29602 emit_insn ((*gen_func
) (mem
, reg
));
29603 if (i
+ 2 * nelt_mode
<= length
)
29605 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29606 offset
+= nelt_mode
;
29607 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29611 /* If there are not less than nelt_v8 bytes leftover, we must be in
29613 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29615 /* Handle (8, 16) bytes leftover. */
29616 if (i
+ nelt_v8
< length
)
29618 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29619 offset
+= length
- i
;
29620 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29622 /* We are shifting bytes back, set the alignment accordingly. */
29623 if ((length
& 1) != 0 && align
>= 2)
29624 set_mem_align (mem
, BITS_PER_UNIT
);
29626 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29628 /* Handle (0, 8] bytes leftover. */
29629 else if (i
< length
&& i
+ nelt_v8
>= length
)
29631 if (mode
== V16QImode
)
29632 reg
= gen_lowpart (V8QImode
, reg
);
29634 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29635 + (nelt_mode
- nelt_v8
))));
29636 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29637 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29639 /* We are shifting bytes back, set the alignment accordingly. */
29640 if ((length
& 1) != 0 && align
>= 2)
29641 set_mem_align (mem
, BITS_PER_UNIT
);
29643 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29649 /* Set a block of memory using vectorization instructions for the
29650 aligned case. We fill the first LENGTH bytes of the memory area
29651 starting from DSTBASE with byte constant VALUE. ALIGN is the
29652 alignment requirement of memory. Return TRUE if succeeded. */
29654 arm_block_set_aligned_vect (rtx dstbase
,
29655 unsigned HOST_WIDE_INT length
,
29656 unsigned HOST_WIDE_INT value
,
29657 unsigned HOST_WIDE_INT align
)
29659 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29660 rtx dst
, addr
, mem
;
29661 rtx val_elt
, val_vec
, reg
;
29662 rtx rval
[MAX_VECT_LEN
];
29664 unsigned HOST_WIDE_INT v
= value
;
29665 unsigned int offset
= 0;
29667 gcc_assert ((align
& 0x3) == 0);
29668 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29669 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29670 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29675 nelt_mode
= GET_MODE_NUNITS (mode
);
29676 gcc_assert (length
>= nelt_mode
);
29677 /* Skip if it isn't profitable. */
29678 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29681 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29683 v
= sext_hwi (v
, BITS_PER_WORD
);
29684 val_elt
= GEN_INT (v
);
29685 for (j
= 0; j
< nelt_mode
; j
++)
29688 reg
= gen_reg_rtx (mode
);
29689 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29690 /* Emit instruction loading the constant value. */
29691 emit_move_insn (reg
, val_vec
);
29694 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29695 if (mode
== V16QImode
)
29697 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29698 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29700 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29701 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29703 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29704 offset
+= length
- nelt_mode
;
29705 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29706 /* We are shifting bytes back, set the alignment accordingly. */
29707 if ((length
& 0x3) == 0)
29708 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29709 else if ((length
& 0x1) == 0)
29710 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29712 set_mem_align (mem
, BITS_PER_UNIT
);
29714 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29717 /* Fall through for bytes leftover. */
29719 nelt_mode
= GET_MODE_NUNITS (mode
);
29720 reg
= gen_lowpart (V8QImode
, reg
);
29723 /* Handle 8 bytes in a vector. */
29724 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29726 addr
= plus_constant (Pmode
, dst
, i
);
29727 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29728 emit_move_insn (mem
, reg
);
29731 /* Handle single word leftover by shifting 4 bytes back. We can
29732 use aligned access for this case. */
29733 if (i
+ UNITS_PER_WORD
== length
)
29735 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29736 offset
+= i
- UNITS_PER_WORD
;
29737 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29738 /* We are shifting 4 bytes back, set the alignment accordingly. */
29739 if (align
> UNITS_PER_WORD
)
29740 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29742 emit_move_insn (mem
, reg
);
29744 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29745 We have to use unaligned access for this case. */
29746 else if (i
< length
)
29748 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29749 offset
+= length
- nelt_mode
;
29750 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29751 /* We are shifting bytes back, set the alignment accordingly. */
29752 if ((length
& 1) == 0)
29753 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29755 set_mem_align (mem
, BITS_PER_UNIT
);
29757 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29763 /* Set a block of memory using plain strh/strb instructions, only
29764 using instructions allowed by ALIGN on processor. We fill the
29765 first LENGTH bytes of the memory area starting from DSTBASE
29766 with byte constant VALUE. ALIGN is the alignment requirement
29769 arm_block_set_unaligned_non_vect (rtx dstbase
,
29770 unsigned HOST_WIDE_INT length
,
29771 unsigned HOST_WIDE_INT value
,
29772 unsigned HOST_WIDE_INT align
)
29775 rtx dst
, addr
, mem
;
29776 rtx val_exp
, val_reg
, reg
;
29778 HOST_WIDE_INT v
= value
;
29780 gcc_assert (align
== 1 || align
== 2);
29783 v
|= (value
<< BITS_PER_UNIT
);
29785 v
= sext_hwi (v
, BITS_PER_WORD
);
29786 val_exp
= GEN_INT (v
);
29787 /* Skip if it isn't profitable. */
29788 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29789 align
, true, false))
29792 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29793 mode
= (align
== 2 ? HImode
: QImode
);
29794 val_reg
= force_reg (SImode
, val_exp
);
29795 reg
= gen_lowpart (mode
, val_reg
);
29797 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29799 addr
= plus_constant (Pmode
, dst
, i
);
29800 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29801 emit_move_insn (mem
, reg
);
29804 /* Handle single byte leftover. */
29805 if (i
+ 1 == length
)
29807 reg
= gen_lowpart (QImode
, val_reg
);
29808 addr
= plus_constant (Pmode
, dst
, i
);
29809 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29810 emit_move_insn (mem
, reg
);
29814 gcc_assert (i
== length
);
29818 /* Set a block of memory using plain strd/str/strh/strb instructions,
29819 to permit unaligned copies on processors which support unaligned
29820 semantics for those instructions. We fill the first LENGTH bytes
29821 of the memory area starting from DSTBASE with byte constant VALUE.
29822 ALIGN is the alignment requirement of memory. */
29824 arm_block_set_aligned_non_vect (rtx dstbase
,
29825 unsigned HOST_WIDE_INT length
,
29826 unsigned HOST_WIDE_INT value
,
29827 unsigned HOST_WIDE_INT align
)
29830 rtx dst
, addr
, mem
;
29831 rtx val_exp
, val_reg
, reg
;
29832 unsigned HOST_WIDE_INT v
;
29835 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29836 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29838 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29839 if (length
< UNITS_PER_WORD
)
29840 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29843 v
|= (v
<< BITS_PER_WORD
);
29845 v
= sext_hwi (v
, BITS_PER_WORD
);
29847 val_exp
= GEN_INT (v
);
29848 /* Skip if it isn't profitable. */
29849 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29850 align
, false, use_strd_p
))
29855 /* Try without strd. */
29856 v
= (v
>> BITS_PER_WORD
);
29857 v
= sext_hwi (v
, BITS_PER_WORD
);
29858 val_exp
= GEN_INT (v
);
29859 use_strd_p
= false;
29860 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29861 align
, false, use_strd_p
))
29866 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29867 /* Handle double words using strd if possible. */
29870 val_reg
= force_reg (DImode
, val_exp
);
29872 for (; (i
+ 8 <= length
); i
+= 8)
29874 addr
= plus_constant (Pmode
, dst
, i
);
29875 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29876 emit_move_insn (mem
, reg
);
29880 val_reg
= force_reg (SImode
, val_exp
);
29882 /* Handle words. */
29883 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29884 for (; (i
+ 4 <= length
); i
+= 4)
29886 addr
= plus_constant (Pmode
, dst
, i
);
29887 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29888 if ((align
& 3) == 0)
29889 emit_move_insn (mem
, reg
);
29891 emit_insn (gen_unaligned_storesi (mem
, reg
));
29894 /* Merge last pair of STRH and STRB into a STR if possible. */
29895 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29897 addr
= plus_constant (Pmode
, dst
, i
- 1);
29898 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29899 /* We are shifting one byte back, set the alignment accordingly. */
29900 if ((align
& 1) == 0)
29901 set_mem_align (mem
, BITS_PER_UNIT
);
29903 /* Most likely this is an unaligned access, and we can't tell at
29904 compilation time. */
29905 emit_insn (gen_unaligned_storesi (mem
, reg
));
29909 /* Handle half word leftover. */
29910 if (i
+ 2 <= length
)
29912 reg
= gen_lowpart (HImode
, val_reg
);
29913 addr
= plus_constant (Pmode
, dst
, i
);
29914 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29915 if ((align
& 1) == 0)
29916 emit_move_insn (mem
, reg
);
29918 emit_insn (gen_unaligned_storehi (mem
, reg
));
29923 /* Handle single byte leftover. */
29924 if (i
+ 1 == length
)
29926 reg
= gen_lowpart (QImode
, val_reg
);
29927 addr
= plus_constant (Pmode
, dst
, i
);
29928 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29929 emit_move_insn (mem
, reg
);
29935 /* Set a block of memory using vectorization instructions for both
29936 aligned and unaligned cases. We fill the first LENGTH bytes of
29937 the memory area starting from DSTBASE with byte constant VALUE.
29938 ALIGN is the alignment requirement of memory. */
29940 arm_block_set_vect (rtx dstbase
,
29941 unsigned HOST_WIDE_INT length
,
29942 unsigned HOST_WIDE_INT value
,
29943 unsigned HOST_WIDE_INT align
)
29945 /* Check whether we need to use unaligned store instruction. */
29946 if (((align
& 3) != 0 || (length
& 3) != 0)
29947 /* Check whether unaligned store instruction is available. */
29948 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29951 if ((align
& 3) == 0)
29952 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29954 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29957 /* Expand string store operation. Firstly we try to do that by using
29958 vectorization instructions, then try with ARM unaligned access and
29959 double-word store if profitable. OPERANDS[0] is the destination,
29960 OPERANDS[1] is the number of bytes, operands[2] is the value to
29961 initialize the memory, OPERANDS[3] is the known alignment of the
29964 arm_gen_setmem (rtx
*operands
)
29966 rtx dstbase
= operands
[0];
29967 unsigned HOST_WIDE_INT length
;
29968 unsigned HOST_WIDE_INT value
;
29969 unsigned HOST_WIDE_INT align
;
29971 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29974 length
= UINTVAL (operands
[1]);
29978 value
= (UINTVAL (operands
[2]) & 0xFF);
29979 align
= UINTVAL (operands
[3]);
29980 if (TARGET_NEON
&& length
>= 8
29981 && current_tune
->string_ops_prefer_neon
29982 && arm_block_set_vect (dstbase
, length
, value
, align
))
29985 if (!unaligned_access
&& (align
& 3) != 0)
29986 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29988 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29993 arm_macro_fusion_p (void)
29995 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29998 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
29999 for MOVW / MOVT macro fusion. */
30002 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30004 /* We are trying to fuse
30005 movw imm / movt imm
30006 instructions as a group that gets scheduled together. */
30008 rtx set_dest
= SET_DEST (curr_set
);
30010 if (GET_MODE (set_dest
) != SImode
)
30013 /* We are trying to match:
30014 prev (movw) == (set (reg r0) (const_int imm16))
30015 curr (movt) == (set (zero_extract (reg r0)
30018 (const_int imm16_1))
30020 prev (movw) == (set (reg r1)
30021 (high (symbol_ref ("SYM"))))
30022 curr (movt) == (set (reg r0)
30024 (symbol_ref ("SYM")))) */
30026 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30028 if (CONST_INT_P (SET_SRC (curr_set
))
30029 && CONST_INT_P (SET_SRC (prev_set
))
30030 && REG_P (XEXP (set_dest
, 0))
30031 && REG_P (SET_DEST (prev_set
))
30032 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30036 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30037 && REG_P (SET_DEST (curr_set
))
30038 && REG_P (SET_DEST (prev_set
))
30039 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30040 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30047 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30049 rtx prev_set
= single_set (prev
);
30050 rtx curr_set
= single_set (curr
);
30056 if (any_condjump_p (curr
))
30059 if (!arm_macro_fusion_p ())
30062 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30063 && aarch_crypto_can_dual_issue (prev
, curr
))
30066 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30067 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30073 /* Return true iff the instruction fusion described by OP is enabled. */
30075 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30077 return current_tune
->fusible_ops
& op
;
30080 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30082 static unsigned HOST_WIDE_INT
30083 arm_asan_shadow_offset (void)
30085 return HOST_WIDE_INT_1U
<< 29;
30089 /* This is a temporary fix for PR60655. Ideally we need
30090 to handle most of these cases in the generic part but
30091 currently we reject minus (..) (sym_ref). We try to
30092 ameliorate the case with minus (sym_ref1) (sym_ref2)
30093 where they are in the same section. */
30096 arm_const_not_ok_for_debug_p (rtx p
)
30098 tree decl_op0
= NULL
;
30099 tree decl_op1
= NULL
;
30101 if (GET_CODE (p
) == MINUS
)
30103 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30105 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30107 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30108 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30110 if ((VAR_P (decl_op1
)
30111 || TREE_CODE (decl_op1
) == CONST_DECL
)
30112 && (VAR_P (decl_op0
)
30113 || TREE_CODE (decl_op0
) == CONST_DECL
))
30114 return (get_variable_section (decl_op1
, false)
30115 != get_variable_section (decl_op0
, false));
30117 if (TREE_CODE (decl_op1
) == LABEL_DECL
30118 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30119 return (DECL_CONTEXT (decl_op1
)
30120 != DECL_CONTEXT (decl_op0
));
30130 /* return TRUE if x is a reference to a value in a constant pool */
30132 arm_is_constant_pool_ref (rtx x
)
30135 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30136 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30139 /* Remember the last target of arm_set_current_function. */
30140 static GTY(()) tree arm_previous_fndecl
;
30142 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30145 save_restore_target_globals (tree new_tree
)
30147 /* If we have a previous state, use it. */
30148 if (TREE_TARGET_GLOBALS (new_tree
))
30149 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30150 else if (new_tree
== target_option_default_node
)
30151 restore_target_globals (&default_target_globals
);
30154 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30155 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30158 arm_option_params_internal ();
30161 /* Invalidate arm_previous_fndecl. */
30164 arm_reset_previous_fndecl (void)
30166 arm_previous_fndecl
= NULL_TREE
;
30169 /* Establish appropriate back-end context for processing the function
30170 FNDECL. The argument might be NULL to indicate processing at top
30171 level, outside of any function scope. */
30174 arm_set_current_function (tree fndecl
)
30176 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30179 tree old_tree
= (arm_previous_fndecl
30180 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30183 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30185 /* If current function has no attributes but previous one did,
30186 use the default node. */
30187 if (! new_tree
&& old_tree
)
30188 new_tree
= target_option_default_node
;
30190 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30191 the default have been handled by save_restore_target_globals from
30192 arm_pragma_target_parse. */
30193 if (old_tree
== new_tree
)
30196 arm_previous_fndecl
= fndecl
;
30198 /* First set the target options. */
30199 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30201 save_restore_target_globals (new_tree
);
30204 /* Implement TARGET_OPTION_PRINT. */
30207 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30209 int flags
= ptr
->x_target_flags
;
30210 const char *fpu_name
;
30212 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30213 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30215 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30216 TARGET_THUMB2_P (flags
) ? "thumb2" :
30217 TARGET_THUMB_P (flags
) ? "thumb1" :
30220 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30223 /* Hook to determine if one function can safely inline another. */
30226 arm_can_inline_p (tree caller
, tree callee
)
30228 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30229 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30230 bool can_inline
= true;
30232 struct cl_target_option
*caller_opts
30233 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30234 : target_option_default_node
);
30236 struct cl_target_option
*callee_opts
30237 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30238 : target_option_default_node
);
30240 if (callee_opts
== caller_opts
)
30243 /* Callee's ISA features should be a subset of the caller's. */
30244 struct arm_build_target caller_target
;
30245 struct arm_build_target callee_target
;
30246 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30247 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30249 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30251 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30253 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30254 can_inline
= false;
30256 sbitmap_free (caller_target
.isa
);
30257 sbitmap_free (callee_target
.isa
);
30259 /* OK to inline between different modes.
30260 Function with mode specific instructions, e.g using asm,
30261 must be explicitly protected with noinline. */
30265 /* Hook to fix function's alignment affected by target attribute. */
30268 arm_relayout_function (tree fndecl
)
30270 if (DECL_USER_ALIGN (fndecl
))
30273 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30276 callee_tree
= target_option_default_node
;
30278 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30279 SET_DECL_ALIGN (fndecl
, FUNCTION_BOUNDARY_P (opts
->x_target_flags
));
30282 /* Inner function to process the attribute((target(...))), take an argument and
30283 set the current options from the argument. If we have a list, recursively
30284 go over the list. */
30287 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30289 if (TREE_CODE (args
) == TREE_LIST
)
30293 for (; args
; args
= TREE_CHAIN (args
))
30294 if (TREE_VALUE (args
)
30295 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30300 else if (TREE_CODE (args
) != STRING_CST
)
30302 error ("attribute %<target%> argument not a string");
30306 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30309 while ((q
= strtok (argstr
, ",")) != NULL
)
30311 while (ISSPACE (*q
)) ++q
;
30314 if (!strncmp (q
, "thumb", 5))
30315 opts
->x_target_flags
|= MASK_THUMB
;
30317 else if (!strncmp (q
, "arm", 3))
30318 opts
->x_target_flags
&= ~MASK_THUMB
;
30320 else if (!strncmp (q
, "fpu=", 4))
30323 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30324 &fpu_index
, CL_TARGET
))
30326 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30329 if (fpu_index
== TARGET_FPU_auto
)
30331 /* This doesn't really make sense until we support
30332 general dynamic selection of the architecture and all
30334 sorry ("auto fpu selection not currently permitted here");
30337 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30341 error ("attribute(target(\"%s\")) is unknown", q
);
30349 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30352 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30353 struct gcc_options
*opts_set
)
30357 if (!arm_valid_target_attribute_rec (args
, opts
))
30360 t
= build_target_option_node (opts
);
30361 arm_configure_build_target (&arm_active_target
, TREE_TARGET_OPTION (t
),
30363 arm_option_check_internal (opts
);
30364 /* Do any overrides, such as global options arch=xxx. */
30365 arm_option_override_internal (opts
, opts_set
);
30367 /* Resynchronize the saved target options. */
30368 cl_target_option_save (TREE_TARGET_OPTION (t
), opts
);
30374 add_attribute (const char * mode
, tree
*attributes
)
30376 size_t len
= strlen (mode
);
30377 tree value
= build_string (len
, mode
);
30379 TREE_TYPE (value
) = build_array_type (char_type_node
,
30380 build_index_type (size_int (len
)));
30382 *attributes
= tree_cons (get_identifier ("target"),
30383 build_tree_list (NULL_TREE
, value
),
30387 /* For testing. Insert thumb or arm modes alternatively on functions. */
30390 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30394 if (! TARGET_FLIP_THUMB
)
30397 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30398 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30401 /* Nested definitions must inherit mode. */
30402 if (current_function_decl
)
30404 mode
= TARGET_THUMB
? "thumb" : "arm";
30405 add_attribute (mode
, attributes
);
30409 /* If there is already a setting don't change it. */
30410 if (lookup_attribute ("target", *attributes
) != NULL
)
30413 mode
= thumb_flipper
? "thumb" : "arm";
30414 add_attribute (mode
, attributes
);
30416 thumb_flipper
= !thumb_flipper
;
30419 /* Hook to validate attribute((target("string"))). */
30422 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30423 tree args
, int ARG_UNUSED (flags
))
30426 struct gcc_options func_options
;
30427 tree cur_tree
, new_optimize
;
30428 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30430 /* Get the optimization options of the current function. */
30431 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30433 /* If the function changed the optimization levels as well as setting target
30434 options, start with the optimizations specified. */
30435 if (!func_optimize
)
30436 func_optimize
= optimization_default_node
;
30438 /* Init func_options. */
30439 memset (&func_options
, 0, sizeof (func_options
));
30440 init_options_struct (&func_options
, NULL
);
30441 lang_hooks
.init_options_struct (&func_options
);
30443 /* Initialize func_options to the defaults. */
30444 cl_optimization_restore (&func_options
,
30445 TREE_OPTIMIZATION (func_optimize
));
30447 cl_target_option_restore (&func_options
,
30448 TREE_TARGET_OPTION (target_option_default_node
));
30450 /* Set func_options flags with new target mode. */
30451 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30452 &global_options_set
);
30454 if (cur_tree
== NULL_TREE
)
30457 new_optimize
= build_optimization_node (&func_options
);
30459 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30461 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30463 finalize_options_struct (&func_options
);
30468 /* Match an ISA feature bitmap to a named FPU. We always use the
30469 first entry that exactly matches the feature set, so that we
30470 effectively canonicalize the FPU name for the assembler. */
30472 arm_identify_fpu_from_isa (sbitmap isa
)
30474 auto_sbitmap
fpubits (isa_num_bits
);
30475 auto_sbitmap
cand_fpubits (isa_num_bits
);
30477 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30479 /* If there are no ISA feature bits relating to the FPU, we must be
30480 doing soft-float. */
30481 if (bitmap_empty_p (fpubits
))
30484 for (unsigned int i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
30486 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30487 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30488 return all_fpus
[i
].name
;
30490 /* We must find an entry, or things have gone wrong. */
30491 gcc_unreachable ();
30495 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30498 fprintf (stream
, "\t.syntax unified\n");
30502 if (is_called_in_ARM_mode (decl
)
30503 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30504 && cfun
->is_thunk
))
30505 fprintf (stream
, "\t.code 32\n");
30506 else if (TARGET_THUMB1
)
30507 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30509 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30512 fprintf (stream
, "\t.arm\n");
30514 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30517 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30519 if (TARGET_POKE_FUNCTION_NAME
)
30520 arm_poke_function_name (stream
, (const char *) name
);
30523 /* If MEM is in the form of [base+offset], extract the two parts
30524 of address and set to BASE and OFFSET, otherwise return false
30525 after clearing BASE and OFFSET. */
30528 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30532 gcc_assert (MEM_P (mem
));
30534 addr
= XEXP (mem
, 0);
30536 /* Strip off const from addresses like (const (addr)). */
30537 if (GET_CODE (addr
) == CONST
)
30538 addr
= XEXP (addr
, 0);
30540 if (GET_CODE (addr
) == REG
)
30543 *offset
= const0_rtx
;
30547 if (GET_CODE (addr
) == PLUS
30548 && GET_CODE (XEXP (addr
, 0)) == REG
30549 && CONST_INT_P (XEXP (addr
, 1)))
30551 *base
= XEXP (addr
, 0);
30552 *offset
= XEXP (addr
, 1);
30557 *offset
= NULL_RTX
;
30562 /* If INSN is a load or store of address in the form of [base+offset],
30563 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30564 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30565 otherwise return FALSE. */
30568 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30572 gcc_assert (INSN_P (insn
));
30573 x
= PATTERN (insn
);
30574 if (GET_CODE (x
) != SET
)
30578 dest
= SET_DEST (x
);
30579 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30582 extract_base_offset_in_addr (dest
, base
, offset
);
30584 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30587 extract_base_offset_in_addr (src
, base
, offset
);
30592 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30595 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30597 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30598 and PRI are only calculated for these instructions. For other instruction,
30599 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30600 instruction fusion can be supported by returning different priorities.
30602 It's important that irrelevant instructions get the largest FUSION_PRI. */
30605 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30606 int *fusion_pri
, int *pri
)
30612 gcc_assert (INSN_P (insn
));
30615 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30622 /* Load goes first. */
30624 *fusion_pri
= tmp
- 1;
30626 *fusion_pri
= tmp
- 2;
30630 /* INSN with smaller base register goes first. */
30631 tmp
-= ((REGNO (base
) & 0xff) << 20);
30633 /* INSN with smaller offset goes first. */
30634 off_val
= (int)(INTVAL (offset
));
30636 tmp
-= (off_val
& 0xfffff);
30638 tmp
+= ((- off_val
) & 0xfffff);
30645 /* Construct and return a PARALLEL RTX vector with elements numbering the
30646 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30647 the vector - from the perspective of the architecture. This does not
30648 line up with GCC's perspective on lane numbers, so we end up with
30649 different masks depending on our target endian-ness. The diagram
30650 below may help. We must draw the distinction when building masks
30651 which select one half of the vector. An instruction selecting
30652 architectural low-lanes for a big-endian target, must be described using
30653 a mask selecting GCC high-lanes.
30655 Big-Endian Little-Endian
30657 GCC 0 1 2 3 3 2 1 0
30658 | x | x | x | x | | x | x | x | x |
30659 Architecture 3 2 1 0 3 2 1 0
30661 Low Mask: { 2, 3 } { 0, 1 }
30662 High Mask: { 0, 1 } { 2, 3 }
30666 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30668 int nunits
= GET_MODE_NUNITS (mode
);
30669 rtvec v
= rtvec_alloc (nunits
/ 2);
30670 int high_base
= nunits
/ 2;
30676 if (BYTES_BIG_ENDIAN
)
30677 base
= high
? low_base
: high_base
;
30679 base
= high
? high_base
: low_base
;
30681 for (i
= 0; i
< nunits
/ 2; i
++)
30682 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30684 t1
= gen_rtx_PARALLEL (mode
, v
);
30688 /* Check OP for validity as a PARALLEL RTX vector with elements
30689 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30690 from the perspective of the architecture. See the diagram above
30691 arm_simd_vect_par_cnst_half_p for more details. */
30694 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30697 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30698 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30699 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30702 if (!VECTOR_MODE_P (mode
))
30705 if (count_op
!= count_ideal
)
30708 for (i
= 0; i
< count_ideal
; i
++)
30710 rtx elt_op
= XVECEXP (op
, 0, i
);
30711 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30713 if (!CONST_INT_P (elt_op
)
30714 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30720 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30723 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30726 /* For now, we punt and not handle this for TARGET_THUMB1. */
30727 if (vcall_offset
&& TARGET_THUMB1
)
30730 /* Otherwise ok. */
30734 /* Generate RTL for a conditional branch with rtx comparison CODE in
30735 mode CC_MODE. The destination of the unlikely conditional branch
30739 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30743 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30744 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30747 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30748 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30750 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30753 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30755 For pure-code sections there is no letter code for this attribute, so
30756 output all the section flags numerically when this is needed. */
30759 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30762 if (flags
& SECTION_ARM_PURECODE
)
30766 if (!(flags
& SECTION_DEBUG
))
30768 if (flags
& SECTION_EXCLUDE
)
30769 *num
|= 0x80000000;
30770 if (flags
& SECTION_WRITE
)
30772 if (flags
& SECTION_CODE
)
30774 if (flags
& SECTION_MERGE
)
30776 if (flags
& SECTION_STRINGS
)
30778 if (flags
& SECTION_TLS
)
30780 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30789 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30791 If pure-code is passed as an option, make sure all functions are in
30792 sections that have the SHF_ARM_PURECODE attribute. */
30795 arm_function_section (tree decl
, enum node_frequency freq
,
30796 bool startup
, bool exit
)
30798 const char * section_name
;
30801 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30802 return default_function_section (decl
, freq
, startup
, exit
);
30804 if (!target_pure_code
)
30805 return default_function_section (decl
, freq
, startup
, exit
);
30808 section_name
= DECL_SECTION_NAME (decl
);
30810 /* If a function is not in a named section then it falls under the 'default'
30811 text section, also known as '.text'. We can preserve previous behavior as
30812 the default text section already has the SHF_ARM_PURECODE section
30816 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30819 /* If default_sec is not null, then it must be a special section like for
30820 example .text.startup. We set the pure-code attribute and return the
30821 same section to preserve existing behavior. */
30823 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30824 return default_sec
;
30827 /* Otherwise look whether a section has already been created with
30829 sec
= get_named_section (decl
, section_name
, 0);
30831 /* If that is not the case passing NULL as the section's name to
30832 'get_named_section' will create a section with the declaration's
30834 sec
= get_named_section (decl
, NULL
, 0);
30836 /* Set the SHF_ARM_PURECODE attribute. */
30837 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30842 /* Implements the TARGET_SECTION_FLAGS hook.
30844 If DECL is a function declaration and pure-code is passed as an option
30845 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30846 section's name and RELOC indicates whether the declarations initializer may
30847 contain runtime relocations. */
30849 static unsigned int
30850 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
30852 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
30854 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
30855 flags
|= SECTION_ARM_PURECODE
;
30860 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30863 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
30865 rtx
*quot_p
, rtx
*rem_p
)
30867 if (mode
== SImode
)
30868 gcc_assert (!TARGET_IDIV
);
30870 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
30873 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
30875 op0
, GET_MODE (op0
),
30876 op1
, GET_MODE (op1
));
30878 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
30879 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
30880 GET_MODE_SIZE (mode
));
30882 gcc_assert (quotient
);
30883 gcc_assert (remainder
);
30885 *quot_p
= quotient
;
30886 *rem_p
= remainder
;
30889 #include "gt-arm.h"