1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "target-def.h"
55 #include "langhooks.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode
;
65 typedef struct minipool_fixup Mfix
;
67 void (*arm_lang_output_object_attributes_hook
)(void);
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets
*arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
81 HOST_WIDE_INT
, rtx
, rtx
, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx
, int);
84 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
85 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
86 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
87 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
88 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
89 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
90 inline static int thumb1_index_register_rtx_p (rtx
, int);
91 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx
, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx
, int);
97 static void arm_print_operand_address (FILE *, rtx
);
98 static bool arm_print_operand_punct_valid_p (unsigned char code
);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
100 static arm_cc
get_arm_condition_code (rtx
);
101 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
102 static const char *output_multi_immediate (rtx
*, const char *, const char *,
104 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
105 static struct machine_function
*arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT
get_jump_table_size (rtx
);
108 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_forward_ref (Mfix
*);
110 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
111 static Mnode
*add_minipool_backward_ref (Mfix
*);
112 static void assign_minipool_offsets (Mfix
*);
113 static void arm_print_value (FILE *, rtx
);
114 static void dump_minipool (rtx
);
115 static int arm_barrier_cost (rtx
);
116 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
117 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
118 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree
);
125 static unsigned long arm_compute_func_type (void);
126 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
127 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
128 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
134 static int arm_comp_type_attributes (const_tree
, const_tree
);
135 static void arm_set_default_type_attributes (tree
);
136 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
137 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
);
141 static int optimal_immediate_sequence_1 (enum rtx_code code
,
142 unsigned HOST_WIDE_INT val
,
143 struct four_ints
*return_sequence
,
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree
, tree
);
147 static enum machine_mode
arm_promote_function_mode (const_tree
,
148 enum machine_mode
, int *,
150 static bool arm_return_in_memory (const_tree
, const_tree
);
151 static rtx
arm_function_value (const_tree
, const_tree
, bool);
152 static rtx
arm_libcall_value_1 (enum machine_mode
);
153 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
160 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
161 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
162 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
163 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
166 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
167 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
168 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
169 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
170 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
174 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
175 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
176 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
177 static tree
arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond
, rtx pattern
);
179 static rtx
emit_set_insn (rtx
, rtx
);
180 static rtx
emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
183 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
185 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
187 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
188 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
190 static rtx
aapcs_libcall_value (enum machine_mode
);
191 static int aapcs_select_return_coproc (const_tree
, const_tree
);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_encode_section_info (tree
, rtx
, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 enum machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx
);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
237 static bool arm_cannot_copy_insn_p (rtx
);
238 static bool arm_tls_symbol_p (rtx x
);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
241 static bool arm_output_addr_const_extra (FILE *, rtx
);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree
);
244 static const char *arm_invalid_parameter_type (const_tree t
);
245 static const char *arm_invalid_return_type (const_tree t
);
246 static tree
arm_promoted_type (const_tree t
);
247 static tree
arm_convert_to_type (tree type
, tree expr
);
248 static bool arm_scalar_mode_supported_p (enum machine_mode
);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx
, tree
, rtx
);
253 static rtx
arm_trampoline_adjust_address (rtx
);
254 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
255 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
257 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode
,
259 unsigned HOST_WIDE_INT
);
260 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
261 static bool arm_class_likely_spilled_p (reg_class_t
);
262 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
263 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
268 static void arm_conditional_register_usage (void);
269 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
276 const unsigned char *sel
);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
280 int misalign ATTRIBUTE_UNUSED
);
281 static unsigned arm_add_stmt_cost (void *data
, int count
,
282 enum vect_cost_for_stmt kind
,
283 struct _stmt_vec_info
*stmt_info
,
285 enum vect_cost_model_location where
);
287 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
288 bool op0_preserve_value
);
289 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table
[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
299 { "long_call", 0, 0, false, true, true, NULL
, false },
300 /* Whereas these functions are always known to reside within the 26 bit
302 { "short_call", 0, 0, false, true, true, NULL
, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
314 /* ARM/PE has three new attributes:
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
323 { "dllimport", 0, 0, true, false, false, NULL
, false },
324 { "dllexport", 0, 0, true, false, false, NULL
, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
333 { NULL
, 0, 0, false, false, false, NULL
, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 struct gcc_target targetm
= TARGET_INITIALIZER
;
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack
;
682 static char * minipool_startobj
;
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped
= 5;
688 extern FILE * asm_out_file
;
690 /* True if we are currently building a constant table. */
691 int making_const_table
;
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune
= arm_none
;
696 /* The current tuning set. */
697 const struct tune_params
*current_tune
;
699 /* Which floating point hardware to schedule for. */
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc
*arm_fpu_desc
;
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label
[14];
707 static int thumb_call_reg_needed
;
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
739 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
741 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
742 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
744 /* Flags that only effect tuning, not available instructions. */
745 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
748 #define FL_FOR_ARCH2 FL_NOTM
749 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
750 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
751 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
752 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
753 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
754 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
755 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
756 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
757 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
758 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
759 #define FL_FOR_ARCH6J FL_FOR_ARCH6
760 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
761 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
762 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
763 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
764 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
765 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
766 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
771 | FL_ARM_DIV | FL_NOTM)
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 static unsigned long insn_flags
= 0;
777 /* The bits in this mask specify which instruction scheduling options should
779 static unsigned long tune_flags
= 0;
781 /* The highest ARM architecture version supported by the
783 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
806 /* Nonzero if this chip supports the ARM 6K extensions. */
809 /* Nonzero if instructions present in ARMv6-M can be used. */
812 /* Nonzero if this chip supports the ARM 7 extensions. */
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm
= 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
821 /* Nonzero if instructions present in ARMv8 can be used. */
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched
= 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm
= 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2
= 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale
= 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale
= 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf
= 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9
= 0;
849 /* Nonzero if generating Thumb instructions. */
852 /* Nonzero if generating Thumb-1 instructions. */
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork
= 0;
862 /* Nonzero if chip supports Thumb 2. */
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv
;
867 int arm_arch_thumb_hwdiv
;
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits
= 0;
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool
= false;
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 enum machine_mode output_memory_reference_mode
;
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register
= INVALID_REGNUM
;
884 /* Set to 1 after arm_reorg has started. Reset to start at the start of
885 the next function. */
886 static int after_arm_reorg
= 0;
888 enum arm_pcs arm_pcs_default
;
890 /* For an explanation of these variables, see final_prescan_insn below. */
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc
;
896 int arm_target_label
;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count
= 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask
= 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen
= 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc
= 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes
[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence
[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
932 const char *const name
;
933 enum processor_type core
;
935 enum base_architecture base_arch
;
936 const unsigned long flags
;
937 const struct tune_params
*const tune
;
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
947 /* arm generic vectorizer costs. */
949 struct cpu_vec_costs arm_default_vec_cost
= {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
969 const struct cpu_cost_table cortexa9_extra_costs
=
976 COSTS_N_INSNS (1), /* Shift_reg. */
977 COSTS_N_INSNS (1), /* Arith_shift. */
978 COSTS_N_INSNS (2), /* Arith_shift_reg. */
980 COSTS_N_INSNS (1), /* Log_shift_reg. */
981 COSTS_N_INSNS (1), /* Extend. */
982 COSTS_N_INSNS (2), /* Extend_arith. */
983 COSTS_N_INSNS (1), /* Bfi. */
984 COSTS_N_INSNS (1), /* Bfx. */
987 true /* non_exec_costs_exec. */
992 COSTS_N_INSNS (3), /* Simple. */
993 COSTS_N_INSNS (3), /* Flag_setting. */
994 COSTS_N_INSNS (2), /* Extend. */
995 COSTS_N_INSNS (3), /* Add. */
996 COSTS_N_INSNS (2), /* Extend_add. */
997 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
1001 0, /* Simple (N/A). */
1002 0, /* Flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* Extend. */
1005 COSTS_N_INSNS (4), /* Extend_add. */
1011 COSTS_N_INSNS (2), /* Load. */
1012 COSTS_N_INSNS (2), /* Load_sign_extend. */
1013 COSTS_N_INSNS (2), /* Ldrd. */
1014 COSTS_N_INSNS (2), /* Ldm_1st. */
1015 1, /* Ldm_regs_per_insn_1st. */
1016 2, /* Ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* Loadf. */
1018 COSTS_N_INSNS (5), /* Loadd. */
1019 COSTS_N_INSNS (1), /* Load_unaligned. */
1020 COSTS_N_INSNS (2), /* Store. */
1021 COSTS_N_INSNS (2), /* Strd. */
1022 COSTS_N_INSNS (2), /* Stm_1st. */
1023 1, /* Stm_regs_per_insn_1st. */
1024 2, /* Stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* Storef. */
1026 COSTS_N_INSNS (1), /* Stored. */
1027 COSTS_N_INSNS (1) /* Store_unaligned. */
1032 COSTS_N_INSNS (14), /* Div. */
1033 COSTS_N_INSNS (4), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (30), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 COSTS_N_INSNS (1), /* Fpconst. */
1038 COSTS_N_INSNS (1), /* Neg. */
1039 COSTS_N_INSNS (3), /* Compare. */
1040 COSTS_N_INSNS (3), /* Widen. */
1041 COSTS_N_INSNS (3), /* Narrow. */
1042 COSTS_N_INSNS (3), /* Toint. */
1043 COSTS_N_INSNS (3), /* Fromint. */
1044 COSTS_N_INSNS (3) /* Roundint. */
1048 COSTS_N_INSNS (24), /* Div. */
1049 COSTS_N_INSNS (5), /* Mult. */
1050 COSTS_N_INSNS (8), /* Mult_addsub. */
1051 COSTS_N_INSNS (30), /* Fma. */
1052 COSTS_N_INSNS (3), /* Addsub. */
1053 COSTS_N_INSNS (1), /* Fpconst. */
1054 COSTS_N_INSNS (1), /* Neg. */
1055 COSTS_N_INSNS (3), /* Compare. */
1056 COSTS_N_INSNS (3), /* Widen. */
1057 COSTS_N_INSNS (3), /* Narrow. */
1058 COSTS_N_INSNS (3), /* Toint. */
1059 COSTS_N_INSNS (3), /* Fromint. */
1060 COSTS_N_INSNS (3) /* Roundint. */
1065 COSTS_N_INSNS (1) /* Alu. */
1070 const struct cpu_cost_table cortexa7_extra_costs
=
1076 COSTS_N_INSNS (1), /* Shift. */
1077 COSTS_N_INSNS (1), /* Shift_reg. */
1078 COSTS_N_INSNS (1), /* Arith_shift. */
1079 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* Log_shift. */
1081 COSTS_N_INSNS (1), /* Log_shift_reg. */
1082 COSTS_N_INSNS (1), /* Extend. */
1083 COSTS_N_INSNS (1), /* Extend_arith. */
1084 COSTS_N_INSNS (1), /* Bfi. */
1085 COSTS_N_INSNS (1), /* Bfx. */
1086 COSTS_N_INSNS (1), /* Clz. */
1088 true /* non_exec_costs_exec. */
1095 COSTS_N_INSNS (1), /* Flag_setting. */
1096 COSTS_N_INSNS (1), /* Extend. */
1097 COSTS_N_INSNS (1), /* Add. */
1098 COSTS_N_INSNS (1), /* Extend_add. */
1099 COSTS_N_INSNS (7) /* Idiv. */
1103 0, /* Simple (N/A). */
1104 0, /* Flag_setting (N/A). */
1105 COSTS_N_INSNS (1), /* Extend. */
1107 COSTS_N_INSNS (2), /* Extend_add. */
1113 COSTS_N_INSNS (1), /* Load. */
1114 COSTS_N_INSNS (1), /* Load_sign_extend. */
1115 COSTS_N_INSNS (3), /* Ldrd. */
1116 COSTS_N_INSNS (1), /* Ldm_1st. */
1117 1, /* Ldm_regs_per_insn_1st. */
1118 2, /* Ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (2), /* Loadf. */
1120 COSTS_N_INSNS (2), /* Loadd. */
1121 COSTS_N_INSNS (1), /* Load_unaligned. */
1122 COSTS_N_INSNS (1), /* Store. */
1123 COSTS_N_INSNS (3), /* Strd. */
1124 COSTS_N_INSNS (1), /* Stm_1st. */
1125 1, /* Stm_regs_per_insn_1st. */
1126 2, /* Stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (2), /* Storef. */
1128 COSTS_N_INSNS (2), /* Stored. */
1129 COSTS_N_INSNS (1) /* Store_unaligned. */
1134 COSTS_N_INSNS (15), /* Div. */
1135 COSTS_N_INSNS (3), /* Mult. */
1136 COSTS_N_INSNS (7), /* Mult_addsub. */
1137 COSTS_N_INSNS (7), /* Fma. */
1138 COSTS_N_INSNS (3), /* Addsub. */
1139 COSTS_N_INSNS (3), /* Fpconst. */
1140 COSTS_N_INSNS (3), /* Neg. */
1141 COSTS_N_INSNS (3), /* Compare. */
1142 COSTS_N_INSNS (3), /* Widen. */
1143 COSTS_N_INSNS (3), /* Narrow. */
1144 COSTS_N_INSNS (3), /* Toint. */
1145 COSTS_N_INSNS (3), /* Fromint. */
1146 COSTS_N_INSNS (3) /* Roundint. */
1150 COSTS_N_INSNS (30), /* Div. */
1151 COSTS_N_INSNS (6), /* Mult. */
1152 COSTS_N_INSNS (10), /* Mult_addsub. */
1153 COSTS_N_INSNS (7), /* Fma. */
1154 COSTS_N_INSNS (3), /* Addsub. */
1155 COSTS_N_INSNS (3), /* Fpconst. */
1156 COSTS_N_INSNS (3), /* Neg. */
1157 COSTS_N_INSNS (3), /* Compare. */
1158 COSTS_N_INSNS (3), /* Widen. */
1159 COSTS_N_INSNS (3), /* Narrow. */
1160 COSTS_N_INSNS (3), /* Toint. */
1161 COSTS_N_INSNS (3), /* Fromint. */
1162 COSTS_N_INSNS (3) /* Roundint. */
1167 COSTS_N_INSNS (1) /* Alu. */
1171 const struct cpu_cost_table cortexa12_extra_costs
=
1178 COSTS_N_INSNS (1), /* Shift_reg. */
1179 COSTS_N_INSNS (1), /* Arith_shift. */
1180 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1181 COSTS_N_INSNS (1), /* Log_shift. */
1182 COSTS_N_INSNS (1), /* Log_shift_reg. */
1184 COSTS_N_INSNS (1), /* Extend_arith. */
1186 COSTS_N_INSNS (1), /* Bfx. */
1187 COSTS_N_INSNS (1), /* Clz. */
1189 true /* non_exec_costs_exec. */
1194 COSTS_N_INSNS (2), /* Simple. */
1195 COSTS_N_INSNS (3), /* Flag_setting. */
1196 COSTS_N_INSNS (2), /* Extend. */
1197 COSTS_N_INSNS (3), /* Add. */
1198 COSTS_N_INSNS (2), /* Extend_add. */
1199 COSTS_N_INSNS (18) /* Idiv. */
1203 0, /* Simple (N/A). */
1204 0, /* Flag_setting (N/A). */
1205 COSTS_N_INSNS (3), /* Extend. */
1207 COSTS_N_INSNS (3), /* Extend_add. */
1213 COSTS_N_INSNS (3), /* Load. */
1214 COSTS_N_INSNS (3), /* Load_sign_extend. */
1215 COSTS_N_INSNS (3), /* Ldrd. */
1216 COSTS_N_INSNS (3), /* Ldm_1st. */
1217 1, /* Ldm_regs_per_insn_1st. */
1218 2, /* Ldm_regs_per_insn_subsequent. */
1219 COSTS_N_INSNS (3), /* Loadf. */
1220 COSTS_N_INSNS (3), /* Loadd. */
1221 0, /* Load_unaligned. */
1225 1, /* Stm_regs_per_insn_1st. */
1226 2, /* Stm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* Storef. */
1228 COSTS_N_INSNS (2), /* Stored. */
1229 0 /* Store_unaligned. */
1234 COSTS_N_INSNS (17), /* Div. */
1235 COSTS_N_INSNS (4), /* Mult. */
1236 COSTS_N_INSNS (8), /* Mult_addsub. */
1237 COSTS_N_INSNS (8), /* Fma. */
1238 COSTS_N_INSNS (4), /* Addsub. */
1239 COSTS_N_INSNS (2), /* Fpconst. */
1240 COSTS_N_INSNS (2), /* Neg. */
1241 COSTS_N_INSNS (2), /* Compare. */
1242 COSTS_N_INSNS (4), /* Widen. */
1243 COSTS_N_INSNS (4), /* Narrow. */
1244 COSTS_N_INSNS (4), /* Toint. */
1245 COSTS_N_INSNS (4), /* Fromint. */
1246 COSTS_N_INSNS (4) /* Roundint. */
1250 COSTS_N_INSNS (31), /* Div. */
1251 COSTS_N_INSNS (4), /* Mult. */
1252 COSTS_N_INSNS (8), /* Mult_addsub. */
1253 COSTS_N_INSNS (8), /* Fma. */
1254 COSTS_N_INSNS (4), /* Addsub. */
1255 COSTS_N_INSNS (2), /* Fpconst. */
1256 COSTS_N_INSNS (2), /* Neg. */
1257 COSTS_N_INSNS (2), /* Compare. */
1258 COSTS_N_INSNS (4), /* Widen. */
1259 COSTS_N_INSNS (4), /* Narrow. */
1260 COSTS_N_INSNS (4), /* Toint. */
1261 COSTS_N_INSNS (4), /* Fromint. */
1262 COSTS_N_INSNS (4) /* Roundint. */
1267 COSTS_N_INSNS (1) /* Alu. */
1271 const struct cpu_cost_table cortexa15_extra_costs
=
1279 COSTS_N_INSNS (1), /* Arith_shift. */
1280 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1281 COSTS_N_INSNS (1), /* Log_shift. */
1282 COSTS_N_INSNS (1), /* Log_shift_reg. */
1284 COSTS_N_INSNS (1), /* Extend_arith. */
1285 COSTS_N_INSNS (1), /* Bfi. */
1289 true /* non_exec_costs_exec. */
1294 COSTS_N_INSNS (2), /* Simple. */
1295 COSTS_N_INSNS (3), /* Flag_setting. */
1296 COSTS_N_INSNS (2), /* Extend. */
1297 COSTS_N_INSNS (2), /* Add. */
1298 COSTS_N_INSNS (2), /* Extend_add. */
1299 COSTS_N_INSNS (18) /* Idiv. */
1303 0, /* Simple (N/A). */
1304 0, /* Flag_setting (N/A). */
1305 COSTS_N_INSNS (3), /* Extend. */
1307 COSTS_N_INSNS (3), /* Extend_add. */
1313 COSTS_N_INSNS (3), /* Load. */
1314 COSTS_N_INSNS (3), /* Load_sign_extend. */
1315 COSTS_N_INSNS (3), /* Ldrd. */
1316 COSTS_N_INSNS (4), /* Ldm_1st. */
1317 1, /* Ldm_regs_per_insn_1st. */
1318 2, /* Ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (4), /* Loadf. */
1320 COSTS_N_INSNS (4), /* Loadd. */
1321 0, /* Load_unaligned. */
1324 COSTS_N_INSNS (1), /* Stm_1st. */
1325 1, /* Stm_regs_per_insn_1st. */
1326 2, /* Stm_regs_per_insn_subsequent. */
1329 0 /* Store_unaligned. */
1334 COSTS_N_INSNS (17), /* Div. */
1335 COSTS_N_INSNS (4), /* Mult. */
1336 COSTS_N_INSNS (8), /* Mult_addsub. */
1337 COSTS_N_INSNS (8), /* Fma. */
1338 COSTS_N_INSNS (4), /* Addsub. */
1339 COSTS_N_INSNS (2), /* Fpconst. */
1340 COSTS_N_INSNS (2), /* Neg. */
1341 COSTS_N_INSNS (5), /* Compare. */
1342 COSTS_N_INSNS (4), /* Widen. */
1343 COSTS_N_INSNS (4), /* Narrow. */
1344 COSTS_N_INSNS (4), /* Toint. */
1345 COSTS_N_INSNS (4), /* Fromint. */
1346 COSTS_N_INSNS (4) /* Roundint. */
1350 COSTS_N_INSNS (31), /* Div. */
1351 COSTS_N_INSNS (4), /* Mult. */
1352 COSTS_N_INSNS (8), /* Mult_addsub. */
1353 COSTS_N_INSNS (8), /* Fma. */
1354 COSTS_N_INSNS (4), /* Addsub. */
1355 COSTS_N_INSNS (2), /* Fpconst. */
1356 COSTS_N_INSNS (2), /* Neg. */
1357 COSTS_N_INSNS (2), /* Compare. */
1358 COSTS_N_INSNS (4), /* Widen. */
1359 COSTS_N_INSNS (4), /* Narrow. */
1360 COSTS_N_INSNS (4), /* Toint. */
1361 COSTS_N_INSNS (4), /* Fromint. */
1362 COSTS_N_INSNS (4) /* Roundint. */
1367 COSTS_N_INSNS (1) /* Alu. */
1371 const struct cpu_cost_table v7m_extra_costs
=
1379 0, /* Arith_shift. */
1380 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1382 COSTS_N_INSNS (1), /* Log_shift_reg. */
1384 COSTS_N_INSNS (1), /* Extend_arith. */
1388 COSTS_N_INSNS (1), /* non_exec. */
1389 false /* non_exec_costs_exec. */
1394 COSTS_N_INSNS (1), /* Simple. */
1395 COSTS_N_INSNS (1), /* Flag_setting. */
1396 COSTS_N_INSNS (2), /* Extend. */
1397 COSTS_N_INSNS (1), /* Add. */
1398 COSTS_N_INSNS (3), /* Extend_add. */
1399 COSTS_N_INSNS (8) /* Idiv. */
1403 0, /* Simple (N/A). */
1404 0, /* Flag_setting (N/A). */
1405 COSTS_N_INSNS (2), /* Extend. */
1407 COSTS_N_INSNS (3), /* Extend_add. */
1413 COSTS_N_INSNS (2), /* Load. */
1414 0, /* Load_sign_extend. */
1415 COSTS_N_INSNS (3), /* Ldrd. */
1416 COSTS_N_INSNS (2), /* Ldm_1st. */
1417 1, /* Ldm_regs_per_insn_1st. */
1418 1, /* Ldm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* Loadf. */
1420 COSTS_N_INSNS (3), /* Loadd. */
1421 COSTS_N_INSNS (1), /* Load_unaligned. */
1422 COSTS_N_INSNS (2), /* Store. */
1423 COSTS_N_INSNS (3), /* Strd. */
1424 COSTS_N_INSNS (2), /* Stm_1st. */
1425 1, /* Stm_regs_per_insn_1st. */
1426 1, /* Stm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (2), /* Storef. */
1428 COSTS_N_INSNS (3), /* Stored. */
1429 COSTS_N_INSNS (1) /* Store_unaligned. */
1434 COSTS_N_INSNS (7), /* Div. */
1435 COSTS_N_INSNS (2), /* Mult. */
1436 COSTS_N_INSNS (5), /* Mult_addsub. */
1437 COSTS_N_INSNS (3), /* Fma. */
1438 COSTS_N_INSNS (1), /* Addsub. */
1450 COSTS_N_INSNS (15), /* Div. */
1451 COSTS_N_INSNS (5), /* Mult. */
1452 COSTS_N_INSNS (7), /* Mult_addsub. */
1453 COSTS_N_INSNS (7), /* Fma. */
1454 COSTS_N_INSNS (3), /* Addsub. */
1467 COSTS_N_INSNS (1) /* Alu. */
1471 const struct tune_params arm_slowmul_tune
=
1473 arm_slowmul_rtx_costs
,
1475 NULL
, /* Sched adj cost. */
1476 3, /* Constant limit. */
1477 5, /* Max cond insns. */
1478 ARM_PREFETCH_NOT_BENEFICIAL
,
1479 true, /* Prefer constant pool. */
1480 arm_default_branch_cost
,
1481 false, /* Prefer LDRD/STRD. */
1482 {true, true}, /* Prefer non short circuit. */
1483 &arm_default_vec_cost
, /* Vectorizer costs. */
1484 false /* Prefer Neon for 64-bits bitops. */
1487 const struct tune_params arm_fastmul_tune
=
1489 arm_fastmul_rtx_costs
,
1491 NULL
, /* Sched adj cost. */
1492 1, /* Constant limit. */
1493 5, /* Max cond insns. */
1494 ARM_PREFETCH_NOT_BENEFICIAL
,
1495 true, /* Prefer constant pool. */
1496 arm_default_branch_cost
,
1497 false, /* Prefer LDRD/STRD. */
1498 {true, true}, /* Prefer non short circuit. */
1499 &arm_default_vec_cost
, /* Vectorizer costs. */
1500 false /* Prefer Neon for 64-bits bitops. */
1503 /* StrongARM has early execution of branches, so a sequence that is worth
1504 skipping is shorter. Set max_insns_skipped to a lower value. */
1506 const struct tune_params arm_strongarm_tune
=
1508 arm_fastmul_rtx_costs
,
1510 NULL
, /* Sched adj cost. */
1511 1, /* Constant limit. */
1512 3, /* Max cond insns. */
1513 ARM_PREFETCH_NOT_BENEFICIAL
,
1514 true, /* Prefer constant pool. */
1515 arm_default_branch_cost
,
1516 false, /* Prefer LDRD/STRD. */
1517 {true, true}, /* Prefer non short circuit. */
1518 &arm_default_vec_cost
, /* Vectorizer costs. */
1519 false /* Prefer Neon for 64-bits bitops. */
1522 const struct tune_params arm_xscale_tune
=
1524 arm_xscale_rtx_costs
,
1526 xscale_sched_adjust_cost
,
1527 2, /* Constant limit. */
1528 3, /* Max cond insns. */
1529 ARM_PREFETCH_NOT_BENEFICIAL
,
1530 true, /* Prefer constant pool. */
1531 arm_default_branch_cost
,
1532 false, /* Prefer LDRD/STRD. */
1533 {true, true}, /* Prefer non short circuit. */
1534 &arm_default_vec_cost
, /* Vectorizer costs. */
1535 false /* Prefer Neon for 64-bits bitops. */
1538 const struct tune_params arm_9e_tune
=
1542 NULL
, /* Sched adj cost. */
1543 1, /* Constant limit. */
1544 5, /* Max cond insns. */
1545 ARM_PREFETCH_NOT_BENEFICIAL
,
1546 true, /* Prefer constant pool. */
1547 arm_default_branch_cost
,
1548 false, /* Prefer LDRD/STRD. */
1549 {true, true}, /* Prefer non short circuit. */
1550 &arm_default_vec_cost
, /* Vectorizer costs. */
1551 false /* Prefer Neon for 64-bits bitops. */
1554 const struct tune_params arm_v6t2_tune
=
1558 NULL
, /* Sched adj cost. */
1559 1, /* Constant limit. */
1560 5, /* Max cond insns. */
1561 ARM_PREFETCH_NOT_BENEFICIAL
,
1562 false, /* Prefer constant pool. */
1563 arm_default_branch_cost
,
1564 false, /* Prefer LDRD/STRD. */
1565 {true, true}, /* Prefer non short circuit. */
1566 &arm_default_vec_cost
, /* Vectorizer costs. */
1567 false /* Prefer Neon for 64-bits bitops. */
1570 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1571 const struct tune_params arm_cortex_tune
=
1574 &generic_extra_costs
,
1575 NULL
, /* Sched adj cost. */
1576 1, /* Constant limit. */
1577 5, /* Max cond insns. */
1578 ARM_PREFETCH_NOT_BENEFICIAL
,
1579 false, /* Prefer constant pool. */
1580 arm_default_branch_cost
,
1581 false, /* Prefer LDRD/STRD. */
1582 {true, true}, /* Prefer non short circuit. */
1583 &arm_default_vec_cost
, /* Vectorizer costs. */
1584 false /* Prefer Neon for 64-bits bitops. */
1587 const struct tune_params arm_cortex_a7_tune
=
1590 &cortexa7_extra_costs
,
1592 1, /* Constant limit. */
1593 5, /* Max cond insns. */
1594 ARM_PREFETCH_NOT_BENEFICIAL
,
1595 false, /* Prefer constant pool. */
1596 arm_default_branch_cost
,
1597 false, /* Prefer LDRD/STRD. */
1598 {true, true}, /* Prefer non short circuit. */
1599 &arm_default_vec_cost
, /* Vectorizer costs. */
1600 false /* Prefer Neon for 64-bits bitops. */
1603 const struct tune_params arm_cortex_a15_tune
=
1606 &cortexa15_extra_costs
,
1607 NULL
, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 2, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL
,
1611 false, /* Prefer constant pool. */
1612 arm_default_branch_cost
,
1613 true, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost
, /* Vectorizer costs. */
1616 false /* Prefer Neon for 64-bits bitops. */
1619 const struct tune_params arm_cortex_a53_tune
=
1622 &cortexa53_extra_costs
,
1623 NULL
, /* Scheduler cost adjustment. */
1624 1, /* Constant limit. */
1625 5, /* Max cond insns. */
1626 ARM_PREFETCH_NOT_BENEFICIAL
,
1627 false, /* Prefer constant pool. */
1628 arm_default_branch_cost
,
1629 false, /* Prefer LDRD/STRD. */
1630 {true, true}, /* Prefer non short circuit. */
1631 &arm_default_vec_cost
, /* Vectorizer costs. */
1632 false /* Prefer Neon for 64-bits bitops. */
1635 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1636 less appealing. Set max_insns_skipped to a low value. */
1638 const struct tune_params arm_cortex_a5_tune
=
1642 NULL
, /* Sched adj cost. */
1643 1, /* Constant limit. */
1644 1, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL
,
1646 false, /* Prefer constant pool. */
1647 arm_cortex_a5_branch_cost
,
1648 false, /* Prefer LDRD/STRD. */
1649 {false, false}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost
, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1654 const struct tune_params arm_cortex_a9_tune
=
1657 &cortexa9_extra_costs
,
1658 cortex_a9_sched_adjust_cost
,
1659 1, /* Constant limit. */
1660 5, /* Max cond insns. */
1661 ARM_PREFETCH_BENEFICIAL(4,32,32),
1662 false, /* Prefer constant pool. */
1663 arm_default_branch_cost
,
1664 false, /* Prefer LDRD/STRD. */
1665 {true, true}, /* Prefer non short circuit. */
1666 &arm_default_vec_cost
, /* Vectorizer costs. */
1667 false /* Prefer Neon for 64-bits bitops. */
1670 const struct tune_params arm_cortex_a12_tune
=
1673 &cortexa12_extra_costs
,
1675 1, /* Constant limit. */
1676 5, /* Max cond insns. */
1677 ARM_PREFETCH_BENEFICIAL(4,32,32),
1678 false, /* Prefer constant pool. */
1679 arm_default_branch_cost
,
1680 true, /* Prefer LDRD/STRD. */
1681 {true, true}, /* Prefer non short circuit. */
1682 &arm_default_vec_cost
, /* Vectorizer costs. */
1683 false /* Prefer Neon for 64-bits bitops. */
1686 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1687 cycle to execute each. An LDR from the constant pool also takes two cycles
1688 to execute, but mildly increases pipelining opportunity (consecutive
1689 loads/stores can be pipelined together, saving one cycle), and may also
1690 improve icache utilisation. Hence we prefer the constant pool for such
1693 const struct tune_params arm_v7m_tune
=
1697 NULL
, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 5, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL
,
1701 true, /* Prefer constant pool. */
1702 arm_cortex_m_branch_cost
,
1703 false, /* Prefer LDRD/STRD. */
1704 {false, false}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost
, /* Vectorizer costs. */
1706 false /* Prefer Neon for 64-bits bitops. */
1709 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1710 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1711 const struct tune_params arm_v6m_tune
=
1715 NULL
, /* Sched adj cost. */
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 ARM_PREFETCH_NOT_BENEFICIAL
,
1719 false, /* Prefer constant pool. */
1720 arm_default_branch_cost
,
1721 false, /* Prefer LDRD/STRD. */
1722 {false, false}, /* Prefer non short circuit. */
1723 &arm_default_vec_cost
, /* Vectorizer costs. */
1724 false /* Prefer Neon for 64-bits bitops. */
1727 const struct tune_params arm_fa726te_tune
=
1731 fa726te_sched_adjust_cost
,
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL
,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost
,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost
, /* Vectorizer costs. */
1740 false /* Prefer Neon for 64-bits bitops. */
1744 /* Not all of these give usefully different compilation alternatives,
1745 but there is no simple way of generalizing them. */
1746 static const struct processors all_cores
[] =
1749 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1750 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1751 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1752 #include "arm-cores.def"
1754 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1757 static const struct processors all_architectures
[] =
1759 /* ARM Architectures */
1760 /* We don't specify tuning costs here as it will be figured out
1763 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1764 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1765 #include "arm-arches.def"
1767 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1771 /* These are populated as commandline arguments are processed, or NULL
1772 if not specified. */
1773 static const struct processors
*arm_selected_arch
;
1774 static const struct processors
*arm_selected_cpu
;
1775 static const struct processors
*arm_selected_tune
;
1777 /* The name of the preprocessor macro to define for this architecture. */
1779 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1781 /* Available values for -mfpu=. */
1783 static const struct arm_fpu_desc all_fpus
[] =
1785 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1786 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1787 #include "arm-fpus.def"
1792 /* Supported TLS relocations. */
1800 TLS_DESCSEQ
/* GNU scheme */
1803 /* The maximum number of insns to be used when loading a constant. */
1805 arm_constant_limit (bool size_p
)
1807 return size_p
? 1 : current_tune
->constant_limit
;
1810 /* Emit an insn that's a simple single-set. Both the operands must be known
1813 emit_set_insn (rtx x
, rtx y
)
1815 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1818 /* Return the number of bits set in VALUE. */
1820 bit_count (unsigned long value
)
1822 unsigned long count
= 0;
1827 value
&= value
- 1; /* Clear the least-significant set bit. */
1835 enum machine_mode mode
;
1837 } arm_fixed_mode_set
;
1839 /* A small helper for setting fixed-point library libfuncs. */
1842 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1843 const char *funcname
, const char *modename
,
1848 if (num_suffix
== 0)
1849 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1851 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1853 set_optab_libfunc (optable
, mode
, buffer
);
1857 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1858 enum machine_mode from
, const char *funcname
,
1859 const char *toname
, const char *fromname
)
1862 const char *maybe_suffix_2
= "";
1864 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1865 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1866 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1867 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1868 maybe_suffix_2
= "2";
1870 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1873 set_conv_libfunc (optable
, to
, from
, buffer
);
1876 /* Set up library functions unique to ARM. */
1879 arm_init_libfuncs (void)
1881 /* For Linux, we have access to kernel support for atomic operations. */
1882 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1883 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1885 /* There are no special library functions unless we are using the
1890 /* The functions below are described in Section 4 of the "Run-Time
1891 ABI for the ARM architecture", Version 1.0. */
1893 /* Double-precision floating-point arithmetic. Table 2. */
1894 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1895 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1896 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1897 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1898 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1900 /* Double-precision comparisons. Table 3. */
1901 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1902 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1903 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1904 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1905 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1906 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1907 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1909 /* Single-precision floating-point arithmetic. Table 4. */
1910 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1911 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1912 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1913 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1914 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1916 /* Single-precision comparisons. Table 5. */
1917 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1918 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1919 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1920 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1921 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1922 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1923 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1925 /* Floating-point to integer conversions. Table 6. */
1926 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1927 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1928 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1929 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1930 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1931 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1932 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1933 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1935 /* Conversions between floating types. Table 7. */
1936 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1937 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1939 /* Integer to floating-point conversions. Table 8. */
1940 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1941 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1942 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1943 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1944 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1945 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1946 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1947 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1949 /* Long long. Table 9. */
1950 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1951 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1952 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1953 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1954 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1955 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1956 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1957 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1959 /* Integer (32/32->32) division. \S 4.3.1. */
1960 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1961 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1963 /* The divmod functions are designed so that they can be used for
1964 plain division, even though they return both the quotient and the
1965 remainder. The quotient is returned in the usual location (i.e.,
1966 r0 for SImode, {r0, r1} for DImode), just as would be expected
1967 for an ordinary division routine. Because the AAPCS calling
1968 conventions specify that all of { r0, r1, r2, r3 } are
1969 callee-saved registers, there is no need to tell the compiler
1970 explicitly that those registers are clobbered by these
1972 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1973 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1975 /* For SImode division the ABI provides div-without-mod routines,
1976 which are faster. */
1977 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1978 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1980 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1981 divmod libcalls instead. */
1982 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1983 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1984 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1985 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1987 /* Half-precision float operations. The compiler handles all operations
1988 with NULL libfuncs by converting the SFmode. */
1989 switch (arm_fp16_format
)
1991 case ARM_FP16_FORMAT_IEEE
:
1992 case ARM_FP16_FORMAT_ALTERNATIVE
:
1995 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1996 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1998 : "__gnu_f2h_alternative"));
1999 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2000 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2002 : "__gnu_h2f_alternative"));
2005 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2006 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2007 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2008 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2009 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2012 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2013 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2014 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2015 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2016 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2017 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2018 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2025 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2027 const arm_fixed_mode_set fixed_arith_modes
[] =
2048 const arm_fixed_mode_set fixed_conv_modes
[] =
2078 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2080 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2081 "add", fixed_arith_modes
[i
].name
, 3);
2082 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2083 "ssadd", fixed_arith_modes
[i
].name
, 3);
2084 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2085 "usadd", fixed_arith_modes
[i
].name
, 3);
2086 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2087 "sub", fixed_arith_modes
[i
].name
, 3);
2088 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2089 "sssub", fixed_arith_modes
[i
].name
, 3);
2090 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2091 "ussub", fixed_arith_modes
[i
].name
, 3);
2092 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2093 "mul", fixed_arith_modes
[i
].name
, 3);
2094 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2095 "ssmul", fixed_arith_modes
[i
].name
, 3);
2096 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2097 "usmul", fixed_arith_modes
[i
].name
, 3);
2098 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2099 "div", fixed_arith_modes
[i
].name
, 3);
2100 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2101 "udiv", fixed_arith_modes
[i
].name
, 3);
2102 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2103 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2104 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2105 "usdiv", fixed_arith_modes
[i
].name
, 3);
2106 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2107 "neg", fixed_arith_modes
[i
].name
, 2);
2108 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2109 "ssneg", fixed_arith_modes
[i
].name
, 2);
2110 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2111 "usneg", fixed_arith_modes
[i
].name
, 2);
2112 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2113 "ashl", fixed_arith_modes
[i
].name
, 3);
2114 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2115 "ashr", fixed_arith_modes
[i
].name
, 3);
2116 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2117 "lshr", fixed_arith_modes
[i
].name
, 3);
2118 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2119 "ssashl", fixed_arith_modes
[i
].name
, 3);
2120 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2121 "usashl", fixed_arith_modes
[i
].name
, 3);
2122 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2123 "cmp", fixed_arith_modes
[i
].name
, 2);
2126 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2127 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2130 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2131 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2134 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2135 fixed_conv_modes
[j
].mode
, "fract",
2136 fixed_conv_modes
[i
].name
,
2137 fixed_conv_modes
[j
].name
);
2138 arm_set_fixed_conv_libfunc (satfract_optab
,
2139 fixed_conv_modes
[i
].mode
,
2140 fixed_conv_modes
[j
].mode
, "satfract",
2141 fixed_conv_modes
[i
].name
,
2142 fixed_conv_modes
[j
].name
);
2143 arm_set_fixed_conv_libfunc (fractuns_optab
,
2144 fixed_conv_modes
[i
].mode
,
2145 fixed_conv_modes
[j
].mode
, "fractuns",
2146 fixed_conv_modes
[i
].name
,
2147 fixed_conv_modes
[j
].name
);
2148 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2149 fixed_conv_modes
[i
].mode
,
2150 fixed_conv_modes
[j
].mode
, "satfractuns",
2151 fixed_conv_modes
[i
].name
,
2152 fixed_conv_modes
[j
].name
);
2156 if (TARGET_AAPCS_BASED
)
2157 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2160 /* On AAPCS systems, this is the "struct __va_list". */
2161 static GTY(()) tree va_list_type
;
2163 /* Return the type to use as __builtin_va_list. */
2165 arm_build_builtin_va_list (void)
2170 if (!TARGET_AAPCS_BASED
)
2171 return std_build_builtin_va_list ();
2173 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2181 The C Library ABI further reinforces this definition in \S
2184 We must follow this definition exactly. The structure tag
2185 name is visible in C++ mangled names, and thus forms a part
2186 of the ABI. The field name may be used by people who
2187 #include <stdarg.h>. */
2188 /* Create the type. */
2189 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2190 /* Give it the required name. */
2191 va_list_name
= build_decl (BUILTINS_LOCATION
,
2193 get_identifier ("__va_list"),
2195 DECL_ARTIFICIAL (va_list_name
) = 1;
2196 TYPE_NAME (va_list_type
) = va_list_name
;
2197 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2198 /* Create the __ap field. */
2199 ap_field
= build_decl (BUILTINS_LOCATION
,
2201 get_identifier ("__ap"),
2203 DECL_ARTIFICIAL (ap_field
) = 1;
2204 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2205 TYPE_FIELDS (va_list_type
) = ap_field
;
2206 /* Compute its layout. */
2207 layout_type (va_list_type
);
2209 return va_list_type
;
2212 /* Return an expression of type "void *" pointing to the next
2213 available argument in a variable-argument list. VALIST is the
2214 user-level va_list object, of type __builtin_va_list. */
2216 arm_extract_valist_ptr (tree valist
)
2218 if (TREE_TYPE (valist
) == error_mark_node
)
2219 return error_mark_node
;
2221 /* On an AAPCS target, the pointer is stored within "struct
2223 if (TARGET_AAPCS_BASED
)
2225 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2226 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2227 valist
, ap_field
, NULL_TREE
);
2233 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2235 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2237 valist
= arm_extract_valist_ptr (valist
);
2238 std_expand_builtin_va_start (valist
, nextarg
);
2241 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2243 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2246 valist
= arm_extract_valist_ptr (valist
);
2247 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2250 /* Fix up any incompatible options that the user has specified. */
2252 arm_option_override (void)
2254 if (global_options_set
.x_arm_arch_option
)
2255 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2257 if (global_options_set
.x_arm_cpu_option
)
2259 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2260 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2263 if (global_options_set
.x_arm_tune_option
)
2264 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2266 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2267 SUBTARGET_OVERRIDE_OPTIONS
;
2270 if (arm_selected_arch
)
2272 if (arm_selected_cpu
)
2274 /* Check for conflict between mcpu and march. */
2275 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2277 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2278 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2279 /* -march wins for code generation.
2280 -mcpu wins for default tuning. */
2281 if (!arm_selected_tune
)
2282 arm_selected_tune
= arm_selected_cpu
;
2284 arm_selected_cpu
= arm_selected_arch
;
2288 arm_selected_arch
= NULL
;
2291 /* Pick a CPU based on the architecture. */
2292 arm_selected_cpu
= arm_selected_arch
;
2295 /* If the user did not specify a processor, choose one for them. */
2296 if (!arm_selected_cpu
)
2298 const struct processors
* sel
;
2299 unsigned int sought
;
2301 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2302 if (!arm_selected_cpu
->name
)
2304 #ifdef SUBTARGET_CPU_DEFAULT
2305 /* Use the subtarget default CPU if none was specified by
2307 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2309 /* Default to ARM6. */
2310 if (!arm_selected_cpu
->name
)
2311 arm_selected_cpu
= &all_cores
[arm6
];
2314 sel
= arm_selected_cpu
;
2315 insn_flags
= sel
->flags
;
2317 /* Now check to see if the user has specified some command line
2318 switch that require certain abilities from the cpu. */
2321 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2323 sought
|= (FL_THUMB
| FL_MODE32
);
2325 /* There are no ARM processors that support both APCS-26 and
2326 interworking. Therefore we force FL_MODE26 to be removed
2327 from insn_flags here (if it was set), so that the search
2328 below will always be able to find a compatible processor. */
2329 insn_flags
&= ~FL_MODE26
;
2332 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2334 /* Try to locate a CPU type that supports all of the abilities
2335 of the default CPU, plus the extra abilities requested by
2337 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2338 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2341 if (sel
->name
== NULL
)
2343 unsigned current_bit_count
= 0;
2344 const struct processors
* best_fit
= NULL
;
2346 /* Ideally we would like to issue an error message here
2347 saying that it was not possible to find a CPU compatible
2348 with the default CPU, but which also supports the command
2349 line options specified by the programmer, and so they
2350 ought to use the -mcpu=<name> command line option to
2351 override the default CPU type.
2353 If we cannot find a cpu that has both the
2354 characteristics of the default cpu and the given
2355 command line options we scan the array again looking
2356 for a best match. */
2357 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2358 if ((sel
->flags
& sought
) == sought
)
2362 count
= bit_count (sel
->flags
& insn_flags
);
2364 if (count
>= current_bit_count
)
2367 current_bit_count
= count
;
2371 gcc_assert (best_fit
);
2375 arm_selected_cpu
= sel
;
2379 gcc_assert (arm_selected_cpu
);
2380 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2381 if (!arm_selected_tune
)
2382 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2384 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2385 insn_flags
= arm_selected_cpu
->flags
;
2386 arm_base_arch
= arm_selected_cpu
->base_arch
;
2388 arm_tune
= arm_selected_tune
->core
;
2389 tune_flags
= arm_selected_tune
->flags
;
2390 current_tune
= arm_selected_tune
->tune
;
2392 /* Make sure that the processor choice does not conflict with any of the
2393 other command line choices. */
2394 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2395 error ("target CPU does not support ARM mode");
2397 /* BPABI targets use linker tricks to allow interworking on cores
2398 without thumb support. */
2399 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2401 warning (0, "target CPU does not support interworking" );
2402 target_flags
&= ~MASK_INTERWORK
;
2405 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2407 warning (0, "target CPU does not support THUMB instructions");
2408 target_flags
&= ~MASK_THUMB
;
2411 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2413 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2414 target_flags
&= ~MASK_APCS_FRAME
;
2417 /* Callee super interworking implies thumb interworking. Adding
2418 this to the flags here simplifies the logic elsewhere. */
2419 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2420 target_flags
|= MASK_INTERWORK
;
2422 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2423 from here where no function is being compiled currently. */
2424 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2425 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2427 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2428 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2430 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2432 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2433 target_flags
|= MASK_APCS_FRAME
;
2436 if (TARGET_POKE_FUNCTION_NAME
)
2437 target_flags
|= MASK_APCS_FRAME
;
2439 if (TARGET_APCS_REENT
&& flag_pic
)
2440 error ("-fpic and -mapcs-reent are incompatible");
2442 if (TARGET_APCS_REENT
)
2443 warning (0, "APCS reentrant code not supported. Ignored");
2445 /* If this target is normally configured to use APCS frames, warn if they
2446 are turned off and debugging is turned on. */
2448 && write_symbols
!= NO_DEBUG
2449 && !TARGET_APCS_FRAME
2450 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2451 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2453 if (TARGET_APCS_FLOAT
)
2454 warning (0, "passing floating point arguments in fp regs not yet supported");
2456 if (TARGET_LITTLE_WORDS
)
2457 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2458 "will be removed in a future release");
2460 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2461 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2462 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2463 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2464 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2465 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2466 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2467 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2468 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2469 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2470 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2471 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2472 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2473 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2474 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2476 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2477 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2478 thumb_code
= TARGET_ARM
== 0;
2479 thumb1_code
= TARGET_THUMB1
!= 0;
2480 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2481 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2482 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2483 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2484 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2485 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2486 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2487 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2488 if (arm_restrict_it
== 2)
2489 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2492 arm_restrict_it
= 0;
2494 /* If we are not using the default (ARM mode) section anchor offset
2495 ranges, then set the correct ranges now. */
2498 /* Thumb-1 LDR instructions cannot have negative offsets.
2499 Permissible positive offset ranges are 5-bit (for byte loads),
2500 6-bit (for halfword loads), or 7-bit (for word loads).
2501 Empirical results suggest a 7-bit anchor range gives the best
2502 overall code size. */
2503 targetm
.min_anchor_offset
= 0;
2504 targetm
.max_anchor_offset
= 127;
2506 else if (TARGET_THUMB2
)
2508 /* The minimum is set such that the total size of the block
2509 for a particular anchor is 248 + 1 + 4095 bytes, which is
2510 divisible by eight, ensuring natural spacing of anchors. */
2511 targetm
.min_anchor_offset
= -248;
2512 targetm
.max_anchor_offset
= 4095;
2515 /* V5 code we generate is completely interworking capable, so we turn off
2516 TARGET_INTERWORK here to avoid many tests later on. */
2518 /* XXX However, we must pass the right pre-processor defines to CPP
2519 or GLD can get confused. This is a hack. */
2520 if (TARGET_INTERWORK
)
2521 arm_cpp_interwork
= 1;
2524 target_flags
&= ~MASK_INTERWORK
;
2526 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2527 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2529 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2530 error ("iwmmxt abi requires an iwmmxt capable cpu");
2532 if (!global_options_set
.x_arm_fpu_index
)
2534 const char *target_fpu_name
;
2537 #ifdef FPUTYPE_DEFAULT
2538 target_fpu_name
= FPUTYPE_DEFAULT
;
2540 target_fpu_name
= "vfp";
2543 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2548 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2550 switch (arm_fpu_desc
->model
)
2552 case ARM_FP_MODEL_VFP
:
2553 arm_fpu_attr
= FPU_VFP
;
2560 if (TARGET_AAPCS_BASED
)
2562 if (TARGET_CALLER_INTERWORKING
)
2563 error ("AAPCS does not support -mcaller-super-interworking");
2565 if (TARGET_CALLEE_INTERWORKING
)
2566 error ("AAPCS does not support -mcallee-super-interworking");
2569 /* iWMMXt and NEON are incompatible. */
2570 if (TARGET_IWMMXT
&& TARGET_NEON
)
2571 error ("iWMMXt and NEON are incompatible");
2573 /* iWMMXt unsupported under Thumb mode. */
2574 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2575 error ("iWMMXt unsupported under Thumb mode");
2577 /* __fp16 support currently assumes the core has ldrh. */
2578 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2579 sorry ("__fp16 and no ldrh");
2581 /* If soft-float is specified then don't use FPU. */
2582 if (TARGET_SOFT_FLOAT
)
2583 arm_fpu_attr
= FPU_NONE
;
2585 if (TARGET_AAPCS_BASED
)
2587 if (arm_abi
== ARM_ABI_IWMMXT
)
2588 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2589 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2590 && TARGET_HARD_FLOAT
2592 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2594 arm_pcs_default
= ARM_PCS_AAPCS
;
2598 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2599 sorry ("-mfloat-abi=hard and VFP");
2601 if (arm_abi
== ARM_ABI_APCS
)
2602 arm_pcs_default
= ARM_PCS_APCS
;
2604 arm_pcs_default
= ARM_PCS_ATPCS
;
2607 /* For arm2/3 there is no need to do any scheduling if we are doing
2608 software floating-point. */
2609 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2610 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2612 /* Use the cp15 method if it is available. */
2613 if (target_thread_pointer
== TP_AUTO
)
2615 if (arm_arch6k
&& !TARGET_THUMB1
)
2616 target_thread_pointer
= TP_CP15
;
2618 target_thread_pointer
= TP_SOFT
;
2621 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2622 error ("can not use -mtp=cp15 with 16-bit Thumb");
2624 /* Override the default structure alignment for AAPCS ABI. */
2625 if (!global_options_set
.x_arm_structure_size_boundary
)
2627 if (TARGET_AAPCS_BASED
)
2628 arm_structure_size_boundary
= 8;
2632 if (arm_structure_size_boundary
!= 8
2633 && arm_structure_size_boundary
!= 32
2634 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2636 if (ARM_DOUBLEWORD_ALIGN
)
2638 "structure size boundary can only be set to 8, 32 or 64");
2640 warning (0, "structure size boundary can only be set to 8 or 32");
2641 arm_structure_size_boundary
2642 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2646 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2648 error ("RTP PIC is incompatible with Thumb");
2652 /* If stack checking is disabled, we can use r10 as the PIC register,
2653 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2654 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2656 if (TARGET_VXWORKS_RTP
)
2657 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2658 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2661 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2662 arm_pic_register
= 9;
2664 if (arm_pic_register_string
!= NULL
)
2666 int pic_register
= decode_reg_name (arm_pic_register_string
);
2669 warning (0, "-mpic-register= is useless without -fpic");
2671 /* Prevent the user from choosing an obviously stupid PIC register. */
2672 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2673 || pic_register
== HARD_FRAME_POINTER_REGNUM
2674 || pic_register
== STACK_POINTER_REGNUM
2675 || pic_register
>= PC_REGNUM
2676 || (TARGET_VXWORKS_RTP
2677 && (unsigned int) pic_register
!= arm_pic_register
))
2678 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2680 arm_pic_register
= pic_register
;
2683 if (TARGET_VXWORKS_RTP
2684 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2685 arm_pic_data_is_text_relative
= 0;
2687 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2688 if (fix_cm3_ldrd
== 2)
2690 if (arm_selected_cpu
->core
== cortexm3
)
2696 /* Enable -munaligned-access by default for
2697 - all ARMv6 architecture-based processors
2698 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2699 - ARMv8 architecture-base processors.
2701 Disable -munaligned-access by default for
2702 - all pre-ARMv6 architecture-based processors
2703 - ARMv6-M architecture-based processors. */
2705 if (unaligned_access
== 2)
2707 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2708 unaligned_access
= 1;
2710 unaligned_access
= 0;
2712 else if (unaligned_access
== 1
2713 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2715 warning (0, "target CPU does not support unaligned accesses");
2716 unaligned_access
= 0;
2719 if (TARGET_THUMB1
&& flag_schedule_insns
)
2721 /* Don't warn since it's on by default in -O2. */
2722 flag_schedule_insns
= 0;
2727 /* If optimizing for size, bump the number of instructions that we
2728 are prepared to conditionally execute (even on a StrongARM). */
2729 max_insns_skipped
= 6;
2732 max_insns_skipped
= current_tune
->max_insns_skipped
;
2734 /* Hot/Cold partitioning is not currently supported, since we can't
2735 handle literal pool placement in that case. */
2736 if (flag_reorder_blocks_and_partition
)
2738 inform (input_location
,
2739 "-freorder-blocks-and-partition not supported on this architecture");
2740 flag_reorder_blocks_and_partition
= 0;
2741 flag_reorder_blocks
= 1;
2745 /* Hoisting PIC address calculations more aggressively provides a small,
2746 but measurable, size reduction for PIC code. Therefore, we decrease
2747 the bar for unrestricted expression hoisting to the cost of PIC address
2748 calculation, which is 2 instructions. */
2749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2750 global_options
.x_param_values
,
2751 global_options_set
.x_param_values
);
2753 /* ARM EABI defaults to strict volatile bitfields. */
2754 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2755 && abi_version_at_least(2))
2756 flag_strict_volatile_bitfields
= 1;
2758 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2759 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2760 if (flag_prefetch_loop_arrays
< 0
2763 && current_tune
->num_prefetch_slots
> 0)
2764 flag_prefetch_loop_arrays
= 1;
2766 /* Set up parameters to be used in prefetching algorithm. Do not override the
2767 defaults unless we are tuning for a core we have researched values for. */
2768 if (current_tune
->num_prefetch_slots
> 0)
2769 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2770 current_tune
->num_prefetch_slots
,
2771 global_options
.x_param_values
,
2772 global_options_set
.x_param_values
);
2773 if (current_tune
->l1_cache_line_size
>= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2775 current_tune
->l1_cache_line_size
,
2776 global_options
.x_param_values
,
2777 global_options_set
.x_param_values
);
2778 if (current_tune
->l1_cache_size
>= 0)
2779 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2780 current_tune
->l1_cache_size
,
2781 global_options
.x_param_values
,
2782 global_options_set
.x_param_values
);
2784 /* Use Neon to perform 64-bits operations rather than core
2786 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2787 if (use_neon_for_64bits
== 1)
2788 prefer_neon_for_64bits
= true;
2790 /* Use the alternative scheduling-pressure algorithm by default. */
2791 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2792 global_options
.x_param_values
,
2793 global_options_set
.x_param_values
);
2795 /* Disable shrink-wrap when optimizing function for size, since it tends to
2796 generate additional returns. */
2797 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2798 flag_shrink_wrap
= false;
2799 /* TBD: Dwarf info for apcs frame is not handled yet. */
2800 if (TARGET_APCS_FRAME
)
2801 flag_shrink_wrap
= false;
2803 /* We only support -mslow-flash-data on armv7-m targets. */
2804 if (target_slow_flash_data
2805 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2806 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2807 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2809 /* Currently, for slow flash data, we just disable literal pools. */
2810 if (target_slow_flash_data
)
2811 arm_disable_literal_pool
= true;
2813 /* Register global variables with the garbage collector. */
2814 arm_add_gc_roots ();
2818 arm_add_gc_roots (void)
2820 gcc_obstack_init(&minipool_obstack
);
2821 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2824 /* A table of known ARM exception types.
2825 For use with the interrupt function attribute. */
2829 const char *const arg
;
2830 const unsigned long return_value
;
2834 static const isr_attribute_arg isr_attribute_args
[] =
2836 { "IRQ", ARM_FT_ISR
},
2837 { "irq", ARM_FT_ISR
},
2838 { "FIQ", ARM_FT_FIQ
},
2839 { "fiq", ARM_FT_FIQ
},
2840 { "ABORT", ARM_FT_ISR
},
2841 { "abort", ARM_FT_ISR
},
2842 { "ABORT", ARM_FT_ISR
},
2843 { "abort", ARM_FT_ISR
},
2844 { "UNDEF", ARM_FT_EXCEPTION
},
2845 { "undef", ARM_FT_EXCEPTION
},
2846 { "SWI", ARM_FT_EXCEPTION
},
2847 { "swi", ARM_FT_EXCEPTION
},
2848 { NULL
, ARM_FT_NORMAL
}
2851 /* Returns the (interrupt) function type of the current
2852 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2854 static unsigned long
2855 arm_isr_value (tree argument
)
2857 const isr_attribute_arg
* ptr
;
2861 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2863 /* No argument - default to IRQ. */
2864 if (argument
== NULL_TREE
)
2867 /* Get the value of the argument. */
2868 if (TREE_VALUE (argument
) == NULL_TREE
2869 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2870 return ARM_FT_UNKNOWN
;
2872 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2874 /* Check it against the list of known arguments. */
2875 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2876 if (streq (arg
, ptr
->arg
))
2877 return ptr
->return_value
;
2879 /* An unrecognized interrupt type. */
2880 return ARM_FT_UNKNOWN
;
2883 /* Computes the type of the current function. */
2885 static unsigned long
2886 arm_compute_func_type (void)
2888 unsigned long type
= ARM_FT_UNKNOWN
;
2892 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2894 /* Decide if the current function is volatile. Such functions
2895 never return, and many memory cycles can be saved by not storing
2896 register values that will never be needed again. This optimization
2897 was added to speed up context switching in a kernel application. */
2899 && (TREE_NOTHROW (current_function_decl
)
2900 || !(flag_unwind_tables
2902 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2903 && TREE_THIS_VOLATILE (current_function_decl
))
2904 type
|= ARM_FT_VOLATILE
;
2906 if (cfun
->static_chain_decl
!= NULL
)
2907 type
|= ARM_FT_NESTED
;
2909 attr
= DECL_ATTRIBUTES (current_function_decl
);
2911 a
= lookup_attribute ("naked", attr
);
2913 type
|= ARM_FT_NAKED
;
2915 a
= lookup_attribute ("isr", attr
);
2917 a
= lookup_attribute ("interrupt", attr
);
2920 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2922 type
|= arm_isr_value (TREE_VALUE (a
));
2927 /* Returns the type of the current function. */
2930 arm_current_func_type (void)
2932 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2933 cfun
->machine
->func_type
= arm_compute_func_type ();
2935 return cfun
->machine
->func_type
;
2939 arm_allocate_stack_slots_for_args (void)
2941 /* Naked functions should not allocate stack slots for arguments. */
2942 return !IS_NAKED (arm_current_func_type ());
2946 arm_warn_func_return (tree decl
)
2948 /* Naked functions are implemented entirely in assembly, including the
2949 return sequence, so suppress warnings about this. */
2950 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2954 /* Output assembler code for a block containing the constant parts
2955 of a trampoline, leaving space for the variable parts.
2957 On the ARM, (if r8 is the static chain regnum, and remembering that
2958 referencing pc adds an offset of 8) the trampoline looks like:
2961 .word static chain value
2962 .word function's address
2963 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2966 arm_asm_trampoline_template (FILE *f
)
2970 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2971 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2973 else if (TARGET_THUMB2
)
2975 /* The Thumb-2 trampoline is similar to the arm implementation.
2976 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2977 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2978 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2979 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2983 ASM_OUTPUT_ALIGN (f
, 2);
2984 fprintf (f
, "\t.code\t16\n");
2985 fprintf (f
, ".Ltrampoline_start:\n");
2986 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2987 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2988 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2989 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2990 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2991 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2993 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2994 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2997 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3000 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3002 rtx fnaddr
, mem
, a_tramp
;
3004 emit_block_move (m_tramp
, assemble_trampoline_template (),
3005 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3007 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3008 emit_move_insn (mem
, chain_value
);
3010 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3011 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3012 emit_move_insn (mem
, fnaddr
);
3014 a_tramp
= XEXP (m_tramp
, 0);
3015 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3016 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3017 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3020 /* Thumb trampolines should be entered in thumb mode, so set
3021 the bottom bit of the address. */
3024 arm_trampoline_adjust_address (rtx addr
)
3027 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3028 NULL
, 0, OPTAB_LIB_WIDEN
);
3032 /* Return 1 if it is possible to return using a single instruction.
3033 If SIBLING is non-null, this is a test for a return before a sibling
3034 call. SIBLING is the call insn, so we can examine its register usage. */
3037 use_return_insn (int iscond
, rtx sibling
)
3040 unsigned int func_type
;
3041 unsigned long saved_int_regs
;
3042 unsigned HOST_WIDE_INT stack_adjust
;
3043 arm_stack_offsets
*offsets
;
3045 /* Never use a return instruction before reload has run. */
3046 if (!reload_completed
)
3049 func_type
= arm_current_func_type ();
3051 /* Naked, volatile and stack alignment functions need special
3053 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3056 /* So do interrupt functions that use the frame pointer and Thumb
3057 interrupt functions. */
3058 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3061 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3062 && !optimize_function_for_size_p (cfun
))
3065 offsets
= arm_get_frame_offsets ();
3066 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3068 /* As do variadic functions. */
3069 if (crtl
->args
.pretend_args_size
3070 || cfun
->machine
->uses_anonymous_args
3071 /* Or if the function calls __builtin_eh_return () */
3072 || crtl
->calls_eh_return
3073 /* Or if the function calls alloca */
3074 || cfun
->calls_alloca
3075 /* Or if there is a stack adjustment. However, if the stack pointer
3076 is saved on the stack, we can use a pre-incrementing stack load. */
3077 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3078 && stack_adjust
== 4)))
3081 saved_int_regs
= offsets
->saved_regs_mask
;
3083 /* Unfortunately, the insn
3085 ldmib sp, {..., sp, ...}
3087 triggers a bug on most SA-110 based devices, such that the stack
3088 pointer won't be correctly restored if the instruction takes a
3089 page fault. We work around this problem by popping r3 along with
3090 the other registers, since that is never slower than executing
3091 another instruction.
3093 We test for !arm_arch5 here, because code for any architecture
3094 less than this could potentially be run on one of the buggy
3096 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3098 /* Validate that r3 is a call-clobbered register (always true in
3099 the default abi) ... */
3100 if (!call_used_regs
[3])
3103 /* ... that it isn't being used for a return value ... */
3104 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3107 /* ... or for a tail-call argument ... */
3110 gcc_assert (CALL_P (sibling
));
3112 if (find_regno_fusage (sibling
, USE
, 3))
3116 /* ... and that there are no call-saved registers in r0-r2
3117 (always true in the default ABI). */
3118 if (saved_int_regs
& 0x7)
3122 /* Can't be done if interworking with Thumb, and any registers have been
3124 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3127 /* On StrongARM, conditional returns are expensive if they aren't
3128 taken and multiple registers have been stacked. */
3129 if (iscond
&& arm_tune_strongarm
)
3131 /* Conditional return when just the LR is stored is a simple
3132 conditional-load instruction, that's not expensive. */
3133 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3137 && arm_pic_register
!= INVALID_REGNUM
3138 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3142 /* If there are saved registers but the LR isn't saved, then we need
3143 two instructions for the return. */
3144 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3147 /* Can't be done if any of the VFP regs are pushed,
3148 since this also requires an insn. */
3149 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3150 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3151 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3154 if (TARGET_REALLY_IWMMXT
)
3155 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3156 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3162 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3163 shrink-wrapping if possible. This is the case if we need to emit a
3164 prologue, which we can test by looking at the offsets. */
3166 use_simple_return_p (void)
3168 arm_stack_offsets
*offsets
;
3170 offsets
= arm_get_frame_offsets ();
3171 return offsets
->outgoing_args
!= 0;
3174 /* Return TRUE if int I is a valid immediate ARM constant. */
3177 const_ok_for_arm (HOST_WIDE_INT i
)
3181 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3182 be all zero, or all one. */
3183 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3184 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3185 != ((~(unsigned HOST_WIDE_INT
) 0)
3186 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3189 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3191 /* Fast return for 0 and small values. We must do this for zero, since
3192 the code below can't handle that one case. */
3193 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3196 /* Get the number of trailing zeros. */
3197 lowbit
= ffs((int) i
) - 1;
3199 /* Only even shifts are allowed in ARM mode so round down to the
3200 nearest even number. */
3204 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3209 /* Allow rotated constants in ARM mode. */
3211 && ((i
& ~0xc000003f) == 0
3212 || (i
& ~0xf000000f) == 0
3213 || (i
& ~0xfc000003) == 0))
3220 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3223 if (i
== v
|| i
== (v
| (v
<< 8)))
3226 /* Allow repeated pattern 0xXY00XY00. */
3236 /* Return true if I is a valid constant for the operation CODE. */
3238 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3240 if (const_ok_for_arm (i
))
3246 /* See if we can use movw. */
3247 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3250 /* Otherwise, try mvn. */
3251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3254 /* See if we can use addw or subw. */
3256 && ((i
& 0xfffff000) == 0
3257 || ((-i
) & 0xfffff000) == 0))
3259 /* else fall through. */
3279 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3281 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3287 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3291 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3298 /* Return true if I is a valid di mode constant for the operation CODE. */
3300 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3302 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3303 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3304 rtx hi
= GEN_INT (hi_val
);
3305 rtx lo
= GEN_INT (lo_val
);
3315 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3316 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3318 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3325 /* Emit a sequence of insns to handle a large constant.
3326 CODE is the code of the operation required, it can be any of SET, PLUS,
3327 IOR, AND, XOR, MINUS;
3328 MODE is the mode in which the operation is being performed;
3329 VAL is the integer to operate on;
3330 SOURCE is the other operand (a register, or a null-pointer for SET);
3331 SUBTARGETS means it is safe to create scratch registers if that will
3332 either produce a simpler sequence, or we will want to cse the values.
3333 Return value is the number of insns emitted. */
3335 /* ??? Tweak this for thumb2. */
3337 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3338 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3342 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3343 cond
= COND_EXEC_TEST (PATTERN (insn
));
3347 if (subtargets
|| code
== SET
3348 || (REG_P (target
) && REG_P (source
)
3349 && REGNO (target
) != REGNO (source
)))
3351 /* After arm_reorg has been called, we can't fix up expensive
3352 constants by pushing them into memory so we must synthesize
3353 them in-line, regardless of the cost. This is only likely to
3354 be more costly on chips that have load delay slots and we are
3355 compiling without running the scheduler (so no splitting
3356 occurred before the final instruction emission).
3358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3360 if (!after_arm_reorg
3362 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3364 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3369 /* Currently SET is the only monadic value for CODE, all
3370 the rest are diadic. */
3371 if (TARGET_USE_MOVT
)
3372 arm_emit_movpair (target
, GEN_INT (val
));
3374 emit_set_insn (target
, GEN_INT (val
));
3380 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3382 if (TARGET_USE_MOVT
)
3383 arm_emit_movpair (temp
, GEN_INT (val
));
3385 emit_set_insn (temp
, GEN_INT (val
));
3387 /* For MINUS, the value is subtracted from, since we never
3388 have subtraction of a constant. */
3390 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3392 emit_set_insn (target
,
3393 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3399 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3403 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3404 ARM/THUMB2 immediates, and add up to VAL.
3405 Thr function return value gives the number of insns required. */
3407 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3408 struct four_ints
*return_sequence
)
3410 int best_consecutive_zeros
= 0;
3414 struct four_ints tmp_sequence
;
3416 /* If we aren't targeting ARM, the best place to start is always at
3417 the bottom, otherwise look more closely. */
3420 for (i
= 0; i
< 32; i
+= 2)
3422 int consecutive_zeros
= 0;
3424 if (!(val
& (3 << i
)))
3426 while ((i
< 32) && !(val
& (3 << i
)))
3428 consecutive_zeros
+= 2;
3431 if (consecutive_zeros
> best_consecutive_zeros
)
3433 best_consecutive_zeros
= consecutive_zeros
;
3434 best_start
= i
- consecutive_zeros
;
3441 /* So long as it won't require any more insns to do so, it's
3442 desirable to emit a small constant (in bits 0...9) in the last
3443 insn. This way there is more chance that it can be combined with
3444 a later addressing insn to form a pre-indexed load or store
3445 operation. Consider:
3447 *((volatile int *)0xe0000100) = 1;
3448 *((volatile int *)0xe0000110) = 2;
3450 We want this to wind up as:
3454 str rB, [rA, #0x100]
3456 str rB, [rA, #0x110]
3458 rather than having to synthesize both large constants from scratch.
3460 Therefore, we calculate how many insns would be required to emit
3461 the constant starting from `best_start', and also starting from
3462 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3463 yield a shorter sequence, we may as well use zero. */
3464 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3466 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3468 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3469 if (insns2
<= insns1
)
3471 *return_sequence
= tmp_sequence
;
3479 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3481 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3482 struct four_ints
*return_sequence
, int i
)
3484 int remainder
= val
& 0xffffffff;
3487 /* Try and find a way of doing the job in either two or three
3490 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3491 location. We start at position I. This may be the MSB, or
3492 optimial_immediate_sequence may have positioned it at the largest block
3493 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3494 wrapping around to the top of the word when we drop off the bottom.
3495 In the worst case this code should produce no more than four insns.
3497 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3498 constants, shifted to any arbitrary location. We should always start
3503 unsigned int b1
, b2
, b3
, b4
;
3504 unsigned HOST_WIDE_INT result
;
3507 gcc_assert (insns
< 4);
3512 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3513 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3516 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3517 /* We can use addw/subw for the last 12 bits. */
3521 /* Use an 8-bit shifted/rotated immediate. */
3525 result
= remainder
& ((0x0ff << end
)
3526 | ((i
< end
) ? (0xff >> (32 - end
))
3533 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3534 arbitrary shifts. */
3535 i
-= TARGET_ARM
? 2 : 1;
3539 /* Next, see if we can do a better job with a thumb2 replicated
3542 We do it this way around to catch the cases like 0x01F001E0 where
3543 two 8-bit immediates would work, but a replicated constant would
3546 TODO: 16-bit constants that don't clear all the bits, but still win.
3547 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3550 b1
= (remainder
& 0xff000000) >> 24;
3551 b2
= (remainder
& 0x00ff0000) >> 16;
3552 b3
= (remainder
& 0x0000ff00) >> 8;
3553 b4
= remainder
& 0xff;
3557 /* The 8-bit immediate already found clears b1 (and maybe b2),
3558 but must leave b3 and b4 alone. */
3560 /* First try to find a 32-bit replicated constant that clears
3561 almost everything. We can assume that we can't do it in one,
3562 or else we wouldn't be here. */
3563 unsigned int tmp
= b1
& b2
& b3
& b4
;
3564 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3566 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3567 + (tmp
== b3
) + (tmp
== b4
);
3569 && (matching_bytes
>= 3
3570 || (matching_bytes
== 2
3571 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3573 /* At least 3 of the bytes match, and the fourth has at
3574 least as many bits set, or two of the bytes match
3575 and it will only require one more insn to finish. */
3583 /* Second, try to find a 16-bit replicated constant that can
3584 leave three of the bytes clear. If b2 or b4 is already
3585 zero, then we can. If the 8-bit from above would not
3586 clear b2 anyway, then we still win. */
3587 else if (b1
== b3
&& (!b2
|| !b4
3588 || (remainder
& 0x00ff0000 & ~result
)))
3590 result
= remainder
& 0xff00ff00;
3596 /* The 8-bit immediate already found clears b2 (and maybe b3)
3597 and we don't get here unless b1 is alredy clear, but it will
3598 leave b4 unchanged. */
3600 /* If we can clear b2 and b4 at once, then we win, since the
3601 8-bits couldn't possibly reach that far. */
3604 result
= remainder
& 0x00ff00ff;
3610 return_sequence
->i
[insns
++] = result
;
3611 remainder
&= ~result
;
3613 if (code
== SET
|| code
== MINUS
)
3621 /* Emit an instruction with the indicated PATTERN. If COND is
3622 non-NULL, conditionalize the execution of the instruction on COND
3626 emit_constant_insn (rtx cond
, rtx pattern
)
3629 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3630 emit_insn (pattern
);
3633 /* As above, but extra parameter GENERATE which, if clear, suppresses
3637 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3638 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3643 int final_invert
= 0;
3645 int set_sign_bit_copies
= 0;
3646 int clear_sign_bit_copies
= 0;
3647 int clear_zero_bit_copies
= 0;
3648 int set_zero_bit_copies
= 0;
3649 int insns
= 0, neg_insns
, inv_insns
;
3650 unsigned HOST_WIDE_INT temp1
, temp2
;
3651 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3652 struct four_ints
*immediates
;
3653 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3655 /* Find out which operations are safe for a given CODE. Also do a quick
3656 check for degenerate cases; these can occur when DImode operations
3669 if (remainder
== 0xffffffff)
3672 emit_constant_insn (cond
,
3673 gen_rtx_SET (VOIDmode
, target
,
3674 GEN_INT (ARM_SIGN_EXTEND (val
))));
3680 if (reload_completed
&& rtx_equal_p (target
, source
))
3684 emit_constant_insn (cond
,
3685 gen_rtx_SET (VOIDmode
, target
, source
));
3694 emit_constant_insn (cond
,
3695 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3698 if (remainder
== 0xffffffff)
3700 if (reload_completed
&& rtx_equal_p (target
, source
))
3703 emit_constant_insn (cond
,
3704 gen_rtx_SET (VOIDmode
, target
, source
));
3713 if (reload_completed
&& rtx_equal_p (target
, source
))
3716 emit_constant_insn (cond
,
3717 gen_rtx_SET (VOIDmode
, target
, source
));
3721 if (remainder
== 0xffffffff)
3724 emit_constant_insn (cond
,
3725 gen_rtx_SET (VOIDmode
, target
,
3726 gen_rtx_NOT (mode
, source
)));
3733 /* We treat MINUS as (val - source), since (source - val) is always
3734 passed as (source + (-val)). */
3738 emit_constant_insn (cond
,
3739 gen_rtx_SET (VOIDmode
, target
,
3740 gen_rtx_NEG (mode
, source
)));
3743 if (const_ok_for_arm (val
))
3746 emit_constant_insn (cond
,
3747 gen_rtx_SET (VOIDmode
, target
,
3748 gen_rtx_MINUS (mode
, GEN_INT (val
),
3759 /* If we can do it in one insn get out quickly. */
3760 if (const_ok_for_op (val
, code
))
3763 emit_constant_insn (cond
,
3764 gen_rtx_SET (VOIDmode
, target
,
3766 ? gen_rtx_fmt_ee (code
, mode
, source
,
3772 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3774 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3775 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3779 if (mode
== SImode
&& i
== 16)
3780 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3782 emit_constant_insn (cond
,
3783 gen_zero_extendhisi2
3784 (target
, gen_lowpart (HImode
, source
)));
3786 /* Extz only supports SImode, but we can coerce the operands
3788 emit_constant_insn (cond
,
3789 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3790 gen_lowpart (SImode
, source
),
3791 GEN_INT (i
), const0_rtx
));
3797 /* Calculate a few attributes that may be useful for specific
3799 /* Count number of leading zeros. */
3800 for (i
= 31; i
>= 0; i
--)
3802 if ((remainder
& (1 << i
)) == 0)
3803 clear_sign_bit_copies
++;
3808 /* Count number of leading 1's. */
3809 for (i
= 31; i
>= 0; i
--)
3811 if ((remainder
& (1 << i
)) != 0)
3812 set_sign_bit_copies
++;
3817 /* Count number of trailing zero's. */
3818 for (i
= 0; i
<= 31; i
++)
3820 if ((remainder
& (1 << i
)) == 0)
3821 clear_zero_bit_copies
++;
3826 /* Count number of trailing 1's. */
3827 for (i
= 0; i
<= 31; i
++)
3829 if ((remainder
& (1 << i
)) != 0)
3830 set_zero_bit_copies
++;
3838 /* See if we can do this by sign_extending a constant that is known
3839 to be negative. This is a good, way of doing it, since the shift
3840 may well merge into a subsequent insn. */
3841 if (set_sign_bit_copies
> 1)
3843 if (const_ok_for_arm
3844 (temp1
= ARM_SIGN_EXTEND (remainder
3845 << (set_sign_bit_copies
- 1))))
3849 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3850 emit_constant_insn (cond
,
3851 gen_rtx_SET (VOIDmode
, new_src
,
3853 emit_constant_insn (cond
,
3854 gen_ashrsi3 (target
, new_src
,
3855 GEN_INT (set_sign_bit_copies
- 1)));
3859 /* For an inverted constant, we will need to set the low bits,
3860 these will be shifted out of harm's way. */
3861 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3862 if (const_ok_for_arm (~temp1
))
3866 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3867 emit_constant_insn (cond
,
3868 gen_rtx_SET (VOIDmode
, new_src
,
3870 emit_constant_insn (cond
,
3871 gen_ashrsi3 (target
, new_src
,
3872 GEN_INT (set_sign_bit_copies
- 1)));
3878 /* See if we can calculate the value as the difference between two
3879 valid immediates. */
3880 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3882 int topshift
= clear_sign_bit_copies
& ~1;
3884 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3885 & (0xff000000 >> topshift
));
3887 /* If temp1 is zero, then that means the 9 most significant
3888 bits of remainder were 1 and we've caused it to overflow.
3889 When topshift is 0 we don't need to do anything since we
3890 can borrow from 'bit 32'. */
3891 if (temp1
== 0 && topshift
!= 0)
3892 temp1
= 0x80000000 >> (topshift
- 1);
3894 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3896 if (const_ok_for_arm (temp2
))
3900 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3901 emit_constant_insn (cond
,
3902 gen_rtx_SET (VOIDmode
, new_src
,
3904 emit_constant_insn (cond
,
3905 gen_addsi3 (target
, new_src
,
3913 /* See if we can generate this by setting the bottom (or the top)
3914 16 bits, and then shifting these into the other half of the
3915 word. We only look for the simplest cases, to do more would cost
3916 too much. Be careful, however, not to generate this when the
3917 alternative would take fewer insns. */
3918 if (val
& 0xffff0000)
3920 temp1
= remainder
& 0xffff0000;
3921 temp2
= remainder
& 0x0000ffff;
3923 /* Overlaps outside this range are best done using other methods. */
3924 for (i
= 9; i
< 24; i
++)
3926 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3927 && !const_ok_for_arm (temp2
))
3929 rtx new_src
= (subtargets
3930 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3932 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3933 source
, subtargets
, generate
);
3941 gen_rtx_ASHIFT (mode
, source
,
3948 /* Don't duplicate cases already considered. */
3949 for (i
= 17; i
< 24; i
++)
3951 if (((temp1
| (temp1
>> i
)) == remainder
)
3952 && !const_ok_for_arm (temp1
))
3954 rtx new_src
= (subtargets
3955 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3957 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3958 source
, subtargets
, generate
);
3963 gen_rtx_SET (VOIDmode
, target
,
3966 gen_rtx_LSHIFTRT (mode
, source
,
3977 /* If we have IOR or XOR, and the constant can be loaded in a
3978 single instruction, and we can find a temporary to put it in,
3979 then this can be done in two instructions instead of 3-4. */
3981 /* TARGET can't be NULL if SUBTARGETS is 0 */
3982 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3984 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3988 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3990 emit_constant_insn (cond
,
3991 gen_rtx_SET (VOIDmode
, sub
,
3993 emit_constant_insn (cond
,
3994 gen_rtx_SET (VOIDmode
, target
,
3995 gen_rtx_fmt_ee (code
, mode
,
4006 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4007 and the remainder 0s for e.g. 0xfff00000)
4008 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4010 This can be done in 2 instructions by using shifts with mov or mvn.
4015 mvn r0, r0, lsr #12 */
4016 if (set_sign_bit_copies
> 8
4017 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4021 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4022 rtx shift
= GEN_INT (set_sign_bit_copies
);
4026 gen_rtx_SET (VOIDmode
, sub
,
4028 gen_rtx_ASHIFT (mode
,
4033 gen_rtx_SET (VOIDmode
, target
,
4035 gen_rtx_LSHIFTRT (mode
, sub
,
4042 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4044 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4046 For eg. r0 = r0 | 0xfff
4051 if (set_zero_bit_copies
> 8
4052 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4056 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4057 rtx shift
= GEN_INT (set_zero_bit_copies
);
4061 gen_rtx_SET (VOIDmode
, sub
,
4063 gen_rtx_LSHIFTRT (mode
,
4068 gen_rtx_SET (VOIDmode
, target
,
4070 gen_rtx_ASHIFT (mode
, sub
,
4076 /* This will never be reached for Thumb2 because orn is a valid
4077 instruction. This is for Thumb1 and the ARM 32 bit cases.
4079 x = y | constant (such that ~constant is a valid constant)
4081 x = ~(~y & ~constant).
4083 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4087 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4088 emit_constant_insn (cond
,
4089 gen_rtx_SET (VOIDmode
, sub
,
4090 gen_rtx_NOT (mode
, source
)));
4093 sub
= gen_reg_rtx (mode
);
4094 emit_constant_insn (cond
,
4095 gen_rtx_SET (VOIDmode
, sub
,
4096 gen_rtx_AND (mode
, source
,
4098 emit_constant_insn (cond
,
4099 gen_rtx_SET (VOIDmode
, target
,
4100 gen_rtx_NOT (mode
, sub
)));
4107 /* See if two shifts will do 2 or more insn's worth of work. */
4108 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4110 HOST_WIDE_INT shift_mask
= ((0xffffffff
4111 << (32 - clear_sign_bit_copies
))
4114 if ((remainder
| shift_mask
) != 0xffffffff)
4118 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4119 insns
= arm_gen_constant (AND
, mode
, cond
,
4120 remainder
| shift_mask
,
4121 new_src
, source
, subtargets
, 1);
4126 rtx targ
= subtargets
? NULL_RTX
: target
;
4127 insns
= arm_gen_constant (AND
, mode
, cond
,
4128 remainder
| shift_mask
,
4129 targ
, source
, subtargets
, 0);
4135 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4136 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4138 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4139 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4145 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4147 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4149 if ((remainder
| shift_mask
) != 0xffffffff)
4153 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4155 insns
= arm_gen_constant (AND
, mode
, cond
,
4156 remainder
| shift_mask
,
4157 new_src
, source
, subtargets
, 1);
4162 rtx targ
= subtargets
? NULL_RTX
: target
;
4164 insns
= arm_gen_constant (AND
, mode
, cond
,
4165 remainder
| shift_mask
,
4166 targ
, source
, subtargets
, 0);
4172 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4173 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4175 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4176 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4188 /* Calculate what the instruction sequences would be if we generated it
4189 normally, negated, or inverted. */
4191 /* AND cannot be split into multiple insns, so invert and use BIC. */
4194 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4197 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4202 if (can_invert
|| final_invert
)
4203 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4208 immediates
= &pos_immediates
;
4210 /* Is the negated immediate sequence more efficient? */
4211 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4214 immediates
= &neg_immediates
;
4219 /* Is the inverted immediate sequence more efficient?
4220 We must allow for an extra NOT instruction for XOR operations, although
4221 there is some chance that the final 'mvn' will get optimized later. */
4222 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4225 immediates
= &inv_immediates
;
4233 /* Now output the chosen sequence as instructions. */
4236 for (i
= 0; i
< insns
; i
++)
4238 rtx new_src
, temp1_rtx
;
4240 temp1
= immediates
->i
[i
];
4242 if (code
== SET
|| code
== MINUS
)
4243 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4244 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4245 new_src
= gen_reg_rtx (mode
);
4251 else if (can_negate
)
4254 temp1
= trunc_int_for_mode (temp1
, mode
);
4255 temp1_rtx
= GEN_INT (temp1
);
4259 else if (code
== MINUS
)
4260 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4262 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4264 emit_constant_insn (cond
,
4265 gen_rtx_SET (VOIDmode
, new_src
,
4271 can_negate
= can_invert
;
4275 else if (code
== MINUS
)
4283 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4284 gen_rtx_NOT (mode
, source
)));
4291 /* Canonicalize a comparison so that we are more likely to recognize it.
4292 This can be done for a few constant compares, where we can make the
4293 immediate value easier to load. */
4296 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4297 bool op0_preserve_value
)
4299 enum machine_mode mode
;
4300 unsigned HOST_WIDE_INT i
, maxval
;
4302 mode
= GET_MODE (*op0
);
4303 if (mode
== VOIDmode
)
4304 mode
= GET_MODE (*op1
);
4306 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4308 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4309 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4310 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4311 for GTU/LEU in Thumb mode. */
4316 if (*code
== GT
|| *code
== LE
4317 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4319 /* Missing comparison. First try to use an available
4321 if (CONST_INT_P (*op1
))
4329 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4331 *op1
= GEN_INT (i
+ 1);
4332 *code
= *code
== GT
? GE
: LT
;
4338 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4339 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4341 *op1
= GEN_INT (i
+ 1);
4342 *code
= *code
== GTU
? GEU
: LTU
;
4351 /* If that did not work, reverse the condition. */
4352 if (!op0_preserve_value
)
4357 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4363 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4364 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4365 to facilitate possible combining with a cmp into 'ands'. */
4367 && GET_CODE (*op0
) == ZERO_EXTEND
4368 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4369 && GET_MODE (XEXP (*op0
, 0)) == QImode
4370 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4371 && subreg_lowpart_p (XEXP (*op0
, 0))
4372 && *op1
== const0_rtx
)
4373 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4376 /* Comparisons smaller than DImode. Only adjust comparisons against
4377 an out-of-range constant. */
4378 if (!CONST_INT_P (*op1
)
4379 || const_ok_for_arm (INTVAL (*op1
))
4380 || const_ok_for_arm (- INTVAL (*op1
)))
4394 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4396 *op1
= GEN_INT (i
+ 1);
4397 *code
= *code
== GT
? GE
: LT
;
4405 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4407 *op1
= GEN_INT (i
- 1);
4408 *code
= *code
== GE
? GT
: LE
;
4415 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4416 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4418 *op1
= GEN_INT (i
+ 1);
4419 *code
= *code
== GTU
? GEU
: LTU
;
4427 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4429 *op1
= GEN_INT (i
- 1);
4430 *code
= *code
== GEU
? GTU
: LEU
;
4441 /* Define how to find the value returned by a function. */
4444 arm_function_value(const_tree type
, const_tree func
,
4445 bool outgoing ATTRIBUTE_UNUSED
)
4447 enum machine_mode mode
;
4448 int unsignedp ATTRIBUTE_UNUSED
;
4449 rtx r ATTRIBUTE_UNUSED
;
4451 mode
= TYPE_MODE (type
);
4453 if (TARGET_AAPCS_BASED
)
4454 return aapcs_allocate_return_reg (mode
, type
, func
);
4456 /* Promote integer types. */
4457 if (INTEGRAL_TYPE_P (type
))
4458 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4460 /* Promotes small structs returned in a register to full-word size
4461 for big-endian AAPCS. */
4462 if (arm_return_in_msb (type
))
4464 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4465 if (size
% UNITS_PER_WORD
!= 0)
4467 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4468 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4472 return arm_libcall_value_1 (mode
);
4475 /* libcall hashtable helpers. */
4477 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4479 typedef rtx_def value_type
;
4480 typedef rtx_def compare_type
;
4481 static inline hashval_t
hash (const value_type
*);
4482 static inline bool equal (const value_type
*, const compare_type
*);
4483 static inline void remove (value_type
*);
4487 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4489 return rtx_equal_p (p1
, p2
);
4493 libcall_hasher::hash (const value_type
*p1
)
4495 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4498 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4501 add_libcall (libcall_table_type htab
, rtx libcall
)
4503 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4507 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4509 static bool init_done
= false;
4510 static libcall_table_type libcall_htab
;
4516 libcall_htab
.create (31);
4517 add_libcall (libcall_htab
,
4518 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4519 add_libcall (libcall_htab
,
4520 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4521 add_libcall (libcall_htab
,
4522 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4523 add_libcall (libcall_htab
,
4524 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4526 add_libcall (libcall_htab
,
4527 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4528 add_libcall (libcall_htab
,
4529 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4530 add_libcall (libcall_htab
,
4531 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4532 add_libcall (libcall_htab
,
4533 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4535 add_libcall (libcall_htab
,
4536 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4537 add_libcall (libcall_htab
,
4538 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4539 add_libcall (libcall_htab
,
4540 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4541 add_libcall (libcall_htab
,
4542 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4543 add_libcall (libcall_htab
,
4544 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4545 add_libcall (libcall_htab
,
4546 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4547 add_libcall (libcall_htab
,
4548 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4549 add_libcall (libcall_htab
,
4550 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4552 /* Values from double-precision helper functions are returned in core
4553 registers if the selected core only supports single-precision
4554 arithmetic, even if we are using the hard-float ABI. The same is
4555 true for single-precision helpers, but we will never be using the
4556 hard-float ABI on a CPU which doesn't support single-precision
4557 operations in hardware. */
4558 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4559 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4560 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4561 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4562 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4563 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4564 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4565 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4566 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4567 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4568 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4569 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4571 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4575 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4579 arm_libcall_value_1 (enum machine_mode mode
)
4581 if (TARGET_AAPCS_BASED
)
4582 return aapcs_libcall_value (mode
);
4583 else if (TARGET_IWMMXT_ABI
4584 && arm_vector_mode_supported_p (mode
))
4585 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4587 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4590 /* Define how to find the value returned by a library function
4591 assuming the value has mode MODE. */
4594 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4596 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4597 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4599 /* The following libcalls return their result in integer registers,
4600 even though they return a floating point value. */
4601 if (arm_libcall_uses_aapcs_base (libcall
))
4602 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4606 return arm_libcall_value_1 (mode
);
4609 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4612 arm_function_value_regno_p (const unsigned int regno
)
4614 if (regno
== ARG_REGISTER (1)
4616 && TARGET_AAPCS_BASED
4618 && TARGET_HARD_FLOAT
4619 && regno
== FIRST_VFP_REGNUM
)
4620 || (TARGET_IWMMXT_ABI
4621 && regno
== FIRST_IWMMXT_REGNUM
))
4627 /* Determine the amount of memory needed to store the possible return
4628 registers of an untyped call. */
4630 arm_apply_result_size (void)
4636 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4638 if (TARGET_IWMMXT_ABI
)
4645 /* Decide whether TYPE should be returned in memory (true)
4646 or in a register (false). FNTYPE is the type of the function making
4649 arm_return_in_memory (const_tree type
, const_tree fntype
)
4653 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4655 if (TARGET_AAPCS_BASED
)
4657 /* Simple, non-aggregate types (ie not including vectors and
4658 complex) are always returned in a register (or registers).
4659 We don't care about which register here, so we can short-cut
4660 some of the detail. */
4661 if (!AGGREGATE_TYPE_P (type
)
4662 && TREE_CODE (type
) != VECTOR_TYPE
4663 && TREE_CODE (type
) != COMPLEX_TYPE
)
4666 /* Any return value that is no larger than one word can be
4668 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4671 /* Check any available co-processors to see if they accept the
4672 type as a register candidate (VFP, for example, can return
4673 some aggregates in consecutive registers). These aren't
4674 available if the call is variadic. */
4675 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4678 /* Vector values should be returned using ARM registers, not
4679 memory (unless they're over 16 bytes, which will break since
4680 we only have four call-clobbered registers to play with). */
4681 if (TREE_CODE (type
) == VECTOR_TYPE
)
4682 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4684 /* The rest go in memory. */
4688 if (TREE_CODE (type
) == VECTOR_TYPE
)
4689 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4691 if (!AGGREGATE_TYPE_P (type
) &&
4692 (TREE_CODE (type
) != VECTOR_TYPE
))
4693 /* All simple types are returned in registers. */
4696 if (arm_abi
!= ARM_ABI_APCS
)
4698 /* ATPCS and later return aggregate types in memory only if they are
4699 larger than a word (or are variable size). */
4700 return (size
< 0 || size
> UNITS_PER_WORD
);
4703 /* For the arm-wince targets we choose to be compatible with Microsoft's
4704 ARM and Thumb compilers, which always return aggregates in memory. */
4706 /* All structures/unions bigger than one word are returned in memory.
4707 Also catch the case where int_size_in_bytes returns -1. In this case
4708 the aggregate is either huge or of variable size, and in either case
4709 we will want to return it via memory and not in a register. */
4710 if (size
< 0 || size
> UNITS_PER_WORD
)
4713 if (TREE_CODE (type
) == RECORD_TYPE
)
4717 /* For a struct the APCS says that we only return in a register
4718 if the type is 'integer like' and every addressable element
4719 has an offset of zero. For practical purposes this means
4720 that the structure can have at most one non bit-field element
4721 and that this element must be the first one in the structure. */
4723 /* Find the first field, ignoring non FIELD_DECL things which will
4724 have been created by C++. */
4725 for (field
= TYPE_FIELDS (type
);
4726 field
&& TREE_CODE (field
) != FIELD_DECL
;
4727 field
= DECL_CHAIN (field
))
4731 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4733 /* Check that the first field is valid for returning in a register. */
4735 /* ... Floats are not allowed */
4736 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4739 /* ... Aggregates that are not themselves valid for returning in
4740 a register are not allowed. */
4741 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4744 /* Now check the remaining fields, if any. Only bitfields are allowed,
4745 since they are not addressable. */
4746 for (field
= DECL_CHAIN (field
);
4748 field
= DECL_CHAIN (field
))
4750 if (TREE_CODE (field
) != FIELD_DECL
)
4753 if (!DECL_BIT_FIELD_TYPE (field
))
4760 if (TREE_CODE (type
) == UNION_TYPE
)
4764 /* Unions can be returned in registers if every element is
4765 integral, or can be returned in an integer register. */
4766 for (field
= TYPE_FIELDS (type
);
4768 field
= DECL_CHAIN (field
))
4770 if (TREE_CODE (field
) != FIELD_DECL
)
4773 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4776 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4782 #endif /* not ARM_WINCE */
4784 /* Return all other types in memory. */
4788 const struct pcs_attribute_arg
4792 } pcs_attribute_args
[] =
4794 {"aapcs", ARM_PCS_AAPCS
},
4795 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4797 /* We could recognize these, but changes would be needed elsewhere
4798 * to implement them. */
4799 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4800 {"atpcs", ARM_PCS_ATPCS
},
4801 {"apcs", ARM_PCS_APCS
},
4803 {NULL
, ARM_PCS_UNKNOWN
}
4807 arm_pcs_from_attribute (tree attr
)
4809 const struct pcs_attribute_arg
*ptr
;
4812 /* Get the value of the argument. */
4813 if (TREE_VALUE (attr
) == NULL_TREE
4814 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4815 return ARM_PCS_UNKNOWN
;
4817 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4819 /* Check it against the list of known arguments. */
4820 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4821 if (streq (arg
, ptr
->arg
))
4824 /* An unrecognized interrupt type. */
4825 return ARM_PCS_UNKNOWN
;
4828 /* Get the PCS variant to use for this call. TYPE is the function's type
4829 specification, DECL is the specific declartion. DECL may be null if
4830 the call could be indirect or if this is a library call. */
4832 arm_get_pcs_model (const_tree type
, const_tree decl
)
4834 bool user_convention
= false;
4835 enum arm_pcs user_pcs
= arm_pcs_default
;
4840 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4843 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4844 user_convention
= true;
4847 if (TARGET_AAPCS_BASED
)
4849 /* Detect varargs functions. These always use the base rules
4850 (no argument is ever a candidate for a co-processor
4852 bool base_rules
= stdarg_p (type
);
4854 if (user_convention
)
4856 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4857 sorry ("non-AAPCS derived PCS variant");
4858 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4859 error ("variadic functions must use the base AAPCS variant");
4863 return ARM_PCS_AAPCS
;
4864 else if (user_convention
)
4866 else if (decl
&& flag_unit_at_a_time
)
4868 /* Local functions never leak outside this compilation unit,
4869 so we are free to use whatever conventions are
4871 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4872 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4874 return ARM_PCS_AAPCS_LOCAL
;
4877 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4878 sorry ("PCS variant");
4880 /* For everything else we use the target's default. */
4881 return arm_pcs_default
;
4886 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4887 const_tree fntype ATTRIBUTE_UNUSED
,
4888 rtx libcall ATTRIBUTE_UNUSED
,
4889 const_tree fndecl ATTRIBUTE_UNUSED
)
4891 /* Record the unallocated VFP registers. */
4892 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4893 pcum
->aapcs_vfp_reg_alloc
= 0;
4896 /* Walk down the type tree of TYPE counting consecutive base elements.
4897 If *MODEP is VOIDmode, then set it to the first valid floating point
4898 type. If a non-floating point type is found, or if a floating point
4899 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4900 otherwise return the count in the sub-tree. */
4902 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4904 enum machine_mode mode
;
4907 switch (TREE_CODE (type
))
4910 mode
= TYPE_MODE (type
);
4911 if (mode
!= DFmode
&& mode
!= SFmode
)
4914 if (*modep
== VOIDmode
)
4923 mode
= TYPE_MODE (TREE_TYPE (type
));
4924 if (mode
!= DFmode
&& mode
!= SFmode
)
4927 if (*modep
== VOIDmode
)
4936 /* Use V2SImode and V4SImode as representatives of all 64-bit
4937 and 128-bit vector types, whether or not those modes are
4938 supported with the present options. */
4939 size
= int_size_in_bytes (type
);
4952 if (*modep
== VOIDmode
)
4955 /* Vector modes are considered to be opaque: two vectors are
4956 equivalent for the purposes of being homogeneous aggregates
4957 if they are the same size. */
4966 tree index
= TYPE_DOMAIN (type
);
4968 /* Can't handle incomplete types. */
4969 if (!COMPLETE_TYPE_P (type
))
4972 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4975 || !TYPE_MAX_VALUE (index
)
4976 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
4977 || !TYPE_MIN_VALUE (index
)
4978 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
4982 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
4983 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
4985 /* There must be no padding. */
4986 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
4987 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
4988 != count
* GET_MODE_BITSIZE (*modep
)))
5000 /* Can't handle incomplete types. */
5001 if (!COMPLETE_TYPE_P (type
))
5004 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5006 if (TREE_CODE (field
) != FIELD_DECL
)
5009 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5015 /* There must be no padding. */
5016 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5017 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5018 != count
* GET_MODE_BITSIZE (*modep
)))
5025 case QUAL_UNION_TYPE
:
5027 /* These aren't very interesting except in a degenerate case. */
5032 /* Can't handle incomplete types. */
5033 if (!COMPLETE_TYPE_P (type
))
5036 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5038 if (TREE_CODE (field
) != FIELD_DECL
)
5041 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5044 count
= count
> sub_count
? count
: sub_count
;
5047 /* There must be no padding. */
5048 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5049 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5050 != count
* GET_MODE_BITSIZE (*modep
)))
5063 /* Return true if PCS_VARIANT should use VFP registers. */
5065 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5067 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5069 static bool seen_thumb1_vfp
= false;
5071 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5073 sorry ("Thumb-1 hard-float VFP ABI");
5074 /* sorry() is not immediately fatal, so only display this once. */
5075 seen_thumb1_vfp
= true;
5081 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5084 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5085 (TARGET_VFP_DOUBLE
|| !is_double
));
5088 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5089 suitable for passing or returning in VFP registers for the PCS
5090 variant selected. If it is, then *BASE_MODE is updated to contain
5091 a machine mode describing each element of the argument's type and
5092 *COUNT to hold the number of such elements. */
5094 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5095 enum machine_mode mode
, const_tree type
,
5096 enum machine_mode
*base_mode
, int *count
)
5098 enum machine_mode new_mode
= VOIDmode
;
5100 /* If we have the type information, prefer that to working things
5101 out from the mode. */
5104 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5106 if (ag_count
> 0 && ag_count
<= 4)
5111 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5112 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5113 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5118 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5121 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5127 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5130 *base_mode
= new_mode
;
5135 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5136 enum machine_mode mode
, const_tree type
)
5138 int count ATTRIBUTE_UNUSED
;
5139 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5141 if (!use_vfp_abi (pcs_variant
, false))
5143 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5148 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5151 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5154 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5155 &pcum
->aapcs_vfp_rmode
,
5156 &pcum
->aapcs_vfp_rcount
);
5160 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5161 const_tree type ATTRIBUTE_UNUSED
)
5163 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5164 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5167 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5168 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5170 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5172 || (mode
== TImode
&& ! TARGET_NEON
)
5173 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5176 int rcount
= pcum
->aapcs_vfp_rcount
;
5178 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5182 /* Avoid using unsupported vector modes. */
5183 if (rmode
== V2SImode
)
5185 else if (rmode
== V4SImode
)
5192 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5193 for (i
= 0; i
< rcount
; i
++)
5195 rtx tmp
= gen_rtx_REG (rmode
,
5196 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5197 tmp
= gen_rtx_EXPR_LIST
5199 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5200 XVECEXP (par
, 0, i
) = tmp
;
5203 pcum
->aapcs_reg
= par
;
5206 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5213 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5214 enum machine_mode mode
,
5215 const_tree type ATTRIBUTE_UNUSED
)
5217 if (!use_vfp_abi (pcs_variant
, false))
5220 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5223 enum machine_mode ag_mode
;
5228 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5233 if (ag_mode
== V2SImode
)
5235 else if (ag_mode
== V4SImode
)
5241 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5242 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5243 for (i
= 0; i
< count
; i
++)
5245 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5246 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5247 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5248 XVECEXP (par
, 0, i
) = tmp
;
5254 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5258 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5259 enum machine_mode mode ATTRIBUTE_UNUSED
,
5260 const_tree type ATTRIBUTE_UNUSED
)
5262 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5263 pcum
->aapcs_vfp_reg_alloc
= 0;
5267 #define AAPCS_CP(X) \
5269 aapcs_ ## X ## _cum_init, \
5270 aapcs_ ## X ## _is_call_candidate, \
5271 aapcs_ ## X ## _allocate, \
5272 aapcs_ ## X ## _is_return_candidate, \
5273 aapcs_ ## X ## _allocate_return_reg, \
5274 aapcs_ ## X ## _advance \
5277 /* Table of co-processors that can be used to pass arguments in
5278 registers. Idealy no arugment should be a candidate for more than
5279 one co-processor table entry, but the table is processed in order
5280 and stops after the first match. If that entry then fails to put
5281 the argument into a co-processor register, the argument will go on
5285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5286 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5289 BLKmode) is a candidate for this co-processor's registers; this
5290 function should ignore any position-dependent state in
5291 CUMULATIVE_ARGS and only use call-type dependent information. */
5292 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5294 /* Return true if the argument does get a co-processor register; it
5295 should set aapcs_reg to an RTX of the register allocated as is
5296 required for a return from FUNCTION_ARG. */
5297 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5299 /* Return true if a result of mode MODE (or type TYPE if MODE is
5300 BLKmode) is can be returned in this co-processor's registers. */
5301 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5303 /* Allocate and return an RTX element to hold the return type of a
5304 call, this routine must not fail and will only be called if
5305 is_return_candidate returned true with the same parameters. */
5306 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5308 /* Finish processing this argument and prepare to start processing
5310 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5311 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5319 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5324 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5325 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5332 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5334 /* We aren't passed a decl, so we can't check that a call is local.
5335 However, it isn't clear that that would be a win anyway, since it
5336 might limit some tail-calling opportunities. */
5337 enum arm_pcs pcs_variant
;
5341 const_tree fndecl
= NULL_TREE
;
5343 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5346 fntype
= TREE_TYPE (fntype
);
5349 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5352 pcs_variant
= arm_pcs_default
;
5354 if (pcs_variant
!= ARM_PCS_AAPCS
)
5358 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5359 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5368 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5371 /* We aren't passed a decl, so we can't check that a call is local.
5372 However, it isn't clear that that would be a win anyway, since it
5373 might limit some tail-calling opportunities. */
5374 enum arm_pcs pcs_variant
;
5375 int unsignedp ATTRIBUTE_UNUSED
;
5379 const_tree fndecl
= NULL_TREE
;
5381 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5384 fntype
= TREE_TYPE (fntype
);
5387 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5390 pcs_variant
= arm_pcs_default
;
5392 /* Promote integer types. */
5393 if (type
&& INTEGRAL_TYPE_P (type
))
5394 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5396 if (pcs_variant
!= ARM_PCS_AAPCS
)
5400 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5401 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5403 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (type
&& arm_return_in_msb (type
))
5411 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5412 if (size
% UNITS_PER_WORD
!= 0)
5414 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5415 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5419 return gen_rtx_REG (mode
, R0_REGNUM
);
5423 aapcs_libcall_value (enum machine_mode mode
)
5425 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5426 && GET_MODE_SIZE (mode
) <= 4)
5429 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5432 /* Lay out a function argument using the AAPCS rules. The rule
5433 numbers referred to here are those in the AAPCS. */
5435 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5436 const_tree type
, bool named
)
5441 /* We only need to do this once per argument. */
5442 if (pcum
->aapcs_arg_processed
)
5445 pcum
->aapcs_arg_processed
= true;
5447 /* Special case: if named is false then we are handling an incoming
5448 anonymous argument which is on the stack. */
5452 /* Is this a potential co-processor register candidate? */
5453 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5455 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5456 pcum
->aapcs_cprc_slot
= slot
;
5458 /* We don't have to apply any of the rules from part B of the
5459 preparation phase, these are handled elsewhere in the
5464 /* A Co-processor register candidate goes either in its own
5465 class of registers or on the stack. */
5466 if (!pcum
->aapcs_cprc_failed
[slot
])
5468 /* C1.cp - Try to allocate the argument to co-processor
5470 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5473 /* C2.cp - Put the argument on the stack and note that we
5474 can't assign any more candidates in this slot. We also
5475 need to note that we have allocated stack space, so that
5476 we won't later try to split a non-cprc candidate between
5477 core registers and the stack. */
5478 pcum
->aapcs_cprc_failed
[slot
] = true;
5479 pcum
->can_split
= false;
5482 /* We didn't get a register, so this argument goes on the
5484 gcc_assert (pcum
->can_split
== false);
5489 /* C3 - For double-word aligned arguments, round the NCRN up to the
5490 next even number. */
5491 ncrn
= pcum
->aapcs_ncrn
;
5492 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5495 nregs
= ARM_NUM_REGS2(mode
, type
);
5497 /* Sigh, this test should really assert that nregs > 0, but a GCC
5498 extension allows empty structs and then gives them empty size; it
5499 then allows such a structure to be passed by value. For some of
5500 the code below we have to pretend that such an argument has
5501 non-zero size so that we 'locate' it correctly either in
5502 registers or on the stack. */
5503 gcc_assert (nregs
>= 0);
5505 nregs2
= nregs
? nregs
: 1;
5507 /* C4 - Argument fits entirely in core registers. */
5508 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5510 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5511 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5515 /* C5 - Some core registers left and there are no arguments already
5516 on the stack: split this argument between the remaining core
5517 registers and the stack. */
5518 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5520 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5521 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5522 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5526 /* C6 - NCRN is set to 4. */
5527 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5534 for a call to a function whose data type is FNTYPE.
5535 For a library call, FNTYPE is NULL. */
5537 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5539 tree fndecl ATTRIBUTE_UNUSED
)
5541 /* Long call handling. */
5543 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5545 pcum
->pcs_variant
= arm_pcs_default
;
5547 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5549 if (arm_libcall_uses_aapcs_base (libname
))
5550 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5552 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5553 pcum
->aapcs_reg
= NULL_RTX
;
5554 pcum
->aapcs_partial
= 0;
5555 pcum
->aapcs_arg_processed
= false;
5556 pcum
->aapcs_cprc_slot
= -1;
5557 pcum
->can_split
= true;
5559 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5563 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5565 pcum
->aapcs_cprc_failed
[i
] = false;
5566 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5574 /* On the ARM, the offset starts at 0. */
5576 pcum
->iwmmxt_nregs
= 0;
5577 pcum
->can_split
= true;
5579 /* Varargs vectors are treated the same as long long.
5580 named_count avoids having to change the way arm handles 'named' */
5581 pcum
->named_count
= 0;
5584 if (TARGET_REALLY_IWMMXT
&& fntype
)
5588 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5590 fn_arg
= TREE_CHAIN (fn_arg
))
5591 pcum
->named_count
+= 1;
5593 if (! pcum
->named_count
)
5594 pcum
->named_count
= INT_MAX
;
5598 /* Return true if we use LRA instead of reload pass. */
5602 return arm_lra_flag
;
5605 /* Return true if mode/type need doubleword alignment. */
5607 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5609 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5610 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5614 /* Determine where to put an argument to a function.
5615 Value is zero to push the argument on the stack,
5616 or a hard register in which to store the argument.
5618 MODE is the argument's machine mode.
5619 TYPE is the data type of the argument (as a tree).
5620 This is null for libcalls where that information may
5622 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5623 the preceding args and about the function being called.
5624 NAMED is nonzero if this argument is a named parameter
5625 (otherwise it is an extra parameter matching an ellipsis).
5627 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5628 other arguments are passed on the stack. If (NAMED == 0) (which happens
5629 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5630 defined), say it is passed in the stack (function_prologue will
5631 indeed make it pass in the stack if necessary). */
5634 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5635 const_tree type
, bool named
)
5637 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5640 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5641 a call insn (op3 of a call_value insn). */
5642 if (mode
== VOIDmode
)
5645 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5647 aapcs_layout_arg (pcum
, mode
, type
, named
);
5648 return pcum
->aapcs_reg
;
5651 /* Varargs vectors are treated the same as long long.
5652 named_count avoids having to change the way arm handles 'named' */
5653 if (TARGET_IWMMXT_ABI
5654 && arm_vector_mode_supported_p (mode
)
5655 && pcum
->named_count
> pcum
->nargs
+ 1)
5657 if (pcum
->iwmmxt_nregs
<= 9)
5658 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5661 pcum
->can_split
= false;
5666 /* Put doubleword aligned quantities in even register pairs. */
5668 && ARM_DOUBLEWORD_ALIGN
5669 && arm_needs_doubleword_align (mode
, type
))
5672 /* Only allow splitting an arg between regs and memory if all preceding
5673 args were allocated to regs. For args passed by reference we only count
5674 the reference pointer. */
5675 if (pcum
->can_split
)
5678 nregs
= ARM_NUM_REGS2 (mode
, type
);
5680 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5683 return gen_rtx_REG (mode
, pcum
->nregs
);
5687 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5689 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5690 ? DOUBLEWORD_ALIGNMENT
5695 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5696 tree type
, bool named
)
5698 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5699 int nregs
= pcum
->nregs
;
5701 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5703 aapcs_layout_arg (pcum
, mode
, type
, named
);
5704 return pcum
->aapcs_partial
;
5707 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5710 if (NUM_ARG_REGS
> nregs
5711 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5713 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5718 /* Update the data in PCUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5723 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5724 const_tree type
, bool named
)
5726 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5728 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5730 aapcs_layout_arg (pcum
, mode
, type
, named
);
5732 if (pcum
->aapcs_cprc_slot
>= 0)
5734 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5736 pcum
->aapcs_cprc_slot
= -1;
5739 /* Generic stuff. */
5740 pcum
->aapcs_arg_processed
= false;
5741 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5742 pcum
->aapcs_reg
= NULL_RTX
;
5743 pcum
->aapcs_partial
= 0;
5748 if (arm_vector_mode_supported_p (mode
)
5749 && pcum
->named_count
> pcum
->nargs
5750 && TARGET_IWMMXT_ABI
)
5751 pcum
->iwmmxt_nregs
+= 1;
5753 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5757 /* Variable sized types are passed by reference. This is a GCC
5758 extension to the ARM ABI. */
5761 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5762 enum machine_mode mode ATTRIBUTE_UNUSED
,
5763 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5765 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5768 /* Encode the current state of the #pragma [no_]long_calls. */
5771 OFF
, /* No #pragma [no_]long_calls is in effect. */
5772 LONG
, /* #pragma long_calls is in effect. */
5773 SHORT
/* #pragma no_long_calls is in effect. */
5776 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5779 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5781 arm_pragma_long_calls
= LONG
;
5785 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5787 arm_pragma_long_calls
= SHORT
;
5791 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5793 arm_pragma_long_calls
= OFF
;
5796 /* Handle an attribute requiring a FUNCTION_DECL;
5797 arguments as in struct attribute_spec.handler. */
5799 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5800 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5802 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5804 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5806 *no_add_attrs
= true;
5812 /* Handle an "interrupt" or "isr" attribute;
5813 arguments as in struct attribute_spec.handler. */
5815 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5820 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5822 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5824 *no_add_attrs
= true;
5826 /* FIXME: the argument if any is checked for type attributes;
5827 should it be checked for decl ones? */
5831 if (TREE_CODE (*node
) == FUNCTION_TYPE
5832 || TREE_CODE (*node
) == METHOD_TYPE
)
5834 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5836 warning (OPT_Wattributes
, "%qE attribute ignored",
5838 *no_add_attrs
= true;
5841 else if (TREE_CODE (*node
) == POINTER_TYPE
5842 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5843 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5844 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5846 *node
= build_variant_type_copy (*node
);
5847 TREE_TYPE (*node
) = build_type_attribute_variant
5849 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5850 *no_add_attrs
= true;
5854 /* Possibly pass this attribute on from the type to a decl. */
5855 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5856 | (int) ATTR_FLAG_FUNCTION_NEXT
5857 | (int) ATTR_FLAG_ARRAY_NEXT
))
5859 *no_add_attrs
= true;
5860 return tree_cons (name
, args
, NULL_TREE
);
5864 warning (OPT_Wattributes
, "%qE attribute ignored",
5873 /* Handle a "pcs" attribute; arguments as in struct
5874 attribute_spec.handler. */
5876 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5877 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5879 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5881 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5882 *no_add_attrs
= true;
5887 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5888 /* Handle the "notshared" attribute. This attribute is another way of
5889 requesting hidden visibility. ARM's compiler supports
5890 "__declspec(notshared)"; we support the same thing via an
5894 arm_handle_notshared_attribute (tree
*node
,
5895 tree name ATTRIBUTE_UNUSED
,
5896 tree args ATTRIBUTE_UNUSED
,
5897 int flags ATTRIBUTE_UNUSED
,
5900 tree decl
= TYPE_NAME (*node
);
5904 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5905 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5906 *no_add_attrs
= false;
5912 /* Return 0 if the attributes for two types are incompatible, 1 if they
5913 are compatible, and 2 if they are nearly compatible (which causes a
5914 warning to be generated). */
5916 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5920 /* Check for mismatch of non-default calling convention. */
5921 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5924 /* Check for mismatched call attributes. */
5925 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5926 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5927 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5928 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5930 /* Only bother to check if an attribute is defined. */
5931 if (l1
| l2
| s1
| s2
)
5933 /* If one type has an attribute, the other must have the same attribute. */
5934 if ((l1
!= l2
) || (s1
!= s2
))
5937 /* Disallow mixed attributes. */
5938 if ((l1
& s2
) || (l2
& s1
))
5942 /* Check for mismatched ISR attribute. */
5943 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5945 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5946 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5948 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5955 /* Assigns default attributes to newly defined type. This is used to
5956 set short_call/long_call attributes for function types of
5957 functions defined inside corresponding #pragma scopes. */
5959 arm_set_default_type_attributes (tree type
)
5961 /* Add __attribute__ ((long_call)) to all functions, when
5962 inside #pragma long_calls or __attribute__ ((short_call)),
5963 when inside #pragma no_long_calls. */
5964 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5966 tree type_attr_list
, attr_name
;
5967 type_attr_list
= TYPE_ATTRIBUTES (type
);
5969 if (arm_pragma_long_calls
== LONG
)
5970 attr_name
= get_identifier ("long_call");
5971 else if (arm_pragma_long_calls
== SHORT
)
5972 attr_name
= get_identifier ("short_call");
5976 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5977 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5981 /* Return true if DECL is known to be linked into section SECTION. */
5984 arm_function_in_section_p (tree decl
, section
*section
)
5986 /* We can only be certain about functions defined in the same
5987 compilation unit. */
5988 if (!TREE_STATIC (decl
))
5991 /* Make sure that SYMBOL always binds to the definition in this
5992 compilation unit. */
5993 if (!targetm
.binds_local_p (decl
))
5996 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5997 if (!DECL_SECTION_NAME (decl
))
5999 /* Make sure that we will not create a unique section for DECL. */
6000 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
6004 return function_section (decl
) == section
;
6007 /* Return nonzero if a 32-bit "long_call" should be generated for
6008 a call from the current function to DECL. We generate a long_call
6011 a. has an __attribute__((long call))
6012 or b. is within the scope of a #pragma long_calls
6013 or c. the -mlong-calls command line switch has been specified
6015 However we do not generate a long call if the function:
6017 d. has an __attribute__ ((short_call))
6018 or e. is inside the scope of a #pragma no_long_calls
6019 or f. is defined in the same section as the current function. */
6022 arm_is_long_call_p (tree decl
)
6027 return TARGET_LONG_CALLS
;
6029 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6030 if (lookup_attribute ("short_call", attrs
))
6033 /* For "f", be conservative, and only cater for cases in which the
6034 whole of the current function is placed in the same section. */
6035 if (!flag_reorder_blocks_and_partition
6036 && TREE_CODE (decl
) == FUNCTION_DECL
6037 && arm_function_in_section_p (decl
, current_function_section ()))
6040 if (lookup_attribute ("long_call", attrs
))
6043 return TARGET_LONG_CALLS
;
6046 /* Return nonzero if it is ok to make a tail-call to DECL. */
6048 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6050 unsigned long func_type
;
6052 if (cfun
->machine
->sibcall_blocked
)
6055 /* Never tailcall something if we are generating code for Thumb-1. */
6059 /* The PIC register is live on entry to VxWorks PLT entries, so we
6060 must make the call before restoring the PIC register. */
6061 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6064 /* Cannot tail-call to long calls, since these are out of range of
6065 a branch instruction. */
6066 if (decl
&& arm_is_long_call_p (decl
))
6069 /* If we are interworking and the function is not declared static
6070 then we can't tail-call it unless we know that it exists in this
6071 compilation unit (since it might be a Thumb routine). */
6072 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6073 && !TREE_ASM_WRITTEN (decl
))
6076 func_type
= arm_current_func_type ();
6077 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6078 if (IS_INTERRUPT (func_type
))
6081 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6083 /* Check that the return value locations are the same. For
6084 example that we aren't returning a value from the sibling in
6085 a VFP register but then need to transfer it to a core
6089 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6090 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6092 if (!rtx_equal_p (a
, b
))
6096 /* Never tailcall if function may be called with a misaligned SP. */
6097 if (IS_STACKALIGN (func_type
))
6100 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6101 references should become a NOP. Don't convert such calls into
6103 if (TARGET_AAPCS_BASED
6104 && arm_abi
== ARM_ABI_AAPCS
6106 && DECL_WEAK (decl
))
6109 /* Everything else is ok. */
6114 /* Addressing mode support functions. */
6116 /* Return nonzero if X is a legitimate immediate operand when compiling
6117 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6119 legitimate_pic_operand_p (rtx x
)
6121 if (GET_CODE (x
) == SYMBOL_REF
6122 || (GET_CODE (x
) == CONST
6123 && GET_CODE (XEXP (x
, 0)) == PLUS
6124 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6130 /* Record that the current function needs a PIC register. Initialize
6131 cfun->machine->pic_reg if we have not already done so. */
6134 require_pic_register (void)
6136 /* A lot of the logic here is made obscure by the fact that this
6137 routine gets called as part of the rtx cost estimation process.
6138 We don't want those calls to affect any assumptions about the real
6139 function; and further, we can't call entry_of_function() until we
6140 start the real expansion process. */
6141 if (!crtl
->uses_pic_offset_table
)
6143 gcc_assert (can_create_pseudo_p ());
6144 if (arm_pic_register
!= INVALID_REGNUM
6145 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6147 if (!cfun
->machine
->pic_reg
)
6148 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6150 /* Play games to avoid marking the function as needing pic
6151 if we are being called as part of the cost-estimation
6153 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6154 crtl
->uses_pic_offset_table
= 1;
6160 if (!cfun
->machine
->pic_reg
)
6161 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6163 /* Play games to avoid marking the function as needing pic
6164 if we are being called as part of the cost-estimation
6166 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6168 crtl
->uses_pic_offset_table
= 1;
6171 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6172 && arm_pic_register
> LAST_LO_REGNUM
)
6173 emit_move_insn (cfun
->machine
->pic_reg
,
6174 gen_rtx_REG (Pmode
, arm_pic_register
));
6176 arm_load_pic_register (0UL);
6181 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6183 INSN_LOCATION (insn
) = prologue_location
;
6185 /* We can be called during expansion of PHI nodes, where
6186 we can't yet emit instructions directly in the final
6187 insn stream. Queue the insns on the entry edge, they will
6188 be committed after everything else is expanded. */
6189 insert_insn_on_edge (seq
,
6190 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6197 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6199 if (GET_CODE (orig
) == SYMBOL_REF
6200 || GET_CODE (orig
) == LABEL_REF
)
6206 gcc_assert (can_create_pseudo_p ());
6207 reg
= gen_reg_rtx (Pmode
);
6210 /* VxWorks does not impose a fixed gap between segments; the run-time
6211 gap can be different from the object-file gap. We therefore can't
6212 use GOTOFF unless we are absolutely sure that the symbol is in the
6213 same segment as the GOT. Unfortunately, the flexibility of linker
6214 scripts means that we can't be sure of that in general, so assume
6215 that GOTOFF is never valid on VxWorks. */
6216 if ((GET_CODE (orig
) == LABEL_REF
6217 || (GET_CODE (orig
) == SYMBOL_REF
&&
6218 SYMBOL_REF_LOCAL_P (orig
)))
6220 && arm_pic_data_is_text_relative
)
6221 insn
= arm_pic_static_addr (orig
, reg
);
6227 /* If this function doesn't have a pic register, create one now. */
6228 require_pic_register ();
6230 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6232 /* Make the MEM as close to a constant as possible. */
6233 mem
= SET_SRC (pat
);
6234 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6235 MEM_READONLY_P (mem
) = 1;
6236 MEM_NOTRAP_P (mem
) = 1;
6238 insn
= emit_insn (pat
);
6241 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6243 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6247 else if (GET_CODE (orig
) == CONST
)
6251 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6252 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6255 /* Handle the case where we have: const (UNSPEC_TLS). */
6256 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6257 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6260 /* Handle the case where we have:
6261 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6263 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6264 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6265 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6267 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6273 gcc_assert (can_create_pseudo_p ());
6274 reg
= gen_reg_rtx (Pmode
);
6277 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6279 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6280 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6281 base
== reg
? 0 : reg
);
6283 if (CONST_INT_P (offset
))
6285 /* The base register doesn't really matter, we only want to
6286 test the index for the appropriate mode. */
6287 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6289 gcc_assert (can_create_pseudo_p ());
6290 offset
= force_reg (Pmode
, offset
);
6293 if (CONST_INT_P (offset
))
6294 return plus_constant (Pmode
, base
, INTVAL (offset
));
6297 if (GET_MODE_SIZE (mode
) > 4
6298 && (GET_MODE_CLASS (mode
) == MODE_INT
6299 || TARGET_SOFT_FLOAT
))
6301 emit_insn (gen_addsi3 (reg
, base
, offset
));
6305 return gen_rtx_PLUS (Pmode
, base
, offset
);
6312 /* Find a spare register to use during the prolog of a function. */
6315 thumb_find_work_register (unsigned long pushed_regs_mask
)
6319 /* Check the argument registers first as these are call-used. The
6320 register allocation order means that sometimes r3 might be used
6321 but earlier argument registers might not, so check them all. */
6322 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6323 if (!df_regs_ever_live_p (reg
))
6326 /* Before going on to check the call-saved registers we can try a couple
6327 more ways of deducing that r3 is available. The first is when we are
6328 pushing anonymous arguments onto the stack and we have less than 4
6329 registers worth of fixed arguments(*). In this case r3 will be part of
6330 the variable argument list and so we can be sure that it will be
6331 pushed right at the start of the function. Hence it will be available
6332 for the rest of the prologue.
6333 (*): ie crtl->args.pretend_args_size is greater than 0. */
6334 if (cfun
->machine
->uses_anonymous_args
6335 && crtl
->args
.pretend_args_size
> 0)
6336 return LAST_ARG_REGNUM
;
6338 /* The other case is when we have fixed arguments but less than 4 registers
6339 worth. In this case r3 might be used in the body of the function, but
6340 it is not being used to convey an argument into the function. In theory
6341 we could just check crtl->args.size to see how many bytes are
6342 being passed in argument registers, but it seems that it is unreliable.
6343 Sometimes it will have the value 0 when in fact arguments are being
6344 passed. (See testcase execute/20021111-1.c for an example). So we also
6345 check the args_info.nregs field as well. The problem with this field is
6346 that it makes no allowances for arguments that are passed to the
6347 function but which are not used. Hence we could miss an opportunity
6348 when a function has an unused argument in r3. But it is better to be
6349 safe than to be sorry. */
6350 if (! cfun
->machine
->uses_anonymous_args
6351 && crtl
->args
.size
>= 0
6352 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6353 && (TARGET_AAPCS_BASED
6354 ? crtl
->args
.info
.aapcs_ncrn
< 4
6355 : crtl
->args
.info
.nregs
< 4))
6356 return LAST_ARG_REGNUM
;
6358 /* Otherwise look for a call-saved register that is going to be pushed. */
6359 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6360 if (pushed_regs_mask
& (1 << reg
))
6365 /* Thumb-2 can use high regs. */
6366 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6367 if (pushed_regs_mask
& (1 << reg
))
6370 /* Something went wrong - thumb_compute_save_reg_mask()
6371 should have arranged for a suitable register to be pushed. */
6375 static GTY(()) int pic_labelno
;
6377 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6381 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6383 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6385 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6388 gcc_assert (flag_pic
);
6390 pic_reg
= cfun
->machine
->pic_reg
;
6391 if (TARGET_VXWORKS_RTP
)
6393 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6394 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6395 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6397 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6399 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6400 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6404 /* We use an UNSPEC rather than a LABEL_REF because this label
6405 never appears in the code stream. */
6407 labelno
= GEN_INT (pic_labelno
++);
6408 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6409 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6411 /* On the ARM the PC register contains 'dot + 8' at the time of the
6412 addition, on the Thumb it is 'dot + 4'. */
6413 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6414 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6416 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6420 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6422 else /* TARGET_THUMB1 */
6424 if (arm_pic_register
!= INVALID_REGNUM
6425 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6427 /* We will have pushed the pic register, so we should always be
6428 able to find a work register. */
6429 pic_tmp
= gen_rtx_REG (SImode
,
6430 thumb_find_work_register (saved_regs
));
6431 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6432 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6433 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6435 else if (arm_pic_register
!= INVALID_REGNUM
6436 && arm_pic_register
> LAST_LO_REGNUM
6437 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6439 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6440 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6441 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6444 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6448 /* Need to emit this whether or not we obey regdecls,
6449 since setjmp/longjmp can cause life info to screw up. */
6453 /* Generate code to load the address of a static var when flag_pic is set. */
6455 arm_pic_static_addr (rtx orig
, rtx reg
)
6457 rtx l1
, labelno
, offset_rtx
, insn
;
6459 gcc_assert (flag_pic
);
6461 /* We use an UNSPEC rather than a LABEL_REF because this label
6462 never appears in the code stream. */
6463 labelno
= GEN_INT (pic_labelno
++);
6464 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6465 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6467 /* On the ARM the PC register contains 'dot + 8' at the time of the
6468 addition, on the Thumb it is 'dot + 4'. */
6469 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6470 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6471 UNSPEC_SYMBOL_OFFSET
);
6472 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6474 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6478 /* Return nonzero if X is valid as an ARM state addressing register. */
6480 arm_address_register_rtx_p (rtx x
, int strict_p
)
6490 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6492 return (regno
<= LAST_ARM_REGNUM
6493 || regno
>= FIRST_PSEUDO_REGISTER
6494 || regno
== FRAME_POINTER_REGNUM
6495 || regno
== ARG_POINTER_REGNUM
);
6498 /* Return TRUE if this rtx is the difference of a symbol and a label,
6499 and will reduce to a PC-relative relocation in the object file.
6500 Expressions like this can be left alone when generating PIC, rather
6501 than forced through the GOT. */
6503 pcrel_constant_p (rtx x
)
6505 if (GET_CODE (x
) == MINUS
)
6506 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6511 /* Return true if X will surely end up in an index register after next
6514 will_be_in_index_register (const_rtx x
)
6516 /* arm.md: calculate_pic_address will split this into a register. */
6517 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6520 /* Return nonzero if X is a valid ARM state address operand. */
6522 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6526 enum rtx_code code
= GET_CODE (x
);
6528 if (arm_address_register_rtx_p (x
, strict_p
))
6531 use_ldrd
= (TARGET_LDRD
6533 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6535 if (code
== POST_INC
|| code
== PRE_DEC
6536 || ((code
== PRE_INC
|| code
== POST_DEC
)
6537 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6538 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6540 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6541 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6542 && GET_CODE (XEXP (x
, 1)) == PLUS
6543 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6545 rtx addend
= XEXP (XEXP (x
, 1), 1);
6547 /* Don't allow ldrd post increment by register because it's hard
6548 to fixup invalid register choices. */
6550 && GET_CODE (x
) == POST_MODIFY
6554 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6555 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6558 /* After reload constants split into minipools will have addresses
6559 from a LABEL_REF. */
6560 else if (reload_completed
6561 && (code
== LABEL_REF
6563 && GET_CODE (XEXP (x
, 0)) == PLUS
6564 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6565 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6568 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6571 else if (code
== PLUS
)
6573 rtx xop0
= XEXP (x
, 0);
6574 rtx xop1
= XEXP (x
, 1);
6576 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6577 && ((CONST_INT_P (xop1
)
6578 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6579 || (!strict_p
&& will_be_in_index_register (xop1
))))
6580 || (arm_address_register_rtx_p (xop1
, strict_p
)
6581 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6585 /* Reload currently can't handle MINUS, so disable this for now */
6586 else if (GET_CODE (x
) == MINUS
)
6588 rtx xop0
= XEXP (x
, 0);
6589 rtx xop1
= XEXP (x
, 1);
6591 return (arm_address_register_rtx_p (xop0
, strict_p
)
6592 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6596 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6597 && code
== SYMBOL_REF
6598 && CONSTANT_POOL_ADDRESS_P (x
)
6600 && symbol_mentioned_p (get_pool_constant (x
))
6601 && ! pcrel_constant_p (get_pool_constant (x
))))
6607 /* Return nonzero if X is a valid Thumb-2 address operand. */
6609 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6612 enum rtx_code code
= GET_CODE (x
);
6614 if (arm_address_register_rtx_p (x
, strict_p
))
6617 use_ldrd
= (TARGET_LDRD
6619 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6621 if (code
== POST_INC
|| code
== PRE_DEC
6622 || ((code
== PRE_INC
|| code
== POST_DEC
)
6623 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6624 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6626 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6627 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6628 && GET_CODE (XEXP (x
, 1)) == PLUS
6629 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6631 /* Thumb-2 only has autoincrement by constant. */
6632 rtx addend
= XEXP (XEXP (x
, 1), 1);
6633 HOST_WIDE_INT offset
;
6635 if (!CONST_INT_P (addend
))
6638 offset
= INTVAL(addend
);
6639 if (GET_MODE_SIZE (mode
) <= 4)
6640 return (offset
> -256 && offset
< 256);
6642 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6643 && (offset
& 3) == 0);
6646 /* After reload constants split into minipools will have addresses
6647 from a LABEL_REF. */
6648 else if (reload_completed
6649 && (code
== LABEL_REF
6651 && GET_CODE (XEXP (x
, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6653 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6656 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6659 else if (code
== PLUS
)
6661 rtx xop0
= XEXP (x
, 0);
6662 rtx xop1
= XEXP (x
, 1);
6664 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6665 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6666 || (!strict_p
&& will_be_in_index_register (xop1
))))
6667 || (arm_address_register_rtx_p (xop1
, strict_p
)
6668 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6671 /* Normally we can assign constant values to target registers without
6672 the help of constant pool. But there are cases we have to use constant
6674 1) assign a label to register.
6675 2) sign-extend a 8bit value to 32bit and then assign to register.
6677 Constant pool access in format:
6678 (set (reg r0) (mem (symbol_ref (".LC0"))))
6679 will cause the use of literal pool (later in function arm_reorg).
6680 So here we mark such format as an invalid format, then the compiler
6681 will adjust it into:
6682 (set (reg r0) (symbol_ref (".LC0")))
6683 (set (reg r0) (mem (reg r0))).
6684 No extra register is required, and (mem (reg r0)) won't cause the use
6685 of literal pools. */
6686 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x
))
6690 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6691 && code
== SYMBOL_REF
6692 && CONSTANT_POOL_ADDRESS_P (x
)
6694 && symbol_mentioned_p (get_pool_constant (x
))
6695 && ! pcrel_constant_p (get_pool_constant (x
))))
6701 /* Return nonzero if INDEX is valid for an address index operand in
6704 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6707 HOST_WIDE_INT range
;
6708 enum rtx_code code
= GET_CODE (index
);
6710 /* Standard coprocessor addressing modes. */
6711 if (TARGET_HARD_FLOAT
6713 && (mode
== SFmode
|| mode
== DFmode
))
6714 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6715 && INTVAL (index
) > -1024
6716 && (INTVAL (index
) & 3) == 0);
6718 /* For quad modes, we restrict the constant offset to be slightly less
6719 than what the instruction format permits. We do this because for
6720 quad mode moves, we will actually decompose them into two separate
6721 double-mode reads or writes. INDEX must therefore be a valid
6722 (double-mode) offset and so should INDEX+8. */
6723 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6724 return (code
== CONST_INT
6725 && INTVAL (index
) < 1016
6726 && INTVAL (index
) > -1024
6727 && (INTVAL (index
) & 3) == 0);
6729 /* We have no such constraint on double mode offsets, so we permit the
6730 full range of the instruction format. */
6731 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6732 return (code
== CONST_INT
6733 && INTVAL (index
) < 1024
6734 && INTVAL (index
) > -1024
6735 && (INTVAL (index
) & 3) == 0);
6737 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6738 return (code
== CONST_INT
6739 && INTVAL (index
) < 1024
6740 && INTVAL (index
) > -1024
6741 && (INTVAL (index
) & 3) == 0);
6743 if (arm_address_register_rtx_p (index
, strict_p
)
6744 && (GET_MODE_SIZE (mode
) <= 4))
6747 if (mode
== DImode
|| mode
== DFmode
)
6749 if (code
== CONST_INT
)
6751 HOST_WIDE_INT val
= INTVAL (index
);
6754 return val
> -256 && val
< 256;
6756 return val
> -4096 && val
< 4092;
6759 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6762 if (GET_MODE_SIZE (mode
) <= 4
6766 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6770 rtx xiop0
= XEXP (index
, 0);
6771 rtx xiop1
= XEXP (index
, 1);
6773 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6774 && power_of_two_operand (xiop1
, SImode
))
6775 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6776 && power_of_two_operand (xiop0
, SImode
)));
6778 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6779 || code
== ASHIFT
|| code
== ROTATERT
)
6781 rtx op
= XEXP (index
, 1);
6783 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6786 && INTVAL (op
) <= 31);
6790 /* For ARM v4 we may be doing a sign-extend operation during the
6796 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6802 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6804 return (code
== CONST_INT
6805 && INTVAL (index
) < range
6806 && INTVAL (index
) > -range
);
6809 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6810 index operand. i.e. 1, 2, 4 or 8. */
6812 thumb2_index_mul_operand (rtx op
)
6816 if (!CONST_INT_P (op
))
6820 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6823 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6825 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6827 enum rtx_code code
= GET_CODE (index
);
6829 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6830 /* Standard coprocessor addressing modes. */
6831 if (TARGET_HARD_FLOAT
6833 && (mode
== SFmode
|| mode
== DFmode
))
6834 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6835 /* Thumb-2 allows only > -256 index range for it's core register
6836 load/stores. Since we allow SF/DF in core registers, we have
6837 to use the intersection between -256~4096 (core) and -1024~1024
6839 && INTVAL (index
) > -256
6840 && (INTVAL (index
) & 3) == 0);
6842 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6844 /* For DImode assume values will usually live in core regs
6845 and only allow LDRD addressing modes. */
6846 if (!TARGET_LDRD
|| mode
!= DImode
)
6847 return (code
== CONST_INT
6848 && INTVAL (index
) < 1024
6849 && INTVAL (index
) > -1024
6850 && (INTVAL (index
) & 3) == 0);
6853 /* For quad modes, we restrict the constant offset to be slightly less
6854 than what the instruction format permits. We do this because for
6855 quad mode moves, we will actually decompose them into two separate
6856 double-mode reads or writes. INDEX must therefore be a valid
6857 (double-mode) offset and so should INDEX+8. */
6858 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6859 return (code
== CONST_INT
6860 && INTVAL (index
) < 1016
6861 && INTVAL (index
) > -1024
6862 && (INTVAL (index
) & 3) == 0);
6864 /* We have no such constraint on double mode offsets, so we permit the
6865 full range of the instruction format. */
6866 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6867 return (code
== CONST_INT
6868 && INTVAL (index
) < 1024
6869 && INTVAL (index
) > -1024
6870 && (INTVAL (index
) & 3) == 0);
6872 if (arm_address_register_rtx_p (index
, strict_p
)
6873 && (GET_MODE_SIZE (mode
) <= 4))
6876 if (mode
== DImode
|| mode
== DFmode
)
6878 if (code
== CONST_INT
)
6880 HOST_WIDE_INT val
= INTVAL (index
);
6881 /* ??? Can we assume ldrd for thumb2? */
6882 /* Thumb-2 ldrd only has reg+const addressing modes. */
6883 /* ldrd supports offsets of +-1020.
6884 However the ldr fallback does not. */
6885 return val
> -256 && val
< 256 && (val
& 3) == 0;
6893 rtx xiop0
= XEXP (index
, 0);
6894 rtx xiop1
= XEXP (index
, 1);
6896 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6897 && thumb2_index_mul_operand (xiop1
))
6898 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6899 && thumb2_index_mul_operand (xiop0
)));
6901 else if (code
== ASHIFT
)
6903 rtx op
= XEXP (index
, 1);
6905 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6908 && INTVAL (op
) <= 3);
6911 return (code
== CONST_INT
6912 && INTVAL (index
) < 4096
6913 && INTVAL (index
) > -256);
6916 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6918 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6928 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6930 return (regno
<= LAST_LO_REGNUM
6931 || regno
> LAST_VIRTUAL_REGISTER
6932 || regno
== FRAME_POINTER_REGNUM
6933 || (GET_MODE_SIZE (mode
) >= 4
6934 && (regno
== STACK_POINTER_REGNUM
6935 || regno
>= FIRST_PSEUDO_REGISTER
6936 || x
== hard_frame_pointer_rtx
6937 || x
== arg_pointer_rtx
)));
6940 /* Return nonzero if x is a legitimate index register. This is the case
6941 for any base register that can access a QImode object. */
6943 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6945 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6948 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6950 The AP may be eliminated to either the SP or the FP, so we use the
6951 least common denominator, e.g. SImode, and offsets from 0 to 64.
6953 ??? Verify whether the above is the right approach.
6955 ??? Also, the FP may be eliminated to the SP, so perhaps that
6956 needs special handling also.
6958 ??? Look at how the mips16 port solves this problem. It probably uses
6959 better ways to solve some of these problems.
6961 Although it is not incorrect, we don't accept QImode and HImode
6962 addresses based on the frame pointer or arg pointer until the
6963 reload pass starts. This is so that eliminating such addresses
6964 into stack based ones won't produce impossible code. */
6966 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6968 /* ??? Not clear if this is right. Experiment. */
6969 if (GET_MODE_SIZE (mode
) < 4
6970 && !(reload_in_progress
|| reload_completed
)
6971 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6972 || reg_mentioned_p (arg_pointer_rtx
, x
)
6973 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6974 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6975 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6976 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6979 /* Accept any base register. SP only in SImode or larger. */
6980 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6983 /* This is PC relative data before arm_reorg runs. */
6984 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6985 && GET_CODE (x
) == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6989 /* This is PC relative data after arm_reorg runs. */
6990 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6992 && (GET_CODE (x
) == LABEL_REF
6993 || (GET_CODE (x
) == CONST
6994 && GET_CODE (XEXP (x
, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6996 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6999 /* Post-inc indexing only supported for SImode and larger. */
7000 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7001 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7004 else if (GET_CODE (x
) == PLUS
)
7006 /* REG+REG address can be any two index registers. */
7007 /* We disallow FRAME+REG addressing since we know that FRAME
7008 will be replaced with STACK, and SP relative addressing only
7009 permits SP+OFFSET. */
7010 if (GET_MODE_SIZE (mode
) <= 4
7011 && XEXP (x
, 0) != frame_pointer_rtx
7012 && XEXP (x
, 1) != frame_pointer_rtx
7013 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7014 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7015 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7018 /* REG+const has 5-7 bit offset for non-SP registers. */
7019 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7020 || XEXP (x
, 0) == arg_pointer_rtx
)
7021 && CONST_INT_P (XEXP (x
, 1))
7022 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7025 /* REG+const has 10-bit offset for SP, but only SImode and
7026 larger is supported. */
7027 /* ??? Should probably check for DI/DFmode overflow here
7028 just like GO_IF_LEGITIMATE_OFFSET does. */
7029 else if (REG_P (XEXP (x
, 0))
7030 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7031 && GET_MODE_SIZE (mode
) >= 4
7032 && CONST_INT_P (XEXP (x
, 1))
7033 && INTVAL (XEXP (x
, 1)) >= 0
7034 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7035 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7038 else if (REG_P (XEXP (x
, 0))
7039 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7040 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7041 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7042 && REGNO (XEXP (x
, 0))
7043 <= LAST_VIRTUAL_POINTER_REGISTER
))
7044 && GET_MODE_SIZE (mode
) >= 4
7045 && CONST_INT_P (XEXP (x
, 1))
7046 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7050 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7051 && GET_MODE_SIZE (mode
) == 4
7052 && GET_CODE (x
) == SYMBOL_REF
7053 && CONSTANT_POOL_ADDRESS_P (x
)
7055 && symbol_mentioned_p (get_pool_constant (x
))
7056 && ! pcrel_constant_p (get_pool_constant (x
))))
7062 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7063 instruction of mode MODE. */
7065 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7067 switch (GET_MODE_SIZE (mode
))
7070 return val
>= 0 && val
< 32;
7073 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7077 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7083 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7086 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7087 else if (TARGET_THUMB2
)
7088 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7089 else /* if (TARGET_THUMB1) */
7090 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7093 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7095 Given an rtx X being reloaded into a reg required to be
7096 in class CLASS, return the class of reg to actually use.
7097 In general this is just CLASS, but for the Thumb core registers and
7098 immediate constants we prefer a LO_REGS class or a subset. */
7101 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7107 if (rclass
== GENERAL_REGS
)
7114 /* Build the SYMBOL_REF for __tls_get_addr. */
7116 static GTY(()) rtx tls_get_addr_libfunc
;
7119 get_tls_get_addr (void)
7121 if (!tls_get_addr_libfunc
)
7122 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7123 return tls_get_addr_libfunc
;
7127 arm_load_tp (rtx target
)
7130 target
= gen_reg_rtx (SImode
);
7134 /* Can return in any reg. */
7135 emit_insn (gen_load_tp_hard (target
));
7139 /* Always returned in r0. Immediately copy the result into a pseudo,
7140 otherwise other uses of r0 (e.g. setting up function arguments) may
7141 clobber the value. */
7145 emit_insn (gen_load_tp_soft ());
7147 tmp
= gen_rtx_REG (SImode
, 0);
7148 emit_move_insn (target
, tmp
);
7154 load_tls_operand (rtx x
, rtx reg
)
7158 if (reg
== NULL_RTX
)
7159 reg
= gen_reg_rtx (SImode
);
7161 tmp
= gen_rtx_CONST (SImode
, x
);
7163 emit_move_insn (reg
, tmp
);
7169 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7171 rtx insns
, label
, labelno
, sum
;
7173 gcc_assert (reloc
!= TLS_DESCSEQ
);
7176 labelno
= GEN_INT (pic_labelno
++);
7177 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7178 label
= gen_rtx_CONST (VOIDmode
, label
);
7180 sum
= gen_rtx_UNSPEC (Pmode
,
7181 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7182 GEN_INT (TARGET_ARM
? 8 : 4)),
7184 reg
= load_tls_operand (sum
, reg
);
7187 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7189 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7191 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7192 LCT_PURE
, /* LCT_CONST? */
7193 Pmode
, 1, reg
, Pmode
);
7195 insns
= get_insns ();
7202 arm_tls_descseq_addr (rtx x
, rtx reg
)
7204 rtx labelno
= GEN_INT (pic_labelno
++);
7205 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7206 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7207 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7208 gen_rtx_CONST (VOIDmode
, label
),
7209 GEN_INT (!TARGET_ARM
)),
7211 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7213 emit_insn (gen_tlscall (x
, labelno
));
7215 reg
= gen_reg_rtx (SImode
);
7217 gcc_assert (REGNO (reg
) != 0);
7219 emit_move_insn (reg
, reg0
);
7225 legitimize_tls_address (rtx x
, rtx reg
)
7227 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7228 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7232 case TLS_MODEL_GLOBAL_DYNAMIC
:
7233 if (TARGET_GNU2_TLS
)
7235 reg
= arm_tls_descseq_addr (x
, reg
);
7237 tp
= arm_load_tp (NULL_RTX
);
7239 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7243 /* Original scheme */
7244 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7245 dest
= gen_reg_rtx (Pmode
);
7246 emit_libcall_block (insns
, dest
, ret
, x
);
7250 case TLS_MODEL_LOCAL_DYNAMIC
:
7251 if (TARGET_GNU2_TLS
)
7253 reg
= arm_tls_descseq_addr (x
, reg
);
7255 tp
= arm_load_tp (NULL_RTX
);
7257 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7261 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7263 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7264 share the LDM result with other LD model accesses. */
7265 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7267 dest
= gen_reg_rtx (Pmode
);
7268 emit_libcall_block (insns
, dest
, ret
, eqv
);
7270 /* Load the addend. */
7271 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7272 GEN_INT (TLS_LDO32
)),
7274 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7275 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7279 case TLS_MODEL_INITIAL_EXEC
:
7280 labelno
= GEN_INT (pic_labelno
++);
7281 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7282 label
= gen_rtx_CONST (VOIDmode
, label
);
7283 sum
= gen_rtx_UNSPEC (Pmode
,
7284 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7285 GEN_INT (TARGET_ARM
? 8 : 4)),
7287 reg
= load_tls_operand (sum
, reg
);
7290 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7291 else if (TARGET_THUMB2
)
7292 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7295 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7296 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7299 tp
= arm_load_tp (NULL_RTX
);
7301 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7303 case TLS_MODEL_LOCAL_EXEC
:
7304 tp
= arm_load_tp (NULL_RTX
);
7306 reg
= gen_rtx_UNSPEC (Pmode
,
7307 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7309 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7311 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7318 /* Try machine-dependent ways of modifying an illegitimate address
7319 to be legitimate. If we find one, return the new, valid address. */
7321 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7325 /* TODO: legitimize_address for Thumb2. */
7328 return thumb_legitimize_address (x
, orig_x
, mode
);
7331 if (arm_tls_symbol_p (x
))
7332 return legitimize_tls_address (x
, NULL_RTX
);
7334 if (GET_CODE (x
) == PLUS
)
7336 rtx xop0
= XEXP (x
, 0);
7337 rtx xop1
= XEXP (x
, 1);
7339 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7340 xop0
= force_reg (SImode
, xop0
);
7342 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7343 && !symbol_mentioned_p (xop1
))
7344 xop1
= force_reg (SImode
, xop1
);
7346 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7347 && CONST_INT_P (xop1
))
7349 HOST_WIDE_INT n
, low_n
;
7353 /* VFP addressing modes actually allow greater offsets, but for
7354 now we just stick with the lowest common denominator. */
7356 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7368 low_n
= ((mode
) == TImode
? 0
7369 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7373 base_reg
= gen_reg_rtx (SImode
);
7374 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7375 emit_move_insn (base_reg
, val
);
7376 x
= plus_constant (Pmode
, base_reg
, low_n
);
7378 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7379 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7382 /* XXX We don't allow MINUS any more -- see comment in
7383 arm_legitimate_address_outer_p (). */
7384 else if (GET_CODE (x
) == MINUS
)
7386 rtx xop0
= XEXP (x
, 0);
7387 rtx xop1
= XEXP (x
, 1);
7389 if (CONSTANT_P (xop0
))
7390 xop0
= force_reg (SImode
, xop0
);
7392 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7393 xop1
= force_reg (SImode
, xop1
);
7395 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7396 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7399 /* Make sure to take full advantage of the pre-indexed addressing mode
7400 with absolute addresses which often allows for the base register to
7401 be factorized for multiple adjacent memory references, and it might
7402 even allows for the mini pool to be avoided entirely. */
7403 else if (CONST_INT_P (x
) && optimize
> 0)
7406 HOST_WIDE_INT mask
, base
, index
;
7409 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7410 use a 8-bit index. So let's use a 12-bit index for SImode only and
7411 hope that arm_gen_constant will enable ldrb to use more bits. */
7412 bits
= (mode
== SImode
) ? 12 : 8;
7413 mask
= (1 << bits
) - 1;
7414 base
= INTVAL (x
) & ~mask
;
7415 index
= INTVAL (x
) & mask
;
7416 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7418 /* It'll most probably be more efficient to generate the base
7419 with more bits set and use a negative index instead. */
7423 base_reg
= force_reg (SImode
, GEN_INT (base
));
7424 x
= plus_constant (Pmode
, base_reg
, index
);
7429 /* We need to find and carefully transform any SYMBOL and LABEL
7430 references; so go back to the original address expression. */
7431 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7433 if (new_x
!= orig_x
)
7441 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7442 to be legitimate. If we find one, return the new, valid address. */
7444 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7446 if (arm_tls_symbol_p (x
))
7447 return legitimize_tls_address (x
, NULL_RTX
);
7449 if (GET_CODE (x
) == PLUS
7450 && CONST_INT_P (XEXP (x
, 1))
7451 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7452 || INTVAL (XEXP (x
, 1)) < 0))
7454 rtx xop0
= XEXP (x
, 0);
7455 rtx xop1
= XEXP (x
, 1);
7456 HOST_WIDE_INT offset
= INTVAL (xop1
);
7458 /* Try and fold the offset into a biasing of the base register and
7459 then offsetting that. Don't do this when optimizing for space
7460 since it can cause too many CSEs. */
7461 if (optimize_size
&& offset
>= 0
7462 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7464 HOST_WIDE_INT delta
;
7467 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7468 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7469 delta
= 31 * GET_MODE_SIZE (mode
);
7471 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7473 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7475 x
= plus_constant (Pmode
, xop0
, delta
);
7477 else if (offset
< 0 && offset
> -256)
7478 /* Small negative offsets are best done with a subtract before the
7479 dereference, forcing these into a register normally takes two
7481 x
= force_operand (x
, NULL_RTX
);
7484 /* For the remaining cases, force the constant into a register. */
7485 xop1
= force_reg (SImode
, xop1
);
7486 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7489 else if (GET_CODE (x
) == PLUS
7490 && s_register_operand (XEXP (x
, 1), SImode
)
7491 && !s_register_operand (XEXP (x
, 0), SImode
))
7493 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7495 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7500 /* We need to find and carefully transform any SYMBOL and LABEL
7501 references; so go back to the original address expression. */
7502 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7504 if (new_x
!= orig_x
)
7512 arm_legitimize_reload_address (rtx
*p
,
7513 enum machine_mode mode
,
7514 int opnum
, int type
,
7515 int ind_levels ATTRIBUTE_UNUSED
)
7517 /* We must recognize output that we have already generated ourselves. */
7518 if (GET_CODE (*p
) == PLUS
7519 && GET_CODE (XEXP (*p
, 0)) == PLUS
7520 && REG_P (XEXP (XEXP (*p
, 0), 0))
7521 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7522 && CONST_INT_P (XEXP (*p
, 1)))
7524 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7525 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7526 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7530 if (GET_CODE (*p
) == PLUS
7531 && REG_P (XEXP (*p
, 0))
7532 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7533 /* If the base register is equivalent to a constant, let the generic
7534 code handle it. Otherwise we will run into problems if a future
7535 reload pass decides to rematerialize the constant. */
7536 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7537 && CONST_INT_P (XEXP (*p
, 1)))
7539 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7540 HOST_WIDE_INT low
, high
;
7542 /* Detect coprocessor load/stores. */
7543 bool coproc_p
= ((TARGET_HARD_FLOAT
7545 && (mode
== SFmode
|| mode
== DFmode
))
7546 || (TARGET_REALLY_IWMMXT
7547 && VALID_IWMMXT_REG_MODE (mode
))
7549 && (VALID_NEON_DREG_MODE (mode
)
7550 || VALID_NEON_QREG_MODE (mode
))));
7552 /* For some conditions, bail out when lower two bits are unaligned. */
7553 if ((val
& 0x3) != 0
7554 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7556 /* For DI, and DF under soft-float: */
7557 || ((mode
== DImode
|| mode
== DFmode
)
7558 /* Without ldrd, we use stm/ldm, which does not
7559 fair well with unaligned bits. */
7561 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7562 || TARGET_THUMB2
))))
7565 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7566 of which the (reg+high) gets turned into a reload add insn,
7567 we try to decompose the index into high/low values that can often
7568 also lead to better reload CSE.
7570 ldr r0, [r2, #4100] // Offset too large
7571 ldr r1, [r2, #4104] // Offset too large
7573 is best reloaded as:
7579 which post-reload CSE can simplify in most cases to eliminate the
7580 second add instruction:
7585 The idea here is that we want to split out the bits of the constant
7586 as a mask, rather than as subtracting the maximum offset that the
7587 respective type of load/store used can handle.
7589 When encountering negative offsets, we can still utilize it even if
7590 the overall offset is positive; sometimes this may lead to an immediate
7591 that can be constructed with fewer instructions.
7593 ldr r0, [r2, #0x3FFFFC]
7595 This is best reloaded as:
7596 add t1, r2, #0x400000
7599 The trick for spotting this for a load insn with N bits of offset
7600 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7601 negative offset that is going to make bit N and all the bits below
7602 it become zero in the remainder part.
7604 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7605 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7606 used in most cases of ARM load/store instructions. */
7608 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7609 (((VAL) & ((1 << (N)) - 1)) \
7610 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7615 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7617 /* NEON quad-word load/stores are made of two double-word accesses,
7618 so the valid index range is reduced by 8. Treat as 9-bit range if
7620 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7621 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7623 else if (GET_MODE_SIZE (mode
) == 8)
7626 low
= (TARGET_THUMB2
7627 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7628 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7630 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7631 to access doublewords. The supported load/store offsets are
7632 -8, -4, and 4, which we try to produce here. */
7633 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7635 else if (GET_MODE_SIZE (mode
) < 8)
7637 /* NEON element load/stores do not have an offset. */
7638 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7643 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7644 Try the wider 12-bit range first, and re-try if the result
7646 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7648 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7652 if (mode
== HImode
|| mode
== HFmode
)
7655 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7658 /* The storehi/movhi_bytes fallbacks can use only
7659 [-4094,+4094] of the full ldrb/strb index range. */
7660 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7661 if (low
== 4095 || low
== -4095)
7666 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7672 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7673 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7674 - (unsigned HOST_WIDE_INT
) 0x80000000);
7675 /* Check for overflow or zero */
7676 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7679 /* Reload the high part into a base reg; leave the low part
7681 Note that replacing this gen_rtx_PLUS with plus_constant is
7682 wrong in this case because we rely on the
7683 (plus (plus reg c1) c2) structure being preserved so that
7684 XEXP (*p, 0) in push_reload below uses the correct term. */
7685 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7686 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7689 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7690 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7691 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7699 thumb_legitimize_reload_address (rtx
*x_p
,
7700 enum machine_mode mode
,
7701 int opnum
, int type
,
7702 int ind_levels ATTRIBUTE_UNUSED
)
7706 if (GET_CODE (x
) == PLUS
7707 && GET_MODE_SIZE (mode
) < 4
7708 && REG_P (XEXP (x
, 0))
7709 && XEXP (x
, 0) == stack_pointer_rtx
7710 && CONST_INT_P (XEXP (x
, 1))
7711 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7716 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7717 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7721 /* If both registers are hi-regs, then it's better to reload the
7722 entire expression rather than each register individually. That
7723 only requires one reload register rather than two. */
7724 if (GET_CODE (x
) == PLUS
7725 && REG_P (XEXP (x
, 0))
7726 && REG_P (XEXP (x
, 1))
7727 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7728 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7733 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7734 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7741 /* Test for various thread-local symbols. */
7743 /* Return TRUE if X is a thread-local symbol. */
7746 arm_tls_symbol_p (rtx x
)
7748 if (! TARGET_HAVE_TLS
)
7751 if (GET_CODE (x
) != SYMBOL_REF
)
7754 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7757 /* Helper for arm_tls_referenced_p. */
7760 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7762 if (GET_CODE (*x
) == SYMBOL_REF
)
7763 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7765 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7766 TLS offsets, not real symbol references. */
7767 if (GET_CODE (*x
) == UNSPEC
7768 && XINT (*x
, 1) == UNSPEC_TLS
)
7774 /* Return TRUE if X contains any TLS symbol references. */
7777 arm_tls_referenced_p (rtx x
)
7779 if (! TARGET_HAVE_TLS
)
7782 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7785 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7787 On the ARM, allow any integer (invalid ones are removed later by insn
7788 patterns), nice doubles and symbol_refs which refer to the function's
7791 When generating pic allow anything. */
7794 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7796 /* At present, we have no support for Neon structure constants, so forbid
7797 them here. It might be possible to handle simple cases like 0 and -1
7799 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7802 return flag_pic
|| !label_mentioned_p (x
);
7806 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7808 return (CONST_INT_P (x
)
7809 || CONST_DOUBLE_P (x
)
7810 || CONSTANT_ADDRESS_P (x
)
7815 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7817 return (!arm_cannot_force_const_mem (mode
, x
)
7819 ? arm_legitimate_constant_p_1 (mode
, x
)
7820 : thumb_legitimate_constant_p (mode
, x
)));
7823 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7826 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7830 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7832 split_const (x
, &base
, &offset
);
7833 if (GET_CODE (base
) == SYMBOL_REF
7834 && !offset_within_block_p (base
, INTVAL (offset
)))
7837 return arm_tls_referenced_p (x
);
7840 #define REG_OR_SUBREG_REG(X) \
7842 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7844 #define REG_OR_SUBREG_RTX(X) \
7845 (REG_P (X) ? (X) : SUBREG_REG (X))
7848 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7850 enum machine_mode mode
= GET_MODE (x
);
7859 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7866 return COSTS_N_INSNS (1);
7869 if (CONST_INT_P (XEXP (x
, 1)))
7872 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7879 return COSTS_N_INSNS (2) + cycles
;
7881 return COSTS_N_INSNS (1) + 16;
7884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7886 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
7887 return (COSTS_N_INSNS (words
)
7888 + 4 * ((MEM_P (SET_SRC (x
)))
7889 + MEM_P (SET_DEST (x
))));
7894 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7896 if (thumb_shiftable_const (INTVAL (x
)))
7897 return COSTS_N_INSNS (2);
7898 return COSTS_N_INSNS (3);
7900 else if ((outer
== PLUS
|| outer
== COMPARE
)
7901 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7903 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7904 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7905 return COSTS_N_INSNS (1);
7906 else if (outer
== AND
)
7909 /* This duplicates the tests in the andsi3 expander. */
7910 for (i
= 9; i
<= 31; i
++)
7911 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7912 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7913 return COSTS_N_INSNS (2);
7915 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7916 || outer
== LSHIFTRT
)
7918 return COSTS_N_INSNS (2);
7924 return COSTS_N_INSNS (3);
7942 /* XXX another guess. */
7943 /* Memory costs quite a lot for the first word, but subsequent words
7944 load at the equivalent of a single insn each. */
7945 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7946 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7951 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7957 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7958 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7964 return total
+ COSTS_N_INSNS (1);
7966 /* Assume a two-shift sequence. Increase the cost slightly so
7967 we prefer actual shifts over an extend operation. */
7968 return total
+ 1 + COSTS_N_INSNS (2);
7976 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7978 enum machine_mode mode
= GET_MODE (x
);
7979 enum rtx_code subcode
;
7981 enum rtx_code code
= GET_CODE (x
);
7987 /* Memory costs quite a lot for the first word, but subsequent words
7988 load at the equivalent of a single insn each. */
7989 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7996 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7997 *total
= COSTS_N_INSNS (2);
7998 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7999 *total
= COSTS_N_INSNS (4);
8001 *total
= COSTS_N_INSNS (20);
8005 if (REG_P (XEXP (x
, 1)))
8006 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8007 else if (!CONST_INT_P (XEXP (x
, 1)))
8008 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8014 *total
+= COSTS_N_INSNS (4);
8019 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8020 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8023 *total
+= COSTS_N_INSNS (3);
8027 *total
+= COSTS_N_INSNS (1);
8028 /* Increase the cost of complex shifts because they aren't any faster,
8029 and reduce dual issue opportunities. */
8030 if (arm_tune_cortex_a9
8031 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8039 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8040 if (CONST_INT_P (XEXP (x
, 0))
8041 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8043 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8047 if (CONST_INT_P (XEXP (x
, 1))
8048 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8050 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8057 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8059 if (TARGET_HARD_FLOAT
8061 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8063 *total
= COSTS_N_INSNS (1);
8064 if (CONST_DOUBLE_P (XEXP (x
, 0))
8065 && arm_const_double_rtx (XEXP (x
, 0)))
8067 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8071 if (CONST_DOUBLE_P (XEXP (x
, 1))
8072 && arm_const_double_rtx (XEXP (x
, 1)))
8074 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8080 *total
= COSTS_N_INSNS (20);
8084 *total
= COSTS_N_INSNS (1);
8085 if (CONST_INT_P (XEXP (x
, 0))
8086 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8088 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8092 subcode
= GET_CODE (XEXP (x
, 1));
8093 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8094 || subcode
== LSHIFTRT
8095 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8097 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8098 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8102 /* A shift as a part of RSB costs no more than RSB itself. */
8103 if (GET_CODE (XEXP (x
, 0)) == MULT
8104 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8106 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8107 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8112 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8114 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8115 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8119 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8120 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8122 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8123 if (REG_P (XEXP (XEXP (x
, 1), 0))
8124 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8125 *total
+= COSTS_N_INSNS (1);
8133 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8134 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8135 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8137 *total
= COSTS_N_INSNS (1);
8138 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8140 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8144 /* MLA: All arguments must be registers. We filter out
8145 multiplication by a power of two, so that we fall down into
8147 if (GET_CODE (XEXP (x
, 0)) == MULT
8148 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8150 /* The cost comes from the cost of the multiply. */
8154 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8156 if (TARGET_HARD_FLOAT
8158 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8160 *total
= COSTS_N_INSNS (1);
8161 if (CONST_DOUBLE_P (XEXP (x
, 1))
8162 && arm_const_double_rtx (XEXP (x
, 1)))
8164 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8171 *total
= COSTS_N_INSNS (20);
8175 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8176 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8178 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8179 if (REG_P (XEXP (XEXP (x
, 0), 0))
8180 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8181 *total
+= COSTS_N_INSNS (1);
8187 case AND
: case XOR
: case IOR
:
8189 /* Normally the frame registers will be spilt into reg+const during
8190 reload, so it is a bad idea to combine them with other instructions,
8191 since then they might not be moved outside of loops. As a compromise
8192 we allow integration with ops that have a constant as their second
8194 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8195 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8196 && !CONST_INT_P (XEXP (x
, 1)))
8197 *total
= COSTS_N_INSNS (1);
8201 *total
+= COSTS_N_INSNS (2);
8202 if (CONST_INT_P (XEXP (x
, 1))
8203 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8205 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8212 *total
+= COSTS_N_INSNS (1);
8213 if (CONST_INT_P (XEXP (x
, 1))
8214 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8216 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8219 subcode
= GET_CODE (XEXP (x
, 0));
8220 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8221 || subcode
== LSHIFTRT
8222 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8224 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8225 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8230 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8232 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8233 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8237 if (subcode
== UMIN
|| subcode
== UMAX
8238 || subcode
== SMIN
|| subcode
== SMAX
)
8240 *total
= COSTS_N_INSNS (3);
8247 /* This should have been handled by the CPU specific routines. */
8251 if (arm_arch3m
&& mode
== SImode
8252 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8253 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8254 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8255 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8256 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8257 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8259 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8262 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8266 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8268 if (TARGET_HARD_FLOAT
8270 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8272 *total
= COSTS_N_INSNS (1);
8275 *total
= COSTS_N_INSNS (2);
8281 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8282 if (mode
== SImode
&& code
== NOT
)
8284 subcode
= GET_CODE (XEXP (x
, 0));
8285 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8286 || subcode
== LSHIFTRT
8287 || subcode
== ROTATE
|| subcode
== ROTATERT
8289 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8291 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8292 /* Register shifts cost an extra cycle. */
8293 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8294 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8303 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8305 *total
= COSTS_N_INSNS (4);
8309 operand
= XEXP (x
, 0);
8311 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8312 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8313 && REG_P (XEXP (operand
, 0))
8314 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8315 *total
+= COSTS_N_INSNS (1);
8316 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8317 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8321 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8323 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8329 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8330 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8332 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8338 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8339 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8341 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8361 /* SCC insns. In the case where the comparison has already been
8362 performed, then they cost 2 instructions. Otherwise they need
8363 an additional comparison before them. */
8364 *total
= COSTS_N_INSNS (2);
8365 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8372 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8378 *total
+= COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x
, 1))
8380 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8382 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8386 subcode
= GET_CODE (XEXP (x
, 0));
8387 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8388 || subcode
== LSHIFTRT
8389 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8391 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8392 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8397 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8399 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8400 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8410 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8411 if (!CONST_INT_P (XEXP (x
, 1))
8412 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8413 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8417 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8419 if (TARGET_HARD_FLOAT
8421 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8423 *total
= COSTS_N_INSNS (1);
8426 *total
= COSTS_N_INSNS (20);
8429 *total
= COSTS_N_INSNS (1);
8431 *total
+= COSTS_N_INSNS (3);
8437 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8439 rtx op
= XEXP (x
, 0);
8440 enum machine_mode opmode
= GET_MODE (op
);
8443 *total
+= COSTS_N_INSNS (1);
8445 if (opmode
!= SImode
)
8449 /* If !arm_arch4, we use one of the extendhisi2_mem
8450 or movhi_bytes patterns for HImode. For a QImode
8451 sign extension, we first zero-extend from memory
8452 and then perform a shift sequence. */
8453 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8454 *total
+= COSTS_N_INSNS (2);
8457 *total
+= COSTS_N_INSNS (1);
8459 /* We don't have the necessary insn, so we need to perform some
8461 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8462 /* An and with constant 255. */
8463 *total
+= COSTS_N_INSNS (1);
8465 /* A shift sequence. Increase costs slightly to avoid
8466 combining two shifts into an extend operation. */
8467 *total
+= COSTS_N_INSNS (2) + 1;
8473 switch (GET_MODE (XEXP (x
, 0)))
8480 *total
= COSTS_N_INSNS (1);
8490 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8494 if (const_ok_for_arm (INTVAL (x
))
8495 || const_ok_for_arm (~INTVAL (x
)))
8496 *total
= COSTS_N_INSNS (1);
8498 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8499 INTVAL (x
), NULL_RTX
,
8506 *total
= COSTS_N_INSNS (3);
8510 *total
= COSTS_N_INSNS (1);
8514 *total
= COSTS_N_INSNS (1);
8515 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8519 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8520 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8521 *total
= COSTS_N_INSNS (1);
8523 *total
= COSTS_N_INSNS (4);
8527 /* The vec_extract patterns accept memory operands that require an
8528 address reload. Account for the cost of that reload to give the
8529 auto-inc-dec pass an incentive to try to replace them. */
8530 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8531 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8533 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8534 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8535 *total
+= COSTS_N_INSNS (1);
8538 /* Likewise for the vec_set patterns. */
8539 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8540 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8541 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8543 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8544 *total
= rtx_cost (mem
, code
, 0, speed
);
8545 if (!neon_vector_mem_operand (mem
, 2, true))
8546 *total
+= COSTS_N_INSNS (1);
8552 /* We cost this as high as our memory costs to allow this to
8553 be hoisted from loops. */
8554 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8556 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8562 && TARGET_HARD_FLOAT
8564 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8565 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8566 *total
= COSTS_N_INSNS (1);
8568 *total
= COSTS_N_INSNS (4);
8572 *total
= COSTS_N_INSNS (4);
8577 /* Estimates the size cost of thumb1 instructions.
8578 For now most of the code is copied from thumb1_rtx_costs. We need more
8579 fine grain tuning when we have more related test cases. */
8581 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8583 enum machine_mode mode
= GET_MODE (x
);
8592 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8596 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8597 defined by RTL expansion, especially for the expansion of
8599 if ((GET_CODE (XEXP (x
, 0)) == MULT
8600 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8601 || (GET_CODE (XEXP (x
, 1)) == MULT
8602 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8603 return COSTS_N_INSNS (2);
8604 /* On purpose fall through for normal RTX. */
8608 return COSTS_N_INSNS (1);
8611 if (CONST_INT_P (XEXP (x
, 1)))
8613 /* Thumb1 mul instruction can't operate on const. We must Load it
8614 into a register first. */
8615 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8616 return COSTS_N_INSNS (1) + const_size
;
8618 return COSTS_N_INSNS (1);
8621 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8623 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8624 return (COSTS_N_INSNS (words
)
8625 + 4 * ((MEM_P (SET_SRC (x
)))
8626 + MEM_P (SET_DEST (x
))));
8631 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8632 return COSTS_N_INSNS (1);
8633 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8634 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8635 return COSTS_N_INSNS (2);
8636 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8637 if (thumb_shiftable_const (INTVAL (x
)))
8638 return COSTS_N_INSNS (2);
8639 return COSTS_N_INSNS (3);
8641 else if ((outer
== PLUS
|| outer
== COMPARE
)
8642 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8644 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8645 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8646 return COSTS_N_INSNS (1);
8647 else if (outer
== AND
)
8650 /* This duplicates the tests in the andsi3 expander. */
8651 for (i
= 9; i
<= 31; i
++)
8652 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8653 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8654 return COSTS_N_INSNS (2);
8656 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8657 || outer
== LSHIFTRT
)
8659 return COSTS_N_INSNS (2);
8665 return COSTS_N_INSNS (3);
8683 /* XXX another guess. */
8684 /* Memory costs quite a lot for the first word, but subsequent words
8685 load at the equivalent of a single insn each. */
8686 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8687 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8692 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8697 /* XXX still guessing. */
8698 switch (GET_MODE (XEXP (x
, 0)))
8701 return (1 + (mode
== DImode
? 4 : 0)
8702 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8705 return (4 + (mode
== DImode
? 4 : 0)
8706 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8709 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8720 /* RTX costs when optimizing for size. */
8722 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8725 enum machine_mode mode
= GET_MODE (x
);
8728 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8732 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8736 /* A memory access costs 1 insn if the mode is small, or the address is
8737 a single register, otherwise it costs one insn per word. */
8738 if (REG_P (XEXP (x
, 0)))
8739 *total
= COSTS_N_INSNS (1);
8741 && GET_CODE (XEXP (x
, 0)) == PLUS
8742 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8743 /* This will be split into two instructions.
8744 See arm.md:calculate_pic_address. */
8745 *total
= COSTS_N_INSNS (2);
8747 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8754 /* Needs a libcall, so it costs about this. */
8755 *total
= COSTS_N_INSNS (2);
8759 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8761 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8769 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8771 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8774 else if (mode
== SImode
)
8776 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8777 /* Slightly disparage register shifts, but not by much. */
8778 if (!CONST_INT_P (XEXP (x
, 1)))
8779 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8783 /* Needs a libcall. */
8784 *total
= COSTS_N_INSNS (2);
8788 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8789 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8791 *total
= COSTS_N_INSNS (1);
8797 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8798 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8800 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8801 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8802 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8803 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8804 || subcode1
== ASHIFTRT
)
8806 /* It's just the cost of the two operands. */
8811 *total
= COSTS_N_INSNS (1);
8815 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8819 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8820 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8822 *total
= COSTS_N_INSNS (1);
8826 /* A shift as a part of ADD costs nothing. */
8827 if (GET_CODE (XEXP (x
, 0)) == MULT
8828 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8830 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8831 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8832 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8837 case AND
: case XOR
: case IOR
:
8840 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
8842 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
8843 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
8844 || (code
== AND
&& subcode
== NOT
))
8846 /* It's just the cost of the two operands. */
8852 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8856 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8860 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8861 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8863 *total
= COSTS_N_INSNS (1);
8869 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8878 if (cc_register (XEXP (x
, 0), VOIDmode
))
8881 *total
= COSTS_N_INSNS (1);
8885 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8886 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8887 *total
= COSTS_N_INSNS (1);
8889 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8894 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8897 if (const_ok_for_arm (INTVAL (x
)))
8898 /* A multiplication by a constant requires another instruction
8899 to load the constant to a register. */
8900 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8902 else if (const_ok_for_arm (~INTVAL (x
)))
8903 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8904 else if (const_ok_for_arm (-INTVAL (x
)))
8906 if (outer_code
== COMPARE
|| outer_code
== PLUS
8907 || outer_code
== MINUS
)
8910 *total
= COSTS_N_INSNS (1);
8913 *total
= COSTS_N_INSNS (2);
8919 *total
= COSTS_N_INSNS (2);
8923 *total
= COSTS_N_INSNS (4);
8928 && TARGET_HARD_FLOAT
8929 && outer_code
== SET
8930 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8931 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8932 *total
= COSTS_N_INSNS (1);
8934 *total
= COSTS_N_INSNS (4);
8939 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8940 cost of these slightly. */
8941 *total
= COSTS_N_INSNS (1) + 1;
8948 if (mode
!= VOIDmode
)
8949 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8951 *total
= COSTS_N_INSNS (4); /* How knows? */
8956 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8957 operand, then return the operand that is being shifted. If the shift
8958 is not by a constant, then set SHIFT_REG to point to the operand.
8959 Return NULL if OP is not a shifter operand. */
8961 shifter_op_p (rtx op
, rtx
*shift_reg
)
8963 enum rtx_code code
= GET_CODE (op
);
8965 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8966 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8967 return XEXP (op
, 0);
8968 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8969 return XEXP (op
, 0);
8970 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8971 || code
== ASHIFTRT
)
8973 if (!CONST_INT_P (XEXP (op
, 1)))
8974 *shift_reg
= XEXP (op
, 1);
8975 return XEXP (op
, 0);
8982 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
8984 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
8985 gcc_assert (GET_CODE (x
) == UNSPEC
);
8987 switch (XINT (x
, 1))
8989 case UNSPEC_UNALIGNED_LOAD
:
8990 /* We can only do unaligned loads into the integer unit, and we can't
8992 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8994 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
8995 + extra_cost
->ldst
.load_unaligned
);
8998 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8999 ADDR_SPACE_GENERIC
, speed_p
);
9003 case UNSPEC_UNALIGNED_STORE
:
9004 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9006 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9007 + extra_cost
->ldst
.store_unaligned
);
9009 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9011 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9012 ADDR_SPACE_GENERIC
, speed_p
);
9022 *cost
= COSTS_N_INSNS (1);
9024 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9028 *cost
= COSTS_N_INSNS (2);
9034 /* Cost of a libcall. We assume one insn per argument, an amount for the
9035 call (one insn for -Os) and then one for processing the result. */
9036 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9038 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9041 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9042 if (shift_op != NULL \
9043 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9048 *cost += extra_cost->alu.arith_shift_reg; \
9049 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9052 *cost += extra_cost->alu.arith_shift; \
9054 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9055 + rtx_cost (XEXP (x, 1 - IDX), \
9062 /* RTX costs. Make an estimate of the cost of executing the operation
9063 X, which is contained with an operation with code OUTER_CODE.
9064 SPEED_P indicates whether the cost desired is the performance cost,
9065 or the size cost. The estimate is stored in COST and the return
9066 value is TRUE if the cost calculation is final, or FALSE if the
9067 caller should recurse through the operands of X to add additional
9070 We currently make no attempt to model the size savings of Thumb-2
9071 16-bit instructions. At the normal points in compilation where
9072 this code is called we have no measure of whether the condition
9073 flags are live or not, and thus no realistic way to determine what
9074 the size will eventually be. */
9076 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9077 const struct cpu_cost_table
*extra_cost
,
9078 int *cost
, bool speed_p
)
9080 enum machine_mode mode
= GET_MODE (x
);
9085 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9087 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9095 if (REG_P (SET_SRC (x
))
9096 && REG_P (SET_DEST (x
)))
9098 /* Assume that most copies can be done with a single insn,
9099 unless we don't have HW FP, in which case everything
9100 larger than word mode will require two insns. */
9101 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9102 && GET_MODE_SIZE (mode
) > 4)
9105 /* Conditional register moves can be encoded
9106 in 16 bits in Thumb mode. */
9107 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9111 if (CONST_INT_P (SET_SRC (x
)))
9113 /* Handle CONST_INT here, since the value doesn't have a mode
9114 and we would otherwise be unable to work out the true cost. */
9115 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9116 mode
= GET_MODE (SET_DEST (x
));
9118 /* Slightly lower the cost of setting a core reg to a constant.
9119 This helps break up chains and allows for better scheduling. */
9120 if (REG_P (SET_DEST (x
))
9121 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9124 /* Immediate moves with an immediate in the range [0, 255] can be
9125 encoded in 16 bits in Thumb mode. */
9126 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9127 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9129 goto const_int_cost
;
9135 /* A memory access costs 1 insn if the mode is small, or the address is
9136 a single register, otherwise it costs one insn per word. */
9137 if (REG_P (XEXP (x
, 0)))
9138 *cost
= COSTS_N_INSNS (1);
9140 && GET_CODE (XEXP (x
, 0)) == PLUS
9141 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9142 /* This will be split into two instructions.
9143 See arm.md:calculate_pic_address. */
9144 *cost
= COSTS_N_INSNS (2);
9146 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9148 /* For speed optimizations, add the costs of the address and
9149 accessing memory. */
9152 *cost
+= (extra_cost
->ldst
.load
9153 + arm_address_cost (XEXP (x
, 0), mode
,
9154 ADDR_SPACE_GENERIC
, speed_p
));
9156 *cost
+= extra_cost
->ldst
.load
;
9162 /* Calculations of LDM costs are complex. We assume an initial cost
9163 (ldm_1st) which will load the number of registers mentioned in
9164 ldm_regs_per_insn_1st registers; then each additional
9165 ldm_regs_per_insn_subsequent registers cost one more insn. The
9166 formula for N regs is thus:
9168 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9169 + ldm_regs_per_insn_subsequent - 1)
9170 / ldm_regs_per_insn_subsequent).
9172 Additional costs may also be added for addressing. A similar
9173 formula is used for STM. */
9175 bool is_ldm
= load_multiple_operation (x
, SImode
);
9176 bool is_stm
= store_multiple_operation (x
, SImode
);
9178 *cost
= COSTS_N_INSNS (1);
9180 if (is_ldm
|| is_stm
)
9184 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9185 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9186 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9187 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9188 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9189 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9190 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9192 *cost
+= regs_per_insn_1st
9193 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9194 + regs_per_insn_sub
- 1)
9195 / regs_per_insn_sub
);
9204 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9205 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9206 *cost
= COSTS_N_INSNS (speed_p
9207 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9208 else if (mode
== SImode
&& TARGET_IDIV
)
9209 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9211 *cost
= LIBCALL_COST (2);
9212 return false; /* All arguments must be in registers. */
9216 *cost
= LIBCALL_COST (2);
9217 return false; /* All arguments must be in registers. */
9220 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9222 *cost
= (COSTS_N_INSNS (2)
9223 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9225 *cost
+= extra_cost
->alu
.shift_reg
;
9233 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9235 *cost
= (COSTS_N_INSNS (3)
9236 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9238 *cost
+= 2 * extra_cost
->alu
.shift
;
9241 else if (mode
== SImode
)
9243 *cost
= (COSTS_N_INSNS (1)
9244 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9245 /* Slightly disparage register shifts at -Os, but not by much. */
9246 if (!CONST_INT_P (XEXP (x
, 1)))
9247 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9248 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9251 else if (GET_MODE_CLASS (mode
) == MODE_INT
9252 && GET_MODE_SIZE (mode
) < 4)
9256 *cost
= (COSTS_N_INSNS (1)
9257 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9258 /* Slightly disparage register shifts at -Os, but not by
9260 if (!CONST_INT_P (XEXP (x
, 1)))
9261 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9262 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9264 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9266 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9268 /* Can use SBFX/UBFX. */
9269 *cost
= COSTS_N_INSNS (1);
9271 *cost
+= extra_cost
->alu
.bfx
;
9272 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9276 *cost
= COSTS_N_INSNS (2);
9277 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9280 if (CONST_INT_P (XEXP (x
, 1)))
9281 *cost
+= 2 * extra_cost
->alu
.shift
;
9283 *cost
+= (extra_cost
->alu
.shift
9284 + extra_cost
->alu
.shift_reg
);
9287 /* Slightly disparage register shifts. */
9288 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9293 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9294 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9297 if (CONST_INT_P (XEXP (x
, 1)))
9298 *cost
+= (2 * extra_cost
->alu
.shift
9299 + extra_cost
->alu
.log_shift
);
9301 *cost
+= (extra_cost
->alu
.shift
9302 + extra_cost
->alu
.shift_reg
9303 + extra_cost
->alu
.log_shift_reg
);
9309 *cost
= LIBCALL_COST (2);
9313 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9314 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9316 *cost
= COSTS_N_INSNS (1);
9317 if (GET_CODE (XEXP (x
, 0)) == MULT
9318 || GET_CODE (XEXP (x
, 1)) == MULT
)
9320 rtx mul_op0
, mul_op1
, sub_op
;
9323 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9325 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9327 mul_op0
= XEXP (XEXP (x
, 0), 0);
9328 mul_op1
= XEXP (XEXP (x
, 0), 1);
9329 sub_op
= XEXP (x
, 1);
9333 mul_op0
= XEXP (XEXP (x
, 1), 0);
9334 mul_op1
= XEXP (XEXP (x
, 1), 1);
9335 sub_op
= XEXP (x
, 0);
9338 /* The first operand of the multiply may be optionally
9340 if (GET_CODE (mul_op0
) == NEG
)
9341 mul_op0
= XEXP (mul_op0
, 0);
9343 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9344 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9345 + rtx_cost (sub_op
, code
, 0, speed_p
));
9351 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9357 rtx shift_by_reg
= NULL
;
9361 *cost
= COSTS_N_INSNS (1);
9363 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9364 if (shift_op
== NULL
)
9366 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9367 non_shift_op
= XEXP (x
, 0);
9370 non_shift_op
= XEXP (x
, 1);
9372 if (shift_op
!= NULL
)
9374 if (shift_by_reg
!= NULL
)
9377 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9378 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9381 *cost
+= extra_cost
->alu
.arith_shift
;
9383 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9384 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9389 && GET_CODE (XEXP (x
, 1)) == MULT
)
9393 *cost
+= extra_cost
->mult
[0].add
;
9394 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9395 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9396 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9400 if (CONST_INT_P (XEXP (x
, 0)))
9402 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9403 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9405 *cost
= COSTS_N_INSNS (insns
);
9407 *cost
+= insns
* extra_cost
->alu
.arith
;
9408 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9415 if (GET_MODE_CLASS (mode
) == MODE_INT
9416 && GET_MODE_SIZE (mode
) < 4)
9418 rtx shift_op
, shift_reg
;
9421 /* We check both sides of the MINUS for shifter operands since,
9422 unlike PLUS, it's not commutative. */
9424 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9425 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9427 /* Slightly disparage, as we might need to widen the result. */
9428 *cost
= 1 + COSTS_N_INSNS (1);
9430 *cost
+= extra_cost
->alu
.arith
;
9432 if (CONST_INT_P (XEXP (x
, 0)))
9434 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9443 *cost
= COSTS_N_INSNS (2);
9445 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9447 rtx op1
= XEXP (x
, 1);
9450 *cost
+= 2 * extra_cost
->alu
.arith
;
9452 if (GET_CODE (op1
) == ZERO_EXTEND
)
9453 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9455 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9456 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9460 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9463 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9464 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9466 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9469 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9470 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9473 *cost
+= (extra_cost
->alu
.arith
9474 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9475 ? extra_cost
->alu
.arith
9476 : extra_cost
->alu
.arith_shift
));
9477 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9478 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9479 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9484 *cost
+= 2 * extra_cost
->alu
.arith
;
9490 *cost
= LIBCALL_COST (2);
9494 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9495 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9497 *cost
= COSTS_N_INSNS (1);
9498 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9500 rtx mul_op0
, mul_op1
, add_op
;
9503 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9505 mul_op0
= XEXP (XEXP (x
, 0), 0);
9506 mul_op1
= XEXP (XEXP (x
, 0), 1);
9507 add_op
= XEXP (x
, 1);
9509 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9510 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9511 + rtx_cost (add_op
, code
, 0, speed_p
));
9517 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9520 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9522 *cost
= LIBCALL_COST (2);
9526 /* Narrow modes can be synthesized in SImode, but the range
9527 of useful sub-operations is limited. Check for shift operations
9528 on one of the operands. Only left shifts can be used in the
9530 if (GET_MODE_CLASS (mode
) == MODE_INT
9531 && GET_MODE_SIZE (mode
) < 4)
9533 rtx shift_op
, shift_reg
;
9536 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9538 if (CONST_INT_P (XEXP (x
, 1)))
9540 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9541 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9543 *cost
= COSTS_N_INSNS (insns
);
9545 *cost
+= insns
* extra_cost
->alu
.arith
;
9546 /* Slightly penalize a narrow operation as the result may
9548 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9552 /* Slightly penalize a narrow operation as the result may
9554 *cost
= 1 + COSTS_N_INSNS (1);
9556 *cost
+= extra_cost
->alu
.arith
;
9563 rtx shift_op
, shift_reg
;
9565 *cost
= COSTS_N_INSNS (1);
9567 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9568 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9570 /* UXTA[BH] or SXTA[BH]. */
9572 *cost
+= extra_cost
->alu
.extnd_arith
;
9573 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9575 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9580 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9581 if (shift_op
!= NULL
)
9586 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9587 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9590 *cost
+= extra_cost
->alu
.arith_shift
;
9592 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9593 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9596 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9598 rtx mul_op
= XEXP (x
, 0);
9600 *cost
= COSTS_N_INSNS (1);
9602 if (TARGET_DSP_MULTIPLY
9603 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9604 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9605 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9606 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9607 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9608 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9609 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9610 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9611 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9612 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9613 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9614 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9619 *cost
+= extra_cost
->mult
[0].extend_add
;
9620 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9621 SIGN_EXTEND
, 0, speed_p
)
9622 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9623 SIGN_EXTEND
, 0, speed_p
)
9624 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9629 *cost
+= extra_cost
->mult
[0].add
;
9630 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9631 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9632 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9635 if (CONST_INT_P (XEXP (x
, 1)))
9637 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9638 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9640 *cost
= COSTS_N_INSNS (insns
);
9642 *cost
+= insns
* extra_cost
->alu
.arith
;
9643 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9652 && GET_CODE (XEXP (x
, 0)) == MULT
9653 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9654 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9655 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9656 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9658 *cost
= COSTS_N_INSNS (1);
9660 *cost
+= extra_cost
->mult
[1].extend_add
;
9661 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9662 ZERO_EXTEND
, 0, speed_p
)
9663 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9664 ZERO_EXTEND
, 0, speed_p
)
9665 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9669 *cost
= COSTS_N_INSNS (2);
9671 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9672 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9675 *cost
+= (extra_cost
->alu
.arith
9676 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9677 ? extra_cost
->alu
.arith
9678 : extra_cost
->alu
.arith_shift
));
9680 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9682 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9687 *cost
+= 2 * extra_cost
->alu
.arith
;
9692 *cost
= LIBCALL_COST (2);
9695 case AND
: case XOR
: case IOR
:
9698 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9699 rtx op0
= XEXP (x
, 0);
9700 rtx shift_op
, shift_reg
;
9702 *cost
= COSTS_N_INSNS (1);
9706 || (code
== IOR
&& TARGET_THUMB2
)))
9707 op0
= XEXP (op0
, 0);
9710 shift_op
= shifter_op_p (op0
, &shift_reg
);
9711 if (shift_op
!= NULL
)
9716 *cost
+= extra_cost
->alu
.log_shift_reg
;
9717 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9720 *cost
+= extra_cost
->alu
.log_shift
;
9722 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9723 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9727 if (CONST_INT_P (XEXP (x
, 1)))
9729 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9730 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9733 *cost
= COSTS_N_INSNS (insns
);
9735 *cost
+= insns
* extra_cost
->alu
.logical
;
9736 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9741 *cost
+= extra_cost
->alu
.logical
;
9742 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9743 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9749 rtx op0
= XEXP (x
, 0);
9750 enum rtx_code subcode
= GET_CODE (op0
);
9752 *cost
= COSTS_N_INSNS (2);
9756 || (code
== IOR
&& TARGET_THUMB2
)))
9757 op0
= XEXP (op0
, 0);
9759 if (GET_CODE (op0
) == ZERO_EXTEND
)
9762 *cost
+= 2 * extra_cost
->alu
.logical
;
9764 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9765 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9768 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9771 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9773 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9774 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9779 *cost
+= 2 * extra_cost
->alu
.logical
;
9785 *cost
= LIBCALL_COST (2);
9789 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9790 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9792 rtx op0
= XEXP (x
, 0);
9794 *cost
= COSTS_N_INSNS (1);
9796 if (GET_CODE (op0
) == NEG
)
9797 op0
= XEXP (op0
, 0);
9800 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9802 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
9803 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
9806 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9808 *cost
= LIBCALL_COST (2);
9814 *cost
= COSTS_N_INSNS (1);
9815 if (TARGET_DSP_MULTIPLY
9816 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9817 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9818 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9819 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9820 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9821 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9823 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9824 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9825 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9826 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9827 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9832 *cost
+= extra_cost
->mult
[0].extend
;
9833 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
9834 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
9838 *cost
+= extra_cost
->mult
[0].simple
;
9845 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9846 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9847 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9848 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9850 *cost
= COSTS_N_INSNS (1);
9852 *cost
+= extra_cost
->mult
[1].extend
;
9853 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
9854 ZERO_EXTEND
, 0, speed_p
)
9855 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9856 ZERO_EXTEND
, 0, speed_p
));
9860 *cost
= LIBCALL_COST (2);
9865 *cost
= LIBCALL_COST (2);
9869 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9870 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9872 *cost
= COSTS_N_INSNS (1);
9874 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9878 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9880 *cost
= LIBCALL_COST (1);
9886 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9888 *cost
= COSTS_N_INSNS (2);
9889 /* Assume the non-flag-changing variant. */
9891 *cost
+= (extra_cost
->alu
.log_shift
9892 + extra_cost
->alu
.arith_shift
);
9893 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
9897 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9898 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9900 *cost
= COSTS_N_INSNS (2);
9901 /* No extra cost for MOV imm and MVN imm. */
9902 /* If the comparison op is using the flags, there's no further
9903 cost, otherwise we need to add the cost of the comparison. */
9904 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9905 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9906 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9908 *cost
+= (COSTS_N_INSNS (1)
9909 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
9911 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
9914 *cost
+= extra_cost
->alu
.arith
;
9918 *cost
= COSTS_N_INSNS (1);
9920 *cost
+= extra_cost
->alu
.arith
;
9924 if (GET_MODE_CLASS (mode
) == MODE_INT
9925 && GET_MODE_SIZE (mode
) < 4)
9927 /* Slightly disparage, as we might need an extend operation. */
9928 *cost
= 1 + COSTS_N_INSNS (1);
9930 *cost
+= extra_cost
->alu
.arith
;
9936 *cost
= COSTS_N_INSNS (2);
9938 *cost
+= 2 * extra_cost
->alu
.arith
;
9943 *cost
= LIBCALL_COST (1);
9950 rtx shift_reg
= NULL
;
9952 *cost
= COSTS_N_INSNS (1);
9953 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9957 if (shift_reg
!= NULL
)
9960 *cost
+= extra_cost
->alu
.log_shift_reg
;
9961 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9964 *cost
+= extra_cost
->alu
.log_shift
;
9965 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
9970 *cost
+= extra_cost
->alu
.logical
;
9975 *cost
= COSTS_N_INSNS (2);
9981 *cost
+= LIBCALL_COST (1);
9986 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9988 *cost
= COSTS_N_INSNS (4);
9991 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
9992 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
9994 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
9995 /* Assume that if one arm of the if_then_else is a register,
9996 that it will be tied with the result and eliminate the
9997 conditional insn. */
9998 if (REG_P (XEXP (x
, 1)))
10000 else if (REG_P (XEXP (x
, 2)))
10006 if (extra_cost
->alu
.non_exec_costs_exec
)
10007 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10009 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10012 *cost
+= op1cost
+ op2cost
;
10018 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10022 enum machine_mode op0mode
;
10023 /* We'll mostly assume that the cost of a compare is the cost of the
10024 LHS. However, there are some notable exceptions. */
10026 /* Floating point compares are never done as side-effects. */
10027 op0mode
= GET_MODE (XEXP (x
, 0));
10028 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10029 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10031 *cost
= COSTS_N_INSNS (1);
10033 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10035 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10037 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10043 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10045 *cost
= LIBCALL_COST (2);
10049 /* DImode compares normally take two insns. */
10050 if (op0mode
== DImode
)
10052 *cost
= COSTS_N_INSNS (2);
10054 *cost
+= 2 * extra_cost
->alu
.arith
;
10058 if (op0mode
== SImode
)
10063 if (XEXP (x
, 1) == const0_rtx
10064 && !(REG_P (XEXP (x
, 0))
10065 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10066 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10068 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10070 /* Multiply operations that set the flags are often
10071 significantly more expensive. */
10073 && GET_CODE (XEXP (x
, 0)) == MULT
10074 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10075 *cost
+= extra_cost
->mult
[0].flag_setting
;
10078 && GET_CODE (XEXP (x
, 0)) == PLUS
10079 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10080 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10082 *cost
+= extra_cost
->mult
[0].flag_setting
;
10087 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10088 if (shift_op
!= NULL
)
10090 *cost
= COSTS_N_INSNS (1);
10091 if (shift_reg
!= NULL
)
10093 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10095 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10098 *cost
+= extra_cost
->alu
.arith_shift
;
10099 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10100 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10104 *cost
= COSTS_N_INSNS (1);
10106 *cost
+= extra_cost
->alu
.arith
;
10107 if (CONST_INT_P (XEXP (x
, 1))
10108 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10110 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10118 *cost
= LIBCALL_COST (2);
10141 if (outer_code
== SET
)
10143 /* Is it a store-flag operation? */
10144 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10145 && XEXP (x
, 1) == const0_rtx
)
10147 /* Thumb also needs an IT insn. */
10148 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10151 if (XEXP (x
, 1) == const0_rtx
)
10156 /* LSR Rd, Rn, #31. */
10157 *cost
= COSTS_N_INSNS (1);
10159 *cost
+= extra_cost
->alu
.shift
;
10169 *cost
= COSTS_N_INSNS (2);
10173 /* RSBS T1, Rn, Rn, LSR #31
10175 *cost
= COSTS_N_INSNS (2);
10177 *cost
+= extra_cost
->alu
.arith_shift
;
10181 /* RSB Rd, Rn, Rn, ASR #1
10182 LSR Rd, Rd, #31. */
10183 *cost
= COSTS_N_INSNS (2);
10185 *cost
+= (extra_cost
->alu
.arith_shift
10186 + extra_cost
->alu
.shift
);
10192 *cost
= COSTS_N_INSNS (2);
10194 *cost
+= extra_cost
->alu
.shift
;
10198 /* Remaining cases are either meaningless or would take
10199 three insns anyway. */
10200 *cost
= COSTS_N_INSNS (3);
10203 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10208 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10209 if (CONST_INT_P (XEXP (x
, 1))
10210 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10212 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10219 /* Not directly inside a set. If it involves the condition code
10220 register it must be the condition for a branch, cond_exec or
10221 I_T_E operation. Since the comparison is performed elsewhere
10222 this is just the control part which has no additional
10224 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10225 && XEXP (x
, 1) == const0_rtx
)
10233 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10234 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10236 *cost
= COSTS_N_INSNS (1);
10238 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10242 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10244 *cost
= LIBCALL_COST (1);
10248 if (mode
== SImode
)
10250 *cost
= COSTS_N_INSNS (1);
10252 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10256 *cost
= LIBCALL_COST (1);
10260 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10261 && MEM_P (XEXP (x
, 0)))
10263 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10265 if (mode
== DImode
)
10266 *cost
+= COSTS_N_INSNS (1);
10271 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10272 *cost
+= extra_cost
->ldst
.load
;
10274 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10276 if (mode
== DImode
)
10277 *cost
+= extra_cost
->alu
.shift
;
10282 /* Widening from less than 32-bits requires an extend operation. */
10283 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10285 /* We have SXTB/SXTH. */
10286 *cost
= COSTS_N_INSNS (1);
10287 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10289 *cost
+= extra_cost
->alu
.extnd
;
10291 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10293 /* Needs two shifts. */
10294 *cost
= COSTS_N_INSNS (2);
10295 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10297 *cost
+= 2 * extra_cost
->alu
.shift
;
10300 /* Widening beyond 32-bits requires one more insn. */
10301 if (mode
== DImode
)
10303 *cost
+= COSTS_N_INSNS (1);
10305 *cost
+= extra_cost
->alu
.shift
;
10312 || GET_MODE (XEXP (x
, 0)) == SImode
10313 || GET_MODE (XEXP (x
, 0)) == QImode
)
10314 && MEM_P (XEXP (x
, 0)))
10316 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10318 if (mode
== DImode
)
10319 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10324 /* Widening from less than 32-bits requires an extend operation. */
10325 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10327 /* UXTB can be a shorter instruction in Thumb2, but it might
10328 be slower than the AND Rd, Rn, #255 alternative. When
10329 optimizing for speed it should never be slower to use
10330 AND, and we don't really model 16-bit vs 32-bit insns
10332 *cost
= COSTS_N_INSNS (1);
10334 *cost
+= extra_cost
->alu
.logical
;
10336 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10338 /* We have UXTB/UXTH. */
10339 *cost
= COSTS_N_INSNS (1);
10340 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10342 *cost
+= extra_cost
->alu
.extnd
;
10344 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10346 /* Needs two shifts. It's marginally preferable to use
10347 shifts rather than two BIC instructions as the second
10348 shift may merge with a subsequent insn as a shifter
10350 *cost
= COSTS_N_INSNS (2);
10351 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10353 *cost
+= 2 * extra_cost
->alu
.shift
;
10355 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10356 *cost
= COSTS_N_INSNS (1);
10358 /* Widening beyond 32-bits requires one more insn. */
10359 if (mode
== DImode
)
10361 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10368 /* CONST_INT has no mode, so we cannot tell for sure how many
10369 insns are really going to be needed. The best we can do is
10370 look at the value passed. If it fits in SImode, then assume
10371 that's the mode it will be used for. Otherwise assume it
10372 will be used in DImode. */
10373 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10378 /* Avoid blowing up in arm_gen_constant (). */
10379 if (!(outer_code
== PLUS
10380 || outer_code
== AND
10381 || outer_code
== IOR
10382 || outer_code
== XOR
10383 || outer_code
== MINUS
))
10387 if (mode
== SImode
)
10390 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10391 INTVAL (x
), NULL
, NULL
,
10397 *cost
+= COSTS_N_INSNS (arm_gen_constant
10398 (outer_code
, SImode
, NULL
,
10399 trunc_int_for_mode (INTVAL (x
), SImode
),
10401 + arm_gen_constant (outer_code
, SImode
, NULL
,
10402 INTVAL (x
) >> 32, NULL
,
10414 if (arm_arch_thumb2
&& !flag_pic
)
10415 *cost
= COSTS_N_INSNS (2);
10417 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10420 *cost
= COSTS_N_INSNS (2);
10424 *cost
+= COSTS_N_INSNS (1);
10426 *cost
+= extra_cost
->alu
.arith
;
10432 *cost
= COSTS_N_INSNS (4);
10437 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10438 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10440 if (vfp3_const_double_rtx (x
))
10442 *cost
= COSTS_N_INSNS (1);
10444 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10450 *cost
= COSTS_N_INSNS (1);
10451 if (mode
== DFmode
)
10452 *cost
+= extra_cost
->ldst
.loadd
;
10454 *cost
+= extra_cost
->ldst
.loadf
;
10457 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10461 *cost
= COSTS_N_INSNS (4);
10467 && TARGET_HARD_FLOAT
10468 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10469 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10470 *cost
= COSTS_N_INSNS (1);
10472 *cost
= COSTS_N_INSNS (4);
10477 *cost
= COSTS_N_INSNS (1);
10478 /* When optimizing for size, we prefer constant pool entries to
10479 MOVW/MOVT pairs, so bump the cost of these slightly. */
10485 *cost
= COSTS_N_INSNS (1);
10487 *cost
+= extra_cost
->alu
.clz
;
10491 if (XEXP (x
, 1) == const0_rtx
)
10493 *cost
= COSTS_N_INSNS (1);
10495 *cost
+= extra_cost
->alu
.log_shift
;
10496 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10499 /* Fall through. */
10503 *cost
= COSTS_N_INSNS (2);
10507 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10508 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10509 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10510 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10511 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10512 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10513 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10514 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10517 *cost
= COSTS_N_INSNS (1);
10519 *cost
+= extra_cost
->mult
[1].extend
;
10520 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10522 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10526 *cost
= LIBCALL_COST (1);
10530 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10533 /* Reading the PC is like reading any other register. Writing it
10534 is more expensive, but we take that into account elsewhere. */
10539 /* TODO: Simple zero_extract of bottom bits using AND. */
10540 /* Fall through. */
10544 && CONST_INT_P (XEXP (x
, 1))
10545 && CONST_INT_P (XEXP (x
, 2)))
10547 *cost
= COSTS_N_INSNS (1);
10549 *cost
+= extra_cost
->alu
.bfx
;
10550 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10553 /* Without UBFX/SBFX, need to resort to shift operations. */
10554 *cost
= COSTS_N_INSNS (2);
10556 *cost
+= 2 * extra_cost
->alu
.shift
;
10557 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10561 if (TARGET_HARD_FLOAT
)
10563 *cost
= COSTS_N_INSNS (1);
10565 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10566 if (!TARGET_FPU_ARMV8
10567 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10569 /* Pre v8, widening HF->DF is a two-step process, first
10570 widening to SFmode. */
10571 *cost
+= COSTS_N_INSNS (1);
10573 *cost
+= extra_cost
->fp
[0].widen
;
10575 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10579 *cost
= LIBCALL_COST (1);
10582 case FLOAT_TRUNCATE
:
10583 if (TARGET_HARD_FLOAT
)
10585 *cost
= COSTS_N_INSNS (1);
10587 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10588 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10590 /* Vector modes? */
10592 *cost
= LIBCALL_COST (1);
10597 if (TARGET_HARD_FLOAT
)
10599 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10601 *cost
= COSTS_N_INSNS (1);
10603 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10604 /* Strip of the 'cost' of rounding towards zero. */
10605 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10606 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10608 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10609 /* ??? Increase the cost to deal with transferring from
10610 FP -> CORE registers? */
10613 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10614 && TARGET_FPU_ARMV8
)
10616 *cost
= COSTS_N_INSNS (1);
10618 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10621 /* Vector costs? */
10623 *cost
= LIBCALL_COST (1);
10627 case UNSIGNED_FLOAT
:
10628 if (TARGET_HARD_FLOAT
)
10630 /* ??? Increase the cost to deal with transferring from CORE
10631 -> FP registers? */
10632 *cost
= COSTS_N_INSNS (1);
10634 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10637 *cost
= LIBCALL_COST (1);
10641 *cost
= COSTS_N_INSNS (1);
10645 /* Just a guess. Cost one insn per input. */
10646 *cost
= COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x
));
10650 if (mode
!= VOIDmode
)
10651 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10653 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10658 #undef HANDLE_NARROW_SHIFT_ARITH
10660 /* RTX costs when optimizing for size. */
10662 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10663 int *total
, bool speed
)
10667 if (TARGET_OLD_RTX_COSTS
10668 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10670 /* Old way. (Deprecated.) */
10672 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10673 (enum rtx_code
) outer_code
, total
);
10675 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10676 (enum rtx_code
) outer_code
, total
,
10682 if (current_tune
->insn_extra_cost
)
10683 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10684 (enum rtx_code
) outer_code
,
10685 current_tune
->insn_extra_cost
,
10687 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10688 && current_tune->insn_extra_cost != NULL */
10690 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10691 (enum rtx_code
) outer_code
,
10692 &generic_extra_costs
, total
, speed
);
10695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10697 print_rtl_single (dump_file
, x
);
10698 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10699 *total
, result
? "final" : "partial");
10704 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10705 supported on any "slowmul" cores, so it can be ignored. */
10708 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10709 int *total
, bool speed
)
10711 enum machine_mode mode
= GET_MODE (x
);
10715 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10722 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10725 *total
= COSTS_N_INSNS (20);
10729 if (CONST_INT_P (XEXP (x
, 1)))
10731 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10732 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10733 int cost
, const_ok
= const_ok_for_arm (i
);
10734 int j
, booth_unit_size
;
10736 /* Tune as appropriate. */
10737 cost
= const_ok
? 4 : 8;
10738 booth_unit_size
= 2;
10739 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10741 i
>>= booth_unit_size
;
10745 *total
= COSTS_N_INSNS (cost
);
10746 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
10750 *total
= COSTS_N_INSNS (20);
10754 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
10759 /* RTX cost for cores with a fast multiply unit (M variants). */
10762 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10763 int *total
, bool speed
)
10765 enum machine_mode mode
= GET_MODE (x
);
10769 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10773 /* ??? should thumb2 use different costs? */
10777 /* There is no point basing this on the tuning, since it is always the
10778 fast variant if it exists at all. */
10780 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10781 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10782 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10784 *total
= COSTS_N_INSNS(2);
10789 if (mode
== DImode
)
10791 *total
= COSTS_N_INSNS (5);
10795 if (CONST_INT_P (XEXP (x
, 1)))
10797 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10798 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10799 int cost
, const_ok
= const_ok_for_arm (i
);
10800 int j
, booth_unit_size
;
10802 /* Tune as appropriate. */
10803 cost
= const_ok
? 4 : 8;
10804 booth_unit_size
= 8;
10805 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10807 i
>>= booth_unit_size
;
10811 *total
= COSTS_N_INSNS(cost
);
10815 if (mode
== SImode
)
10817 *total
= COSTS_N_INSNS (4);
10821 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10823 if (TARGET_HARD_FLOAT
10825 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10827 *total
= COSTS_N_INSNS (1);
10832 /* Requires a lib call */
10833 *total
= COSTS_N_INSNS (20);
10837 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10842 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10843 so it can be ignored. */
10846 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10847 int *total
, bool speed
)
10849 enum machine_mode mode
= GET_MODE (x
);
10853 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10860 if (GET_CODE (XEXP (x
, 0)) != MULT
)
10861 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10863 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10864 will stall until the multiplication is complete. */
10865 *total
= COSTS_N_INSNS (3);
10869 /* There is no point basing this on the tuning, since it is always the
10870 fast variant if it exists at all. */
10872 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10873 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10874 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10876 *total
= COSTS_N_INSNS (2);
10881 if (mode
== DImode
)
10883 *total
= COSTS_N_INSNS (5);
10887 if (CONST_INT_P (XEXP (x
, 1)))
10889 /* If operand 1 is a constant we can more accurately
10890 calculate the cost of the multiply. The multiplier can
10891 retire 15 bits on the first cycle and a further 12 on the
10892 second. We do, of course, have to load the constant into
10893 a register first. */
10894 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
10895 /* There's a general overhead of one cycle. */
10897 unsigned HOST_WIDE_INT masked_const
;
10899 if (i
& 0x80000000)
10902 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
10904 masked_const
= i
& 0xffff8000;
10905 if (masked_const
!= 0)
10908 masked_const
= i
& 0xf8000000;
10909 if (masked_const
!= 0)
10912 *total
= COSTS_N_INSNS (cost
);
10916 if (mode
== SImode
)
10918 *total
= COSTS_N_INSNS (3);
10922 /* Requires a lib call */
10923 *total
= COSTS_N_INSNS (20);
10927 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10932 /* RTX costs for 9e (and later) cores. */
10935 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10936 int *total
, bool speed
)
10938 enum machine_mode mode
= GET_MODE (x
);
10945 *total
= COSTS_N_INSNS (3);
10949 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10957 /* There is no point basing this on the tuning, since it is always the
10958 fast variant if it exists at all. */
10960 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10961 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10962 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10964 *total
= COSTS_N_INSNS (2);
10969 if (mode
== DImode
)
10971 *total
= COSTS_N_INSNS (5);
10975 if (mode
== SImode
)
10977 *total
= COSTS_N_INSNS (2);
10981 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10983 if (TARGET_HARD_FLOAT
10985 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10987 *total
= COSTS_N_INSNS (1);
10992 *total
= COSTS_N_INSNS (20);
10996 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10999 /* All address computations that can be done are free, but rtx cost returns
11000 the same for practically all of them. So we weight the different types
11001 of address here in the order (most pref first):
11002 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11004 arm_arm_address_cost (rtx x
)
11006 enum rtx_code c
= GET_CODE (x
);
11008 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11010 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11015 if (CONST_INT_P (XEXP (x
, 1)))
11018 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11028 arm_thumb_address_cost (rtx x
)
11030 enum rtx_code c
= GET_CODE (x
);
11035 && REG_P (XEXP (x
, 0))
11036 && CONST_INT_P (XEXP (x
, 1)))
11043 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11044 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11046 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11049 /* Adjust cost hook for XScale. */
11051 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11053 /* Some true dependencies can have a higher cost depending
11054 on precisely how certain input operands are used. */
11055 if (REG_NOTE_KIND(link
) == 0
11056 && recog_memoized (insn
) >= 0
11057 && recog_memoized (dep
) >= 0)
11059 int shift_opnum
= get_attr_shift (insn
);
11060 enum attr_type attr_type
= get_attr_type (dep
);
11062 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11063 operand for INSN. If we have a shifted input operand and the
11064 instruction we depend on is another ALU instruction, then we may
11065 have to account for an additional stall. */
11066 if (shift_opnum
!= 0
11067 && (attr_type
== TYPE_ALU_SHIFT_IMM
11068 || attr_type
== TYPE_ALUS_SHIFT_IMM
11069 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11070 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11071 || attr_type
== TYPE_ALU_SHIFT_REG
11072 || attr_type
== TYPE_ALUS_SHIFT_REG
11073 || attr_type
== TYPE_LOGIC_SHIFT_REG
11074 || attr_type
== TYPE_LOGICS_SHIFT_REG
11075 || attr_type
== TYPE_MOV_SHIFT
11076 || attr_type
== TYPE_MVN_SHIFT
11077 || attr_type
== TYPE_MOV_SHIFT_REG
11078 || attr_type
== TYPE_MVN_SHIFT_REG
))
11080 rtx shifted_operand
;
11083 /* Get the shifted operand. */
11084 extract_insn (insn
);
11085 shifted_operand
= recog_data
.operand
[shift_opnum
];
11087 /* Iterate over all the operands in DEP. If we write an operand
11088 that overlaps with SHIFTED_OPERAND, then we have increase the
11089 cost of this dependency. */
11090 extract_insn (dep
);
11091 preprocess_constraints ();
11092 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11094 /* We can ignore strict inputs. */
11095 if (recog_data
.operand_type
[opno
] == OP_IN
)
11098 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11110 /* Adjust cost hook for Cortex A9. */
11112 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11114 switch (REG_NOTE_KIND (link
))
11121 case REG_DEP_OUTPUT
:
11122 if (recog_memoized (insn
) >= 0
11123 && recog_memoized (dep
) >= 0)
11125 if (GET_CODE (PATTERN (insn
)) == SET
)
11128 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11130 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11132 enum attr_type attr_type_insn
= get_attr_type (insn
);
11133 enum attr_type attr_type_dep
= get_attr_type (dep
);
11135 /* By default all dependencies of the form
11138 have an extra latency of 1 cycle because
11139 of the input and output dependency in this
11140 case. However this gets modeled as an true
11141 dependency and hence all these checks. */
11142 if (REG_P (SET_DEST (PATTERN (insn
)))
11143 && REG_P (SET_DEST (PATTERN (dep
)))
11144 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11145 SET_DEST (PATTERN (dep
))))
11147 /* FMACS is a special case where the dependent
11148 instruction can be issued 3 cycles before
11149 the normal latency in case of an output
11151 if ((attr_type_insn
== TYPE_FMACS
11152 || attr_type_insn
== TYPE_FMACD
)
11153 && (attr_type_dep
== TYPE_FMACS
11154 || attr_type_dep
== TYPE_FMACD
))
11156 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11157 *cost
= insn_default_latency (dep
) - 3;
11159 *cost
= insn_default_latency (dep
);
11164 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11165 *cost
= insn_default_latency (dep
) + 1;
11167 *cost
= insn_default_latency (dep
);
11177 gcc_unreachable ();
11183 /* Adjust cost hook for FA726TE. */
11185 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11187 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11188 have penalty of 3. */
11189 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11190 && recog_memoized (insn
) >= 0
11191 && recog_memoized (dep
) >= 0
11192 && get_attr_conds (dep
) == CONDS_SET
)
11194 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11195 if (get_attr_conds (insn
) == CONDS_USE
11196 && get_attr_type (insn
) != TYPE_BRANCH
)
11202 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11203 || get_attr_conds (insn
) == CONDS_USE
)
11213 /* Implement TARGET_REGISTER_MOVE_COST.
11215 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11216 it is typically more expensive than a single memory access. We set
11217 the cost to less than two memory accesses so that floating
11218 point to integer conversion does not go through memory. */
11221 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11222 reg_class_t from
, reg_class_t to
)
11226 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11227 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11229 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11230 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11232 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11239 if (from
== HI_REGS
|| to
== HI_REGS
)
11246 /* Implement TARGET_MEMORY_MOVE_COST. */
11249 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11250 bool in ATTRIBUTE_UNUSED
)
11256 if (GET_MODE_SIZE (mode
) < 4)
11259 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11263 /* Vectorizer cost model implementation. */
11265 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11267 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11269 int misalign ATTRIBUTE_UNUSED
)
11273 switch (type_of_cost
)
11276 return current_tune
->vec_costs
->scalar_stmt_cost
;
11279 return current_tune
->vec_costs
->scalar_load_cost
;
11282 return current_tune
->vec_costs
->scalar_store_cost
;
11285 return current_tune
->vec_costs
->vec_stmt_cost
;
11288 return current_tune
->vec_costs
->vec_align_load_cost
;
11291 return current_tune
->vec_costs
->vec_store_cost
;
11293 case vec_to_scalar
:
11294 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11296 case scalar_to_vec
:
11297 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11299 case unaligned_load
:
11300 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11302 case unaligned_store
:
11303 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11305 case cond_branch_taken
:
11306 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11308 case cond_branch_not_taken
:
11309 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11312 case vec_promote_demote
:
11313 return current_tune
->vec_costs
->vec_stmt_cost
;
11315 case vec_construct
:
11316 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11317 return elements
/ 2 + 1;
11320 gcc_unreachable ();
11324 /* Implement targetm.vectorize.add_stmt_cost. */
11327 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11328 struct _stmt_vec_info
*stmt_info
, int misalign
,
11329 enum vect_cost_model_location where
)
11331 unsigned *cost
= (unsigned *) data
;
11332 unsigned retval
= 0;
11334 if (flag_vect_cost_model
)
11336 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11337 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11339 /* Statements in an inner loop relative to the loop being
11340 vectorized are weighted more heavily. The value here is
11341 arbitrary and could potentially be improved with analysis. */
11342 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11343 count
*= 50; /* FIXME. */
11345 retval
= (unsigned) (count
* stmt_cost
);
11346 cost
[where
] += retval
;
11352 /* Return true if and only if this insn can dual-issue only as older. */
11354 cortexa7_older_only (rtx insn
)
11356 if (recog_memoized (insn
) < 0)
11359 switch (get_attr_type (insn
))
11362 case TYPE_ALUS_REG
:
11363 case TYPE_LOGIC_REG
:
11364 case TYPE_LOGICS_REG
:
11366 case TYPE_ADCS_REG
:
11371 case TYPE_SHIFT_IMM
:
11372 case TYPE_SHIFT_REG
:
11373 case TYPE_LOAD_BYTE
:
11376 case TYPE_FFARITHS
:
11378 case TYPE_FFARITHD
:
11396 case TYPE_F_STORES
:
11403 /* Return true if and only if this insn can dual-issue as younger. */
11405 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11407 if (recog_memoized (insn
) < 0)
11410 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11414 switch (get_attr_type (insn
))
11417 case TYPE_ALUS_IMM
:
11418 case TYPE_LOGIC_IMM
:
11419 case TYPE_LOGICS_IMM
:
11424 case TYPE_MOV_SHIFT
:
11425 case TYPE_MOV_SHIFT_REG
:
11435 /* Look for an instruction that can dual issue only as an older
11436 instruction, and move it in front of any instructions that can
11437 dual-issue as younger, while preserving the relative order of all
11438 other instructions in the ready list. This is a hueuristic to help
11439 dual-issue in later cycles, by postponing issue of more flexible
11440 instructions. This heuristic may affect dual issue opportunities
11441 in the current cycle. */
11443 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11447 int first_older_only
= -1, first_younger
= -1;
11451 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11455 /* Traverse the ready list from the head (the instruction to issue
11456 first), and looking for the first instruction that can issue as
11457 younger and the first instruction that can dual-issue only as
11459 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11461 rtx insn
= ready
[i
];
11462 if (cortexa7_older_only (insn
))
11464 first_older_only
= i
;
11466 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11469 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11473 /* Nothing to reorder because either no younger insn found or insn
11474 that can dual-issue only as older appears before any insn that
11475 can dual-issue as younger. */
11476 if (first_younger
== -1)
11479 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11483 /* Nothing to reorder because no older-only insn in the ready list. */
11484 if (first_older_only
== -1)
11487 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11491 /* Move first_older_only insn before first_younger. */
11493 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11494 INSN_UID(ready
[first_older_only
]),
11495 INSN_UID(ready
[first_younger
]));
11496 rtx first_older_only_insn
= ready
[first_older_only
];
11497 for (i
= first_older_only
; i
< first_younger
; i
++)
11499 ready
[i
] = ready
[i
+1];
11502 ready
[i
] = first_older_only_insn
;
11506 /* Implement TARGET_SCHED_REORDER. */
11508 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11514 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11517 /* Do nothing for other cores. */
11521 return arm_issue_rate ();
11524 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11525 It corrects the value of COST based on the relationship between
11526 INSN and DEP through the dependence LINK. It returns the new
11527 value. There is a per-core adjust_cost hook to adjust scheduler costs
11528 and the per-core hook can choose to completely override the generic
11529 adjust_cost function. Only put bits of code into arm_adjust_cost that
11530 are common across all cores. */
11532 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11536 /* When generating Thumb-1 code, we want to place flag-setting operations
11537 close to a conditional branch which depends on them, so that we can
11538 omit the comparison. */
11540 && REG_NOTE_KIND (link
) == 0
11541 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11542 && recog_memoized (dep
) >= 0
11543 && get_attr_conds (dep
) == CONDS_SET
)
11546 if (current_tune
->sched_adjust_cost
!= NULL
)
11548 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11552 /* XXX Is this strictly true? */
11553 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11554 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11557 /* Call insns don't incur a stall, even if they follow a load. */
11558 if (REG_NOTE_KIND (link
) == 0
11562 if ((i_pat
= single_set (insn
)) != NULL
11563 && MEM_P (SET_SRC (i_pat
))
11564 && (d_pat
= single_set (dep
)) != NULL
11565 && MEM_P (SET_DEST (d_pat
)))
11567 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11568 /* This is a load after a store, there is no conflict if the load reads
11569 from a cached area. Assume that loads from the stack, and from the
11570 constant pool are cached, and that others will miss. This is a
11573 if ((GET_CODE (src_mem
) == SYMBOL_REF
11574 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11575 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11576 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11577 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11585 arm_max_conditional_execute (void)
11587 return max_insns_skipped
;
11591 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11594 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11596 return (optimize
> 0) ? 2 : 0;
11600 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11602 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11605 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11606 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11607 sequences of non-executed instructions in IT blocks probably take the same
11608 amount of time as executed instructions (and the IT instruction itself takes
11609 space in icache). This function was experimentally determined to give good
11610 results on a popular embedded benchmark. */
11613 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11615 return (TARGET_32BIT
&& speed_p
) ? 1
11616 : arm_default_branch_cost (speed_p
, predictable_p
);
11619 static bool fp_consts_inited
= false;
11621 static REAL_VALUE_TYPE value_fp0
;
11624 init_fp_table (void)
11628 r
= REAL_VALUE_ATOF ("0", DFmode
);
11630 fp_consts_inited
= true;
11633 /* Return TRUE if rtx X is a valid immediate FP constant. */
11635 arm_const_double_rtx (rtx x
)
11639 if (!fp_consts_inited
)
11642 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11643 if (REAL_VALUE_MINUS_ZERO (r
))
11646 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11652 /* VFPv3 has a fairly wide range of representable immediates, formed from
11653 "quarter-precision" floating-point values. These can be evaluated using this
11654 formula (with ^ for exponentiation):
11658 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11659 16 <= n <= 31 and 0 <= r <= 7.
11661 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11663 - A (most-significant) is the sign bit.
11664 - BCD are the exponent (encoded as r XOR 3).
11665 - EFGH are the mantissa (encoded as n - 16).
11668 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11669 fconst[sd] instruction, or -1 if X isn't suitable. */
11671 vfp3_const_double_index (rtx x
)
11673 REAL_VALUE_TYPE r
, m
;
11674 int sign
, exponent
;
11675 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11676 unsigned HOST_WIDE_INT mask
;
11677 HOST_WIDE_INT m1
, m2
;
11678 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11680 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11683 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11685 /* We can't represent these things, so detect them first. */
11686 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11689 /* Extract sign, exponent and mantissa. */
11690 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11691 r
= real_value_abs (&r
);
11692 exponent
= REAL_EXP (&r
);
11693 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11694 highest (sign) bit, with a fixed binary point at bit point_pos.
11695 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11696 bits for the mantissa, this may fail (low bits would be lost). */
11697 real_ldexp (&m
, &r
, point_pos
- exponent
);
11698 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11702 /* If there are bits set in the low part of the mantissa, we can't
11703 represent this value. */
11707 /* Now make it so that mantissa contains the most-significant bits, and move
11708 the point_pos to indicate that the least-significant bits have been
11710 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11711 mantissa
= mant_hi
;
11713 /* We can permit four significant bits of mantissa only, plus a high bit
11714 which is always 1. */
11715 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11716 if ((mantissa
& mask
) != 0)
11719 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11720 mantissa
>>= point_pos
- 5;
11722 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11723 floating-point immediate zero with Neon using an integer-zero load, but
11724 that case is handled elsewhere.) */
11728 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11730 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11731 normalized significands are in the range [1, 2). (Our mantissa is shifted
11732 left 4 places at this point relative to normalized IEEE754 values). GCC
11733 internally uses [0.5, 1) (see real.c), so the exponent returned from
11734 REAL_EXP must be altered. */
11735 exponent
= 5 - exponent
;
11737 if (exponent
< 0 || exponent
> 7)
11740 /* Sign, mantissa and exponent are now in the correct form to plug into the
11741 formula described in the comment above. */
11742 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11745 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11747 vfp3_const_double_rtx (rtx x
)
11752 return vfp3_const_double_index (x
) != -1;
11755 /* Recognize immediates which can be used in various Neon instructions. Legal
11756 immediates are described by the following table (for VMVN variants, the
11757 bitwise inverse of the constant shown is recognized. In either case, VMOV
11758 is output and the correct instruction to use for a given constant is chosen
11759 by the assembler). The constant shown is replicated across all elements of
11760 the destination vector.
11762 insn elems variant constant (binary)
11763 ---- ----- ------- -----------------
11764 vmov i32 0 00000000 00000000 00000000 abcdefgh
11765 vmov i32 1 00000000 00000000 abcdefgh 00000000
11766 vmov i32 2 00000000 abcdefgh 00000000 00000000
11767 vmov i32 3 abcdefgh 00000000 00000000 00000000
11768 vmov i16 4 00000000 abcdefgh
11769 vmov i16 5 abcdefgh 00000000
11770 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11771 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11772 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11773 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11774 vmvn i16 10 00000000 abcdefgh
11775 vmvn i16 11 abcdefgh 00000000
11776 vmov i32 12 00000000 00000000 abcdefgh 11111111
11777 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11778 vmov i32 14 00000000 abcdefgh 11111111 11111111
11779 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11780 vmov i8 16 abcdefgh
11781 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11782 eeeeeeee ffffffff gggggggg hhhhhhhh
11783 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11784 vmov f32 19 00000000 00000000 00000000 00000000
11786 For case 18, B = !b. Representable values are exactly those accepted by
11787 vfp3_const_double_index, but are output as floating-point numbers rather
11790 For case 19, we will change it to vmov.i32 when assembling.
11792 Variants 0-5 (inclusive) may also be used as immediates for the second
11793 operand of VORR/VBIC instructions.
11795 The INVERSE argument causes the bitwise inverse of the given operand to be
11796 recognized instead (used for recognizing legal immediates for the VAND/VORN
11797 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11798 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11799 output, rather than the real insns vbic/vorr).
11801 INVERSE makes no difference to the recognition of float vectors.
11803 The return value is the variant of immediate as shown in the above table, or
11804 -1 if the given value doesn't match any of the listed patterns.
11807 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
11808 rtx
*modconst
, int *elementwidth
)
11810 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11812 for (i = 0; i < idx; i += (STRIDE)) \
11817 immtype = (CLASS); \
11818 elsize = (ELSIZE); \
11822 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11823 unsigned int innersize
;
11824 unsigned char bytes
[16];
11825 int immtype
= -1, matches
;
11826 unsigned int invmask
= inverse
? 0xff : 0;
11827 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11831 n_elts
= CONST_VECTOR_NUNITS (op
);
11832 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11837 if (mode
== VOIDmode
)
11839 innersize
= GET_MODE_SIZE (mode
);
11842 /* Vectors of float constants. */
11843 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11845 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11846 REAL_VALUE_TYPE r0
;
11848 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11851 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
11853 for (i
= 1; i
< n_elts
; i
++)
11855 rtx elt
= CONST_VECTOR_ELT (op
, i
);
11856 REAL_VALUE_TYPE re
;
11858 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
11860 if (!REAL_VALUES_EQUAL (r0
, re
))
11865 *modconst
= CONST_VECTOR_ELT (op
, 0);
11870 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11876 /* Splat vector constant out into a byte vector. */
11877 for (i
= 0; i
< n_elts
; i
++)
11879 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11880 unsigned HOST_WIDE_INT elpart
;
11881 unsigned int part
, parts
;
11883 if (CONST_INT_P (el
))
11885 elpart
= INTVAL (el
);
11888 else if (CONST_DOUBLE_P (el
))
11890 elpart
= CONST_DOUBLE_LOW (el
);
11894 gcc_unreachable ();
11896 for (part
= 0; part
< parts
; part
++)
11899 for (byte
= 0; byte
< innersize
; byte
++)
11901 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11902 elpart
>>= BITS_PER_UNIT
;
11904 if (CONST_DOUBLE_P (el
))
11905 elpart
= CONST_DOUBLE_HIGH (el
);
11909 /* Sanity check. */
11910 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11914 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11915 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11917 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11918 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11920 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11921 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11923 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11924 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11926 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11928 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11930 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11931 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11933 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11934 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11936 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11937 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11939 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11940 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11942 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11944 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11946 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11947 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11949 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11950 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11952 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11953 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11955 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11956 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11958 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11960 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11961 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11969 *elementwidth
= elsize
;
11973 unsigned HOST_WIDE_INT imm
= 0;
11975 /* Un-invert bytes of recognized vector, if necessary. */
11977 for (i
= 0; i
< idx
; i
++)
11978 bytes
[i
] ^= invmask
;
11982 /* FIXME: Broken on 32-bit H_W_I hosts. */
11983 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11985 for (i
= 0; i
< 8; i
++)
11986 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11987 << (i
* BITS_PER_UNIT
);
11989 *modconst
= GEN_INT (imm
);
11993 unsigned HOST_WIDE_INT imm
= 0;
11995 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11996 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11998 *modconst
= GEN_INT (imm
);
12006 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12007 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12008 float elements), and a modified constant (whatever should be output for a
12009 VMOV) in *MODCONST. */
12012 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12013 rtx
*modconst
, int *elementwidth
)
12017 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12023 *modconst
= tmpconst
;
12026 *elementwidth
= tmpwidth
;
12031 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12032 the immediate is valid, write a constant suitable for using as an operand
12033 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12034 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12037 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12038 rtx
*modconst
, int *elementwidth
)
12042 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12044 if (retval
< 0 || retval
> 5)
12048 *modconst
= tmpconst
;
12051 *elementwidth
= tmpwidth
;
12056 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12057 the immediate is valid, write a constant suitable for using as an operand
12058 to VSHR/VSHL to *MODCONST and the corresponding element width to
12059 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12060 because they have different limitations. */
12063 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12064 rtx
*modconst
, int *elementwidth
,
12067 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12068 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12069 unsigned HOST_WIDE_INT last_elt
= 0;
12070 unsigned HOST_WIDE_INT maxshift
;
12072 /* Split vector constant out into a byte vector. */
12073 for (i
= 0; i
< n_elts
; i
++)
12075 rtx el
= CONST_VECTOR_ELT (op
, i
);
12076 unsigned HOST_WIDE_INT elpart
;
12078 if (CONST_INT_P (el
))
12079 elpart
= INTVAL (el
);
12080 else if (CONST_DOUBLE_P (el
))
12083 gcc_unreachable ();
12085 if (i
!= 0 && elpart
!= last_elt
)
12091 /* Shift less than element size. */
12092 maxshift
= innersize
* 8;
12096 /* Left shift immediate value can be from 0 to <size>-1. */
12097 if (last_elt
>= maxshift
)
12102 /* Right shift immediate value can be from 1 to <size>. */
12103 if (last_elt
== 0 || last_elt
> maxshift
)
12108 *elementwidth
= innersize
* 8;
12111 *modconst
= CONST_VECTOR_ELT (op
, 0);
12116 /* Return a string suitable for output of Neon immediate logic operation
12120 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12121 int inverse
, int quad
)
12123 int width
, is_valid
;
12124 static char templ
[40];
12126 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12128 gcc_assert (is_valid
!= 0);
12131 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12133 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12138 /* Return a string suitable for output of Neon immediate shift operation
12139 (VSHR or VSHL) MNEM. */
12142 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12143 enum machine_mode mode
, int quad
,
12146 int width
, is_valid
;
12147 static char templ
[40];
12149 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12150 gcc_assert (is_valid
!= 0);
12153 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12155 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12160 /* Output a sequence of pairwise operations to implement a reduction.
12161 NOTE: We do "too much work" here, because pairwise operations work on two
12162 registers-worth of operands in one go. Unfortunately we can't exploit those
12163 extra calculations to do the full operation in fewer steps, I don't think.
12164 Although all vector elements of the result but the first are ignored, we
12165 actually calculate the same result in each of the elements. An alternative
12166 such as initially loading a vector with zero to use as each of the second
12167 operands would use up an additional register and take an extra instruction,
12168 for no particular gain. */
12171 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12172 rtx (*reduc
) (rtx
, rtx
, rtx
))
12174 enum machine_mode inner
= GET_MODE_INNER (mode
);
12175 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12178 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12180 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12181 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12186 /* If VALS is a vector constant that can be loaded into a register
12187 using VDUP, generate instructions to do so and return an RTX to
12188 assign to the register. Otherwise return NULL_RTX. */
12191 neon_vdup_constant (rtx vals
)
12193 enum machine_mode mode
= GET_MODE (vals
);
12194 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12195 int n_elts
= GET_MODE_NUNITS (mode
);
12196 bool all_same
= true;
12200 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12203 for (i
= 0; i
< n_elts
; ++i
)
12205 x
= XVECEXP (vals
, 0, i
);
12206 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12211 /* The elements are not all the same. We could handle repeating
12212 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12213 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12217 /* We can load this constant by using VDUP and a constant in a
12218 single ARM register. This will be cheaper than a vector
12221 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12222 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12225 /* Generate code to load VALS, which is a PARALLEL containing only
12226 constants (for vec_init) or CONST_VECTOR, efficiently into a
12227 register. Returns an RTX to copy into the register, or NULL_RTX
12228 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12231 neon_make_constant (rtx vals
)
12233 enum machine_mode mode
= GET_MODE (vals
);
12235 rtx const_vec
= NULL_RTX
;
12236 int n_elts
= GET_MODE_NUNITS (mode
);
12240 if (GET_CODE (vals
) == CONST_VECTOR
)
12242 else if (GET_CODE (vals
) == PARALLEL
)
12244 /* A CONST_VECTOR must contain only CONST_INTs and
12245 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12246 Only store valid constants in a CONST_VECTOR. */
12247 for (i
= 0; i
< n_elts
; ++i
)
12249 rtx x
= XVECEXP (vals
, 0, i
);
12250 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12253 if (n_const
== n_elts
)
12254 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12257 gcc_unreachable ();
12259 if (const_vec
!= NULL
12260 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12261 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12263 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12264 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12265 pipeline cycle; creating the constant takes one or two ARM
12266 pipeline cycles. */
12268 else if (const_vec
!= NULL_RTX
)
12269 /* Load from constant pool. On Cortex-A8 this takes two cycles
12270 (for either double or quad vectors). We can not take advantage
12271 of single-cycle VLD1 because we need a PC-relative addressing
12275 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12276 We can not construct an initializer. */
12280 /* Initialize vector TARGET to VALS. */
12283 neon_expand_vector_init (rtx target
, rtx vals
)
12285 enum machine_mode mode
= GET_MODE (target
);
12286 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12287 int n_elts
= GET_MODE_NUNITS (mode
);
12288 int n_var
= 0, one_var
= -1;
12289 bool all_same
= true;
12293 for (i
= 0; i
< n_elts
; ++i
)
12295 x
= XVECEXP (vals
, 0, i
);
12296 if (!CONSTANT_P (x
))
12297 ++n_var
, one_var
= i
;
12299 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12305 rtx constant
= neon_make_constant (vals
);
12306 if (constant
!= NULL_RTX
)
12308 emit_move_insn (target
, constant
);
12313 /* Splat a single non-constant element if we can. */
12314 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12316 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12317 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12318 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12322 /* One field is non-constant. Load constant then overwrite varying
12323 field. This is more efficient than using the stack. */
12326 rtx copy
= copy_rtx (vals
);
12327 rtx index
= GEN_INT (one_var
);
12329 /* Load constant part of vector, substitute neighboring value for
12330 varying element. */
12331 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12332 neon_expand_vector_init (target
, copy
);
12334 /* Insert variable. */
12335 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12339 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12342 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12345 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12348 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12351 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12354 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12357 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12360 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12363 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12366 gcc_unreachable ();
12371 /* Construct the vector in memory one field at a time
12372 and load the whole vector. */
12373 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12374 for (i
= 0; i
< n_elts
; i
++)
12375 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12376 i
* GET_MODE_SIZE (inner_mode
)),
12377 XVECEXP (vals
, 0, i
));
12378 emit_move_insn (target
, mem
);
12381 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12382 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12383 reported source locations are bogus. */
12386 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12389 HOST_WIDE_INT lane
;
12391 gcc_assert (CONST_INT_P (operand
));
12393 lane
= INTVAL (operand
);
12395 if (lane
< low
|| lane
>= high
)
12399 /* Bounds-check lanes. */
12402 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12404 bounds_check (operand
, low
, high
, "lane out of range");
12407 /* Bounds-check constants. */
12410 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12412 bounds_check (operand
, low
, high
, "constant out of range");
12416 neon_element_bits (enum machine_mode mode
)
12418 if (mode
== DImode
)
12419 return GET_MODE_BITSIZE (mode
);
12421 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12425 /* Predicates for `match_operand' and `match_operator'. */
12427 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12428 WB is true if full writeback address modes are allowed and is false
12429 if limited writeback address modes (POST_INC and PRE_DEC) are
12433 arm_coproc_mem_operand (rtx op
, bool wb
)
12437 /* Reject eliminable registers. */
12438 if (! (reload_in_progress
|| reload_completed
)
12439 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12440 || reg_mentioned_p (arg_pointer_rtx
, op
)
12441 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12442 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12443 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12444 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12447 /* Constants are converted into offsets from labels. */
12451 ind
= XEXP (op
, 0);
12453 if (reload_completed
12454 && (GET_CODE (ind
) == LABEL_REF
12455 || (GET_CODE (ind
) == CONST
12456 && GET_CODE (XEXP (ind
, 0)) == PLUS
12457 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12458 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12461 /* Match: (mem (reg)). */
12463 return arm_address_register_rtx_p (ind
, 0);
12465 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12466 acceptable in any case (subject to verification by
12467 arm_address_register_rtx_p). We need WB to be true to accept
12468 PRE_INC and POST_DEC. */
12469 if (GET_CODE (ind
) == POST_INC
12470 || GET_CODE (ind
) == PRE_DEC
12472 && (GET_CODE (ind
) == PRE_INC
12473 || GET_CODE (ind
) == POST_DEC
)))
12474 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12477 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12478 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12479 && GET_CODE (XEXP (ind
, 1)) == PLUS
12480 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12481 ind
= XEXP (ind
, 1);
12486 if (GET_CODE (ind
) == PLUS
12487 && REG_P (XEXP (ind
, 0))
12488 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12489 && CONST_INT_P (XEXP (ind
, 1))
12490 && INTVAL (XEXP (ind
, 1)) > -1024
12491 && INTVAL (XEXP (ind
, 1)) < 1024
12492 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12498 /* Return TRUE if OP is a memory operand which we can load or store a vector
12499 to/from. TYPE is one of the following values:
12500 0 - Vector load/stor (vldr)
12501 1 - Core registers (ldm)
12502 2 - Element/structure loads (vld1)
12505 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12509 /* Reject eliminable registers. */
12510 if (! (reload_in_progress
|| reload_completed
)
12511 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12512 || reg_mentioned_p (arg_pointer_rtx
, op
)
12513 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12514 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12515 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12516 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12519 /* Constants are converted into offsets from labels. */
12523 ind
= XEXP (op
, 0);
12525 if (reload_completed
12526 && (GET_CODE (ind
) == LABEL_REF
12527 || (GET_CODE (ind
) == CONST
12528 && GET_CODE (XEXP (ind
, 0)) == PLUS
12529 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12530 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12533 /* Match: (mem (reg)). */
12535 return arm_address_register_rtx_p (ind
, 0);
12537 /* Allow post-increment with Neon registers. */
12538 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12539 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12540 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12542 /* FIXME: vld1 allows register post-modify. */
12548 && GET_CODE (ind
) == PLUS
12549 && REG_P (XEXP (ind
, 0))
12550 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12551 && CONST_INT_P (XEXP (ind
, 1))
12552 && INTVAL (XEXP (ind
, 1)) > -1024
12553 /* For quad modes, we restrict the constant offset to be slightly less
12554 than what the instruction format permits. We have no such constraint
12555 on double mode offsets. (This must match arm_legitimate_index_p.) */
12556 && (INTVAL (XEXP (ind
, 1))
12557 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12558 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12564 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12567 neon_struct_mem_operand (rtx op
)
12571 /* Reject eliminable registers. */
12572 if (! (reload_in_progress
|| reload_completed
)
12573 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12574 || reg_mentioned_p (arg_pointer_rtx
, op
)
12575 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12576 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12577 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12578 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12581 /* Constants are converted into offsets from labels. */
12585 ind
= XEXP (op
, 0);
12587 if (reload_completed
12588 && (GET_CODE (ind
) == LABEL_REF
12589 || (GET_CODE (ind
) == CONST
12590 && GET_CODE (XEXP (ind
, 0)) == PLUS
12591 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12592 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12595 /* Match: (mem (reg)). */
12597 return arm_address_register_rtx_p (ind
, 0);
12599 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12600 if (GET_CODE (ind
) == POST_INC
12601 || GET_CODE (ind
) == PRE_DEC
)
12602 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12607 /* Return true if X is a register that will be eliminated later on. */
12609 arm_eliminable_register (rtx x
)
12611 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12612 || REGNO (x
) == ARG_POINTER_REGNUM
12613 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12614 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12617 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12618 coprocessor registers. Otherwise return NO_REGS. */
12621 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12623 if (mode
== HFmode
)
12625 if (!TARGET_NEON_FP16
)
12626 return GENERAL_REGS
;
12627 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12629 return GENERAL_REGS
;
12632 /* The neon move patterns handle all legitimate vector and struct
12635 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12636 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12637 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12638 || VALID_NEON_STRUCT_MODE (mode
)))
12641 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12644 return GENERAL_REGS
;
12647 /* Values which must be returned in the most-significant end of the return
12651 arm_return_in_msb (const_tree valtype
)
12653 return (TARGET_AAPCS_BASED
12654 && BYTES_BIG_ENDIAN
12655 && (AGGREGATE_TYPE_P (valtype
)
12656 || TREE_CODE (valtype
) == COMPLEX_TYPE
12657 || FIXED_POINT_TYPE_P (valtype
)));
12660 /* Return TRUE if X references a SYMBOL_REF. */
12662 symbol_mentioned_p (rtx x
)
12667 if (GET_CODE (x
) == SYMBOL_REF
)
12670 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12671 are constant offsets, not symbols. */
12672 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12675 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12677 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12683 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12684 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12687 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12694 /* Return TRUE if X references a LABEL_REF. */
12696 label_mentioned_p (rtx x
)
12701 if (GET_CODE (x
) == LABEL_REF
)
12704 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12705 instruction, but they are constant offsets, not symbols. */
12706 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12709 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12710 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12716 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12717 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12720 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12728 tls_mentioned_p (rtx x
)
12730 switch (GET_CODE (x
))
12733 return tls_mentioned_p (XEXP (x
, 0));
12736 if (XINT (x
, 1) == UNSPEC_TLS
)
12744 /* Must not copy any rtx that uses a pc-relative address. */
12747 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
12749 if (GET_CODE (*x
) == UNSPEC
12750 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
12751 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
12757 arm_cannot_copy_insn_p (rtx insn
)
12759 /* The tls call insn cannot be copied, as it is paired with a data
12761 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12764 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
12768 minmax_code (rtx x
)
12770 enum rtx_code code
= GET_CODE (x
);
12783 gcc_unreachable ();
12787 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12790 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12791 int *mask
, bool *signed_sat
)
12793 /* The high bound must be a power of two minus one. */
12794 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12798 /* The low bound is either zero (for usat) or one less than the
12799 negation of the high bound (for ssat). */
12800 if (INTVAL (lo_bound
) == 0)
12805 *signed_sat
= false;
12810 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12815 *signed_sat
= true;
12823 /* Return 1 if memory locations are adjacent. */
12825 adjacent_mem_locations (rtx a
, rtx b
)
12827 /* We don't guarantee to preserve the order of these memory refs. */
12828 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12831 if ((REG_P (XEXP (a
, 0))
12832 || (GET_CODE (XEXP (a
, 0)) == PLUS
12833 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12834 && (REG_P (XEXP (b
, 0))
12835 || (GET_CODE (XEXP (b
, 0)) == PLUS
12836 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12838 HOST_WIDE_INT val0
= 0, val1
= 0;
12842 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12844 reg0
= XEXP (XEXP (a
, 0), 0);
12845 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12848 reg0
= XEXP (a
, 0);
12850 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12852 reg1
= XEXP (XEXP (b
, 0), 0);
12853 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12856 reg1
= XEXP (b
, 0);
12858 /* Don't accept any offset that will require multiple
12859 instructions to handle, since this would cause the
12860 arith_adjacentmem pattern to output an overlong sequence. */
12861 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12864 /* Don't allow an eliminable register: register elimination can make
12865 the offset too large. */
12866 if (arm_eliminable_register (reg0
))
12869 val_diff
= val1
- val0
;
12873 /* If the target has load delay slots, then there's no benefit
12874 to using an ldm instruction unless the offset is zero and
12875 we are optimizing for size. */
12876 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12877 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12878 && (val_diff
== 4 || val_diff
== -4));
12881 return ((REGNO (reg0
) == REGNO (reg1
))
12882 && (val_diff
== 4 || val_diff
== -4));
12888 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12889 for load operations, false for store operations. CONSECUTIVE is true
12890 if the register numbers in the operation must be consecutive in the register
12891 bank. RETURN_PC is true if value is to be loaded in PC.
12892 The pattern we are trying to match for load is:
12893 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12894 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12897 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12900 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12901 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12902 3. If consecutive is TRUE, then for kth register being loaded,
12903 REGNO (R_dk) = REGNO (R_d0) + k.
12904 The pattern for store is similar. */
12906 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
12907 bool consecutive
, bool return_pc
)
12909 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12910 rtx reg
, mem
, addr
;
12912 unsigned first_regno
;
12913 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12915 bool addr_reg_in_reglist
= false;
12916 bool update
= false;
12921 /* If not in SImode, then registers must be consecutive
12922 (e.g., VLDM instructions for DFmode). */
12923 gcc_assert ((mode
== SImode
) || consecutive
);
12924 /* Setting return_pc for stores is illegal. */
12925 gcc_assert (!return_pc
|| load
);
12927 /* Set up the increments and the regs per val based on the mode. */
12928 reg_increment
= GET_MODE_SIZE (mode
);
12929 regs_per_val
= reg_increment
/ 4;
12930 offset_adj
= return_pc
? 1 : 0;
12933 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12934 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12937 /* Check if this is a write-back. */
12938 elt
= XVECEXP (op
, 0, offset_adj
);
12939 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12945 /* The offset adjustment must be the number of registers being
12946 popped times the size of a single register. */
12947 if (!REG_P (SET_DEST (elt
))
12948 || !REG_P (XEXP (SET_SRC (elt
), 0))
12949 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12950 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12951 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12952 ((count
- 1 - offset_adj
) * reg_increment
))
12956 i
= i
+ offset_adj
;
12957 base
= base
+ offset_adj
;
12958 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12959 success depends on the type: VLDM can do just one reg,
12960 LDM must do at least two. */
12961 if ((count
<= i
) && (mode
== SImode
))
12964 elt
= XVECEXP (op
, 0, i
- 1);
12965 if (GET_CODE (elt
) != SET
)
12970 reg
= SET_DEST (elt
);
12971 mem
= SET_SRC (elt
);
12975 reg
= SET_SRC (elt
);
12976 mem
= SET_DEST (elt
);
12979 if (!REG_P (reg
) || !MEM_P (mem
))
12982 regno
= REGNO (reg
);
12983 first_regno
= regno
;
12984 addr
= XEXP (mem
, 0);
12985 if (GET_CODE (addr
) == PLUS
)
12987 if (!CONST_INT_P (XEXP (addr
, 1)))
12990 offset
= INTVAL (XEXP (addr
, 1));
12991 addr
= XEXP (addr
, 0);
12997 /* Don't allow SP to be loaded unless it is also the base register. It
12998 guarantees that SP is reset correctly when an LDM instruction
12999 is interrupted. Otherwise, we might end up with a corrupt stack. */
13000 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13003 for (; i
< count
; i
++)
13005 elt
= XVECEXP (op
, 0, i
);
13006 if (GET_CODE (elt
) != SET
)
13011 reg
= SET_DEST (elt
);
13012 mem
= SET_SRC (elt
);
13016 reg
= SET_SRC (elt
);
13017 mem
= SET_DEST (elt
);
13021 || GET_MODE (reg
) != mode
13022 || REGNO (reg
) <= regno
13025 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13026 /* Don't allow SP to be loaded unless it is also the base register. It
13027 guarantees that SP is reset correctly when an LDM instruction
13028 is interrupted. Otherwise, we might end up with a corrupt stack. */
13029 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13031 || GET_MODE (mem
) != mode
13032 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13033 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13034 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13035 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13036 offset
+ (i
- base
) * reg_increment
))
13037 && (!REG_P (XEXP (mem
, 0))
13038 || offset
+ (i
- base
) * reg_increment
!= 0)))
13041 regno
= REGNO (reg
);
13042 if (regno
== REGNO (addr
))
13043 addr_reg_in_reglist
= true;
13048 if (update
&& addr_reg_in_reglist
)
13051 /* For Thumb-1, address register is always modified - either by write-back
13052 or by explicit load. If the pattern does not describe an update,
13053 then the address register must be in the list of loaded registers. */
13055 return update
|| addr_reg_in_reglist
;
13061 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13062 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13063 instruction. ADD_OFFSET is nonzero if the base address register needs
13064 to be modified with an add instruction before we can use it. */
13067 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13068 int nops
, HOST_WIDE_INT add_offset
)
13070 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13071 if the offset isn't small enough. The reason 2 ldrs are faster
13072 is because these ARMs are able to do more than one cache access
13073 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13074 whilst the ARM8 has a double bandwidth cache. This means that
13075 these cores can do both an instruction fetch and a data fetch in
13076 a single cycle, so the trick of calculating the address into a
13077 scratch register (one of the result regs) and then doing a load
13078 multiple actually becomes slower (and no smaller in code size).
13079 That is the transformation
13081 ldr rd1, [rbase + offset]
13082 ldr rd2, [rbase + offset + 4]
13086 add rd1, rbase, offset
13087 ldmia rd1, {rd1, rd2}
13089 produces worse code -- '3 cycles + any stalls on rd2' instead of
13090 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13091 access per cycle, the first sequence could never complete in less
13092 than 6 cycles, whereas the ldm sequence would only take 5 and
13093 would make better use of sequential accesses if not hitting the
13096 We cheat here and test 'arm_ld_sched' which we currently know to
13097 only be true for the ARM8, ARM9 and StrongARM. If this ever
13098 changes, then the test below needs to be reworked. */
13099 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13102 /* XScale has load-store double instructions, but they have stricter
13103 alignment requirements than load-store multiple, so we cannot
13106 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13107 the pipeline until completion.
13115 An ldr instruction takes 1-3 cycles, but does not block the
13124 Best case ldr will always win. However, the more ldr instructions
13125 we issue, the less likely we are to be able to schedule them well.
13126 Using ldr instructions also increases code size.
13128 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13129 for counts of 3 or 4 regs. */
13130 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13135 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13136 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13137 an array ORDER which describes the sequence to use when accessing the
13138 offsets that produces an ascending order. In this sequence, each
13139 offset must be larger by exactly 4 than the previous one. ORDER[0]
13140 must have been filled in with the lowest offset by the caller.
13141 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13142 we use to verify that ORDER produces an ascending order of registers.
13143 Return true if it was possible to construct such an order, false if
13147 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13148 int *unsorted_regs
)
13151 for (i
= 1; i
< nops
; i
++)
13155 order
[i
] = order
[i
- 1];
13156 for (j
= 0; j
< nops
; j
++)
13157 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13159 /* We must find exactly one offset that is higher than the
13160 previous one by 4. */
13161 if (order
[i
] != order
[i
- 1])
13165 if (order
[i
] == order
[i
- 1])
13167 /* The register numbers must be ascending. */
13168 if (unsorted_regs
!= NULL
13169 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13175 /* Used to determine in a peephole whether a sequence of load
13176 instructions can be changed into a load-multiple instruction.
13177 NOPS is the number of separate load instructions we are examining. The
13178 first NOPS entries in OPERANDS are the destination registers, the
13179 next NOPS entries are memory operands. If this function is
13180 successful, *BASE is set to the common base register of the memory
13181 accesses; *LOAD_OFFSET is set to the first memory location's offset
13182 from that base register.
13183 REGS is an array filled in with the destination register numbers.
13184 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13185 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13186 the sequence of registers in REGS matches the loads from ascending memory
13187 locations, and the function verifies that the register numbers are
13188 themselves ascending. If CHECK_REGS is false, the register numbers
13189 are stored in the order they are found in the operands. */
13191 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13192 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13194 int unsorted_regs
[MAX_LDM_STM_OPS
];
13195 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13196 int order
[MAX_LDM_STM_OPS
];
13197 rtx base_reg_rtx
= NULL
;
13201 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13202 easily extended if required. */
13203 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13205 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13207 /* Loop over the operands and check that the memory references are
13208 suitable (i.e. immediate offsets from the same base register). At
13209 the same time, extract the target register, and the memory
13211 for (i
= 0; i
< nops
; i
++)
13216 /* Convert a subreg of a mem into the mem itself. */
13217 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13218 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13220 gcc_assert (MEM_P (operands
[nops
+ i
]));
13222 /* Don't reorder volatile memory references; it doesn't seem worth
13223 looking for the case where the order is ok anyway. */
13224 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13227 offset
= const0_rtx
;
13229 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13230 || (GET_CODE (reg
) == SUBREG
13231 && REG_P (reg
= SUBREG_REG (reg
))))
13232 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13233 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13234 || (GET_CODE (reg
) == SUBREG
13235 && REG_P (reg
= SUBREG_REG (reg
))))
13236 && (CONST_INT_P (offset
13237 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13241 base_reg
= REGNO (reg
);
13242 base_reg_rtx
= reg
;
13243 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13246 else if (base_reg
!= (int) REGNO (reg
))
13247 /* Not addressed from the same base register. */
13250 unsorted_regs
[i
] = (REG_P (operands
[i
])
13251 ? REGNO (operands
[i
])
13252 : REGNO (SUBREG_REG (operands
[i
])));
13254 /* If it isn't an integer register, or if it overwrites the
13255 base register but isn't the last insn in the list, then
13256 we can't do this. */
13257 if (unsorted_regs
[i
] < 0
13258 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13259 || unsorted_regs
[i
] > 14
13260 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13263 /* Don't allow SP to be loaded unless it is also the base
13264 register. It guarantees that SP is reset correctly when
13265 an LDM instruction is interrupted. Otherwise, we might
13266 end up with a corrupt stack. */
13267 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13270 unsorted_offsets
[i
] = INTVAL (offset
);
13271 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13275 /* Not a suitable memory address. */
13279 /* All the useful information has now been extracted from the
13280 operands into unsorted_regs and unsorted_offsets; additionally,
13281 order[0] has been set to the lowest offset in the list. Sort
13282 the offsets into order, verifying that they are adjacent, and
13283 check that the register numbers are ascending. */
13284 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13285 check_regs
? unsorted_regs
: NULL
))
13289 memcpy (saved_order
, order
, sizeof order
);
13295 for (i
= 0; i
< nops
; i
++)
13296 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13298 *load_offset
= unsorted_offsets
[order
[0]];
13302 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13305 if (unsorted_offsets
[order
[0]] == 0)
13306 ldm_case
= 1; /* ldmia */
13307 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13308 ldm_case
= 2; /* ldmib */
13309 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13310 ldm_case
= 3; /* ldmda */
13311 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13312 ldm_case
= 4; /* ldmdb */
13313 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13314 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13319 if (!multiple_operation_profitable_p (false, nops
,
13321 ? unsorted_offsets
[order
[0]] : 0))
13327 /* Used to determine in a peephole whether a sequence of store instructions can
13328 be changed into a store-multiple instruction.
13329 NOPS is the number of separate store instructions we are examining.
13330 NOPS_TOTAL is the total number of instructions recognized by the peephole
13332 The first NOPS entries in OPERANDS are the source registers, the next
13333 NOPS entries are memory operands. If this function is successful, *BASE is
13334 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13335 to the first memory location's offset from that base register. REGS is an
13336 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13337 likewise filled with the corresponding rtx's.
13338 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13339 numbers to an ascending order of stores.
13340 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13341 from ascending memory locations, and the function verifies that the register
13342 numbers are themselves ascending. If CHECK_REGS is false, the register
13343 numbers are stored in the order they are found in the operands. */
13345 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13346 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13347 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13349 int unsorted_regs
[MAX_LDM_STM_OPS
];
13350 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13351 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13352 int order
[MAX_LDM_STM_OPS
];
13354 rtx base_reg_rtx
= NULL
;
13357 /* Write back of base register is currently only supported for Thumb 1. */
13358 int base_writeback
= TARGET_THUMB1
;
13360 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13361 easily extended if required. */
13362 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13364 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13366 /* Loop over the operands and check that the memory references are
13367 suitable (i.e. immediate offsets from the same base register). At
13368 the same time, extract the target register, and the memory
13370 for (i
= 0; i
< nops
; i
++)
13375 /* Convert a subreg of a mem into the mem itself. */
13376 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13377 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13379 gcc_assert (MEM_P (operands
[nops
+ i
]));
13381 /* Don't reorder volatile memory references; it doesn't seem worth
13382 looking for the case where the order is ok anyway. */
13383 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13386 offset
= const0_rtx
;
13388 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13389 || (GET_CODE (reg
) == SUBREG
13390 && REG_P (reg
= SUBREG_REG (reg
))))
13391 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13392 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13393 || (GET_CODE (reg
) == SUBREG
13394 && REG_P (reg
= SUBREG_REG (reg
))))
13395 && (CONST_INT_P (offset
13396 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13398 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13399 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13400 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13404 base_reg
= REGNO (reg
);
13405 base_reg_rtx
= reg
;
13406 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13409 else if (base_reg
!= (int) REGNO (reg
))
13410 /* Not addressed from the same base register. */
13413 /* If it isn't an integer register, then we can't do this. */
13414 if (unsorted_regs
[i
] < 0
13415 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13416 /* The effects are unpredictable if the base register is
13417 both updated and stored. */
13418 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13419 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13420 || unsorted_regs
[i
] > 14)
13423 unsorted_offsets
[i
] = INTVAL (offset
);
13424 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13428 /* Not a suitable memory address. */
13432 /* All the useful information has now been extracted from the
13433 operands into unsorted_regs and unsorted_offsets; additionally,
13434 order[0] has been set to the lowest offset in the list. Sort
13435 the offsets into order, verifying that they are adjacent, and
13436 check that the register numbers are ascending. */
13437 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13438 check_regs
? unsorted_regs
: NULL
))
13442 memcpy (saved_order
, order
, sizeof order
);
13448 for (i
= 0; i
< nops
; i
++)
13450 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13452 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13455 *load_offset
= unsorted_offsets
[order
[0]];
13459 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13462 if (unsorted_offsets
[order
[0]] == 0)
13463 stm_case
= 1; /* stmia */
13464 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13465 stm_case
= 2; /* stmib */
13466 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13467 stm_case
= 3; /* stmda */
13468 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13469 stm_case
= 4; /* stmdb */
13473 if (!multiple_operation_profitable_p (false, nops
, 0))
13479 /* Routines for use in generating RTL. */
13481 /* Generate a load-multiple instruction. COUNT is the number of loads in
13482 the instruction; REGS and MEMS are arrays containing the operands.
13483 BASEREG is the base register to be used in addressing the memory operands.
13484 WBACK_OFFSET is nonzero if the instruction should update the base
13488 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13489 HOST_WIDE_INT wback_offset
)
13494 if (!multiple_operation_profitable_p (false, count
, 0))
13500 for (i
= 0; i
< count
; i
++)
13501 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13503 if (wback_offset
!= 0)
13504 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13506 seq
= get_insns ();
13512 result
= gen_rtx_PARALLEL (VOIDmode
,
13513 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13514 if (wback_offset
!= 0)
13516 XVECEXP (result
, 0, 0)
13517 = gen_rtx_SET (VOIDmode
, basereg
,
13518 plus_constant (Pmode
, basereg
, wback_offset
));
13523 for (j
= 0; i
< count
; i
++, j
++)
13524 XVECEXP (result
, 0, i
)
13525 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13530 /* Generate a store-multiple instruction. COUNT is the number of stores in
13531 the instruction; REGS and MEMS are arrays containing the operands.
13532 BASEREG is the base register to be used in addressing the memory operands.
13533 WBACK_OFFSET is nonzero if the instruction should update the base
13537 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13538 HOST_WIDE_INT wback_offset
)
13543 if (GET_CODE (basereg
) == PLUS
)
13544 basereg
= XEXP (basereg
, 0);
13546 if (!multiple_operation_profitable_p (false, count
, 0))
13552 for (i
= 0; i
< count
; i
++)
13553 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13555 if (wback_offset
!= 0)
13556 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13558 seq
= get_insns ();
13564 result
= gen_rtx_PARALLEL (VOIDmode
,
13565 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13566 if (wback_offset
!= 0)
13568 XVECEXP (result
, 0, 0)
13569 = gen_rtx_SET (VOIDmode
, basereg
,
13570 plus_constant (Pmode
, basereg
, wback_offset
));
13575 for (j
= 0; i
< count
; i
++, j
++)
13576 XVECEXP (result
, 0, i
)
13577 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13582 /* Generate either a load-multiple or a store-multiple instruction. This
13583 function can be used in situations where we can start with a single MEM
13584 rtx and adjust its address upwards.
13585 COUNT is the number of operations in the instruction, not counting a
13586 possible update of the base register. REGS is an array containing the
13588 BASEREG is the base register to be used in addressing the memory operands,
13589 which are constructed from BASEMEM.
13590 WRITE_BACK specifies whether the generated instruction should include an
13591 update of the base register.
13592 OFFSETP is used to pass an offset to and from this function; this offset
13593 is not used when constructing the address (instead BASEMEM should have an
13594 appropriate offset in its address), it is used only for setting
13595 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13598 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13599 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13601 rtx mems
[MAX_LDM_STM_OPS
];
13602 HOST_WIDE_INT offset
= *offsetp
;
13605 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13607 if (GET_CODE (basereg
) == PLUS
)
13608 basereg
= XEXP (basereg
, 0);
13610 for (i
= 0; i
< count
; i
++)
13612 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13613 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13621 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13622 write_back
? 4 * count
: 0);
13624 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13625 write_back
? 4 * count
: 0);
13629 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13630 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13632 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13637 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13638 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13640 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13644 /* Called from a peephole2 expander to turn a sequence of loads into an
13645 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13646 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13647 is true if we can reorder the registers because they are used commutatively
13649 Returns true iff we could generate a new instruction. */
13652 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13654 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13655 rtx mems
[MAX_LDM_STM_OPS
];
13656 int i
, j
, base_reg
;
13658 HOST_WIDE_INT offset
;
13659 int write_back
= FALSE
;
13663 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13664 &base_reg
, &offset
, !sort_regs
);
13670 for (i
= 0; i
< nops
- 1; i
++)
13671 for (j
= i
+ 1; j
< nops
; j
++)
13672 if (regs
[i
] > regs
[j
])
13678 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13682 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13683 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13689 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13690 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13692 if (!TARGET_THUMB1
)
13694 base_reg
= regs
[0];
13695 base_reg_rtx
= newbase
;
13699 for (i
= 0; i
< nops
; i
++)
13701 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13702 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13705 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13706 write_back
? offset
+ i
* 4 : 0));
13710 /* Called from a peephole2 expander to turn a sequence of stores into an
13711 STM instruction. OPERANDS are the operands found by the peephole matcher;
13712 NOPS indicates how many separate stores we are trying to combine.
13713 Returns true iff we could generate a new instruction. */
13716 gen_stm_seq (rtx
*operands
, int nops
)
13719 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13720 rtx mems
[MAX_LDM_STM_OPS
];
13723 HOST_WIDE_INT offset
;
13724 int write_back
= FALSE
;
13727 bool base_reg_dies
;
13729 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13730 mem_order
, &base_reg
, &offset
, true);
13735 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13737 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13740 gcc_assert (base_reg_dies
);
13746 gcc_assert (base_reg_dies
);
13747 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13751 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13753 for (i
= 0; i
< nops
; i
++)
13755 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13756 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13759 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13760 write_back
? offset
+ i
* 4 : 0));
13764 /* Called from a peephole2 expander to turn a sequence of stores that are
13765 preceded by constant loads into an STM instruction. OPERANDS are the
13766 operands found by the peephole matcher; NOPS indicates how many
13767 separate stores we are trying to combine; there are 2 * NOPS
13768 instructions in the peephole.
13769 Returns true iff we could generate a new instruction. */
13772 gen_const_stm_seq (rtx
*operands
, int nops
)
13774 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13775 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13776 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13777 rtx mems
[MAX_LDM_STM_OPS
];
13780 HOST_WIDE_INT offset
;
13781 int write_back
= FALSE
;
13784 bool base_reg_dies
;
13786 HARD_REG_SET allocated
;
13788 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13789 mem_order
, &base_reg
, &offset
, false);
13794 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13796 /* If the same register is used more than once, try to find a free
13798 CLEAR_HARD_REG_SET (allocated
);
13799 for (i
= 0; i
< nops
; i
++)
13801 for (j
= i
+ 1; j
< nops
; j
++)
13802 if (regs
[i
] == regs
[j
])
13804 rtx t
= peep2_find_free_register (0, nops
* 2,
13805 TARGET_THUMB1
? "l" : "r",
13806 SImode
, &allocated
);
13810 regs
[i
] = REGNO (t
);
13814 /* Compute an ordering that maps the register numbers to an ascending
13817 for (i
= 0; i
< nops
; i
++)
13818 if (regs
[i
] < regs
[reg_order
[0]])
13821 for (i
= 1; i
< nops
; i
++)
13823 int this_order
= reg_order
[i
- 1];
13824 for (j
= 0; j
< nops
; j
++)
13825 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13826 && (this_order
== reg_order
[i
- 1]
13827 || regs
[j
] < regs
[this_order
]))
13829 reg_order
[i
] = this_order
;
13832 /* Ensure that registers that must be live after the instruction end
13833 up with the correct value. */
13834 for (i
= 0; i
< nops
; i
++)
13836 int this_order
= reg_order
[i
];
13837 if ((this_order
!= mem_order
[i
]
13838 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13839 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13843 /* Load the constants. */
13844 for (i
= 0; i
< nops
; i
++)
13846 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13847 sorted_regs
[i
] = regs
[reg_order
[i
]];
13848 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13851 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13853 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13856 gcc_assert (base_reg_dies
);
13862 gcc_assert (base_reg_dies
);
13863 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13867 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13869 for (i
= 0; i
< nops
; i
++)
13871 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13872 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13875 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13876 write_back
? offset
+ i
* 4 : 0));
13880 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13881 unaligned copies on processors which support unaligned semantics for those
13882 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13883 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13884 An interleave factor of 1 (the minimum) will perform no interleaving.
13885 Load/store multiple are used for aligned addresses where possible. */
13888 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13889 HOST_WIDE_INT length
,
13890 unsigned int interleave_factor
)
13892 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13893 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13894 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13895 HOST_WIDE_INT i
, j
;
13896 HOST_WIDE_INT remaining
= length
, words
;
13897 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13899 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13900 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13901 HOST_WIDE_INT srcoffset
, dstoffset
;
13902 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13905 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13907 /* Use hard registers if we have aligned source or destination so we can use
13908 load/store multiple with contiguous registers. */
13909 if (dst_aligned
|| src_aligned
)
13910 for (i
= 0; i
< interleave_factor
; i
++)
13911 regs
[i
] = gen_rtx_REG (SImode
, i
);
13913 for (i
= 0; i
< interleave_factor
; i
++)
13914 regs
[i
] = gen_reg_rtx (SImode
);
13916 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13917 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13919 srcoffset
= dstoffset
= 0;
13921 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13922 For copying the last bytes we want to subtract this offset again. */
13923 src_autoinc
= dst_autoinc
= 0;
13925 for (i
= 0; i
< interleave_factor
; i
++)
13928 /* Copy BLOCK_SIZE_BYTES chunks. */
13930 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13933 if (src_aligned
&& interleave_factor
> 1)
13935 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13936 TRUE
, srcbase
, &srcoffset
));
13937 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13941 for (j
= 0; j
< interleave_factor
; j
++)
13943 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13945 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13946 srcoffset
+ j
* UNITS_PER_WORD
);
13947 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13949 srcoffset
+= block_size_bytes
;
13953 if (dst_aligned
&& interleave_factor
> 1)
13955 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13956 TRUE
, dstbase
, &dstoffset
));
13957 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13961 for (j
= 0; j
< interleave_factor
; j
++)
13963 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13965 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13966 dstoffset
+ j
* UNITS_PER_WORD
);
13967 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13969 dstoffset
+= block_size_bytes
;
13972 remaining
-= block_size_bytes
;
13975 /* Copy any whole words left (note these aren't interleaved with any
13976 subsequent halfword/byte load/stores in the interests of simplicity). */
13978 words
= remaining
/ UNITS_PER_WORD
;
13980 gcc_assert (words
< interleave_factor
);
13982 if (src_aligned
&& words
> 1)
13984 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13986 src_autoinc
+= UNITS_PER_WORD
* words
;
13990 for (j
= 0; j
< words
; j
++)
13992 addr
= plus_constant (Pmode
, src
,
13993 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13994 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13995 srcoffset
+ j
* UNITS_PER_WORD
);
13996 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13998 srcoffset
+= words
* UNITS_PER_WORD
;
14001 if (dst_aligned
&& words
> 1)
14003 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14005 dst_autoinc
+= words
* UNITS_PER_WORD
;
14009 for (j
= 0; j
< words
; j
++)
14011 addr
= plus_constant (Pmode
, dst
,
14012 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14013 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14014 dstoffset
+ j
* UNITS_PER_WORD
);
14015 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14017 dstoffset
+= words
* UNITS_PER_WORD
;
14020 remaining
-= words
* UNITS_PER_WORD
;
14022 gcc_assert (remaining
< 4);
14024 /* Copy a halfword if necessary. */
14026 if (remaining
>= 2)
14028 halfword_tmp
= gen_reg_rtx (SImode
);
14030 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14031 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14032 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14034 /* Either write out immediately, or delay until we've loaded the last
14035 byte, depending on interleave factor. */
14036 if (interleave_factor
== 1)
14038 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14039 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14040 emit_insn (gen_unaligned_storehi (mem
,
14041 gen_lowpart (HImode
, halfword_tmp
)));
14042 halfword_tmp
= NULL
;
14050 gcc_assert (remaining
< 2);
14052 /* Copy last byte. */
14054 if ((remaining
& 1) != 0)
14056 byte_tmp
= gen_reg_rtx (SImode
);
14058 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14059 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14060 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14062 if (interleave_factor
== 1)
14064 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14065 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14066 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14075 /* Store last halfword if we haven't done so already. */
14079 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14080 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14081 emit_insn (gen_unaligned_storehi (mem
,
14082 gen_lowpart (HImode
, halfword_tmp
)));
14086 /* Likewise for last byte. */
14090 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14091 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14092 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14096 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14099 /* From mips_adjust_block_mem:
14101 Helper function for doing a loop-based block operation on memory
14102 reference MEM. Each iteration of the loop will operate on LENGTH
14105 Create a new base register for use within the loop and point it to
14106 the start of MEM. Create a new memory reference that uses this
14107 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14110 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14113 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14115 /* Although the new mem does not refer to a known location,
14116 it does keep up to LENGTH bytes of alignment. */
14117 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14118 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14121 /* From mips_block_move_loop:
14123 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14124 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14125 the memory regions do not overlap. */
14128 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14129 unsigned int interleave_factor
,
14130 HOST_WIDE_INT bytes_per_iter
)
14132 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14133 HOST_WIDE_INT leftover
;
14135 leftover
= length
% bytes_per_iter
;
14136 length
-= leftover
;
14138 /* Create registers and memory references for use within the loop. */
14139 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14140 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14142 /* Calculate the value that SRC_REG should have after the last iteration of
14144 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14145 0, 0, OPTAB_WIDEN
);
14147 /* Emit the start of the loop. */
14148 label
= gen_label_rtx ();
14149 emit_label (label
);
14151 /* Emit the loop body. */
14152 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14153 interleave_factor
);
14155 /* Move on to the next block. */
14156 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14157 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14159 /* Emit the loop condition. */
14160 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14161 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14163 /* Mop up any left-over bytes. */
14165 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14168 /* Emit a block move when either the source or destination is unaligned (not
14169 aligned to a four-byte boundary). This may need further tuning depending on
14170 core type, optimize_size setting, etc. */
14173 arm_movmemqi_unaligned (rtx
*operands
)
14175 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14179 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14180 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14181 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14182 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14183 or dst_aligned though: allow more interleaving in those cases since the
14184 resulting code can be smaller. */
14185 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14186 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14189 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14190 interleave_factor
, bytes_per_iter
);
14192 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14193 interleave_factor
);
14197 /* Note that the loop created by arm_block_move_unaligned_loop may be
14198 subject to loop unrolling, which makes tuning this condition a little
14201 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14203 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14210 arm_gen_movmemqi (rtx
*operands
)
14212 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14213 HOST_WIDE_INT srcoffset
, dstoffset
;
14215 rtx src
, dst
, srcbase
, dstbase
;
14216 rtx part_bytes_reg
= NULL
;
14219 if (!CONST_INT_P (operands
[2])
14220 || !CONST_INT_P (operands
[3])
14221 || INTVAL (operands
[2]) > 64)
14224 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14225 return arm_movmemqi_unaligned (operands
);
14227 if (INTVAL (operands
[3]) & 3)
14230 dstbase
= operands
[0];
14231 srcbase
= operands
[1];
14233 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14234 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14236 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14237 out_words_to_go
= INTVAL (operands
[2]) / 4;
14238 last_bytes
= INTVAL (operands
[2]) & 3;
14239 dstoffset
= srcoffset
= 0;
14241 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14242 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14244 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14246 if (in_words_to_go
> 4)
14247 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14248 TRUE
, srcbase
, &srcoffset
));
14250 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14251 src
, FALSE
, srcbase
,
14254 if (out_words_to_go
)
14256 if (out_words_to_go
> 4)
14257 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14258 TRUE
, dstbase
, &dstoffset
));
14259 else if (out_words_to_go
!= 1)
14260 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14261 out_words_to_go
, dst
,
14264 dstbase
, &dstoffset
));
14267 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14268 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14269 if (last_bytes
!= 0)
14271 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14277 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14278 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14281 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14282 if (out_words_to_go
)
14286 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14287 sreg
= copy_to_reg (mem
);
14289 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14290 emit_move_insn (mem
, sreg
);
14293 gcc_assert (!in_words_to_go
); /* Sanity check */
14296 if (in_words_to_go
)
14298 gcc_assert (in_words_to_go
> 0);
14300 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14301 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14304 gcc_assert (!last_bytes
|| part_bytes_reg
);
14306 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14308 rtx tmp
= gen_reg_rtx (SImode
);
14310 /* The bytes we want are in the top end of the word. */
14311 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14312 GEN_INT (8 * (4 - last_bytes
))));
14313 part_bytes_reg
= tmp
;
14317 mem
= adjust_automodify_address (dstbase
, QImode
,
14318 plus_constant (Pmode
, dst
,
14320 dstoffset
+ last_bytes
- 1);
14321 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14325 tmp
= gen_reg_rtx (SImode
);
14326 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14327 part_bytes_reg
= tmp
;
14334 if (last_bytes
> 1)
14336 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14337 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14341 rtx tmp
= gen_reg_rtx (SImode
);
14342 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14343 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14344 part_bytes_reg
= tmp
;
14351 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14352 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14359 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14362 next_consecutive_mem (rtx mem
)
14364 enum machine_mode mode
= GET_MODE (mem
);
14365 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14366 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14368 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14371 /* Copy using LDRD/STRD instructions whenever possible.
14372 Returns true upon success. */
14374 gen_movmem_ldrd_strd (rtx
*operands
)
14376 unsigned HOST_WIDE_INT len
;
14377 HOST_WIDE_INT align
;
14378 rtx src
, dst
, base
;
14380 bool src_aligned
, dst_aligned
;
14381 bool src_volatile
, dst_volatile
;
14383 gcc_assert (CONST_INT_P (operands
[2]));
14384 gcc_assert (CONST_INT_P (operands
[3]));
14386 len
= UINTVAL (operands
[2]);
14390 /* Maximum alignment we can assume for both src and dst buffers. */
14391 align
= INTVAL (operands
[3]);
14393 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14396 /* Place src and dst addresses in registers
14397 and update the corresponding mem rtx. */
14399 dst_volatile
= MEM_VOLATILE_P (dst
);
14400 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14401 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14402 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14405 src_volatile
= MEM_VOLATILE_P (src
);
14406 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14407 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14408 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14410 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14413 if (src_volatile
|| dst_volatile
)
14416 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14417 if (!(dst_aligned
|| src_aligned
))
14418 return arm_gen_movmemqi (operands
);
14420 src
= adjust_address (src
, DImode
, 0);
14421 dst
= adjust_address (dst
, DImode
, 0);
14425 reg0
= gen_reg_rtx (DImode
);
14427 emit_move_insn (reg0
, src
);
14429 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14432 emit_move_insn (dst
, reg0
);
14434 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14436 src
= next_consecutive_mem (src
);
14437 dst
= next_consecutive_mem (dst
);
14440 gcc_assert (len
< 8);
14443 /* More than a word but less than a double-word to copy. Copy a word. */
14444 reg0
= gen_reg_rtx (SImode
);
14445 src
= adjust_address (src
, SImode
, 0);
14446 dst
= adjust_address (dst
, SImode
, 0);
14448 emit_move_insn (reg0
, src
);
14450 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14453 emit_move_insn (dst
, reg0
);
14455 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14457 src
= next_consecutive_mem (src
);
14458 dst
= next_consecutive_mem (dst
);
14465 /* Copy the remaining bytes. */
14468 dst
= adjust_address (dst
, HImode
, 0);
14469 src
= adjust_address (src
, HImode
, 0);
14470 reg0
= gen_reg_rtx (SImode
);
14472 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14474 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14477 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14479 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14481 src
= next_consecutive_mem (src
);
14482 dst
= next_consecutive_mem (dst
);
14487 dst
= adjust_address (dst
, QImode
, 0);
14488 src
= adjust_address (src
, QImode
, 0);
14489 reg0
= gen_reg_rtx (QImode
);
14490 emit_move_insn (reg0
, src
);
14491 emit_move_insn (dst
, reg0
);
14495 /* Select a dominance comparison mode if possible for a test of the general
14496 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14497 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14498 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14499 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14500 In all cases OP will be either EQ or NE, but we don't need to know which
14501 here. If we are unable to support a dominance comparison we return
14502 CC mode. This will then fail to match for the RTL expressions that
14503 generate this call. */
14505 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14507 enum rtx_code cond1
, cond2
;
14510 /* Currently we will probably get the wrong result if the individual
14511 comparisons are not simple. This also ensures that it is safe to
14512 reverse a comparison if necessary. */
14513 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14515 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14519 /* The if_then_else variant of this tests the second condition if the
14520 first passes, but is true if the first fails. Reverse the first
14521 condition to get a true "inclusive-or" expression. */
14522 if (cond_or
== DOM_CC_NX_OR_Y
)
14523 cond1
= reverse_condition (cond1
);
14525 /* If the comparisons are not equal, and one doesn't dominate the other,
14526 then we can't do this. */
14528 && !comparison_dominates_p (cond1
, cond2
)
14529 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14534 enum rtx_code temp
= cond1
;
14542 if (cond_or
== DOM_CC_X_AND_Y
)
14547 case EQ
: return CC_DEQmode
;
14548 case LE
: return CC_DLEmode
;
14549 case LEU
: return CC_DLEUmode
;
14550 case GE
: return CC_DGEmode
;
14551 case GEU
: return CC_DGEUmode
;
14552 default: gcc_unreachable ();
14556 if (cond_or
== DOM_CC_X_AND_Y
)
14568 gcc_unreachable ();
14572 if (cond_or
== DOM_CC_X_AND_Y
)
14584 gcc_unreachable ();
14588 if (cond_or
== DOM_CC_X_AND_Y
)
14589 return CC_DLTUmode
;
14594 return CC_DLTUmode
;
14596 return CC_DLEUmode
;
14600 gcc_unreachable ();
14604 if (cond_or
== DOM_CC_X_AND_Y
)
14605 return CC_DGTUmode
;
14610 return CC_DGTUmode
;
14612 return CC_DGEUmode
;
14616 gcc_unreachable ();
14619 /* The remaining cases only occur when both comparisons are the
14622 gcc_assert (cond1
== cond2
);
14626 gcc_assert (cond1
== cond2
);
14630 gcc_assert (cond1
== cond2
);
14634 gcc_assert (cond1
== cond2
);
14635 return CC_DLEUmode
;
14638 gcc_assert (cond1
== cond2
);
14639 return CC_DGEUmode
;
14642 gcc_unreachable ();
14647 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14649 /* All floating point compares return CCFP if it is an equality
14650 comparison, and CCFPE otherwise. */
14651 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14674 gcc_unreachable ();
14678 /* A compare with a shifted operand. Because of canonicalization, the
14679 comparison will have to be swapped when we emit the assembler. */
14680 if (GET_MODE (y
) == SImode
14681 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14682 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14683 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14684 || GET_CODE (x
) == ROTATERT
))
14687 /* This operation is performed swapped, but since we only rely on the Z
14688 flag we don't need an additional mode. */
14689 if (GET_MODE (y
) == SImode
14690 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14691 && GET_CODE (x
) == NEG
14692 && (op
== EQ
|| op
== NE
))
14695 /* This is a special case that is used by combine to allow a
14696 comparison of a shifted byte load to be split into a zero-extend
14697 followed by a comparison of the shifted integer (only valid for
14698 equalities and unsigned inequalities). */
14699 if (GET_MODE (x
) == SImode
14700 && GET_CODE (x
) == ASHIFT
14701 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14702 && GET_CODE (XEXP (x
, 0)) == SUBREG
14703 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14704 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14705 && (op
== EQ
|| op
== NE
14706 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14707 && CONST_INT_P (y
))
14710 /* A construct for a conditional compare, if the false arm contains
14711 0, then both conditions must be true, otherwise either condition
14712 must be true. Not all conditions are possible, so CCmode is
14713 returned if it can't be done. */
14714 if (GET_CODE (x
) == IF_THEN_ELSE
14715 && (XEXP (x
, 2) == const0_rtx
14716 || XEXP (x
, 2) == const1_rtx
)
14717 && COMPARISON_P (XEXP (x
, 0))
14718 && COMPARISON_P (XEXP (x
, 1)))
14719 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14720 INTVAL (XEXP (x
, 2)));
14722 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14723 if (GET_CODE (x
) == AND
14724 && (op
== EQ
|| op
== NE
)
14725 && COMPARISON_P (XEXP (x
, 0))
14726 && COMPARISON_P (XEXP (x
, 1)))
14727 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14730 if (GET_CODE (x
) == IOR
14731 && (op
== EQ
|| op
== NE
)
14732 && COMPARISON_P (XEXP (x
, 0))
14733 && COMPARISON_P (XEXP (x
, 1)))
14734 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14737 /* An operation (on Thumb) where we want to test for a single bit.
14738 This is done by shifting that bit up into the top bit of a
14739 scratch register; we can then branch on the sign bit. */
14741 && GET_MODE (x
) == SImode
14742 && (op
== EQ
|| op
== NE
)
14743 && GET_CODE (x
) == ZERO_EXTRACT
14744 && XEXP (x
, 1) == const1_rtx
)
14747 /* An operation that sets the condition codes as a side-effect, the
14748 V flag is not set correctly, so we can only use comparisons where
14749 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14751 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14752 if (GET_MODE (x
) == SImode
14754 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14755 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14756 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14757 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14758 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14759 || GET_CODE (x
) == LSHIFTRT
14760 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14761 || GET_CODE (x
) == ROTATERT
14762 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14763 return CC_NOOVmode
;
14765 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14768 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14769 && GET_CODE (x
) == PLUS
14770 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14773 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14779 /* A DImode comparison against zero can be implemented by
14780 or'ing the two halves together. */
14781 if (y
== const0_rtx
)
14784 /* We can do an equality test in three Thumb instructions. */
14794 /* DImode unsigned comparisons can be implemented by cmp +
14795 cmpeq without a scratch register. Not worth doing in
14806 /* DImode signed and unsigned comparisons can be implemented
14807 by cmp + sbcs with a scratch register, but that does not
14808 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14809 gcc_assert (op
!= EQ
&& op
!= NE
);
14813 gcc_unreachable ();
14817 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14818 return GET_MODE (x
);
14823 /* X and Y are two things to compare using CODE. Emit the compare insn and
14824 return the rtx for register 0 in the proper mode. FP means this is a
14825 floating point compare: I don't think that it is needed on the arm. */
14827 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14829 enum machine_mode mode
;
14831 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14833 /* We might have X as a constant, Y as a register because of the predicates
14834 used for cmpdi. If so, force X to a register here. */
14835 if (dimode_comparison
&& !REG_P (x
))
14836 x
= force_reg (DImode
, x
);
14838 mode
= SELECT_CC_MODE (code
, x
, y
);
14839 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14841 if (dimode_comparison
14842 && mode
!= CC_CZmode
)
14846 /* To compare two non-zero values for equality, XOR them and
14847 then compare against zero. Not used for ARM mode; there
14848 CC_CZmode is cheaper. */
14849 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14851 gcc_assert (!reload_completed
);
14852 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14856 /* A scratch register is required. */
14857 if (reload_completed
)
14858 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14860 scratch
= gen_rtx_SCRATCH (SImode
);
14862 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14863 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14864 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14867 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14872 /* Generate a sequence of insns that will generate the correct return
14873 address mask depending on the physical architecture that the program
14876 arm_gen_return_addr_mask (void)
14878 rtx reg
= gen_reg_rtx (Pmode
);
14880 emit_insn (gen_return_addr_mask (reg
));
14885 arm_reload_in_hi (rtx
*operands
)
14887 rtx ref
= operands
[1];
14889 HOST_WIDE_INT offset
= 0;
14891 if (GET_CODE (ref
) == SUBREG
)
14893 offset
= SUBREG_BYTE (ref
);
14894 ref
= SUBREG_REG (ref
);
14899 /* We have a pseudo which has been spilt onto the stack; there
14900 are two cases here: the first where there is a simple
14901 stack-slot replacement and a second where the stack-slot is
14902 out of range, or is used as a subreg. */
14903 if (reg_equiv_mem (REGNO (ref
)))
14905 ref
= reg_equiv_mem (REGNO (ref
));
14906 base
= find_replacement (&XEXP (ref
, 0));
14909 /* The slot is out of range, or was dressed up in a SUBREG. */
14910 base
= reg_equiv_address (REGNO (ref
));
14913 base
= find_replacement (&XEXP (ref
, 0));
14915 /* Handle the case where the address is too complex to be offset by 1. */
14916 if (GET_CODE (base
) == MINUS
14917 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14919 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14921 emit_set_insn (base_plus
, base
);
14924 else if (GET_CODE (base
) == PLUS
)
14926 /* The addend must be CONST_INT, or we would have dealt with it above. */
14927 HOST_WIDE_INT hi
, lo
;
14929 offset
+= INTVAL (XEXP (base
, 1));
14930 base
= XEXP (base
, 0);
14932 /* Rework the address into a legal sequence of insns. */
14933 /* Valid range for lo is -4095 -> 4095 */
14936 : -((-offset
) & 0xfff));
14938 /* Corner case, if lo is the max offset then we would be out of range
14939 once we have added the additional 1 below, so bump the msb into the
14940 pre-loading insn(s). */
14944 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14945 ^ (HOST_WIDE_INT
) 0x80000000)
14946 - (HOST_WIDE_INT
) 0x80000000);
14948 gcc_assert (hi
+ lo
== offset
);
14952 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14954 /* Get the base address; addsi3 knows how to handle constants
14955 that require more than one insn. */
14956 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14962 /* Operands[2] may overlap operands[0] (though it won't overlap
14963 operands[1]), that's why we asked for a DImode reg -- so we can
14964 use the bit that does not overlap. */
14965 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14966 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14968 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14970 emit_insn (gen_zero_extendqisi2 (scratch
,
14971 gen_rtx_MEM (QImode
,
14972 plus_constant (Pmode
, base
,
14974 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14975 gen_rtx_MEM (QImode
,
14976 plus_constant (Pmode
, base
,
14978 if (!BYTES_BIG_ENDIAN
)
14979 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14980 gen_rtx_IOR (SImode
,
14983 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14987 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14988 gen_rtx_IOR (SImode
,
14989 gen_rtx_ASHIFT (SImode
, scratch
,
14991 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14994 /* Handle storing a half-word to memory during reload by synthesizing as two
14995 byte stores. Take care not to clobber the input values until after we
14996 have moved them somewhere safe. This code assumes that if the DImode
14997 scratch in operands[2] overlaps either the input value or output address
14998 in some way, then that value must die in this insn (we absolutely need
14999 two scratch registers for some corner cases). */
15001 arm_reload_out_hi (rtx
*operands
)
15003 rtx ref
= operands
[0];
15004 rtx outval
= operands
[1];
15006 HOST_WIDE_INT offset
= 0;
15008 if (GET_CODE (ref
) == SUBREG
)
15010 offset
= SUBREG_BYTE (ref
);
15011 ref
= SUBREG_REG (ref
);
15016 /* We have a pseudo which has been spilt onto the stack; there
15017 are two cases here: the first where there is a simple
15018 stack-slot replacement and a second where the stack-slot is
15019 out of range, or is used as a subreg. */
15020 if (reg_equiv_mem (REGNO (ref
)))
15022 ref
= reg_equiv_mem (REGNO (ref
));
15023 base
= find_replacement (&XEXP (ref
, 0));
15026 /* The slot is out of range, or was dressed up in a SUBREG. */
15027 base
= reg_equiv_address (REGNO (ref
));
15030 base
= find_replacement (&XEXP (ref
, 0));
15032 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15034 /* Handle the case where the address is too complex to be offset by 1. */
15035 if (GET_CODE (base
) == MINUS
15036 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15038 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15040 /* Be careful not to destroy OUTVAL. */
15041 if (reg_overlap_mentioned_p (base_plus
, outval
))
15043 /* Updating base_plus might destroy outval, see if we can
15044 swap the scratch and base_plus. */
15045 if (!reg_overlap_mentioned_p (scratch
, outval
))
15048 scratch
= base_plus
;
15053 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15055 /* Be conservative and copy OUTVAL into the scratch now,
15056 this should only be necessary if outval is a subreg
15057 of something larger than a word. */
15058 /* XXX Might this clobber base? I can't see how it can,
15059 since scratch is known to overlap with OUTVAL, and
15060 must be wider than a word. */
15061 emit_insn (gen_movhi (scratch_hi
, outval
));
15062 outval
= scratch_hi
;
15066 emit_set_insn (base_plus
, base
);
15069 else if (GET_CODE (base
) == PLUS
)
15071 /* The addend must be CONST_INT, or we would have dealt with it above. */
15072 HOST_WIDE_INT hi
, lo
;
15074 offset
+= INTVAL (XEXP (base
, 1));
15075 base
= XEXP (base
, 0);
15077 /* Rework the address into a legal sequence of insns. */
15078 /* Valid range for lo is -4095 -> 4095 */
15081 : -((-offset
) & 0xfff));
15083 /* Corner case, if lo is the max offset then we would be out of range
15084 once we have added the additional 1 below, so bump the msb into the
15085 pre-loading insn(s). */
15089 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15090 ^ (HOST_WIDE_INT
) 0x80000000)
15091 - (HOST_WIDE_INT
) 0x80000000);
15093 gcc_assert (hi
+ lo
== offset
);
15097 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15099 /* Be careful not to destroy OUTVAL. */
15100 if (reg_overlap_mentioned_p (base_plus
, outval
))
15102 /* Updating base_plus might destroy outval, see if we
15103 can swap the scratch and base_plus. */
15104 if (!reg_overlap_mentioned_p (scratch
, outval
))
15107 scratch
= base_plus
;
15112 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15114 /* Be conservative and copy outval into scratch now,
15115 this should only be necessary if outval is a
15116 subreg of something larger than a word. */
15117 /* XXX Might this clobber base? I can't see how it
15118 can, since scratch is known to overlap with
15120 emit_insn (gen_movhi (scratch_hi
, outval
));
15121 outval
= scratch_hi
;
15125 /* Get the base address; addsi3 knows how to handle constants
15126 that require more than one insn. */
15127 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15133 if (BYTES_BIG_ENDIAN
)
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15136 plus_constant (Pmode
, base
,
15138 gen_lowpart (QImode
, outval
)));
15139 emit_insn (gen_lshrsi3 (scratch
,
15140 gen_rtx_SUBREG (SImode
, outval
, 0),
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15144 gen_lowpart (QImode
, scratch
)));
15148 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15150 gen_lowpart (QImode
, outval
)));
15151 emit_insn (gen_lshrsi3 (scratch
,
15152 gen_rtx_SUBREG (SImode
, outval
, 0),
15154 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15155 plus_constant (Pmode
, base
,
15157 gen_lowpart (QImode
, scratch
)));
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162 (padded to the size of a word) should be passed in a register. */
15165 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15167 if (TARGET_AAPCS_BASED
)
15168 return must_pass_in_stack_var_size (mode
, type
);
15170 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15174 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15175 Return true if an argument passed on the stack should be padded upwards,
15176 i.e. if the least-significant byte has useful data.
15177 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15178 aggregate types are placed in the lowest memory address. */
15181 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15183 if (!TARGET_AAPCS_BASED
)
15184 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15186 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15193 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15194 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15195 register has useful data, and return the opposite if the most
15196 significant byte does. */
15199 arm_pad_reg_upward (enum machine_mode mode
,
15200 tree type
, int first ATTRIBUTE_UNUSED
)
15202 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15204 /* For AAPCS, small aggregates, small fixed-point types,
15205 and small complex types are always padded upwards. */
15208 if ((AGGREGATE_TYPE_P (type
)
15209 || TREE_CODE (type
) == COMPLEX_TYPE
15210 || FIXED_POINT_TYPE_P (type
))
15211 && int_size_in_bytes (type
) <= 4)
15216 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15217 && GET_MODE_SIZE (mode
) <= 4)
15222 /* Otherwise, use default padding. */
15223 return !BYTES_BIG_ENDIAN
;
15226 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15227 assuming that the address in the base register is word aligned. */
15229 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15231 HOST_WIDE_INT max_offset
;
15233 /* Offset must be a multiple of 4 in Thumb mode. */
15234 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15239 else if (TARGET_ARM
)
15244 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15247 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15248 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15249 Assumes that the address in the base register RN is word aligned. Pattern
15250 guarantees that both memory accesses use the same base register,
15251 the offsets are constants within the range, and the gap between the offsets is 4.
15252 If preload complete then check that registers are legal. WBACK indicates whether
15253 address is updated. LOAD indicates whether memory access is load or store. */
15255 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15256 bool wback
, bool load
)
15258 unsigned int t
, t2
, n
;
15260 if (!reload_completed
)
15263 if (!offset_ok_for_ldrd_strd (offset
))
15270 if ((TARGET_THUMB2
)
15271 && ((wback
&& (n
== t
|| n
== t2
))
15272 || (t
== SP_REGNUM
)
15273 || (t
== PC_REGNUM
)
15274 || (t2
== SP_REGNUM
)
15275 || (t2
== PC_REGNUM
)
15276 || (!load
&& (n
== PC_REGNUM
))
15277 || (load
&& (t
== t2
))
15278 /* Triggers Cortex-M3 LDRD errata. */
15279 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15283 && ((wback
&& (n
== t
|| n
== t2
))
15284 || (t2
== PC_REGNUM
)
15285 || (t
% 2 != 0) /* First destination register is not even. */
15287 /* PC can be used as base register (for offset addressing only),
15288 but it is depricated. */
15289 || (n
== PC_REGNUM
)))
15295 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15296 operand MEM's address contains an immediate offset from the base
15297 register and has no side effects, in which case it sets BASE and
15298 OFFSET accordingly. */
15300 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15304 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15306 /* TODO: Handle more general memory operand patterns, such as
15307 PRE_DEC and PRE_INC. */
15309 if (side_effects_p (mem
))
15312 /* Can't deal with subregs. */
15313 if (GET_CODE (mem
) == SUBREG
)
15316 gcc_assert (MEM_P (mem
));
15318 *offset
= const0_rtx
;
15320 addr
= XEXP (mem
, 0);
15322 /* If addr isn't valid for DImode, then we can't handle it. */
15323 if (!arm_legitimate_address_p (DImode
, addr
,
15324 reload_in_progress
|| reload_completed
))
15332 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15334 *base
= XEXP (addr
, 0);
15335 *offset
= XEXP (addr
, 1);
15336 return (REG_P (*base
) && CONST_INT_P (*offset
));
15342 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15344 /* Called from a peephole2 to replace two word-size accesses with a
15345 single LDRD/STRD instruction. Returns true iff we can generate a
15346 new instruction sequence. That is, both accesses use the same base
15347 register and the gap between constant offsets is 4. This function
15348 may reorder its operands to match ldrd/strd RTL templates.
15349 OPERANDS are the operands found by the peephole matcher;
15350 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15351 corresponding memory operands. LOAD indicaates whether the access
15352 is load or store. CONST_STORE indicates a store of constant
15353 integer values held in OPERANDS[4,5] and assumes that the pattern
15354 is of length 4 insn, for the purpose of checking dead registers.
15355 COMMUTE indicates that register operands may be reordered. */
15357 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15358 bool const_store
, bool commute
)
15361 HOST_WIDE_INT offsets
[2], offset
;
15362 rtx base
= NULL_RTX
;
15363 rtx cur_base
, cur_offset
, tmp
;
15365 HARD_REG_SET regset
;
15367 gcc_assert (!const_store
|| !load
);
15368 /* Check that the memory references are immediate offsets from the
15369 same base register. Extract the base register, the destination
15370 registers, and the corresponding memory offsets. */
15371 for (i
= 0; i
< nops
; i
++)
15373 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15378 else if (REGNO (base
) != REGNO (cur_base
))
15381 offsets
[i
] = INTVAL (cur_offset
);
15382 if (GET_CODE (operands
[i
]) == SUBREG
)
15384 tmp
= SUBREG_REG (operands
[i
]);
15385 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15390 /* Make sure there is no dependency between the individual loads. */
15391 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15392 return false; /* RAW */
15394 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15395 return false; /* WAW */
15397 /* If the same input register is used in both stores
15398 when storing different constants, try to find a free register.
15399 For example, the code
15404 can be transformed into
15407 in Thumb mode assuming that r1 is free. */
15409 && REGNO (operands
[0]) == REGNO (operands
[1])
15410 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15414 CLEAR_HARD_REG_SET (regset
);
15415 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15416 if (tmp
== NULL_RTX
)
15419 /* Use the new register in the first load to ensure that
15420 if the original input register is not dead after peephole,
15421 then it will have the correct constant value. */
15424 else if (TARGET_ARM
)
15427 int regno
= REGNO (operands
[0]);
15428 if (!peep2_reg_dead_p (4, operands
[0]))
15430 /* When the input register is even and is not dead after the
15431 pattern, it has to hold the second constant but we cannot
15432 form a legal STRD in ARM mode with this register as the second
15434 if (regno
% 2 == 0)
15437 /* Is regno-1 free? */
15438 SET_HARD_REG_SET (regset
);
15439 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15440 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15441 if (tmp
== NULL_RTX
)
15448 /* Find a DImode register. */
15449 CLEAR_HARD_REG_SET (regset
);
15450 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15451 if (tmp
!= NULL_RTX
)
15453 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15454 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15458 /* Can we use the input register to form a DI register? */
15459 SET_HARD_REG_SET (regset
);
15460 CLEAR_HARD_REG_BIT(regset
,
15461 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15462 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15463 if (tmp
== NULL_RTX
)
15465 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15469 gcc_assert (operands
[0] != NULL_RTX
);
15470 gcc_assert (operands
[1] != NULL_RTX
);
15471 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15472 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15476 /* Make sure the instructions are ordered with lower memory access first. */
15477 if (offsets
[0] > offsets
[1])
15479 gap
= offsets
[0] - offsets
[1];
15480 offset
= offsets
[1];
15482 /* Swap the instructions such that lower memory is accessed first. */
15483 SWAP_RTX (operands
[0], operands
[1]);
15484 SWAP_RTX (operands
[2], operands
[3]);
15486 SWAP_RTX (operands
[4], operands
[5]);
15490 gap
= offsets
[1] - offsets
[0];
15491 offset
= offsets
[0];
15494 /* Make sure accesses are to consecutive memory locations. */
15498 /* Make sure we generate legal instructions. */
15499 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15503 /* In Thumb state, where registers are almost unconstrained, there
15504 is little hope to fix it. */
15508 if (load
&& commute
)
15510 /* Try reordering registers. */
15511 SWAP_RTX (operands
[0], operands
[1]);
15512 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15519 /* If input registers are dead after this pattern, they can be
15520 reordered or replaced by other registers that are free in the
15521 current pattern. */
15522 if (!peep2_reg_dead_p (4, operands
[0])
15523 || !peep2_reg_dead_p (4, operands
[1]))
15526 /* Try to reorder the input registers. */
15527 /* For example, the code
15532 can be transformed into
15537 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15540 SWAP_RTX (operands
[0], operands
[1]);
15544 /* Try to find a free DI register. */
15545 CLEAR_HARD_REG_SET (regset
);
15546 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15547 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15550 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15551 if (tmp
== NULL_RTX
)
15554 /* DREG must be an even-numbered register in DImode.
15555 Split it into SI registers. */
15556 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15557 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15558 gcc_assert (operands
[0] != NULL_RTX
);
15559 gcc_assert (operands
[1] != NULL_RTX
);
15560 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15561 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15563 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15576 /* Print a symbolic form of X to the debug file, F. */
15578 arm_print_value (FILE *f
, rtx x
)
15580 switch (GET_CODE (x
))
15583 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15587 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15595 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15597 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15598 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15606 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15610 fprintf (f
, "`%s'", XSTR (x
, 0));
15614 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15618 arm_print_value (f
, XEXP (x
, 0));
15622 arm_print_value (f
, XEXP (x
, 0));
15624 arm_print_value (f
, XEXP (x
, 1));
15632 fprintf (f
, "????");
15637 /* Routines for manipulation of the constant pool. */
15639 /* Arm instructions cannot load a large constant directly into a
15640 register; they have to come from a pc relative load. The constant
15641 must therefore be placed in the addressable range of the pc
15642 relative load. Depending on the precise pc relative load
15643 instruction the range is somewhere between 256 bytes and 4k. This
15644 means that we often have to dump a constant inside a function, and
15645 generate code to branch around it.
15647 It is important to minimize this, since the branches will slow
15648 things down and make the code larger.
15650 Normally we can hide the table after an existing unconditional
15651 branch so that there is no interruption of the flow, but in the
15652 worst case the code looks like this:
15670 We fix this by performing a scan after scheduling, which notices
15671 which instructions need to have their operands fetched from the
15672 constant table and builds the table.
15674 The algorithm starts by building a table of all the constants that
15675 need fixing up and all the natural barriers in the function (places
15676 where a constant table can be dropped without breaking the flow).
15677 For each fixup we note how far the pc-relative replacement will be
15678 able to reach and the offset of the instruction into the function.
15680 Having built the table we then group the fixes together to form
15681 tables that are as large as possible (subject to addressing
15682 constraints) and emit each table of constants after the last
15683 barrier that is within range of all the instructions in the group.
15684 If a group does not contain a barrier, then we forcibly create one
15685 by inserting a jump instruction into the flow. Once the table has
15686 been inserted, the insns are then modified to reference the
15687 relevant entry in the pool.
15689 Possible enhancements to the algorithm (not implemented) are:
15691 1) For some processors and object formats, there may be benefit in
15692 aligning the pools to the start of cache lines; this alignment
15693 would need to be taken into account when calculating addressability
15696 /* These typedefs are located at the start of this file, so that
15697 they can be used in the prototypes there. This comment is to
15698 remind readers of that fact so that the following structures
15699 can be understood more easily.
15701 typedef struct minipool_node Mnode;
15702 typedef struct minipool_fixup Mfix; */
15704 struct minipool_node
15706 /* Doubly linked chain of entries. */
15709 /* The maximum offset into the code that this entry can be placed. While
15710 pushing fixes for forward references, all entries are sorted in order
15711 of increasing max_address. */
15712 HOST_WIDE_INT max_address
;
15713 /* Similarly for an entry inserted for a backwards ref. */
15714 HOST_WIDE_INT min_address
;
15715 /* The number of fixes referencing this entry. This can become zero
15716 if we "unpush" an entry. In this case we ignore the entry when we
15717 come to emit the code. */
15719 /* The offset from the start of the minipool. */
15720 HOST_WIDE_INT offset
;
15721 /* The value in table. */
15723 /* The mode of value. */
15724 enum machine_mode mode
;
15725 /* The size of the value. With iWMMXt enabled
15726 sizes > 4 also imply an alignment of 8-bytes. */
15730 struct minipool_fixup
15734 HOST_WIDE_INT address
;
15736 enum machine_mode mode
;
15740 HOST_WIDE_INT forwards
;
15741 HOST_WIDE_INT backwards
;
15744 /* Fixes less than a word need padding out to a word boundary. */
15745 #define MINIPOOL_FIX_SIZE(mode) \
15746 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15748 static Mnode
* minipool_vector_head
;
15749 static Mnode
* minipool_vector_tail
;
15750 static rtx minipool_vector_label
;
15751 static int minipool_pad
;
15753 /* The linked list of all minipool fixes required for this function. */
15754 Mfix
* minipool_fix_head
;
15755 Mfix
* minipool_fix_tail
;
15756 /* The fix entry for the current minipool, once it has been placed. */
15757 Mfix
* minipool_barrier
;
15759 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15760 #define JUMP_TABLES_IN_TEXT_SECTION 0
15763 static HOST_WIDE_INT
15764 get_jump_table_size (rtx insn
)
15766 /* ADDR_VECs only take room if read-only data does into the text
15768 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15770 rtx body
= PATTERN (insn
);
15771 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15772 HOST_WIDE_INT size
;
15773 HOST_WIDE_INT modesize
;
15775 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15776 size
= modesize
* XVECLEN (body
, elt
);
15780 /* Round up size of TBB table to a halfword boundary. */
15781 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
15784 /* No padding necessary for TBH. */
15787 /* Add two bytes for alignment on Thumb. */
15792 gcc_unreachable ();
15800 /* Return the maximum amount of padding that will be inserted before
15803 static HOST_WIDE_INT
15804 get_label_padding (rtx label
)
15806 HOST_WIDE_INT align
, min_insn_size
;
15808 align
= 1 << label_to_alignment (label
);
15809 min_insn_size
= TARGET_THUMB
? 2 : 4;
15810 return align
> min_insn_size
? align
- min_insn_size
: 0;
15813 /* Move a minipool fix MP from its current location to before MAX_MP.
15814 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15815 constraints may need updating. */
15817 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15818 HOST_WIDE_INT max_address
)
15820 /* The code below assumes these are different. */
15821 gcc_assert (mp
!= max_mp
);
15823 if (max_mp
== NULL
)
15825 if (max_address
< mp
->max_address
)
15826 mp
->max_address
= max_address
;
15830 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15831 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15833 mp
->max_address
= max_address
;
15835 /* Unlink MP from its current position. Since max_mp is non-null,
15836 mp->prev must be non-null. */
15837 mp
->prev
->next
= mp
->next
;
15838 if (mp
->next
!= NULL
)
15839 mp
->next
->prev
= mp
->prev
;
15841 minipool_vector_tail
= mp
->prev
;
15843 /* Re-insert it before MAX_MP. */
15845 mp
->prev
= max_mp
->prev
;
15848 if (mp
->prev
!= NULL
)
15849 mp
->prev
->next
= mp
;
15851 minipool_vector_head
= mp
;
15854 /* Save the new entry. */
15857 /* Scan over the preceding entries and adjust their addresses as
15859 while (mp
->prev
!= NULL
15860 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15862 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15869 /* Add a constant to the minipool for a forward reference. Returns the
15870 node added or NULL if the constant will not fit in this pool. */
15872 add_minipool_forward_ref (Mfix
*fix
)
15874 /* If set, max_mp is the first pool_entry that has a lower
15875 constraint than the one we are trying to add. */
15876 Mnode
* max_mp
= NULL
;
15877 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15880 /* If the minipool starts before the end of FIX->INSN then this FIX
15881 can not be placed into the current pool. Furthermore, adding the
15882 new constant pool entry may cause the pool to start FIX_SIZE bytes
15884 if (minipool_vector_head
&&
15885 (fix
->address
+ get_attr_length (fix
->insn
)
15886 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15889 /* Scan the pool to see if a constant with the same value has
15890 already been added. While we are doing this, also note the
15891 location where we must insert the constant if it doesn't already
15893 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15895 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15896 && fix
->mode
== mp
->mode
15897 && (!LABEL_P (fix
->value
)
15898 || (CODE_LABEL_NUMBER (fix
->value
)
15899 == CODE_LABEL_NUMBER (mp
->value
)))
15900 && rtx_equal_p (fix
->value
, mp
->value
))
15902 /* More than one fix references this entry. */
15904 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15907 /* Note the insertion point if necessary. */
15909 && mp
->max_address
> max_address
)
15912 /* If we are inserting an 8-bytes aligned quantity and
15913 we have not already found an insertion point, then
15914 make sure that all such 8-byte aligned quantities are
15915 placed at the start of the pool. */
15916 if (ARM_DOUBLEWORD_ALIGN
15918 && fix
->fix_size
>= 8
15919 && mp
->fix_size
< 8)
15922 max_address
= mp
->max_address
;
15926 /* The value is not currently in the minipool, so we need to create
15927 a new entry for it. If MAX_MP is NULL, the entry will be put on
15928 the end of the list since the placement is less constrained than
15929 any existing entry. Otherwise, we insert the new fix before
15930 MAX_MP and, if necessary, adjust the constraints on the other
15933 mp
->fix_size
= fix
->fix_size
;
15934 mp
->mode
= fix
->mode
;
15935 mp
->value
= fix
->value
;
15937 /* Not yet required for a backwards ref. */
15938 mp
->min_address
= -65536;
15940 if (max_mp
== NULL
)
15942 mp
->max_address
= max_address
;
15944 mp
->prev
= minipool_vector_tail
;
15946 if (mp
->prev
== NULL
)
15948 minipool_vector_head
= mp
;
15949 minipool_vector_label
= gen_label_rtx ();
15952 mp
->prev
->next
= mp
;
15954 minipool_vector_tail
= mp
;
15958 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15959 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15961 mp
->max_address
= max_address
;
15964 mp
->prev
= max_mp
->prev
;
15966 if (mp
->prev
!= NULL
)
15967 mp
->prev
->next
= mp
;
15969 minipool_vector_head
= mp
;
15972 /* Save the new entry. */
15975 /* Scan over the preceding entries and adjust their addresses as
15977 while (mp
->prev
!= NULL
15978 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15980 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15988 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15989 HOST_WIDE_INT min_address
)
15991 HOST_WIDE_INT offset
;
15993 /* The code below assumes these are different. */
15994 gcc_assert (mp
!= min_mp
);
15996 if (min_mp
== NULL
)
15998 if (min_address
> mp
->min_address
)
15999 mp
->min_address
= min_address
;
16003 /* We will adjust this below if it is too loose. */
16004 mp
->min_address
= min_address
;
16006 /* Unlink MP from its current position. Since min_mp is non-null,
16007 mp->next must be non-null. */
16008 mp
->next
->prev
= mp
->prev
;
16009 if (mp
->prev
!= NULL
)
16010 mp
->prev
->next
= mp
->next
;
16012 minipool_vector_head
= mp
->next
;
16014 /* Reinsert it after MIN_MP. */
16016 mp
->next
= min_mp
->next
;
16018 if (mp
->next
!= NULL
)
16019 mp
->next
->prev
= mp
;
16021 minipool_vector_tail
= mp
;
16027 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16029 mp
->offset
= offset
;
16030 if (mp
->refcount
> 0)
16031 offset
+= mp
->fix_size
;
16033 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16034 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16040 /* Add a constant to the minipool for a backward reference. Returns the
16041 node added or NULL if the constant will not fit in this pool.
16043 Note that the code for insertion for a backwards reference can be
16044 somewhat confusing because the calculated offsets for each fix do
16045 not take into account the size of the pool (which is still under
16048 add_minipool_backward_ref (Mfix
*fix
)
16050 /* If set, min_mp is the last pool_entry that has a lower constraint
16051 than the one we are trying to add. */
16052 Mnode
*min_mp
= NULL
;
16053 /* This can be negative, since it is only a constraint. */
16054 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16057 /* If we can't reach the current pool from this insn, or if we can't
16058 insert this entry at the end of the pool without pushing other
16059 fixes out of range, then we don't try. This ensures that we
16060 can't fail later on. */
16061 if (min_address
>= minipool_barrier
->address
16062 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16063 >= minipool_barrier
->address
))
16066 /* Scan the pool to see if a constant with the same value has
16067 already been added. While we are doing this, also note the
16068 location where we must insert the constant if it doesn't already
16070 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16072 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16073 && fix
->mode
== mp
->mode
16074 && (!LABEL_P (fix
->value
)
16075 || (CODE_LABEL_NUMBER (fix
->value
)
16076 == CODE_LABEL_NUMBER (mp
->value
)))
16077 && rtx_equal_p (fix
->value
, mp
->value
)
16078 /* Check that there is enough slack to move this entry to the
16079 end of the table (this is conservative). */
16080 && (mp
->max_address
16081 > (minipool_barrier
->address
16082 + minipool_vector_tail
->offset
16083 + minipool_vector_tail
->fix_size
)))
16086 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16089 if (min_mp
!= NULL
)
16090 mp
->min_address
+= fix
->fix_size
;
16093 /* Note the insertion point if necessary. */
16094 if (mp
->min_address
< min_address
)
16096 /* For now, we do not allow the insertion of 8-byte alignment
16097 requiring nodes anywhere but at the start of the pool. */
16098 if (ARM_DOUBLEWORD_ALIGN
16099 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16104 else if (mp
->max_address
16105 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16107 /* Inserting before this entry would push the fix beyond
16108 its maximum address (which can happen if we have
16109 re-located a forwards fix); force the new fix to come
16111 if (ARM_DOUBLEWORD_ALIGN
16112 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16117 min_address
= mp
->min_address
+ fix
->fix_size
;
16120 /* Do not insert a non-8-byte aligned quantity before 8-byte
16121 aligned quantities. */
16122 else if (ARM_DOUBLEWORD_ALIGN
16123 && fix
->fix_size
< 8
16124 && mp
->fix_size
>= 8)
16127 min_address
= mp
->min_address
+ fix
->fix_size
;
16132 /* We need to create a new entry. */
16134 mp
->fix_size
= fix
->fix_size
;
16135 mp
->mode
= fix
->mode
;
16136 mp
->value
= fix
->value
;
16138 mp
->max_address
= minipool_barrier
->address
+ 65536;
16140 mp
->min_address
= min_address
;
16142 if (min_mp
== NULL
)
16145 mp
->next
= minipool_vector_head
;
16147 if (mp
->next
== NULL
)
16149 minipool_vector_tail
= mp
;
16150 minipool_vector_label
= gen_label_rtx ();
16153 mp
->next
->prev
= mp
;
16155 minipool_vector_head
= mp
;
16159 mp
->next
= min_mp
->next
;
16163 if (mp
->next
!= NULL
)
16164 mp
->next
->prev
= mp
;
16166 minipool_vector_tail
= mp
;
16169 /* Save the new entry. */
16177 /* Scan over the following entries and adjust their offsets. */
16178 while (mp
->next
!= NULL
)
16180 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16181 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16184 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16186 mp
->next
->offset
= mp
->offset
;
16195 assign_minipool_offsets (Mfix
*barrier
)
16197 HOST_WIDE_INT offset
= 0;
16200 minipool_barrier
= barrier
;
16202 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16204 mp
->offset
= offset
;
16206 if (mp
->refcount
> 0)
16207 offset
+= mp
->fix_size
;
16211 /* Output the literal table */
16213 dump_minipool (rtx scan
)
16219 if (ARM_DOUBLEWORD_ALIGN
)
16220 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16221 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16228 fprintf (dump_file
,
16229 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16230 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16232 scan
= emit_label_after (gen_label_rtx (), scan
);
16233 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16234 scan
= emit_label_after (minipool_vector_label
, scan
);
16236 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16238 if (mp
->refcount
> 0)
16242 fprintf (dump_file
,
16243 ";; Offset %u, min %ld, max %ld ",
16244 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16245 (unsigned long) mp
->max_address
);
16246 arm_print_value (dump_file
, mp
->value
);
16247 fputc ('\n', dump_file
);
16250 switch (mp
->fix_size
)
16252 #ifdef HAVE_consttable_1
16254 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16258 #ifdef HAVE_consttable_2
16260 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16264 #ifdef HAVE_consttable_4
16266 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16270 #ifdef HAVE_consttable_8
16272 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16276 #ifdef HAVE_consttable_16
16278 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16283 gcc_unreachable ();
16291 minipool_vector_head
= minipool_vector_tail
= NULL
;
16292 scan
= emit_insn_after (gen_consttable_end (), scan
);
16293 scan
= emit_barrier_after (scan
);
16296 /* Return the cost of forcibly inserting a barrier after INSN. */
16298 arm_barrier_cost (rtx insn
)
16300 /* Basing the location of the pool on the loop depth is preferable,
16301 but at the moment, the basic block information seems to be
16302 corrupt by this stage of the compilation. */
16303 int base_cost
= 50;
16304 rtx next
= next_nonnote_insn (insn
);
16306 if (next
!= NULL
&& LABEL_P (next
))
16309 switch (GET_CODE (insn
))
16312 /* It will always be better to place the table before the label, rather
16321 return base_cost
- 10;
16324 return base_cost
+ 10;
16328 /* Find the best place in the insn stream in the range
16329 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330 Create the barrier by inserting a jump and add a new fix entry for
16333 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16335 HOST_WIDE_INT count
= 0;
16337 rtx from
= fix
->insn
;
16338 /* The instruction after which we will insert the jump. */
16339 rtx selected
= NULL
;
16341 /* The address at which the jump instruction will be placed. */
16342 HOST_WIDE_INT selected_address
;
16344 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16345 rtx label
= gen_label_rtx ();
16347 selected_cost
= arm_barrier_cost (from
);
16348 selected_address
= fix
->address
;
16350 while (from
&& count
< max_count
)
16355 /* This code shouldn't have been called if there was a natural barrier
16357 gcc_assert (!BARRIER_P (from
));
16359 /* Count the length of this insn. This must stay in sync with the
16360 code that pushes minipool fixes. */
16361 if (LABEL_P (from
))
16362 count
+= get_label_padding (from
);
16364 count
+= get_attr_length (from
);
16366 /* If there is a jump table, add its length. */
16367 if (tablejump_p (from
, NULL
, &tmp
))
16369 count
+= get_jump_table_size (tmp
);
16371 /* Jump tables aren't in a basic block, so base the cost on
16372 the dispatch insn. If we select this location, we will
16373 still put the pool after the table. */
16374 new_cost
= arm_barrier_cost (from
);
16376 if (count
< max_count
16377 && (!selected
|| new_cost
<= selected_cost
))
16380 selected_cost
= new_cost
;
16381 selected_address
= fix
->address
+ count
;
16384 /* Continue after the dispatch table. */
16385 from
= NEXT_INSN (tmp
);
16389 new_cost
= arm_barrier_cost (from
);
16391 if (count
< max_count
16392 && (!selected
|| new_cost
<= selected_cost
))
16395 selected_cost
= new_cost
;
16396 selected_address
= fix
->address
+ count
;
16399 from
= NEXT_INSN (from
);
16402 /* Make sure that we found a place to insert the jump. */
16403 gcc_assert (selected
);
16405 /* Make sure we do not split a call and its corresponding
16406 CALL_ARG_LOCATION note. */
16407 if (CALL_P (selected
))
16409 rtx next
= NEXT_INSN (selected
);
16410 if (next
&& NOTE_P (next
)
16411 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16415 /* Create a new JUMP_INSN that branches around a barrier. */
16416 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16417 JUMP_LABEL (from
) = label
;
16418 barrier
= emit_barrier_after (from
);
16419 emit_label_after (label
, barrier
);
16421 /* Create a minipool barrier entry for the new barrier. */
16422 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16423 new_fix
->insn
= barrier
;
16424 new_fix
->address
= selected_address
;
16425 new_fix
->next
= fix
->next
;
16426 fix
->next
= new_fix
;
16431 /* Record that there is a natural barrier in the insn stream at
16434 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16436 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16439 fix
->address
= address
;
16442 if (minipool_fix_head
!= NULL
)
16443 minipool_fix_tail
->next
= fix
;
16445 minipool_fix_head
= fix
;
16447 minipool_fix_tail
= fix
;
16450 /* Record INSN, which will need fixing up to load a value from the
16451 minipool. ADDRESS is the offset of the insn since the start of the
16452 function; LOC is a pointer to the part of the insn which requires
16453 fixing; VALUE is the constant that must be loaded, which is of type
16456 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16457 enum machine_mode mode
, rtx value
)
16459 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16462 fix
->address
= address
;
16465 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16466 fix
->value
= value
;
16467 fix
->forwards
= get_attr_pool_range (insn
);
16468 fix
->backwards
= get_attr_neg_pool_range (insn
);
16469 fix
->minipool
= NULL
;
16471 /* If an insn doesn't have a range defined for it, then it isn't
16472 expecting to be reworked by this code. Better to stop now than
16473 to generate duff assembly code. */
16474 gcc_assert (fix
->forwards
|| fix
->backwards
);
16476 /* If an entry requires 8-byte alignment then assume all constant pools
16477 require 4 bytes of padding. Trying to do this later on a per-pool
16478 basis is awkward because existing pool entries have to be modified. */
16479 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16484 fprintf (dump_file
,
16485 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16486 GET_MODE_NAME (mode
),
16487 INSN_UID (insn
), (unsigned long) address
,
16488 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16489 arm_print_value (dump_file
, fix
->value
);
16490 fprintf (dump_file
, "\n");
16493 /* Add it to the chain of fixes. */
16496 if (minipool_fix_head
!= NULL
)
16497 minipool_fix_tail
->next
= fix
;
16499 minipool_fix_head
= fix
;
16501 minipool_fix_tail
= fix
;
16504 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16505 Returns the number of insns needed, or 99 if we always want to synthesize
16508 arm_max_const_double_inline_cost ()
16510 /* Let the value get synthesized to avoid the use of literal pools. */
16511 if (arm_disable_literal_pool
)
16514 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16517 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16518 Returns the number of insns needed, or 99 if we don't know how to
16521 arm_const_double_inline_cost (rtx val
)
16523 rtx lowpart
, highpart
;
16524 enum machine_mode mode
;
16526 mode
= GET_MODE (val
);
16528 if (mode
== VOIDmode
)
16531 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16533 lowpart
= gen_lowpart (SImode
, val
);
16534 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16536 gcc_assert (CONST_INT_P (lowpart
));
16537 gcc_assert (CONST_INT_P (highpart
));
16539 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16540 NULL_RTX
, NULL_RTX
, 0, 0)
16541 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16542 NULL_RTX
, NULL_RTX
, 0, 0));
16545 /* Return true if it is worthwhile to split a 64-bit constant into two
16546 32-bit operations. This is the case if optimizing for size, or
16547 if we have load delay slots, or if one 32-bit part can be done with
16548 a single data operation. */
16550 arm_const_double_by_parts (rtx val
)
16552 enum machine_mode mode
= GET_MODE (val
);
16555 if (optimize_size
|| arm_ld_sched
)
16558 if (mode
== VOIDmode
)
16561 part
= gen_highpart_mode (SImode
, mode
, val
);
16563 gcc_assert (CONST_INT_P (part
));
16565 if (const_ok_for_arm (INTVAL (part
))
16566 || const_ok_for_arm (~INTVAL (part
)))
16569 part
= gen_lowpart (SImode
, val
);
16571 gcc_assert (CONST_INT_P (part
));
16573 if (const_ok_for_arm (INTVAL (part
))
16574 || const_ok_for_arm (~INTVAL (part
)))
16580 /* Return true if it is possible to inline both the high and low parts
16581 of a 64-bit constant into 32-bit data processing instructions. */
16583 arm_const_double_by_immediates (rtx val
)
16585 enum machine_mode mode
= GET_MODE (val
);
16588 if (mode
== VOIDmode
)
16591 part
= gen_highpart_mode (SImode
, mode
, val
);
16593 gcc_assert (CONST_INT_P (part
));
16595 if (!const_ok_for_arm (INTVAL (part
)))
16598 part
= gen_lowpart (SImode
, val
);
16600 gcc_assert (CONST_INT_P (part
));
16602 if (!const_ok_for_arm (INTVAL (part
)))
16608 /* Scan INSN and note any of its operands that need fixing.
16609 If DO_PUSHES is false we do not actually push any of the fixups
16612 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16616 extract_insn (insn
);
16618 if (!constrain_operands (1))
16619 fatal_insn_not_found (insn
);
16621 if (recog_data
.n_alternatives
== 0)
16624 /* Fill in recog_op_alt with information about the constraints of
16626 preprocess_constraints ();
16628 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16630 /* Things we need to fix can only occur in inputs. */
16631 if (recog_data
.operand_type
[opno
] != OP_IN
)
16634 /* If this alternative is a memory reference, then any mention
16635 of constants in this alternative is really to fool reload
16636 into allowing us to accept one there. We need to fix them up
16637 now so that we output the right code. */
16638 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16640 rtx op
= recog_data
.operand
[opno
];
16642 if (CONSTANT_P (op
))
16645 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16646 recog_data
.operand_mode
[opno
], op
);
16648 else if (MEM_P (op
)
16649 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16650 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16654 rtx cop
= avoid_constant_pool_reference (op
);
16656 /* Casting the address of something to a mode narrower
16657 than a word can cause avoid_constant_pool_reference()
16658 to return the pool reference itself. That's no good to
16659 us here. Lets just hope that we can use the
16660 constant pool value directly. */
16662 cop
= get_pool_constant (XEXP (op
, 0));
16664 push_minipool_fix (insn
, address
,
16665 recog_data
.operand_loc
[opno
],
16666 recog_data
.operand_mode
[opno
], cop
);
16676 /* Rewrite move insn into subtract of 0 if the condition codes will
16677 be useful in next conditional jump insn. */
16680 thumb1_reorg (void)
16684 FOR_EACH_BB_FN (bb
, cfun
)
16687 rtx pat
, op0
, set
= NULL
;
16688 rtx prev
, insn
= BB_END (bb
);
16689 bool insn_clobbered
= false;
16691 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16692 insn
= PREV_INSN (insn
);
16694 /* Find the last cbranchsi4_insn in basic block BB. */
16695 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16698 /* Get the register with which we are comparing. */
16699 pat
= PATTERN (insn
);
16700 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16702 /* Find the first flag setting insn before INSN in basic block BB. */
16703 gcc_assert (insn
!= BB_HEAD (bb
));
16704 for (prev
= PREV_INSN (insn
);
16706 && prev
!= BB_HEAD (bb
)
16708 || DEBUG_INSN_P (prev
)
16709 || ((set
= single_set (prev
)) != NULL
16710 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16711 prev
= PREV_INSN (prev
))
16713 if (reg_set_p (op0
, prev
))
16714 insn_clobbered
= true;
16717 /* Skip if op0 is clobbered by insn other than prev. */
16718 if (insn_clobbered
)
16724 dest
= SET_DEST (set
);
16725 src
= SET_SRC (set
);
16726 if (!low_register_operand (dest
, SImode
)
16727 || !low_register_operand (src
, SImode
))
16730 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16731 in INSN. Both src and dest of the move insn are checked. */
16732 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16734 dest
= copy_rtx (dest
);
16735 src
= copy_rtx (src
);
16736 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16737 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16738 INSN_CODE (prev
) = -1;
16739 /* Set test register in INSN to dest. */
16740 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16741 INSN_CODE (insn
) = -1;
16746 /* Convert instructions to their cc-clobbering variant if possible, since
16747 that allows us to use smaller encodings. */
16750 thumb2_reorg (void)
16755 INIT_REG_SET (&live
);
16757 /* We are freeing block_for_insn in the toplev to keep compatibility
16758 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16759 compute_bb_for_insn ();
16762 FOR_EACH_BB_FN (bb
, cfun
)
16766 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16767 df_simulate_initialize_backwards (bb
, &live
);
16768 FOR_BB_INSNS_REVERSE (bb
, insn
)
16770 if (NONJUMP_INSN_P (insn
)
16771 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16772 && GET_CODE (PATTERN (insn
)) == SET
)
16774 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
16775 rtx pat
= PATTERN (insn
);
16776 rtx dst
= XEXP (pat
, 0);
16777 rtx src
= XEXP (pat
, 1);
16778 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16780 if (!OBJECT_P (src
))
16781 op0
= XEXP (src
, 0);
16783 if (BINARY_P (src
))
16784 op1
= XEXP (src
, 1);
16786 if (low_register_operand (dst
, SImode
))
16788 switch (GET_CODE (src
))
16791 /* Adding two registers and storing the result
16792 in the first source is already a 16-bit
16794 if (rtx_equal_p (dst
, op0
)
16795 && register_operand (op1
, SImode
))
16798 if (low_register_operand (op0
, SImode
))
16800 /* ADDS <Rd>,<Rn>,<Rm> */
16801 if (low_register_operand (op1
, SImode
))
16803 /* ADDS <Rdn>,#<imm8> */
16804 /* SUBS <Rdn>,#<imm8> */
16805 else if (rtx_equal_p (dst
, op0
)
16806 && CONST_INT_P (op1
)
16807 && IN_RANGE (INTVAL (op1
), -255, 255))
16809 /* ADDS <Rd>,<Rn>,#<imm3> */
16810 /* SUBS <Rd>,<Rn>,#<imm3> */
16811 else if (CONST_INT_P (op1
)
16812 && IN_RANGE (INTVAL (op1
), -7, 7))
16815 /* ADCS <Rd>, <Rn> */
16816 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16817 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16818 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16820 && COMPARISON_P (op1
)
16821 && cc_register (XEXP (op1
, 0), VOIDmode
)
16822 && maybe_get_arm_condition_code (op1
) == ARM_CS
16823 && XEXP (op1
, 1) == const0_rtx
)
16828 /* RSBS <Rd>,<Rn>,#0
16829 Not handled here: see NEG below. */
16830 /* SUBS <Rd>,<Rn>,#<imm3>
16832 Not handled here: see PLUS above. */
16833 /* SUBS <Rd>,<Rn>,<Rm> */
16834 if (low_register_operand (op0
, SImode
)
16835 && low_register_operand (op1
, SImode
))
16840 /* MULS <Rdm>,<Rn>,<Rdm>
16841 As an exception to the rule, this is only used
16842 when optimizing for size since MULS is slow on all
16843 known implementations. We do not even want to use
16844 MULS in cold code, if optimizing for speed, so we
16845 test the global flag here. */
16846 if (!optimize_size
)
16848 /* else fall through. */
16852 /* ANDS <Rdn>,<Rm> */
16853 if (rtx_equal_p (dst
, op0
)
16854 && low_register_operand (op1
, SImode
))
16856 else if (rtx_equal_p (dst
, op1
)
16857 && low_register_operand (op0
, SImode
))
16858 action
= SWAP_CONV
;
16864 /* ASRS <Rdn>,<Rm> */
16865 /* LSRS <Rdn>,<Rm> */
16866 /* LSLS <Rdn>,<Rm> */
16867 if (rtx_equal_p (dst
, op0
)
16868 && low_register_operand (op1
, SImode
))
16870 /* ASRS <Rd>,<Rm>,#<imm5> */
16871 /* LSRS <Rd>,<Rm>,#<imm5> */
16872 /* LSLS <Rd>,<Rm>,#<imm5> */
16873 else if (low_register_operand (op0
, SImode
)
16874 && CONST_INT_P (op1
)
16875 && IN_RANGE (INTVAL (op1
), 0, 31))
16880 /* RORS <Rdn>,<Rm> */
16881 if (rtx_equal_p (dst
, op0
)
16882 && low_register_operand (op1
, SImode
))
16888 /* MVNS <Rd>,<Rm> */
16889 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16890 if (low_register_operand (op0
, SImode
))
16895 /* MOVS <Rd>,#<imm8> */
16896 if (CONST_INT_P (src
)
16897 && IN_RANGE (INTVAL (src
), 0, 255))
16902 /* MOVS and MOV<c> with registers have different
16903 encodings, so are not relevant here. */
16911 if (action
!= SKIP
)
16913 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16914 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16917 if (action
== SWAP_CONV
)
16919 src
= copy_rtx (src
);
16920 XEXP (src
, 0) = op1
;
16921 XEXP (src
, 1) = op0
;
16922 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
16923 vec
= gen_rtvec (2, pat
, clobber
);
16925 else /* action == CONV */
16926 vec
= gen_rtvec (2, pat
, clobber
);
16928 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16929 INSN_CODE (insn
) = -1;
16933 if (NONDEBUG_INSN_P (insn
))
16934 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16938 CLEAR_REG_SET (&live
);
16941 /* Gcc puts the pool in the wrong place for ARM, since we can only
16942 load addresses a limited distance around the pc. We do some
16943 special munging to move the constant pool values to the correct
16944 point in the code. */
16949 HOST_WIDE_INT address
= 0;
16954 else if (TARGET_THUMB2
)
16957 /* Ensure all insns that must be split have been split at this point.
16958 Otherwise, the pool placement code below may compute incorrect
16959 insn lengths. Note that when optimizing, all insns have already
16960 been split at this point. */
16962 split_all_insns_noflow ();
16964 minipool_fix_head
= minipool_fix_tail
= NULL
;
16966 /* The first insn must always be a note, or the code below won't
16967 scan it properly. */
16968 insn
= get_insns ();
16969 gcc_assert (NOTE_P (insn
));
16972 /* Scan all the insns and record the operands that will need fixing. */
16973 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16975 if (BARRIER_P (insn
))
16976 push_minipool_barrier (insn
, address
);
16977 else if (INSN_P (insn
))
16981 note_invalid_constants (insn
, address
, true);
16982 address
+= get_attr_length (insn
);
16984 /* If the insn is a vector jump, add the size of the table
16985 and skip the table. */
16986 if (tablejump_p (insn
, NULL
, &table
))
16988 address
+= get_jump_table_size (table
);
16992 else if (LABEL_P (insn
))
16993 /* Add the worst-case padding due to alignment. We don't add
16994 the _current_ padding because the minipool insertions
16995 themselves might change it. */
16996 address
+= get_label_padding (insn
);
16999 fix
= minipool_fix_head
;
17001 /* Now scan the fixups and perform the required changes. */
17006 Mfix
* last_added_fix
;
17007 Mfix
* last_barrier
= NULL
;
17010 /* Skip any further barriers before the next fix. */
17011 while (fix
&& BARRIER_P (fix
->insn
))
17014 /* No more fixes. */
17018 last_added_fix
= NULL
;
17020 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17022 if (BARRIER_P (ftmp
->insn
))
17024 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17027 last_barrier
= ftmp
;
17029 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17032 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17035 /* If we found a barrier, drop back to that; any fixes that we
17036 could have reached but come after the barrier will now go in
17037 the next mini-pool. */
17038 if (last_barrier
!= NULL
)
17040 /* Reduce the refcount for those fixes that won't go into this
17042 for (fdel
= last_barrier
->next
;
17043 fdel
&& fdel
!= ftmp
;
17046 fdel
->minipool
->refcount
--;
17047 fdel
->minipool
= NULL
;
17050 ftmp
= last_barrier
;
17054 /* ftmp is first fix that we can't fit into this pool and
17055 there no natural barriers that we could use. Insert a
17056 new barrier in the code somewhere between the previous
17057 fix and this one, and arrange to jump around it. */
17058 HOST_WIDE_INT max_address
;
17060 /* The last item on the list of fixes must be a barrier, so
17061 we can never run off the end of the list of fixes without
17062 last_barrier being set. */
17065 max_address
= minipool_vector_head
->max_address
;
17066 /* Check that there isn't another fix that is in range that
17067 we couldn't fit into this pool because the pool was
17068 already too large: we need to put the pool before such an
17069 instruction. The pool itself may come just after the
17070 fix because create_fix_barrier also allows space for a
17071 jump instruction. */
17072 if (ftmp
->address
< max_address
)
17073 max_address
= ftmp
->address
+ 1;
17075 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17078 assign_minipool_offsets (last_barrier
);
17082 if (!BARRIER_P (ftmp
->insn
)
17083 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17090 /* Scan over the fixes we have identified for this pool, fixing them
17091 up and adding the constants to the pool itself. */
17092 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17093 this_fix
= this_fix
->next
)
17094 if (!BARRIER_P (this_fix
->insn
))
17097 = plus_constant (Pmode
,
17098 gen_rtx_LABEL_REF (VOIDmode
,
17099 minipool_vector_label
),
17100 this_fix
->minipool
->offset
);
17101 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17104 dump_minipool (last_barrier
->insn
);
17108 /* From now on we must synthesize any constants that we can't handle
17109 directly. This can happen if the RTL gets split during final
17110 instruction generation. */
17111 after_arm_reorg
= 1;
17113 /* Free the minipool memory. */
17114 obstack_free (&minipool_obstack
, minipool_startobj
);
17117 /* Routines to output assembly language. */
17119 /* If the rtx is the correct value then return the string of the number.
17120 In this way we can ensure that valid double constants are generated even
17121 when cross compiling. */
17123 fp_immediate_constant (rtx x
)
17127 if (!fp_consts_inited
)
17130 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17132 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17136 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17137 static const char *
17138 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17140 if (!fp_consts_inited
)
17143 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17147 /* OPERANDS[0] is the entire list of insns that constitute pop,
17148 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17149 is in the list, UPDATE is true iff the list contains explicit
17150 update of base register. */
17152 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17158 const char *conditional
;
17159 int num_saves
= XVECLEN (operands
[0], 0);
17160 unsigned int regno
;
17161 unsigned int regno_base
= REGNO (operands
[1]);
17164 offset
+= update
? 1 : 0;
17165 offset
+= return_pc
? 1 : 0;
17167 /* Is the base register in the list? */
17168 for (i
= offset
; i
< num_saves
; i
++)
17170 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17171 /* If SP is in the list, then the base register must be SP. */
17172 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17173 /* If base register is in the list, there must be no explicit update. */
17174 if (regno
== regno_base
)
17175 gcc_assert (!update
);
17178 conditional
= reverse
? "%?%D0" : "%?%d0";
17179 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17181 /* Output pop (not stmfd) because it has a shorter encoding. */
17182 gcc_assert (update
);
17183 sprintf (pattern
, "pop%s\t{", conditional
);
17187 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17188 It's just a convention, their semantics are identical. */
17189 if (regno_base
== SP_REGNUM
)
17190 sprintf (pattern
, "ldm%sfd\t", conditional
);
17191 else if (TARGET_UNIFIED_ASM
)
17192 sprintf (pattern
, "ldmia%s\t", conditional
);
17194 sprintf (pattern
, "ldm%sia\t", conditional
);
17196 strcat (pattern
, reg_names
[regno_base
]);
17198 strcat (pattern
, "!, {");
17200 strcat (pattern
, ", {");
17203 /* Output the first destination register. */
17205 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17207 /* Output the rest of the destination registers. */
17208 for (i
= offset
+ 1; i
< num_saves
; i
++)
17210 strcat (pattern
, ", ");
17212 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17215 strcat (pattern
, "}");
17217 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17218 strcat (pattern
, "^");
17220 output_asm_insn (pattern
, &cond
);
17224 /* Output the assembly for a store multiple. */
17227 vfp_output_fstmd (rtx
* operands
)
17234 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17235 p
= strlen (pattern
);
17237 gcc_assert (REG_P (operands
[1]));
17239 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17240 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17242 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17244 strcpy (&pattern
[p
], "}");
17246 output_asm_insn (pattern
, operands
);
17251 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17252 number of bytes pushed. */
17255 vfp_emit_fstmd (int base_reg
, int count
)
17262 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17263 register pairs are stored by a store multiple insn. We avoid this
17264 by pushing an extra pair. */
17265 if (count
== 2 && !arm_arch6
)
17267 if (base_reg
== LAST_VFP_REGNUM
- 3)
17272 /* FSTMD may not store more than 16 doubleword registers at once. Split
17273 larger stores into multiple parts (up to a maximum of two, in
17278 /* NOTE: base_reg is an internal register number, so each D register
17280 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17281 saved
+= vfp_emit_fstmd (base_reg
, 16);
17285 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17286 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17288 reg
= gen_rtx_REG (DFmode
, base_reg
);
17291 XVECEXP (par
, 0, 0)
17292 = gen_rtx_SET (VOIDmode
,
17295 gen_rtx_PRE_MODIFY (Pmode
,
17298 (Pmode
, stack_pointer_rtx
,
17301 gen_rtx_UNSPEC (BLKmode
,
17302 gen_rtvec (1, reg
),
17303 UNSPEC_PUSH_MULT
));
17305 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17306 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17307 RTX_FRAME_RELATED_P (tmp
) = 1;
17308 XVECEXP (dwarf
, 0, 0) = tmp
;
17310 tmp
= gen_rtx_SET (VOIDmode
,
17311 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17313 RTX_FRAME_RELATED_P (tmp
) = 1;
17314 XVECEXP (dwarf
, 0, 1) = tmp
;
17316 for (i
= 1; i
< count
; i
++)
17318 reg
= gen_rtx_REG (DFmode
, base_reg
);
17320 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17322 tmp
= gen_rtx_SET (VOIDmode
,
17323 gen_frame_mem (DFmode
,
17324 plus_constant (Pmode
,
17328 RTX_FRAME_RELATED_P (tmp
) = 1;
17329 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17332 par
= emit_insn (par
);
17333 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17334 RTX_FRAME_RELATED_P (par
) = 1;
17339 /* Emit a call instruction with pattern PAT. ADDR is the address of
17340 the call target. */
17343 arm_emit_call_insn (rtx pat
, rtx addr
)
17347 insn
= emit_call_insn (pat
);
17349 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17350 If the call might use such an entry, add a use of the PIC register
17351 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17352 if (TARGET_VXWORKS_RTP
17354 && GET_CODE (addr
) == SYMBOL_REF
17355 && (SYMBOL_REF_DECL (addr
)
17356 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17357 : !SYMBOL_REF_LOCAL_P (addr
)))
17359 require_pic_register ();
17360 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17364 /* Output a 'call' insn. */
17366 output_call (rtx
*operands
)
17368 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17370 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17371 if (REGNO (operands
[0]) == LR_REGNUM
)
17373 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17374 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17377 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17379 if (TARGET_INTERWORK
|| arm_arch4t
)
17380 output_asm_insn ("bx%?\t%0", operands
);
17382 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17387 /* Output a 'call' insn that is a reference in memory. This is
17388 disabled for ARMv5 and we prefer a blx instead because otherwise
17389 there's a significant performance overhead. */
17391 output_call_mem (rtx
*operands
)
17393 gcc_assert (!arm_arch5
);
17394 if (TARGET_INTERWORK
)
17396 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17397 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17398 output_asm_insn ("bx%?\t%|ip", operands
);
17400 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17402 /* LR is used in the memory address. We load the address in the
17403 first instruction. It's safe to use IP as the target of the
17404 load since the call will kill it anyway. */
17405 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17406 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17408 output_asm_insn ("bx%?\t%|ip", operands
);
17410 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17414 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17415 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17422 /* Output a move from arm registers to arm registers of a long double
17423 OPERANDS[0] is the destination.
17424 OPERANDS[1] is the source. */
17426 output_mov_long_double_arm_from_arm (rtx
*operands
)
17428 /* We have to be careful here because the two might overlap. */
17429 int dest_start
= REGNO (operands
[0]);
17430 int src_start
= REGNO (operands
[1]);
17434 if (dest_start
< src_start
)
17436 for (i
= 0; i
< 3; i
++)
17438 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17439 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17440 output_asm_insn ("mov%?\t%0, %1", ops
);
17445 for (i
= 2; i
>= 0; i
--)
17447 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17448 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17449 output_asm_insn ("mov%?\t%0, %1", ops
);
17457 arm_emit_movpair (rtx dest
, rtx src
)
17459 /* If the src is an immediate, simplify it. */
17460 if (CONST_INT_P (src
))
17462 HOST_WIDE_INT val
= INTVAL (src
);
17463 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17464 if ((val
>> 16) & 0x0000ffff)
17465 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17467 GEN_INT ((val
>> 16) & 0x0000ffff));
17470 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17471 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17474 /* Output a move between double words. It must be REG<-MEM
17477 output_move_double (rtx
*operands
, bool emit
, int *count
)
17479 enum rtx_code code0
= GET_CODE (operands
[0]);
17480 enum rtx_code code1
= GET_CODE (operands
[1]);
17485 /* The only case when this might happen is when
17486 you are looking at the length of a DImode instruction
17487 that has an invalid constant in it. */
17488 if (code0
== REG
&& code1
!= MEM
)
17490 gcc_assert (!emit
);
17497 unsigned int reg0
= REGNO (operands
[0]);
17499 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17501 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17503 switch (GET_CODE (XEXP (operands
[1], 0)))
17510 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17511 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17513 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17518 gcc_assert (TARGET_LDRD
);
17520 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17527 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17529 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17537 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17539 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17544 gcc_assert (TARGET_LDRD
);
17546 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17551 /* Autoicrement addressing modes should never have overlapping
17552 base and destination registers, and overlapping index registers
17553 are already prohibited, so this doesn't need to worry about
17555 otherops
[0] = operands
[0];
17556 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17557 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17559 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17561 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17563 /* Registers overlap so split out the increment. */
17566 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17567 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17574 /* Use a single insn if we can.
17575 FIXME: IWMMXT allows offsets larger than ldrd can
17576 handle, fix these up with a pair of ldr. */
17578 || !CONST_INT_P (otherops
[2])
17579 || (INTVAL (otherops
[2]) > -256
17580 && INTVAL (otherops
[2]) < 256))
17583 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17589 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17590 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17600 /* Use a single insn if we can.
17601 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17602 fix these up with a pair of ldr. */
17604 || !CONST_INT_P (otherops
[2])
17605 || (INTVAL (otherops
[2]) > -256
17606 && INTVAL (otherops
[2]) < 256))
17609 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17615 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17616 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17626 /* We might be able to use ldrd %0, %1 here. However the range is
17627 different to ldr/adr, and it is broken on some ARMv7-M
17628 implementations. */
17629 /* Use the second register of the pair to avoid problematic
17631 otherops
[1] = operands
[1];
17633 output_asm_insn ("adr%?\t%0, %1", otherops
);
17634 operands
[1] = otherops
[0];
17638 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17640 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17647 /* ??? This needs checking for thumb2. */
17649 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17650 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17652 otherops
[0] = operands
[0];
17653 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17654 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17656 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17658 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17660 switch ((int) INTVAL (otherops
[2]))
17664 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17670 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17676 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17680 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17681 operands
[1] = otherops
[0];
17683 && (REG_P (otherops
[2])
17685 || (CONST_INT_P (otherops
[2])
17686 && INTVAL (otherops
[2]) > -256
17687 && INTVAL (otherops
[2]) < 256)))
17689 if (reg_overlap_mentioned_p (operands
[0],
17693 /* Swap base and index registers over to
17694 avoid a conflict. */
17696 otherops
[1] = otherops
[2];
17699 /* If both registers conflict, it will usually
17700 have been fixed by a splitter. */
17701 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17702 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17706 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17707 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17714 otherops
[0] = operands
[0];
17716 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17721 if (CONST_INT_P (otherops
[2]))
17725 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17726 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17728 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17734 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17740 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17747 return "ldr%(d%)\t%0, [%1]";
17749 return "ldm%(ia%)\t%1, %M0";
17753 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17754 /* Take care of overlapping base/data reg. */
17755 if (reg_mentioned_p (operands
[0], operands
[1]))
17759 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17760 output_asm_insn ("ldr%?\t%0, %1", operands
);
17770 output_asm_insn ("ldr%?\t%0, %1", operands
);
17771 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17781 /* Constraints should ensure this. */
17782 gcc_assert (code0
== MEM
&& code1
== REG
);
17783 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17784 || (TARGET_ARM
&& TARGET_LDRD
));
17786 switch (GET_CODE (XEXP (operands
[0], 0)))
17792 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
17794 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17799 gcc_assert (TARGET_LDRD
);
17801 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
17808 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
17810 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
17818 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
17820 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
17825 gcc_assert (TARGET_LDRD
);
17827 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
17832 otherops
[0] = operands
[1];
17833 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17834 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17836 /* IWMMXT allows offsets larger than ldrd can handle,
17837 fix these up with a pair of ldr. */
17839 && CONST_INT_P (otherops
[2])
17840 && (INTVAL(otherops
[2]) <= -256
17841 || INTVAL(otherops
[2]) >= 256))
17843 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17847 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17848 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17857 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17858 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17864 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17867 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
17872 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
17877 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17878 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17880 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17884 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
17891 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
17898 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
17903 && (REG_P (otherops
[2])
17905 || (CONST_INT_P (otherops
[2])
17906 && INTVAL (otherops
[2]) > -256
17907 && INTVAL (otherops
[2]) < 256)))
17909 otherops
[0] = operands
[1];
17910 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17912 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
17918 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17919 otherops
[1] = operands
[1];
17922 output_asm_insn ("str%?\t%1, %0", operands
);
17923 output_asm_insn ("str%?\t%H1, %0", otherops
);
17933 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17934 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17937 output_move_quad (rtx
*operands
)
17939 if (REG_P (operands
[0]))
17941 /* Load, or reg->reg move. */
17943 if (MEM_P (operands
[1]))
17945 switch (GET_CODE (XEXP (operands
[1], 0)))
17948 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17953 output_asm_insn ("adr%?\t%0, %1", operands
);
17954 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
17958 gcc_unreachable ();
17966 gcc_assert (REG_P (operands
[1]));
17968 dest
= REGNO (operands
[0]);
17969 src
= REGNO (operands
[1]);
17971 /* This seems pretty dumb, but hopefully GCC won't try to do it
17974 for (i
= 0; i
< 4; i
++)
17976 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17977 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17978 output_asm_insn ("mov%?\t%0, %1", ops
);
17981 for (i
= 3; i
>= 0; i
--)
17983 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17984 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17985 output_asm_insn ("mov%?\t%0, %1", ops
);
17991 gcc_assert (MEM_P (operands
[0]));
17992 gcc_assert (REG_P (operands
[1]));
17993 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
17995 switch (GET_CODE (XEXP (operands
[0], 0)))
17998 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18002 gcc_unreachable ();
18009 /* Output a VFP load or store instruction. */
18012 output_move_vfp (rtx
*operands
)
18014 rtx reg
, mem
, addr
, ops
[2];
18015 int load
= REG_P (operands
[0]);
18016 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18017 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18020 enum machine_mode mode
;
18022 reg
= operands
[!load
];
18023 mem
= operands
[load
];
18025 mode
= GET_MODE (reg
);
18027 gcc_assert (REG_P (reg
));
18028 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18029 gcc_assert (mode
== SFmode
18033 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18034 gcc_assert (MEM_P (mem
));
18036 addr
= XEXP (mem
, 0);
18038 switch (GET_CODE (addr
))
18041 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18042 ops
[0] = XEXP (addr
, 0);
18047 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18048 ops
[0] = XEXP (addr
, 0);
18053 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18059 sprintf (buff
, templ
,
18060 load
? "ld" : "st",
18063 integer_p
? "\t%@ int" : "");
18064 output_asm_insn (buff
, ops
);
18069 /* Output a Neon double-word or quad-word load or store, or a load
18070 or store for larger structure modes.
18072 WARNING: The ordering of elements is weird in big-endian mode,
18073 because the EABI requires that vectors stored in memory appear
18074 as though they were stored by a VSTM, as required by the EABI.
18075 GCC RTL defines element ordering based on in-memory order.
18076 This can be different from the architectural ordering of elements
18077 within a NEON register. The intrinsics defined in arm_neon.h use the
18078 NEON register element ordering, not the GCC RTL element ordering.
18080 For example, the in-memory ordering of a big-endian a quadword
18081 vector with 16-bit elements when stored from register pair {d0,d1}
18082 will be (lowest address first, d0[N] is NEON register element N):
18084 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18086 When necessary, quadword registers (dN, dN+1) are moved to ARM
18087 registers from rN in the order:
18089 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18091 So that STM/LDM can be used on vectors in ARM registers, and the
18092 same memory layout will result as if VSTM/VLDM were used.
18094 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18095 possible, which allows use of appropriate alignment tags.
18096 Note that the choice of "64" is independent of the actual vector
18097 element size; this size simply ensures that the behavior is
18098 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18100 Due to limitations of those instructions, use of VST1.64/VLD1.64
18101 is not possible if:
18102 - the address contains PRE_DEC, or
18103 - the mode refers to more than 4 double-word registers
18105 In those cases, it would be possible to replace VSTM/VLDM by a
18106 sequence of instructions; this is not currently implemented since
18107 this is not certain to actually improve performance. */
18110 output_move_neon (rtx
*operands
)
18112 rtx reg
, mem
, addr
, ops
[2];
18113 int regno
, nregs
, load
= REG_P (operands
[0]);
18116 enum machine_mode mode
;
18118 reg
= operands
[!load
];
18119 mem
= operands
[load
];
18121 mode
= GET_MODE (reg
);
18123 gcc_assert (REG_P (reg
));
18124 regno
= REGNO (reg
);
18125 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18126 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18127 || NEON_REGNO_OK_FOR_QUAD (regno
));
18128 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18129 || VALID_NEON_QREG_MODE (mode
)
18130 || VALID_NEON_STRUCT_MODE (mode
));
18131 gcc_assert (MEM_P (mem
));
18133 addr
= XEXP (mem
, 0);
18135 /* Strip off const from addresses like (const (plus (...))). */
18136 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18137 addr
= XEXP (addr
, 0);
18139 switch (GET_CODE (addr
))
18142 /* We have to use vldm / vstm for too-large modes. */
18145 templ
= "v%smia%%?\t%%0!, %%h1";
18146 ops
[0] = XEXP (addr
, 0);
18150 templ
= "v%s1.64\t%%h1, %%A0";
18157 /* We have to use vldm / vstm in this case, since there is no
18158 pre-decrement form of the vld1 / vst1 instructions. */
18159 templ
= "v%smdb%%?\t%%0!, %%h1";
18160 ops
[0] = XEXP (addr
, 0);
18165 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18166 gcc_unreachable ();
18173 for (i
= 0; i
< nregs
; i
++)
18175 /* We're only using DImode here because it's a convenient size. */
18176 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18177 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18178 if (reg_overlap_mentioned_p (ops
[0], mem
))
18180 gcc_assert (overlap
== -1);
18185 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18186 output_asm_insn (buff
, ops
);
18191 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18192 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18193 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18194 output_asm_insn (buff
, ops
);
18201 /* We have to use vldm / vstm for too-large modes. */
18203 templ
= "v%smia%%?\t%%m0, %%h1";
18205 templ
= "v%s1.64\t%%h1, %%A0";
18211 sprintf (buff
, templ
, load
? "ld" : "st");
18212 output_asm_insn (buff
, ops
);
18217 /* Compute and return the length of neon_mov<mode>, where <mode> is
18218 one of VSTRUCT modes: EI, OI, CI or XI. */
18220 arm_attr_length_move_neon (rtx insn
)
18222 rtx reg
, mem
, addr
;
18224 enum machine_mode mode
;
18226 extract_insn_cached (insn
);
18228 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18230 mode
= GET_MODE (recog_data
.operand
[0]);
18241 gcc_unreachable ();
18245 load
= REG_P (recog_data
.operand
[0]);
18246 reg
= recog_data
.operand
[!load
];
18247 mem
= recog_data
.operand
[load
];
18249 gcc_assert (MEM_P (mem
));
18251 mode
= GET_MODE (reg
);
18252 addr
= XEXP (mem
, 0);
18254 /* Strip off const from addresses like (const (plus (...))). */
18255 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18256 addr
= XEXP (addr
, 0);
18258 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18260 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18267 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18271 arm_address_offset_is_imm (rtx insn
)
18275 extract_insn_cached (insn
);
18277 if (REG_P (recog_data
.operand
[0]))
18280 mem
= recog_data
.operand
[0];
18282 gcc_assert (MEM_P (mem
));
18284 addr
= XEXP (mem
, 0);
18287 || (GET_CODE (addr
) == PLUS
18288 && REG_P (XEXP (addr
, 0))
18289 && CONST_INT_P (XEXP (addr
, 1))))
18295 /* Output an ADD r, s, #n where n may be too big for one instruction.
18296 If adding zero to one register, output nothing. */
18298 output_add_immediate (rtx
*operands
)
18300 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18302 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18305 output_multi_immediate (operands
,
18306 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18309 output_multi_immediate (operands
,
18310 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18317 /* Output a multiple immediate operation.
18318 OPERANDS is the vector of operands referred to in the output patterns.
18319 INSTR1 is the output pattern to use for the first constant.
18320 INSTR2 is the output pattern to use for subsequent constants.
18321 IMMED_OP is the index of the constant slot in OPERANDS.
18322 N is the constant value. */
18323 static const char *
18324 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18325 int immed_op
, HOST_WIDE_INT n
)
18327 #if HOST_BITS_PER_WIDE_INT > 32
18333 /* Quick and easy output. */
18334 operands
[immed_op
] = const0_rtx
;
18335 output_asm_insn (instr1
, operands
);
18340 const char * instr
= instr1
;
18342 /* Note that n is never zero here (which would give no output). */
18343 for (i
= 0; i
< 32; i
+= 2)
18347 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18348 output_asm_insn (instr
, operands
);
18358 /* Return the name of a shifter operation. */
18359 static const char *
18360 arm_shift_nmem(enum rtx_code code
)
18365 return ARM_LSL_NAME
;
18381 /* Return the appropriate ARM instruction for the operation code.
18382 The returned result should not be overwritten. OP is the rtx of the
18383 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18386 arithmetic_instr (rtx op
, int shift_first_arg
)
18388 switch (GET_CODE (op
))
18394 return shift_first_arg
? "rsb" : "sub";
18409 return arm_shift_nmem(GET_CODE(op
));
18412 gcc_unreachable ();
18416 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18417 for the operation code. The returned result should not be overwritten.
18418 OP is the rtx code of the shift.
18419 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18421 static const char *
18422 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18425 enum rtx_code code
= GET_CODE (op
);
18430 if (!CONST_INT_P (XEXP (op
, 1)))
18432 output_operand_lossage ("invalid shift operand");
18437 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18445 mnem
= arm_shift_nmem(code
);
18446 if (CONST_INT_P (XEXP (op
, 1)))
18448 *amountp
= INTVAL (XEXP (op
, 1));
18450 else if (REG_P (XEXP (op
, 1)))
18457 output_operand_lossage ("invalid shift operand");
18463 /* We never have to worry about the amount being other than a
18464 power of 2, since this case can never be reloaded from a reg. */
18465 if (!CONST_INT_P (XEXP (op
, 1)))
18467 output_operand_lossage ("invalid shift operand");
18471 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18473 /* Amount must be a power of two. */
18474 if (*amountp
& (*amountp
- 1))
18476 output_operand_lossage ("invalid shift operand");
18480 *amountp
= int_log2 (*amountp
);
18481 return ARM_LSL_NAME
;
18484 output_operand_lossage ("invalid shift operand");
18488 /* This is not 100% correct, but follows from the desire to merge
18489 multiplication by a power of 2 with the recognizer for a
18490 shift. >=32 is not a valid shift for "lsl", so we must try and
18491 output a shift that produces the correct arithmetical result.
18492 Using lsr #32 is identical except for the fact that the carry bit
18493 is not set correctly if we set the flags; but we never use the
18494 carry bit from such an operation, so we can ignore that. */
18495 if (code
== ROTATERT
)
18496 /* Rotate is just modulo 32. */
18498 else if (*amountp
!= (*amountp
& 31))
18500 if (code
== ASHIFT
)
18505 /* Shifts of 0 are no-ops. */
18512 /* Obtain the shift from the POWER of two. */
18514 static HOST_WIDE_INT
18515 int_log2 (HOST_WIDE_INT power
)
18517 HOST_WIDE_INT shift
= 0;
18519 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18521 gcc_assert (shift
<= 31);
18528 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18529 because /bin/as is horribly restrictive. The judgement about
18530 whether or not each character is 'printable' (and can be output as
18531 is) or not (and must be printed with an octal escape) must be made
18532 with reference to the *host* character set -- the situation is
18533 similar to that discussed in the comments above pp_c_char in
18534 c-pretty-print.c. */
18536 #define MAX_ASCII_LEN 51
18539 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18542 int len_so_far
= 0;
18544 fputs ("\t.ascii\t\"", stream
);
18546 for (i
= 0; i
< len
; i
++)
18550 if (len_so_far
>= MAX_ASCII_LEN
)
18552 fputs ("\"\n\t.ascii\t\"", stream
);
18558 if (c
== '\\' || c
== '\"')
18560 putc ('\\', stream
);
18568 fprintf (stream
, "\\%03o", c
);
18573 fputs ("\"\n", stream
);
18576 /* Compute the register save mask for registers 0 through 12
18577 inclusive. This code is used by arm_compute_save_reg_mask. */
18579 static unsigned long
18580 arm_compute_save_reg0_reg12_mask (void)
18582 unsigned long func_type
= arm_current_func_type ();
18583 unsigned long save_reg_mask
= 0;
18586 if (IS_INTERRUPT (func_type
))
18588 unsigned int max_reg
;
18589 /* Interrupt functions must not corrupt any registers,
18590 even call clobbered ones. If this is a leaf function
18591 we can just examine the registers used by the RTL, but
18592 otherwise we have to assume that whatever function is
18593 called might clobber anything, and so we have to save
18594 all the call-clobbered registers as well. */
18595 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18596 /* FIQ handlers have registers r8 - r12 banked, so
18597 we only need to check r0 - r7, Normal ISRs only
18598 bank r14 and r15, so we must check up to r12.
18599 r13 is the stack pointer which is always preserved,
18600 so we do not need to consider it here. */
18605 for (reg
= 0; reg
<= max_reg
; reg
++)
18606 if (df_regs_ever_live_p (reg
)
18607 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18608 save_reg_mask
|= (1 << reg
);
18610 /* Also save the pic base register if necessary. */
18612 && !TARGET_SINGLE_PIC_BASE
18613 && arm_pic_register
!= INVALID_REGNUM
18614 && crtl
->uses_pic_offset_table
)
18615 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18617 else if (IS_VOLATILE(func_type
))
18619 /* For noreturn functions we historically omitted register saves
18620 altogether. However this really messes up debugging. As a
18621 compromise save just the frame pointers. Combined with the link
18622 register saved elsewhere this should be sufficient to get
18624 if (frame_pointer_needed
)
18625 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18626 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18627 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18628 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18629 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18633 /* In the normal case we only need to save those registers
18634 which are call saved and which are used by this function. */
18635 for (reg
= 0; reg
<= 11; reg
++)
18636 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18637 save_reg_mask
|= (1 << reg
);
18639 /* Handle the frame pointer as a special case. */
18640 if (frame_pointer_needed
)
18641 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18643 /* If we aren't loading the PIC register,
18644 don't stack it even though it may be live. */
18646 && !TARGET_SINGLE_PIC_BASE
18647 && arm_pic_register
!= INVALID_REGNUM
18648 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18649 || crtl
->uses_pic_offset_table
))
18650 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18652 /* The prologue will copy SP into R0, so save it. */
18653 if (IS_STACKALIGN (func_type
))
18654 save_reg_mask
|= 1;
18657 /* Save registers so the exception handler can modify them. */
18658 if (crtl
->calls_eh_return
)
18664 reg
= EH_RETURN_DATA_REGNO (i
);
18665 if (reg
== INVALID_REGNUM
)
18667 save_reg_mask
|= 1 << reg
;
18671 return save_reg_mask
;
18674 /* Return true if r3 is live at the start of the function. */
18677 arm_r3_live_at_start_p (void)
18679 /* Just look at cfg info, which is still close enough to correct at this
18680 point. This gives false positives for broken functions that might use
18681 uninitialized data that happens to be allocated in r3, but who cares? */
18682 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18685 /* Compute the number of bytes used to store the static chain register on the
18686 stack, above the stack frame. We need to know this accurately to get the
18687 alignment of the rest of the stack frame correct. */
18690 arm_compute_static_chain_stack_bytes (void)
18692 /* See the defining assertion in arm_expand_prologue. */
18693 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18694 && IS_NESTED (arm_current_func_type ())
18695 && arm_r3_live_at_start_p ()
18696 && crtl
->args
.pretend_args_size
== 0)
18702 /* Compute a bit mask of which registers need to be
18703 saved on the stack for the current function.
18704 This is used by arm_get_frame_offsets, which may add extra registers. */
18706 static unsigned long
18707 arm_compute_save_reg_mask (void)
18709 unsigned int save_reg_mask
= 0;
18710 unsigned long func_type
= arm_current_func_type ();
18713 if (IS_NAKED (func_type
))
18714 /* This should never really happen. */
18717 /* If we are creating a stack frame, then we must save the frame pointer,
18718 IP (which will hold the old stack pointer), LR and the PC. */
18719 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18721 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18724 | (1 << PC_REGNUM
);
18726 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18728 /* Decide if we need to save the link register.
18729 Interrupt routines have their own banked link register,
18730 so they never need to save it.
18731 Otherwise if we do not use the link register we do not need to save
18732 it. If we are pushing other registers onto the stack however, we
18733 can save an instruction in the epilogue by pushing the link register
18734 now and then popping it back into the PC. This incurs extra memory
18735 accesses though, so we only do it when optimizing for size, and only
18736 if we know that we will not need a fancy return sequence. */
18737 if (df_regs_ever_live_p (LR_REGNUM
)
18740 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18741 && !crtl
->calls_eh_return
))
18742 save_reg_mask
|= 1 << LR_REGNUM
;
18744 if (cfun
->machine
->lr_save_eliminated
)
18745 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18747 if (TARGET_REALLY_IWMMXT
18748 && ((bit_count (save_reg_mask
)
18749 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18750 arm_compute_static_chain_stack_bytes())
18753 /* The total number of registers that are going to be pushed
18754 onto the stack is odd. We need to ensure that the stack
18755 is 64-bit aligned before we start to save iWMMXt registers,
18756 and also before we start to create locals. (A local variable
18757 might be a double or long long which we will load/store using
18758 an iWMMXt instruction). Therefore we need to push another
18759 ARM register, so that the stack will be 64-bit aligned. We
18760 try to avoid using the arg registers (r0 -r3) as they might be
18761 used to pass values in a tail call. */
18762 for (reg
= 4; reg
<= 12; reg
++)
18763 if ((save_reg_mask
& (1 << reg
)) == 0)
18767 save_reg_mask
|= (1 << reg
);
18770 cfun
->machine
->sibcall_blocked
= 1;
18771 save_reg_mask
|= (1 << 3);
18775 /* We may need to push an additional register for use initializing the
18776 PIC base register. */
18777 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18778 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18780 reg
= thumb_find_work_register (1 << 4);
18781 if (!call_used_regs
[reg
])
18782 save_reg_mask
|= (1 << reg
);
18785 return save_reg_mask
;
18789 /* Compute a bit mask of which registers need to be
18790 saved on the stack for the current function. */
18791 static unsigned long
18792 thumb1_compute_save_reg_mask (void)
18794 unsigned long mask
;
18798 for (reg
= 0; reg
< 12; reg
++)
18799 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
18803 && !TARGET_SINGLE_PIC_BASE
18804 && arm_pic_register
!= INVALID_REGNUM
18805 && crtl
->uses_pic_offset_table
)
18806 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18808 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18809 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18810 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18812 /* LR will also be pushed if any lo regs are pushed. */
18813 if (mask
& 0xff || thumb_force_lr_save ())
18814 mask
|= (1 << LR_REGNUM
);
18816 /* Make sure we have a low work register if we need one.
18817 We will need one if we are going to push a high register,
18818 but we are not currently intending to push a low register. */
18819 if ((mask
& 0xff) == 0
18820 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18822 /* Use thumb_find_work_register to choose which register
18823 we will use. If the register is live then we will
18824 have to push it. Use LAST_LO_REGNUM as our fallback
18825 choice for the register to select. */
18826 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18827 /* Make sure the register returned by thumb_find_work_register is
18828 not part of the return value. */
18829 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18830 reg
= LAST_LO_REGNUM
;
18832 if (! call_used_regs
[reg
])
18836 /* The 504 below is 8 bytes less than 512 because there are two possible
18837 alignment words. We can't tell here if they will be present or not so we
18838 have to play it safe and assume that they are. */
18839 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18840 ROUND_UP_WORD (get_frame_size ()) +
18841 crtl
->outgoing_args_size
) >= 504)
18843 /* This is the same as the code in thumb1_expand_prologue() which
18844 determines which register to use for stack decrement. */
18845 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18846 if (mask
& (1 << reg
))
18849 if (reg
> LAST_LO_REGNUM
)
18851 /* Make sure we have a register available for stack decrement. */
18852 mask
|= 1 << LAST_LO_REGNUM
;
18860 /* Return the number of bytes required to save VFP registers. */
18862 arm_get_vfp_saved_size (void)
18864 unsigned int regno
;
18869 /* Space for saved VFP registers. */
18870 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
18873 for (regno
= FIRST_VFP_REGNUM
;
18874 regno
< LAST_VFP_REGNUM
;
18877 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18878 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18882 /* Workaround ARM10 VFPr1 bug. */
18883 if (count
== 2 && !arm_arch6
)
18885 saved
+= count
* 8;
18894 if (count
== 2 && !arm_arch6
)
18896 saved
+= count
* 8;
18903 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18904 everything bar the final return instruction. If simple_return is true,
18905 then do not output epilogue, because it has already been emitted in RTL. */
18907 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18908 bool simple_return
)
18910 char conditional
[10];
18913 unsigned long live_regs_mask
;
18914 unsigned long func_type
;
18915 arm_stack_offsets
*offsets
;
18917 func_type
= arm_current_func_type ();
18919 if (IS_NAKED (func_type
))
18922 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18924 /* If this function was declared non-returning, and we have
18925 found a tail call, then we have to trust that the called
18926 function won't return. */
18931 /* Otherwise, trap an attempted return by aborting. */
18933 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18935 assemble_external_libcall (ops
[1]);
18936 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18942 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18944 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18946 cfun
->machine
->return_used_this_function
= 1;
18948 offsets
= arm_get_frame_offsets ();
18949 live_regs_mask
= offsets
->saved_regs_mask
;
18951 if (!simple_return
&& live_regs_mask
)
18953 const char * return_reg
;
18955 /* If we do not have any special requirements for function exit
18956 (e.g. interworking) then we can load the return address
18957 directly into the PC. Otherwise we must load it into LR. */
18959 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18960 return_reg
= reg_names
[PC_REGNUM
];
18962 return_reg
= reg_names
[LR_REGNUM
];
18964 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18966 /* There are three possible reasons for the IP register
18967 being saved. 1) a stack frame was created, in which case
18968 IP contains the old stack pointer, or 2) an ISR routine
18969 corrupted it, or 3) it was saved to align the stack on
18970 iWMMXt. In case 1, restore IP into SP, otherwise just
18972 if (frame_pointer_needed
)
18974 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18975 live_regs_mask
|= (1 << SP_REGNUM
);
18978 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
18981 /* On some ARM architectures it is faster to use LDR rather than
18982 LDM to load a single register. On other architectures, the
18983 cost is the same. In 26 bit mode, or for exception handlers,
18984 we have to use LDM to load the PC so that the CPSR is also
18986 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
18987 if (live_regs_mask
== (1U << reg
))
18990 if (reg
<= LAST_ARM_REGNUM
18991 && (reg
!= LR_REGNUM
18993 || ! IS_INTERRUPT (func_type
)))
18995 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
18996 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19003 /* Generate the load multiple instruction to restore the
19004 registers. Note we can get here, even if
19005 frame_pointer_needed is true, but only if sp already
19006 points to the base of the saved core registers. */
19007 if (live_regs_mask
& (1 << SP_REGNUM
))
19009 unsigned HOST_WIDE_INT stack_adjust
;
19011 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19012 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19014 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19015 if (TARGET_UNIFIED_ASM
)
19016 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19018 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19021 /* If we can't use ldmib (SA110 bug),
19022 then try to pop r3 instead. */
19024 live_regs_mask
|= 1 << 3;
19026 if (TARGET_UNIFIED_ASM
)
19027 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19029 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19033 if (TARGET_UNIFIED_ASM
)
19034 sprintf (instr
, "pop%s\t{", conditional
);
19036 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19038 p
= instr
+ strlen (instr
);
19040 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19041 if (live_regs_mask
& (1 << reg
))
19043 int l
= strlen (reg_names
[reg
]);
19049 memcpy (p
, ", ", 2);
19053 memcpy (p
, "%|", 2);
19054 memcpy (p
+ 2, reg_names
[reg
], l
);
19058 if (live_regs_mask
& (1 << LR_REGNUM
))
19060 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19061 /* If returning from an interrupt, restore the CPSR. */
19062 if (IS_INTERRUPT (func_type
))
19069 output_asm_insn (instr
, & operand
);
19071 /* See if we need to generate an extra instruction to
19072 perform the actual function return. */
19074 && func_type
!= ARM_FT_INTERWORKED
19075 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19077 /* The return has already been handled
19078 by loading the LR into the PC. */
19085 switch ((int) ARM_FUNC_TYPE (func_type
))
19089 /* ??? This is wrong for unified assembly syntax. */
19090 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19093 case ARM_FT_INTERWORKED
:
19094 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19097 case ARM_FT_EXCEPTION
:
19098 /* ??? This is wrong for unified assembly syntax. */
19099 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19103 /* Use bx if it's available. */
19104 if (arm_arch5
|| arm_arch4t
)
19105 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19107 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19111 output_asm_insn (instr
, & operand
);
19117 /* Write the function name into the code section, directly preceding
19118 the function prologue.
19120 Code will be output similar to this:
19122 .ascii "arm_poke_function_name", 0
19125 .word 0xff000000 + (t1 - t0)
19126 arm_poke_function_name
19128 stmfd sp!, {fp, ip, lr, pc}
19131 When performing a stack backtrace, code can inspect the value
19132 of 'pc' stored at 'fp' + 0. If the trace function then looks
19133 at location pc - 12 and the top 8 bits are set, then we know
19134 that there is a function name embedded immediately preceding this
19135 location and has length ((pc[-3]) & 0xff000000).
19137 We assume that pc is declared as a pointer to an unsigned long.
19139 It is of no benefit to output the function name if we are assembling
19140 a leaf function. These function types will not contain a stack
19141 backtrace structure, therefore it is not possible to determine the
19144 arm_poke_function_name (FILE *stream
, const char *name
)
19146 unsigned long alignlength
;
19147 unsigned long length
;
19150 length
= strlen (name
) + 1;
19151 alignlength
= ROUND_UP_WORD (length
);
19153 ASM_OUTPUT_ASCII (stream
, name
, length
);
19154 ASM_OUTPUT_ALIGN (stream
, 2);
19155 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19156 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19159 /* Place some comments into the assembler stream
19160 describing the current function. */
19162 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19164 unsigned long func_type
;
19166 /* ??? Do we want to print some of the below anyway? */
19170 /* Sanity check. */
19171 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19173 func_type
= arm_current_func_type ();
19175 switch ((int) ARM_FUNC_TYPE (func_type
))
19178 case ARM_FT_NORMAL
:
19180 case ARM_FT_INTERWORKED
:
19181 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19184 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19187 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19189 case ARM_FT_EXCEPTION
:
19190 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19194 if (IS_NAKED (func_type
))
19195 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19197 if (IS_VOLATILE (func_type
))
19198 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19200 if (IS_NESTED (func_type
))
19201 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19202 if (IS_STACKALIGN (func_type
))
19203 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19205 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19207 crtl
->args
.pretend_args_size
, frame_size
);
19209 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19210 frame_pointer_needed
,
19211 cfun
->machine
->uses_anonymous_args
);
19213 if (cfun
->machine
->lr_save_eliminated
)
19214 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19216 if (crtl
->calls_eh_return
)
19217 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19222 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19223 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19225 arm_stack_offsets
*offsets
;
19231 /* Emit any call-via-reg trampolines that are needed for v4t support
19232 of call_reg and call_value_reg type insns. */
19233 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19235 rtx label
= cfun
->machine
->call_via
[regno
];
19239 switch_to_section (function_section (current_function_decl
));
19240 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19241 CODE_LABEL_NUMBER (label
));
19242 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19246 /* ??? Probably not safe to set this here, since it assumes that a
19247 function will be emitted as assembly immediately after we generate
19248 RTL for it. This does not happen for inline functions. */
19249 cfun
->machine
->return_used_this_function
= 0;
19251 else /* TARGET_32BIT */
19253 /* We need to take into account any stack-frame rounding. */
19254 offsets
= arm_get_frame_offsets ();
19256 gcc_assert (!use_return_insn (FALSE
, NULL
)
19257 || (cfun
->machine
->return_used_this_function
!= 0)
19258 || offsets
->saved_regs
== offsets
->outgoing_args
19259 || frame_pointer_needed
);
19261 /* Reset the ARM-specific per-function variables. */
19262 after_arm_reorg
= 0;
19266 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19267 STR and STRD. If an even number of registers are being pushed, one
19268 or more STRD patterns are created for each register pair. If an
19269 odd number of registers are pushed, emit an initial STR followed by
19270 as many STRD instructions as are needed. This works best when the
19271 stack is initially 64-bit aligned (the normal case), since it
19272 ensures that each STRD is also 64-bit aligned. */
19274 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19279 rtx par
= NULL_RTX
;
19280 rtx dwarf
= NULL_RTX
;
19284 num_regs
= bit_count (saved_regs_mask
);
19286 /* Must be at least one register to save, and can't save SP or PC. */
19287 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19288 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19289 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19291 /* Create sequence for DWARF info. All the frame-related data for
19292 debugging is held in this wrapper. */
19293 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19295 /* Describe the stack adjustment. */
19296 tmp
= gen_rtx_SET (VOIDmode
,
19298 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19299 RTX_FRAME_RELATED_P (tmp
) = 1;
19300 XVECEXP (dwarf
, 0, 0) = tmp
;
19302 /* Find the first register. */
19303 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19308 /* If there's an odd number of registers to push. Start off by
19309 pushing a single register. This ensures that subsequent strd
19310 operations are dword aligned (assuming that SP was originally
19311 64-bit aligned). */
19312 if ((num_regs
& 1) != 0)
19314 rtx reg
, mem
, insn
;
19316 reg
= gen_rtx_REG (SImode
, regno
);
19318 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19319 stack_pointer_rtx
));
19321 mem
= gen_frame_mem (Pmode
,
19323 (Pmode
, stack_pointer_rtx
,
19324 plus_constant (Pmode
, stack_pointer_rtx
,
19327 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19328 RTX_FRAME_RELATED_P (tmp
) = 1;
19329 insn
= emit_insn (tmp
);
19330 RTX_FRAME_RELATED_P (insn
) = 1;
19331 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19332 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19334 RTX_FRAME_RELATED_P (tmp
) = 1;
19337 XVECEXP (dwarf
, 0, i
) = tmp
;
19341 while (i
< num_regs
)
19342 if (saved_regs_mask
& (1 << regno
))
19344 rtx reg1
, reg2
, mem1
, mem2
;
19345 rtx tmp0
, tmp1
, tmp2
;
19348 /* Find the register to pair with this one. */
19349 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19353 reg1
= gen_rtx_REG (SImode
, regno
);
19354 reg2
= gen_rtx_REG (SImode
, regno2
);
19361 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19364 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19366 -4 * (num_regs
- 1)));
19367 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19368 plus_constant (Pmode
, stack_pointer_rtx
,
19370 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19371 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19372 RTX_FRAME_RELATED_P (tmp0
) = 1;
19373 RTX_FRAME_RELATED_P (tmp1
) = 1;
19374 RTX_FRAME_RELATED_P (tmp2
) = 1;
19375 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19376 XVECEXP (par
, 0, 0) = tmp0
;
19377 XVECEXP (par
, 0, 1) = tmp1
;
19378 XVECEXP (par
, 0, 2) = tmp2
;
19379 insn
= emit_insn (par
);
19380 RTX_FRAME_RELATED_P (insn
) = 1;
19381 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19385 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19388 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19391 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19392 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19393 RTX_FRAME_RELATED_P (tmp1
) = 1;
19394 RTX_FRAME_RELATED_P (tmp2
) = 1;
19395 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19396 XVECEXP (par
, 0, 0) = tmp1
;
19397 XVECEXP (par
, 0, 1) = tmp2
;
19401 /* Create unwind information. This is an approximation. */
19402 tmp1
= gen_rtx_SET (VOIDmode
,
19403 gen_frame_mem (Pmode
,
19404 plus_constant (Pmode
,
19408 tmp2
= gen_rtx_SET (VOIDmode
,
19409 gen_frame_mem (Pmode
,
19410 plus_constant (Pmode
,
19415 RTX_FRAME_RELATED_P (tmp1
) = 1;
19416 RTX_FRAME_RELATED_P (tmp2
) = 1;
19417 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19418 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19420 regno
= regno2
+ 1;
19428 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19429 whenever possible, otherwise it emits single-word stores. The first store
19430 also allocates stack space for all saved registers, using writeback with
19431 post-addressing mode. All other stores use offset addressing. If no STRD
19432 can be emitted, this function emits a sequence of single-word stores,
19433 and not an STM as before, because single-word stores provide more freedom
19434 scheduling and can be turned into an STM by peephole optimizations. */
19436 arm_emit_strd_push (unsigned long saved_regs_mask
)
19439 int i
, j
, dwarf_index
= 0;
19441 rtx dwarf
= NULL_RTX
;
19442 rtx insn
= NULL_RTX
;
19445 /* TODO: A more efficient code can be emitted by changing the
19446 layout, e.g., first push all pairs that can use STRD to keep the
19447 stack aligned, and then push all other registers. */
19448 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19449 if (saved_regs_mask
& (1 << i
))
19452 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19453 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19454 gcc_assert (num_regs
> 0);
19456 /* Create sequence for DWARF info. */
19457 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19459 /* For dwarf info, we generate explicit stack update. */
19460 tmp
= gen_rtx_SET (VOIDmode
,
19462 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19463 RTX_FRAME_RELATED_P (tmp
) = 1;
19464 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19466 /* Save registers. */
19467 offset
= - 4 * num_regs
;
19469 while (j
<= LAST_ARM_REGNUM
)
19470 if (saved_regs_mask
& (1 << j
))
19473 && (saved_regs_mask
& (1 << (j
+ 1))))
19475 /* Current register and previous register form register pair for
19476 which STRD can be generated. */
19479 /* Allocate stack space for all saved registers. */
19480 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19481 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19482 mem
= gen_frame_mem (DImode
, tmp
);
19485 else if (offset
> 0)
19486 mem
= gen_frame_mem (DImode
,
19487 plus_constant (Pmode
,
19491 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19493 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19494 RTX_FRAME_RELATED_P (tmp
) = 1;
19495 tmp
= emit_insn (tmp
);
19497 /* Record the first store insn. */
19498 if (dwarf_index
== 1)
19501 /* Generate dwarf info. */
19502 mem
= gen_frame_mem (SImode
,
19503 plus_constant (Pmode
,
19506 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19507 RTX_FRAME_RELATED_P (tmp
) = 1;
19508 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19510 mem
= gen_frame_mem (SImode
,
19511 plus_constant (Pmode
,
19514 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19515 RTX_FRAME_RELATED_P (tmp
) = 1;
19516 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19523 /* Emit a single word store. */
19526 /* Allocate stack space for all saved registers. */
19527 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19528 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19529 mem
= gen_frame_mem (SImode
, tmp
);
19532 else if (offset
> 0)
19533 mem
= gen_frame_mem (SImode
,
19534 plus_constant (Pmode
,
19538 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19540 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19541 RTX_FRAME_RELATED_P (tmp
) = 1;
19542 tmp
= emit_insn (tmp
);
19544 /* Record the first store insn. */
19545 if (dwarf_index
== 1)
19548 /* Generate dwarf info. */
19549 mem
= gen_frame_mem (SImode
,
19550 plus_constant(Pmode
,
19553 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19554 RTX_FRAME_RELATED_P (tmp
) = 1;
19555 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19564 /* Attach dwarf info to the first insn we generate. */
19565 gcc_assert (insn
!= NULL_RTX
);
19566 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19567 RTX_FRAME_RELATED_P (insn
) = 1;
19570 /* Generate and emit an insn that we will recognize as a push_multi.
19571 Unfortunately, since this insn does not reflect very well the actual
19572 semantics of the operation, we need to annotate the insn for the benefit
19573 of DWARF2 frame unwind information. */
19575 emit_multi_reg_push (unsigned long mask
)
19578 int num_dwarf_regs
;
19582 int dwarf_par_index
;
19585 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19586 if (mask
& (1 << i
))
19589 gcc_assert (num_regs
&& num_regs
<= 16);
19591 /* We don't record the PC in the dwarf frame information. */
19592 num_dwarf_regs
= num_regs
;
19593 if (mask
& (1 << PC_REGNUM
))
19596 /* For the body of the insn we are going to generate an UNSPEC in
19597 parallel with several USEs. This allows the insn to be recognized
19598 by the push_multi pattern in the arm.md file.
19600 The body of the insn looks something like this:
19603 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19604 (const_int:SI <num>)))
19605 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19611 For the frame note however, we try to be more explicit and actually
19612 show each register being stored into the stack frame, plus a (single)
19613 decrement of the stack pointer. We do it this way in order to be
19614 friendly to the stack unwinding code, which only wants to see a single
19615 stack decrement per instruction. The RTL we generate for the note looks
19616 something like this:
19619 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19620 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19621 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19622 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19626 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19627 instead we'd have a parallel expression detailing all
19628 the stores to the various memory addresses so that debug
19629 information is more up-to-date. Remember however while writing
19630 this to take care of the constraints with the push instruction.
19632 Note also that this has to be taken care of for the VFP registers.
19634 For more see PR43399. */
19636 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19637 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19638 dwarf_par_index
= 1;
19640 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19642 if (mask
& (1 << i
))
19644 reg
= gen_rtx_REG (SImode
, i
);
19646 XVECEXP (par
, 0, 0)
19647 = gen_rtx_SET (VOIDmode
,
19650 gen_rtx_PRE_MODIFY (Pmode
,
19653 (Pmode
, stack_pointer_rtx
,
19656 gen_rtx_UNSPEC (BLKmode
,
19657 gen_rtvec (1, reg
),
19658 UNSPEC_PUSH_MULT
));
19660 if (i
!= PC_REGNUM
)
19662 tmp
= gen_rtx_SET (VOIDmode
,
19663 gen_frame_mem (SImode
, stack_pointer_rtx
),
19665 RTX_FRAME_RELATED_P (tmp
) = 1;
19666 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
19674 for (j
= 1, i
++; j
< num_regs
; i
++)
19676 if (mask
& (1 << i
))
19678 reg
= gen_rtx_REG (SImode
, i
);
19680 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19682 if (i
!= PC_REGNUM
)
19685 = gen_rtx_SET (VOIDmode
,
19688 plus_constant (Pmode
, stack_pointer_rtx
,
19691 RTX_FRAME_RELATED_P (tmp
) = 1;
19692 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19699 par
= emit_insn (par
);
19701 tmp
= gen_rtx_SET (VOIDmode
,
19703 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19704 RTX_FRAME_RELATED_P (tmp
) = 1;
19705 XVECEXP (dwarf
, 0, 0) = tmp
;
19707 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19712 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19713 SIZE is the offset to be adjusted.
19714 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19716 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19720 RTX_FRAME_RELATED_P (insn
) = 1;
19721 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19722 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19725 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19726 SAVED_REGS_MASK shows which registers need to be restored.
19728 Unfortunately, since this insn does not reflect very well the actual
19729 semantics of the operation, we need to annotate the insn for the benefit
19730 of DWARF2 frame unwind information. */
19732 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19737 rtx dwarf
= NULL_RTX
;
19743 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19744 offset_adj
= return_in_pc
? 1 : 0;
19745 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19746 if (saved_regs_mask
& (1 << i
))
19749 gcc_assert (num_regs
&& num_regs
<= 16);
19751 /* If SP is in reglist, then we don't emit SP update insn. */
19752 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19754 /* The parallel needs to hold num_regs SETs
19755 and one SET for the stack update. */
19756 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19761 XVECEXP (par
, 0, 0) = tmp
;
19766 /* Increment the stack pointer, based on there being
19767 num_regs 4-byte registers to restore. */
19768 tmp
= gen_rtx_SET (VOIDmode
,
19770 plus_constant (Pmode
,
19773 RTX_FRAME_RELATED_P (tmp
) = 1;
19774 XVECEXP (par
, 0, offset_adj
) = tmp
;
19777 /* Now restore every reg, which may include PC. */
19778 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19779 if (saved_regs_mask
& (1 << i
))
19781 reg
= gen_rtx_REG (SImode
, i
);
19782 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19784 /* Emit single load with writeback. */
19785 tmp
= gen_frame_mem (SImode
,
19786 gen_rtx_POST_INC (Pmode
,
19787 stack_pointer_rtx
));
19788 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
19789 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19793 tmp
= gen_rtx_SET (VOIDmode
,
19797 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19798 RTX_FRAME_RELATED_P (tmp
) = 1;
19799 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19801 /* We need to maintain a sequence for DWARF info too. As dwarf info
19802 should not have PC, skip PC. */
19803 if (i
!= PC_REGNUM
)
19804 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19810 par
= emit_jump_insn (par
);
19812 par
= emit_insn (par
);
19814 REG_NOTES (par
) = dwarf
;
19816 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19817 stack_pointer_rtx
, stack_pointer_rtx
);
19820 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19821 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19823 Unfortunately, since this insn does not reflect very well the actual
19824 semantics of the operation, we need to annotate the insn for the benefit
19825 of DWARF2 frame unwind information. */
19827 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19831 rtx dwarf
= NULL_RTX
;
19834 gcc_assert (num_regs
&& num_regs
<= 32);
19836 /* Workaround ARM10 VFPr1 bug. */
19837 if (num_regs
== 2 && !arm_arch6
)
19839 if (first_reg
== 15)
19845 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19846 there could be up to 32 D-registers to restore.
19847 If there are more than 16 D-registers, make two recursive calls,
19848 each of which emits one pop_multi instruction. */
19851 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19852 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19856 /* The parallel needs to hold num_regs SETs
19857 and one SET for the stack update. */
19858 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19860 /* Increment the stack pointer, based on there being
19861 num_regs 8-byte registers to restore. */
19862 tmp
= gen_rtx_SET (VOIDmode
,
19864 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19865 RTX_FRAME_RELATED_P (tmp
) = 1;
19866 XVECEXP (par
, 0, 0) = tmp
;
19868 /* Now show every reg that will be restored, using a SET for each. */
19869 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19871 reg
= gen_rtx_REG (DFmode
, i
);
19873 tmp
= gen_rtx_SET (VOIDmode
,
19877 plus_constant (Pmode
, base_reg
, 8 * j
)));
19878 RTX_FRAME_RELATED_P (tmp
) = 1;
19879 XVECEXP (par
, 0, j
+ 1) = tmp
;
19881 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19886 par
= emit_insn (par
);
19887 REG_NOTES (par
) = dwarf
;
19889 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19890 base_reg
, base_reg
);
19893 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19894 number of registers are being popped, multiple LDRD patterns are created for
19895 all register pairs. If odd number of registers are popped, last register is
19896 loaded by using LDR pattern. */
19898 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19902 rtx par
= NULL_RTX
;
19903 rtx dwarf
= NULL_RTX
;
19904 rtx tmp
, reg
, tmp1
;
19907 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19908 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19909 if (saved_regs_mask
& (1 << i
))
19912 gcc_assert (num_regs
&& num_regs
<= 16);
19914 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19915 to be popped. So, if num_regs is even, now it will become odd,
19916 and we can generate pop with PC. If num_regs is odd, it will be
19917 even now, and ldr with return can be generated for PC. */
19921 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19923 /* Var j iterates over all the registers to gather all the registers in
19924 saved_regs_mask. Var i gives index of saved registers in stack frame.
19925 A PARALLEL RTX of register-pair is created here, so that pattern for
19926 LDRD can be matched. As PC is always last register to be popped, and
19927 we have already decremented num_regs if PC, we don't have to worry
19928 about PC in this loop. */
19929 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19930 if (saved_regs_mask
& (1 << j
))
19932 /* Create RTX for memory load. */
19933 reg
= gen_rtx_REG (SImode
, j
);
19934 tmp
= gen_rtx_SET (SImode
,
19936 gen_frame_mem (SImode
,
19937 plus_constant (Pmode
,
19938 stack_pointer_rtx
, 4 * i
)));
19939 RTX_FRAME_RELATED_P (tmp
) = 1;
19943 /* When saved-register index (i) is even, the RTX to be emitted is
19944 yet to be created. Hence create it first. The LDRD pattern we
19945 are generating is :
19946 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19947 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19948 where target registers need not be consecutive. */
19949 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19953 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19954 added as 0th element and if i is odd, reg_i is added as 1st element
19955 of LDRD pattern shown above. */
19956 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19957 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19961 /* When saved-register index (i) is odd, RTXs for both the registers
19962 to be loaded are generated in above given LDRD pattern, and the
19963 pattern can be emitted now. */
19964 par
= emit_insn (par
);
19965 REG_NOTES (par
) = dwarf
;
19966 RTX_FRAME_RELATED_P (par
) = 1;
19972 /* If the number of registers pushed is odd AND return_in_pc is false OR
19973 number of registers are even AND return_in_pc is true, last register is
19974 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19975 then LDR with post increment. */
19977 /* Increment the stack pointer, based on there being
19978 num_regs 4-byte registers to restore. */
19979 tmp
= gen_rtx_SET (VOIDmode
,
19981 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
19982 RTX_FRAME_RELATED_P (tmp
) = 1;
19983 tmp
= emit_insn (tmp
);
19986 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
19987 stack_pointer_rtx
, stack_pointer_rtx
);
19992 if (((num_regs
% 2) == 1 && !return_in_pc
)
19993 || ((num_regs
% 2) == 0 && return_in_pc
))
19995 /* Scan for the single register to be popped. Skip until the saved
19996 register is found. */
19997 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
19999 /* Gen LDR with post increment here. */
20000 tmp1
= gen_rtx_MEM (SImode
,
20001 gen_rtx_POST_INC (SImode
,
20002 stack_pointer_rtx
));
20003 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20005 reg
= gen_rtx_REG (SImode
, j
);
20006 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20007 RTX_FRAME_RELATED_P (tmp
) = 1;
20008 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20012 /* If return_in_pc, j must be PC_REGNUM. */
20013 gcc_assert (j
== PC_REGNUM
);
20014 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20015 XVECEXP (par
, 0, 0) = ret_rtx
;
20016 XVECEXP (par
, 0, 1) = tmp
;
20017 par
= emit_jump_insn (par
);
20021 par
= emit_insn (tmp
);
20022 REG_NOTES (par
) = dwarf
;
20023 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20024 stack_pointer_rtx
, stack_pointer_rtx
);
20028 else if ((num_regs
% 2) == 1 && return_in_pc
)
20030 /* There are 2 registers to be popped. So, generate the pattern
20031 pop_multiple_with_stack_update_and_return to pop in PC. */
20032 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20038 /* LDRD in ARM mode needs consecutive registers as operands. This function
20039 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20040 offset addressing and then generates one separate stack udpate. This provides
20041 more scheduling freedom, compared to writeback on every load. However,
20042 if the function returns using load into PC directly
20043 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20044 before the last load. TODO: Add a peephole optimization to recognize
20045 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20046 peephole optimization to merge the load at stack-offset zero
20047 with the stack update instruction using load with writeback
20048 in post-index addressing mode. */
20050 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20054 rtx par
= NULL_RTX
;
20055 rtx dwarf
= NULL_RTX
;
20058 /* Restore saved registers. */
20059 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20061 while (j
<= LAST_ARM_REGNUM
)
20062 if (saved_regs_mask
& (1 << j
))
20065 && (saved_regs_mask
& (1 << (j
+ 1)))
20066 && (j
+ 1) != PC_REGNUM
)
20068 /* Current register and next register form register pair for which
20069 LDRD can be generated. PC is always the last register popped, and
20070 we handle it separately. */
20072 mem
= gen_frame_mem (DImode
,
20073 plus_constant (Pmode
,
20077 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20079 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20080 tmp
= emit_insn (tmp
);
20081 RTX_FRAME_RELATED_P (tmp
) = 1;
20083 /* Generate dwarf info. */
20085 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20086 gen_rtx_REG (SImode
, j
),
20088 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20089 gen_rtx_REG (SImode
, j
+ 1),
20092 REG_NOTES (tmp
) = dwarf
;
20097 else if (j
!= PC_REGNUM
)
20099 /* Emit a single word load. */
20101 mem
= gen_frame_mem (SImode
,
20102 plus_constant (Pmode
,
20106 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20108 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20109 tmp
= emit_insn (tmp
);
20110 RTX_FRAME_RELATED_P (tmp
) = 1;
20112 /* Generate dwarf info. */
20113 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20114 gen_rtx_REG (SImode
, j
),
20120 else /* j == PC_REGNUM */
20126 /* Update the stack. */
20129 tmp
= gen_rtx_SET (Pmode
,
20131 plus_constant (Pmode
,
20134 tmp
= emit_insn (tmp
);
20135 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20136 stack_pointer_rtx
, stack_pointer_rtx
);
20140 if (saved_regs_mask
& (1 << PC_REGNUM
))
20142 /* Only PC is to be popped. */
20143 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20144 XVECEXP (par
, 0, 0) = ret_rtx
;
20145 tmp
= gen_rtx_SET (SImode
,
20146 gen_rtx_REG (SImode
, PC_REGNUM
),
20147 gen_frame_mem (SImode
,
20148 gen_rtx_POST_INC (SImode
,
20149 stack_pointer_rtx
)));
20150 RTX_FRAME_RELATED_P (tmp
) = 1;
20151 XVECEXP (par
, 0, 1) = tmp
;
20152 par
= emit_jump_insn (par
);
20154 /* Generate dwarf info. */
20155 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20156 gen_rtx_REG (SImode
, PC_REGNUM
),
20158 REG_NOTES (par
) = dwarf
;
20159 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20160 stack_pointer_rtx
, stack_pointer_rtx
);
20164 /* Calculate the size of the return value that is passed in registers. */
20166 arm_size_return_regs (void)
20168 enum machine_mode mode
;
20170 if (crtl
->return_rtx
!= 0)
20171 mode
= GET_MODE (crtl
->return_rtx
);
20173 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20175 return GET_MODE_SIZE (mode
);
20178 /* Return true if the current function needs to save/restore LR. */
20180 thumb_force_lr_save (void)
20182 return !cfun
->machine
->lr_save_eliminated
20183 && (!leaf_function_p ()
20184 || thumb_far_jump_used_p ()
20185 || df_regs_ever_live_p (LR_REGNUM
));
20188 /* We do not know if r3 will be available because
20189 we do have an indirect tailcall happening in this
20190 particular case. */
20192 is_indirect_tailcall_p (rtx call
)
20194 rtx pat
= PATTERN (call
);
20196 /* Indirect tail call. */
20197 pat
= XVECEXP (pat
, 0, 0);
20198 if (GET_CODE (pat
) == SET
)
20199 pat
= SET_SRC (pat
);
20201 pat
= XEXP (XEXP (pat
, 0), 0);
20202 return REG_P (pat
);
20205 /* Return true if r3 is used by any of the tail call insns in the
20206 current function. */
20208 any_sibcall_could_use_r3 (void)
20213 if (!crtl
->tail_call_emit
)
20215 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20216 if (e
->flags
& EDGE_SIBCALL
)
20218 rtx call
= BB_END (e
->src
);
20219 if (!CALL_P (call
))
20220 call
= prev_nonnote_nondebug_insn (call
);
20221 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20222 if (find_regno_fusage (call
, USE
, 3)
20223 || is_indirect_tailcall_p (call
))
20230 /* Compute the distance from register FROM to register TO.
20231 These can be the arg pointer (26), the soft frame pointer (25),
20232 the stack pointer (13) or the hard frame pointer (11).
20233 In thumb mode r7 is used as the soft frame pointer, if needed.
20234 Typical stack layout looks like this:
20236 old stack pointer -> | |
20239 | | saved arguments for
20240 | | vararg functions
20243 hard FP & arg pointer -> | | \
20251 soft frame pointer -> | | /
20256 locals base pointer -> | | /
20261 current stack pointer -> | | /
20264 For a given function some or all of these stack components
20265 may not be needed, giving rise to the possibility of
20266 eliminating some of the registers.
20268 The values returned by this function must reflect the behavior
20269 of arm_expand_prologue() and arm_compute_save_reg_mask().
20271 The sign of the number returned reflects the direction of stack
20272 growth, so the values are positive for all eliminations except
20273 from the soft frame pointer to the hard frame pointer.
20275 SFP may point just inside the local variables block to ensure correct
20279 /* Calculate stack offsets. These are used to calculate register elimination
20280 offsets and in prologue/epilogue code. Also calculates which registers
20281 should be saved. */
20283 static arm_stack_offsets
*
20284 arm_get_frame_offsets (void)
20286 struct arm_stack_offsets
*offsets
;
20287 unsigned long func_type
;
20291 HOST_WIDE_INT frame_size
;
20294 offsets
= &cfun
->machine
->stack_offsets
;
20296 /* We need to know if we are a leaf function. Unfortunately, it
20297 is possible to be called after start_sequence has been called,
20298 which causes get_insns to return the insns for the sequence,
20299 not the function, which will cause leaf_function_p to return
20300 the incorrect result.
20302 to know about leaf functions once reload has completed, and the
20303 frame size cannot be changed after that time, so we can safely
20304 use the cached value. */
20306 if (reload_completed
)
20309 /* Initially this is the size of the local variables. It will translated
20310 into an offset once we have determined the size of preceding data. */
20311 frame_size
= ROUND_UP_WORD (get_frame_size ());
20313 leaf
= leaf_function_p ();
20315 /* Space for variadic functions. */
20316 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20318 /* In Thumb mode this is incorrect, but never used. */
20320 = (offsets
->saved_args
20321 + arm_compute_static_chain_stack_bytes ()
20322 + (frame_pointer_needed
? 4 : 0));
20326 unsigned int regno
;
20328 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20329 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20330 saved
= core_saved
;
20332 /* We know that SP will be doubleword aligned on entry, and we must
20333 preserve that condition at any subroutine call. We also require the
20334 soft frame pointer to be doubleword aligned. */
20336 if (TARGET_REALLY_IWMMXT
)
20338 /* Check for the call-saved iWMMXt registers. */
20339 for (regno
= FIRST_IWMMXT_REGNUM
;
20340 regno
<= LAST_IWMMXT_REGNUM
;
20342 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20346 func_type
= arm_current_func_type ();
20347 /* Space for saved VFP registers. */
20348 if (! IS_VOLATILE (func_type
)
20349 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20350 saved
+= arm_get_vfp_saved_size ();
20352 else /* TARGET_THUMB1 */
20354 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20355 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20356 saved
= core_saved
;
20357 if (TARGET_BACKTRACE
)
20361 /* Saved registers include the stack frame. */
20362 offsets
->saved_regs
20363 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20364 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20366 /* A leaf function does not need any stack alignment if it has nothing
20368 if (leaf
&& frame_size
== 0
20369 /* However if it calls alloca(), we have a dynamically allocated
20370 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20371 && ! cfun
->calls_alloca
)
20373 offsets
->outgoing_args
= offsets
->soft_frame
;
20374 offsets
->locals_base
= offsets
->soft_frame
;
20378 /* Ensure SFP has the correct alignment. */
20379 if (ARM_DOUBLEWORD_ALIGN
20380 && (offsets
->soft_frame
& 7))
20382 offsets
->soft_frame
+= 4;
20383 /* Try to align stack by pushing an extra reg. Don't bother doing this
20384 when there is a stack frame as the alignment will be rolled into
20385 the normal stack adjustment. */
20386 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20390 /* If it is safe to use r3, then do so. This sometimes
20391 generates better code on Thumb-2 by avoiding the need to
20392 use 32-bit push/pop instructions. */
20393 if (! any_sibcall_could_use_r3 ()
20394 && arm_size_return_regs () <= 12
20395 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20397 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20402 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20404 /* Avoid fixed registers; they may be changed at
20405 arbitrary times so it's unsafe to restore them
20406 during the epilogue. */
20408 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20417 offsets
->saved_regs
+= 4;
20418 offsets
->saved_regs_mask
|= (1 << reg
);
20423 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20424 offsets
->outgoing_args
= (offsets
->locals_base
20425 + crtl
->outgoing_args_size
);
20427 if (ARM_DOUBLEWORD_ALIGN
)
20429 /* Ensure SP remains doubleword aligned. */
20430 if (offsets
->outgoing_args
& 7)
20431 offsets
->outgoing_args
+= 4;
20432 gcc_assert (!(offsets
->outgoing_args
& 7));
20439 /* Calculate the relative offsets for the different stack pointers. Positive
20440 offsets are in the direction of stack growth. */
20443 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20445 arm_stack_offsets
*offsets
;
20447 offsets
= arm_get_frame_offsets ();
20449 /* OK, now we have enough information to compute the distances.
20450 There must be an entry in these switch tables for each pair
20451 of registers in ELIMINABLE_REGS, even if some of the entries
20452 seem to be redundant or useless. */
20455 case ARG_POINTER_REGNUM
:
20458 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20461 case FRAME_POINTER_REGNUM
:
20462 /* This is the reverse of the soft frame pointer
20463 to hard frame pointer elimination below. */
20464 return offsets
->soft_frame
- offsets
->saved_args
;
20466 case ARM_HARD_FRAME_POINTER_REGNUM
:
20467 /* This is only non-zero in the case where the static chain register
20468 is stored above the frame. */
20469 return offsets
->frame
- offsets
->saved_args
- 4;
20471 case STACK_POINTER_REGNUM
:
20472 /* If nothing has been pushed on the stack at all
20473 then this will return -4. This *is* correct! */
20474 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20477 gcc_unreachable ();
20479 gcc_unreachable ();
20481 case FRAME_POINTER_REGNUM
:
20484 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20487 case ARM_HARD_FRAME_POINTER_REGNUM
:
20488 /* The hard frame pointer points to the top entry in the
20489 stack frame. The soft frame pointer to the bottom entry
20490 in the stack frame. If there is no stack frame at all,
20491 then they are identical. */
20493 return offsets
->frame
- offsets
->soft_frame
;
20495 case STACK_POINTER_REGNUM
:
20496 return offsets
->outgoing_args
- offsets
->soft_frame
;
20499 gcc_unreachable ();
20501 gcc_unreachable ();
20504 /* You cannot eliminate from the stack pointer.
20505 In theory you could eliminate from the hard frame
20506 pointer to the stack pointer, but this will never
20507 happen, since if a stack frame is not needed the
20508 hard frame pointer will never be used. */
20509 gcc_unreachable ();
20513 /* Given FROM and TO register numbers, say whether this elimination is
20514 allowed. Frame pointer elimination is automatically handled.
20516 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20517 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20518 pointer, we must eliminate FRAME_POINTER_REGNUM into
20519 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20520 ARG_POINTER_REGNUM. */
20523 arm_can_eliminate (const int from
, const int to
)
20525 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20526 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20527 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20528 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20532 /* Emit RTL to save coprocessor registers on function entry. Returns the
20533 number of bytes pushed. */
20536 arm_save_coproc_regs(void)
20538 int saved_size
= 0;
20540 unsigned start_reg
;
20543 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20544 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20546 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20547 insn
= gen_rtx_MEM (V2SImode
, insn
);
20548 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20549 RTX_FRAME_RELATED_P (insn
) = 1;
20553 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20555 start_reg
= FIRST_VFP_REGNUM
;
20557 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20559 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20560 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20562 if (start_reg
!= reg
)
20563 saved_size
+= vfp_emit_fstmd (start_reg
,
20564 (reg
- start_reg
) / 2);
20565 start_reg
= reg
+ 2;
20568 if (start_reg
!= reg
)
20569 saved_size
+= vfp_emit_fstmd (start_reg
,
20570 (reg
- start_reg
) / 2);
20576 /* Set the Thumb frame pointer from the stack pointer. */
20579 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20581 HOST_WIDE_INT amount
;
20584 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20586 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20587 stack_pointer_rtx
, GEN_INT (amount
)));
20590 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20591 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20592 expects the first two operands to be the same. */
20595 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20597 hard_frame_pointer_rtx
));
20601 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20602 hard_frame_pointer_rtx
,
20603 stack_pointer_rtx
));
20605 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20606 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20607 RTX_FRAME_RELATED_P (dwarf
) = 1;
20608 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20611 RTX_FRAME_RELATED_P (insn
) = 1;
20614 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20617 arm_expand_prologue (void)
20622 unsigned long live_regs_mask
;
20623 unsigned long func_type
;
20625 int saved_pretend_args
= 0;
20626 int saved_regs
= 0;
20627 unsigned HOST_WIDE_INT args_to_push
;
20628 arm_stack_offsets
*offsets
;
20630 func_type
= arm_current_func_type ();
20632 /* Naked functions don't have prologues. */
20633 if (IS_NAKED (func_type
))
20636 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20637 args_to_push
= crtl
->args
.pretend_args_size
;
20639 /* Compute which register we will have to save onto the stack. */
20640 offsets
= arm_get_frame_offsets ();
20641 live_regs_mask
= offsets
->saved_regs_mask
;
20643 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20645 if (IS_STACKALIGN (func_type
))
20649 /* Handle a word-aligned stack pointer. We generate the following:
20654 <save and restore r0 in normal prologue/epilogue>
20658 The unwinder doesn't need to know about the stack realignment.
20659 Just tell it we saved SP in r0. */
20660 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20662 r0
= gen_rtx_REG (SImode
, 0);
20663 r1
= gen_rtx_REG (SImode
, 1);
20665 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20666 RTX_FRAME_RELATED_P (insn
) = 1;
20667 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20669 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20671 /* ??? The CFA changes here, which may cause GDB to conclude that it
20672 has entered a different function. That said, the unwind info is
20673 correct, individually, before and after this instruction because
20674 we've described the save of SP, which will override the default
20675 handling of SP as restoring from the CFA. */
20676 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20679 /* For APCS frames, if IP register is clobbered
20680 when creating frame, save that register in a special
20682 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20684 if (IS_INTERRUPT (func_type
))
20686 /* Interrupt functions must not corrupt any registers.
20687 Creating a frame pointer however, corrupts the IP
20688 register, so we must push it first. */
20689 emit_multi_reg_push (1 << IP_REGNUM
);
20691 /* Do not set RTX_FRAME_RELATED_P on this insn.
20692 The dwarf stack unwinding code only wants to see one
20693 stack decrement per function, and this is not it. If
20694 this instruction is labeled as being part of the frame
20695 creation sequence then dwarf2out_frame_debug_expr will
20696 die when it encounters the assignment of IP to FP
20697 later on, since the use of SP here establishes SP as
20698 the CFA register and not IP.
20700 Anyway this instruction is not really part of the stack
20701 frame creation although it is part of the prologue. */
20703 else if (IS_NESTED (func_type
))
20705 /* The static chain register is the same as the IP register
20706 used as a scratch register during stack frame creation.
20707 To get around this need to find somewhere to store IP
20708 whilst the frame is being created. We try the following
20711 1. The last argument register r3 if it is available.
20712 2. A slot on the stack above the frame if there are no
20713 arguments to push onto the stack.
20714 3. Register r3 again, after pushing the argument registers
20715 onto the stack, if this is a varargs function.
20716 4. The last slot on the stack created for the arguments to
20717 push, if this isn't a varargs function.
20719 Note - we only need to tell the dwarf2 backend about the SP
20720 adjustment in the second variant; the static chain register
20721 doesn't need to be unwound, as it doesn't contain a value
20722 inherited from the caller. */
20724 if (!arm_r3_live_at_start_p ())
20725 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20726 else if (args_to_push
== 0)
20730 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20733 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20734 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20737 /* Just tell the dwarf backend that we adjusted SP. */
20738 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20739 plus_constant (Pmode
, stack_pointer_rtx
,
20741 RTX_FRAME_RELATED_P (insn
) = 1;
20742 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20746 /* Store the args on the stack. */
20747 if (cfun
->machine
->uses_anonymous_args
)
20750 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20751 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20752 saved_pretend_args
= 1;
20758 if (args_to_push
== 4)
20759 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20762 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
20763 plus_constant (Pmode
,
20767 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20769 /* Just tell the dwarf backend that we adjusted SP. */
20771 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20772 plus_constant (Pmode
, stack_pointer_rtx
,
20774 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20777 RTX_FRAME_RELATED_P (insn
) = 1;
20778 fp_offset
= args_to_push
;
20783 insn
= emit_set_insn (ip_rtx
,
20784 plus_constant (Pmode
, stack_pointer_rtx
,
20786 RTX_FRAME_RELATED_P (insn
) = 1;
20791 /* Push the argument registers, or reserve space for them. */
20792 if (cfun
->machine
->uses_anonymous_args
)
20793 insn
= emit_multi_reg_push
20794 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20797 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20798 GEN_INT (- args_to_push
)));
20799 RTX_FRAME_RELATED_P (insn
) = 1;
20802 /* If this is an interrupt service routine, and the link register
20803 is going to be pushed, and we're not generating extra
20804 push of IP (needed when frame is needed and frame layout if apcs),
20805 subtracting four from LR now will mean that the function return
20806 can be done with a single instruction. */
20807 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20808 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20809 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20812 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20814 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20817 if (live_regs_mask
)
20819 saved_regs
+= bit_count (live_regs_mask
) * 4;
20820 if (optimize_size
&& !frame_pointer_needed
20821 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20823 /* If no coprocessor registers are being pushed and we don't have
20824 to worry about a frame pointer then push extra registers to
20825 create the stack frame. This is done is a way that does not
20826 alter the frame layout, so is independent of the epilogue. */
20830 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20832 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20833 if (frame
&& n
* 4 >= frame
)
20836 live_regs_mask
|= (1 << n
) - 1;
20837 saved_regs
+= frame
;
20842 && current_tune
->prefer_ldrd_strd
20843 && !optimize_function_for_size_p (cfun
))
20847 thumb2_emit_strd_push (live_regs_mask
);
20849 else if (TARGET_ARM
20850 && !TARGET_APCS_FRAME
20851 && !IS_INTERRUPT (func_type
))
20853 arm_emit_strd_push (live_regs_mask
);
20857 insn
= emit_multi_reg_push (live_regs_mask
);
20858 RTX_FRAME_RELATED_P (insn
) = 1;
20863 insn
= emit_multi_reg_push (live_regs_mask
);
20864 RTX_FRAME_RELATED_P (insn
) = 1;
20868 if (! IS_VOLATILE (func_type
))
20869 saved_regs
+= arm_save_coproc_regs ();
20871 if (frame_pointer_needed
&& TARGET_ARM
)
20873 /* Create the new frame pointer. */
20874 if (TARGET_APCS_FRAME
)
20876 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20877 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20878 RTX_FRAME_RELATED_P (insn
) = 1;
20880 if (IS_NESTED (func_type
))
20882 /* Recover the static chain register. */
20883 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20884 insn
= gen_rtx_REG (SImode
, 3);
20887 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20888 insn
= gen_frame_mem (SImode
, insn
);
20890 emit_set_insn (ip_rtx
, insn
);
20891 /* Add a USE to stop propagate_one_insn() from barfing. */
20892 emit_insn (gen_force_register_use (ip_rtx
));
20897 insn
= GEN_INT (saved_regs
- 4);
20898 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20899 stack_pointer_rtx
, insn
));
20900 RTX_FRAME_RELATED_P (insn
) = 1;
20904 if (flag_stack_usage_info
)
20905 current_function_static_stack_size
20906 = offsets
->outgoing_args
- offsets
->saved_args
;
20908 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20910 /* This add can produce multiple insns for a large constant, so we
20911 need to get tricky. */
20912 rtx last
= get_last_insn ();
20914 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20915 - offsets
->outgoing_args
);
20917 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20921 last
= last
? NEXT_INSN (last
) : get_insns ();
20922 RTX_FRAME_RELATED_P (last
) = 1;
20924 while (last
!= insn
);
20926 /* If the frame pointer is needed, emit a special barrier that
20927 will prevent the scheduler from moving stores to the frame
20928 before the stack adjustment. */
20929 if (frame_pointer_needed
)
20930 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20931 hard_frame_pointer_rtx
));
20935 if (frame_pointer_needed
&& TARGET_THUMB2
)
20936 thumb_set_frame_pointer (offsets
);
20938 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20940 unsigned long mask
;
20942 mask
= live_regs_mask
;
20943 mask
&= THUMB2_WORK_REGS
;
20944 if (!IS_NESTED (func_type
))
20945 mask
|= (1 << IP_REGNUM
);
20946 arm_load_pic_register (mask
);
20949 /* If we are profiling, make sure no instructions are scheduled before
20950 the call to mcount. Similarly if the user has requested no
20951 scheduling in the prolog. Similarly if we want non-call exceptions
20952 using the EABI unwinder, to prevent faulting instructions from being
20953 swapped with a stack adjustment. */
20954 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20955 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20956 && cfun
->can_throw_non_call_exceptions
))
20957 emit_insn (gen_blockage ());
20959 /* If the link register is being kept alive, with the return address in it,
20960 then make sure that it does not get reused by the ce2 pass. */
20961 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20962 cfun
->machine
->lr_save_eliminated
= 1;
20965 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20967 arm_print_condition (FILE *stream
)
20969 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
20971 /* Branch conversion is not implemented for Thumb-2. */
20974 output_operand_lossage ("predicated Thumb instruction");
20977 if (current_insn_predicate
!= NULL
)
20979 output_operand_lossage
20980 ("predicated instruction in conditional sequence");
20984 fputs (arm_condition_codes
[arm_current_cc
], stream
);
20986 else if (current_insn_predicate
)
20988 enum arm_cond_code code
;
20992 output_operand_lossage ("predicated Thumb instruction");
20996 code
= get_arm_condition_code (current_insn_predicate
);
20997 fputs (arm_condition_codes
[code
], stream
);
21002 /* If CODE is 'd', then the X is a condition operand and the instruction
21003 should only be executed if the condition is true.
21004 if CODE is 'D', then the X is a condition operand and the instruction
21005 should only be executed if the condition is false: however, if the mode
21006 of the comparison is CCFPEmode, then always execute the instruction -- we
21007 do this because in these circumstances !GE does not necessarily imply LT;
21008 in these cases the instruction pattern will take care to make sure that
21009 an instruction containing %d will follow, thereby undoing the effects of
21010 doing this instruction unconditionally.
21011 If CODE is 'N' then X is a floating point operand that must be negated
21013 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21014 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21016 arm_print_operand (FILE *stream
, rtx x
, int code
)
21021 fputs (ASM_COMMENT_START
, stream
);
21025 fputs (user_label_prefix
, stream
);
21029 fputs (REGISTER_PREFIX
, stream
);
21033 arm_print_condition (stream
);
21037 /* Nothing in unified syntax, otherwise the current condition code. */
21038 if (!TARGET_UNIFIED_ASM
)
21039 arm_print_condition (stream
);
21043 /* The current condition code in unified syntax, otherwise nothing. */
21044 if (TARGET_UNIFIED_ASM
)
21045 arm_print_condition (stream
);
21049 /* The current condition code for a condition code setting instruction.
21050 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21051 if (TARGET_UNIFIED_ASM
)
21053 fputc('s', stream
);
21054 arm_print_condition (stream
);
21058 arm_print_condition (stream
);
21059 fputc('s', stream
);
21064 /* If the instruction is conditionally executed then print
21065 the current condition code, otherwise print 's'. */
21066 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21067 if (current_insn_predicate
)
21068 arm_print_condition (stream
);
21070 fputc('s', stream
);
21073 /* %# is a "break" sequence. It doesn't output anything, but is used to
21074 separate e.g. operand numbers from following text, if that text consists
21075 of further digits which we don't want to be part of the operand
21083 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21084 r
= real_value_negate (&r
);
21085 fprintf (stream
, "%s", fp_const_from_val (&r
));
21089 /* An integer or symbol address without a preceding # sign. */
21091 switch (GET_CODE (x
))
21094 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21098 output_addr_const (stream
, x
);
21102 if (GET_CODE (XEXP (x
, 0)) == PLUS
21103 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21105 output_addr_const (stream
, x
);
21108 /* Fall through. */
21111 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21115 /* An integer that we want to print in HEX. */
21117 switch (GET_CODE (x
))
21120 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21124 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21129 if (CONST_INT_P (x
))
21132 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21133 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21137 putc ('~', stream
);
21138 output_addr_const (stream
, x
);
21143 /* The low 16 bits of an immediate constant. */
21144 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21148 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21152 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21160 shift
= shift_op (x
, &val
);
21164 fprintf (stream
, ", %s ", shift
);
21166 arm_print_operand (stream
, XEXP (x
, 1), 0);
21168 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21173 /* An explanation of the 'Q', 'R' and 'H' register operands:
21175 In a pair of registers containing a DI or DF value the 'Q'
21176 operand returns the register number of the register containing
21177 the least significant part of the value. The 'R' operand returns
21178 the register number of the register containing the most
21179 significant part of the value.
21181 The 'H' operand returns the higher of the two register numbers.
21182 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21183 same as the 'Q' operand, since the most significant part of the
21184 value is held in the lower number register. The reverse is true
21185 on systems where WORDS_BIG_ENDIAN is false.
21187 The purpose of these operands is to distinguish between cases
21188 where the endian-ness of the values is important (for example
21189 when they are added together), and cases where the endian-ness
21190 is irrelevant, but the order of register operations is important.
21191 For example when loading a value from memory into a register
21192 pair, the endian-ness does not matter. Provided that the value
21193 from the lower memory address is put into the lower numbered
21194 register, and the value from the higher address is put into the
21195 higher numbered register, the load will work regardless of whether
21196 the value being loaded is big-wordian or little-wordian. The
21197 order of the two register loads can matter however, if the address
21198 of the memory location is actually held in one of the registers
21199 being overwritten by the load.
21201 The 'Q' and 'R' constraints are also available for 64-bit
21204 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21206 rtx part
= gen_lowpart (SImode
, x
);
21207 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21211 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21213 output_operand_lossage ("invalid operand for code '%c'", code
);
21217 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21221 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21223 enum machine_mode mode
= GET_MODE (x
);
21226 if (mode
== VOIDmode
)
21228 part
= gen_highpart_mode (SImode
, mode
, x
);
21229 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21233 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21235 output_operand_lossage ("invalid operand for code '%c'", code
);
21239 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21243 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21245 output_operand_lossage ("invalid operand for code '%c'", code
);
21249 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21253 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21255 output_operand_lossage ("invalid operand for code '%c'", code
);
21259 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21263 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21265 output_operand_lossage ("invalid operand for code '%c'", code
);
21269 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21273 asm_fprintf (stream
, "%r",
21274 REG_P (XEXP (x
, 0))
21275 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21279 asm_fprintf (stream
, "{%r-%r}",
21281 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21284 /* Like 'M', but writing doubleword vector registers, for use by Neon
21288 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21289 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21291 asm_fprintf (stream
, "{d%d}", regno
);
21293 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21298 /* CONST_TRUE_RTX means always -- that's the default. */
21299 if (x
== const_true_rtx
)
21302 if (!COMPARISON_P (x
))
21304 output_operand_lossage ("invalid operand for code '%c'", code
);
21308 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21313 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21314 want to do that. */
21315 if (x
== const_true_rtx
)
21317 output_operand_lossage ("instruction never executed");
21320 if (!COMPARISON_P (x
))
21322 output_operand_lossage ("invalid operand for code '%c'", code
);
21326 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21327 (get_arm_condition_code (x
))],
21337 /* Former Maverick support, removed after GCC-4.7. */
21338 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21343 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21344 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21345 /* Bad value for wCG register number. */
21347 output_operand_lossage ("invalid operand for code '%c'", code
);
21352 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21355 /* Print an iWMMXt control register name. */
21357 if (!CONST_INT_P (x
)
21359 || INTVAL (x
) >= 16)
21360 /* Bad value for wC register number. */
21362 output_operand_lossage ("invalid operand for code '%c'", code
);
21368 static const char * wc_reg_names
[16] =
21370 "wCID", "wCon", "wCSSF", "wCASF",
21371 "wC4", "wC5", "wC6", "wC7",
21372 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21373 "wC12", "wC13", "wC14", "wC15"
21376 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21380 /* Print the high single-precision register of a VFP double-precision
21384 int mode
= GET_MODE (x
);
21387 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21389 output_operand_lossage ("invalid operand for code '%c'", code
);
21394 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21396 output_operand_lossage ("invalid operand for code '%c'", code
);
21400 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21404 /* Print a VFP/Neon double precision or quad precision register name. */
21408 int mode
= GET_MODE (x
);
21409 int is_quad
= (code
== 'q');
21412 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21414 output_operand_lossage ("invalid operand for code '%c'", code
);
21419 || !IS_VFP_REGNUM (REGNO (x
)))
21421 output_operand_lossage ("invalid operand for code '%c'", code
);
21426 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21427 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21429 output_operand_lossage ("invalid operand for code '%c'", code
);
21433 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21434 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21438 /* These two codes print the low/high doubleword register of a Neon quad
21439 register, respectively. For pair-structure types, can also print
21440 low/high quadword registers. */
21444 int mode
= GET_MODE (x
);
21447 if ((GET_MODE_SIZE (mode
) != 16
21448 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21450 output_operand_lossage ("invalid operand for code '%c'", code
);
21455 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21457 output_operand_lossage ("invalid operand for code '%c'", code
);
21461 if (GET_MODE_SIZE (mode
) == 16)
21462 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21463 + (code
== 'f' ? 1 : 0));
21465 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21466 + (code
== 'f' ? 1 : 0));
21470 /* Print a VFPv3 floating-point constant, represented as an integer
21474 int index
= vfp3_const_double_index (x
);
21475 gcc_assert (index
!= -1);
21476 fprintf (stream
, "%d", index
);
21480 /* Print bits representing opcode features for Neon.
21482 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21483 and polynomials as unsigned.
21485 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21487 Bit 2 is 1 for rounding functions, 0 otherwise. */
21489 /* Identify the type as 's', 'u', 'p' or 'f'. */
21492 HOST_WIDE_INT bits
= INTVAL (x
);
21493 fputc ("uspf"[bits
& 3], stream
);
21497 /* Likewise, but signed and unsigned integers are both 'i'. */
21500 HOST_WIDE_INT bits
= INTVAL (x
);
21501 fputc ("iipf"[bits
& 3], stream
);
21505 /* As for 'T', but emit 'u' instead of 'p'. */
21508 HOST_WIDE_INT bits
= INTVAL (x
);
21509 fputc ("usuf"[bits
& 3], stream
);
21513 /* Bit 2: rounding (vs none). */
21516 HOST_WIDE_INT bits
= INTVAL (x
);
21517 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21521 /* Memory operand for vld1/vst1 instruction. */
21525 bool postinc
= FALSE
;
21526 unsigned align
, memsize
, align_bits
;
21528 gcc_assert (MEM_P (x
));
21529 addr
= XEXP (x
, 0);
21530 if (GET_CODE (addr
) == POST_INC
)
21533 addr
= XEXP (addr
, 0);
21535 asm_fprintf (stream
, "[%r", REGNO (addr
));
21537 /* We know the alignment of this access, so we can emit a hint in the
21538 instruction (for some alignments) as an aid to the memory subsystem
21540 align
= MEM_ALIGN (x
) >> 3;
21541 memsize
= MEM_SIZE (x
);
21543 /* Only certain alignment specifiers are supported by the hardware. */
21544 if (memsize
== 32 && (align
% 32) == 0)
21546 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21548 else if (memsize
>= 8 && (align
% 8) == 0)
21553 if (align_bits
!= 0)
21554 asm_fprintf (stream
, ":%d", align_bits
);
21556 asm_fprintf (stream
, "]");
21559 fputs("!", stream
);
21567 gcc_assert (MEM_P (x
));
21568 addr
= XEXP (x
, 0);
21569 gcc_assert (REG_P (addr
));
21570 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21574 /* Translate an S register number into a D register number and element index. */
21577 int mode
= GET_MODE (x
);
21580 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21582 output_operand_lossage ("invalid operand for code '%c'", code
);
21587 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21589 output_operand_lossage ("invalid operand for code '%c'", code
);
21593 regno
= regno
- FIRST_VFP_REGNUM
;
21594 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21599 gcc_assert (CONST_DOUBLE_P (x
));
21601 result
= vfp3_const_double_for_fract_bits (x
);
21603 result
= vfp3_const_double_for_bits (x
);
21604 fprintf (stream
, "#%d", result
);
21607 /* Register specifier for vld1.16/vst1.16. Translate the S register
21608 number into a D register number and element index. */
21611 int mode
= GET_MODE (x
);
21614 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21616 output_operand_lossage ("invalid operand for code '%c'", code
);
21621 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21623 output_operand_lossage ("invalid operand for code '%c'", code
);
21627 regno
= regno
- FIRST_VFP_REGNUM
;
21628 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21635 output_operand_lossage ("missing operand");
21639 switch (GET_CODE (x
))
21642 asm_fprintf (stream
, "%r", REGNO (x
));
21646 output_memory_reference_mode
= GET_MODE (x
);
21647 output_address (XEXP (x
, 0));
21654 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21655 sizeof (fpstr
), 0, 1);
21656 fprintf (stream
, "#%s", fpstr
);
21659 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21663 gcc_assert (GET_CODE (x
) != NEG
);
21664 fputc ('#', stream
);
21665 if (GET_CODE (x
) == HIGH
)
21667 fputs (":lower16:", stream
);
21671 output_addr_const (stream
, x
);
21677 /* Target hook for printing a memory address. */
21679 arm_print_operand_address (FILE *stream
, rtx x
)
21683 int is_minus
= GET_CODE (x
) == MINUS
;
21686 asm_fprintf (stream
, "[%r]", REGNO (x
));
21687 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21689 rtx base
= XEXP (x
, 0);
21690 rtx index
= XEXP (x
, 1);
21691 HOST_WIDE_INT offset
= 0;
21693 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21695 /* Ensure that BASE is a register. */
21696 /* (one of them must be). */
21697 /* Also ensure the SP is not used as in index register. */
21702 switch (GET_CODE (index
))
21705 offset
= INTVAL (index
);
21708 asm_fprintf (stream
, "[%r, #%wd]",
21709 REGNO (base
), offset
);
21713 asm_fprintf (stream
, "[%r, %s%r]",
21714 REGNO (base
), is_minus
? "-" : "",
21724 asm_fprintf (stream
, "[%r, %s%r",
21725 REGNO (base
), is_minus
? "-" : "",
21726 REGNO (XEXP (index
, 0)));
21727 arm_print_operand (stream
, index
, 'S');
21728 fputs ("]", stream
);
21733 gcc_unreachable ();
21736 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21737 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21739 extern enum machine_mode output_memory_reference_mode
;
21741 gcc_assert (REG_P (XEXP (x
, 0)));
21743 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21744 asm_fprintf (stream
, "[%r, #%s%d]!",
21745 REGNO (XEXP (x
, 0)),
21746 GET_CODE (x
) == PRE_DEC
? "-" : "",
21747 GET_MODE_SIZE (output_memory_reference_mode
));
21749 asm_fprintf (stream
, "[%r], #%s%d",
21750 REGNO (XEXP (x
, 0)),
21751 GET_CODE (x
) == POST_DEC
? "-" : "",
21752 GET_MODE_SIZE (output_memory_reference_mode
));
21754 else if (GET_CODE (x
) == PRE_MODIFY
)
21756 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21757 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21758 asm_fprintf (stream
, "#%wd]!",
21759 INTVAL (XEXP (XEXP (x
, 1), 1)));
21761 asm_fprintf (stream
, "%r]!",
21762 REGNO (XEXP (XEXP (x
, 1), 1)));
21764 else if (GET_CODE (x
) == POST_MODIFY
)
21766 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21767 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21768 asm_fprintf (stream
, "#%wd",
21769 INTVAL (XEXP (XEXP (x
, 1), 1)));
21771 asm_fprintf (stream
, "%r",
21772 REGNO (XEXP (XEXP (x
, 1), 1)));
21774 else output_addr_const (stream
, x
);
21779 asm_fprintf (stream
, "[%r]", REGNO (x
));
21780 else if (GET_CODE (x
) == POST_INC
)
21781 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21782 else if (GET_CODE (x
) == PLUS
)
21784 gcc_assert (REG_P (XEXP (x
, 0)));
21785 if (CONST_INT_P (XEXP (x
, 1)))
21786 asm_fprintf (stream
, "[%r, #%wd]",
21787 REGNO (XEXP (x
, 0)),
21788 INTVAL (XEXP (x
, 1)));
21790 asm_fprintf (stream
, "[%r, %r]",
21791 REGNO (XEXP (x
, 0)),
21792 REGNO (XEXP (x
, 1)));
21795 output_addr_const (stream
, x
);
21799 /* Target hook for indicating whether a punctuation character for
21800 TARGET_PRINT_OPERAND is valid. */
21802 arm_print_operand_punct_valid_p (unsigned char code
)
21804 return (code
== '@' || code
== '|' || code
== '.'
21805 || code
== '(' || code
== ')' || code
== '#'
21806 || (TARGET_32BIT
&& (code
== '?'))
21807 || (TARGET_THUMB2
&& (code
== '!'))
21808 || (TARGET_THUMB
&& (code
== '_')));
21811 /* Target hook for assembling integer objects. The ARM version needs to
21812 handle word-sized values specially. */
21814 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21816 enum machine_mode mode
;
21818 if (size
== UNITS_PER_WORD
&& aligned_p
)
21820 fputs ("\t.word\t", asm_out_file
);
21821 output_addr_const (asm_out_file
, x
);
21823 /* Mark symbols as position independent. We only do this in the
21824 .text segment, not in the .data segment. */
21825 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21826 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21828 /* See legitimize_pic_address for an explanation of the
21829 TARGET_VXWORKS_RTP check. */
21830 if (!arm_pic_data_is_text_relative
21831 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21832 fputs ("(GOT)", asm_out_file
);
21834 fputs ("(GOTOFF)", asm_out_file
);
21836 fputc ('\n', asm_out_file
);
21840 mode
= GET_MODE (x
);
21842 if (arm_vector_mode_supported_p (mode
))
21846 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21848 units
= CONST_VECTOR_NUNITS (x
);
21849 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
21851 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21852 for (i
= 0; i
< units
; i
++)
21854 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21856 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21859 for (i
= 0; i
< units
; i
++)
21861 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21862 REAL_VALUE_TYPE rval
;
21864 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
21867 (rval
, GET_MODE_INNER (mode
),
21868 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21874 return default_assemble_integer (x
, size
, aligned_p
);
21878 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21882 if (!TARGET_AAPCS_BASED
)
21885 default_named_section_asm_out_constructor
21886 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21890 /* Put these in the .init_array section, using a special relocation. */
21891 if (priority
!= DEFAULT_INIT_PRIORITY
)
21894 sprintf (buf
, "%s.%.5u",
21895 is_ctor
? ".init_array" : ".fini_array",
21897 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21904 switch_to_section (s
);
21905 assemble_align (POINTER_SIZE
);
21906 fputs ("\t.word\t", asm_out_file
);
21907 output_addr_const (asm_out_file
, symbol
);
21908 fputs ("(target1)\n", asm_out_file
);
21911 /* Add a function to the list of static constructors. */
21914 arm_elf_asm_constructor (rtx symbol
, int priority
)
21916 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21919 /* Add a function to the list of static destructors. */
21922 arm_elf_asm_destructor (rtx symbol
, int priority
)
21924 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21927 /* A finite state machine takes care of noticing whether or not instructions
21928 can be conditionally executed, and thus decrease execution time and code
21929 size by deleting branch instructions. The fsm is controlled by
21930 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21932 /* The state of the fsm controlling condition codes are:
21933 0: normal, do nothing special
21934 1: make ASM_OUTPUT_OPCODE not output this instruction
21935 2: make ASM_OUTPUT_OPCODE not output this instruction
21936 3: make instructions conditional
21937 4: make instructions conditional
21939 State transitions (state->state by whom under condition):
21940 0 -> 1 final_prescan_insn if the `target' is a label
21941 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21942 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21943 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21944 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21945 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21946 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21947 (the target insn is arm_target_insn).
21949 If the jump clobbers the conditions then we use states 2 and 4.
21951 A similar thing can be done with conditional return insns.
21953 XXX In case the `target' is an unconditional branch, this conditionalising
21954 of the instructions always reduces code size, but not always execution
21955 time. But then, I want to reduce the code size to somewhere near what
21956 /bin/cc produces. */
21958 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21959 instructions. When a COND_EXEC instruction is seen the subsequent
21960 instructions are scanned so that multiple conditional instructions can be
21961 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21962 specify the length and true/false mask for the IT block. These will be
21963 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21965 /* Returns the index of the ARM condition code string in
21966 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21967 COMPARISON should be an rtx like `(eq (...) (...))'. */
21970 maybe_get_arm_condition_code (rtx comparison
)
21972 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
21973 enum arm_cond_code code
;
21974 enum rtx_code comp_code
= GET_CODE (comparison
);
21976 if (GET_MODE_CLASS (mode
) != MODE_CC
)
21977 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
21978 XEXP (comparison
, 1));
21982 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
21983 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
21984 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
21985 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
21986 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
21987 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
21988 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
21989 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
21990 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
21991 case CC_DLTUmode
: code
= ARM_CC
;
21994 if (comp_code
== EQ
)
21995 return ARM_INVERSE_CONDITION_CODE (code
);
21996 if (comp_code
== NE
)
22003 case NE
: return ARM_NE
;
22004 case EQ
: return ARM_EQ
;
22005 case GE
: return ARM_PL
;
22006 case LT
: return ARM_MI
;
22007 default: return ARM_NV
;
22013 case NE
: return ARM_NE
;
22014 case EQ
: return ARM_EQ
;
22015 default: return ARM_NV
;
22021 case NE
: return ARM_MI
;
22022 case EQ
: return ARM_PL
;
22023 default: return ARM_NV
;
22028 /* We can handle all cases except UNEQ and LTGT. */
22031 case GE
: return ARM_GE
;
22032 case GT
: return ARM_GT
;
22033 case LE
: return ARM_LS
;
22034 case LT
: return ARM_MI
;
22035 case NE
: return ARM_NE
;
22036 case EQ
: return ARM_EQ
;
22037 case ORDERED
: return ARM_VC
;
22038 case UNORDERED
: return ARM_VS
;
22039 case UNLT
: return ARM_LT
;
22040 case UNLE
: return ARM_LE
;
22041 case UNGT
: return ARM_HI
;
22042 case UNGE
: return ARM_PL
;
22043 /* UNEQ and LTGT do not have a representation. */
22044 case UNEQ
: /* Fall through. */
22045 case LTGT
: /* Fall through. */
22046 default: return ARM_NV
;
22052 case NE
: return ARM_NE
;
22053 case EQ
: return ARM_EQ
;
22054 case GE
: return ARM_LE
;
22055 case GT
: return ARM_LT
;
22056 case LE
: return ARM_GE
;
22057 case LT
: return ARM_GT
;
22058 case GEU
: return ARM_LS
;
22059 case GTU
: return ARM_CC
;
22060 case LEU
: return ARM_CS
;
22061 case LTU
: return ARM_HI
;
22062 default: return ARM_NV
;
22068 case LTU
: return ARM_CS
;
22069 case GEU
: return ARM_CC
;
22070 default: return ARM_NV
;
22076 case NE
: return ARM_NE
;
22077 case EQ
: return ARM_EQ
;
22078 case GEU
: return ARM_CS
;
22079 case GTU
: return ARM_HI
;
22080 case LEU
: return ARM_LS
;
22081 case LTU
: return ARM_CC
;
22082 default: return ARM_NV
;
22088 case GE
: return ARM_GE
;
22089 case LT
: return ARM_LT
;
22090 case GEU
: return ARM_CS
;
22091 case LTU
: return ARM_CC
;
22092 default: return ARM_NV
;
22098 case NE
: return ARM_NE
;
22099 case EQ
: return ARM_EQ
;
22100 case GE
: return ARM_GE
;
22101 case GT
: return ARM_GT
;
22102 case LE
: return ARM_LE
;
22103 case LT
: return ARM_LT
;
22104 case GEU
: return ARM_CS
;
22105 case GTU
: return ARM_HI
;
22106 case LEU
: return ARM_LS
;
22107 case LTU
: return ARM_CC
;
22108 default: return ARM_NV
;
22111 default: gcc_unreachable ();
22115 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22116 static enum arm_cond_code
22117 get_arm_condition_code (rtx comparison
)
22119 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22120 gcc_assert (code
!= ARM_NV
);
22124 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22127 thumb2_final_prescan_insn (rtx insn
)
22129 rtx first_insn
= insn
;
22130 rtx body
= PATTERN (insn
);
22132 enum arm_cond_code code
;
22137 /* Maximum number of conditionally executed instructions in a block
22138 is minimum of the two max values: maximum allowed in an IT block
22139 and maximum that is beneficial according to the cost model and tune. */
22140 max
= (max_insns_skipped
< MAX_INSN_PER_IT_BLOCK
) ?
22141 max_insns_skipped
: MAX_INSN_PER_IT_BLOCK
;
22143 /* Remove the previous insn from the count of insns to be output. */
22144 if (arm_condexec_count
)
22145 arm_condexec_count
--;
22147 /* Nothing to do if we are already inside a conditional block. */
22148 if (arm_condexec_count
)
22151 if (GET_CODE (body
) != COND_EXEC
)
22154 /* Conditional jumps are implemented directly. */
22158 predicate
= COND_EXEC_TEST (body
);
22159 arm_current_cc
= get_arm_condition_code (predicate
);
22161 n
= get_attr_ce_count (insn
);
22162 arm_condexec_count
= 1;
22163 arm_condexec_mask
= (1 << n
) - 1;
22164 arm_condexec_masklen
= n
;
22165 /* See if subsequent instructions can be combined into the same block. */
22168 insn
= next_nonnote_insn (insn
);
22170 /* Jumping into the middle of an IT block is illegal, so a label or
22171 barrier terminates the block. */
22172 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22175 body
= PATTERN (insn
);
22176 /* USE and CLOBBER aren't really insns, so just skip them. */
22177 if (GET_CODE (body
) == USE
22178 || GET_CODE (body
) == CLOBBER
)
22181 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22182 if (GET_CODE (body
) != COND_EXEC
)
22184 /* Maximum number of conditionally executed instructions in a block. */
22185 n
= get_attr_ce_count (insn
);
22186 if (arm_condexec_masklen
+ n
> max
)
22189 predicate
= COND_EXEC_TEST (body
);
22190 code
= get_arm_condition_code (predicate
);
22191 mask
= (1 << n
) - 1;
22192 if (arm_current_cc
== code
)
22193 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22194 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22197 arm_condexec_count
++;
22198 arm_condexec_masklen
+= n
;
22200 /* A jump must be the last instruction in a conditional block. */
22204 /* Restore recog_data (getting the attributes of other insns can
22205 destroy this array, but final.c assumes that it remains intact
22206 across this call). */
22207 extract_constrain_insn_cached (first_insn
);
22211 arm_final_prescan_insn (rtx insn
)
22213 /* BODY will hold the body of INSN. */
22214 rtx body
= PATTERN (insn
);
22216 /* This will be 1 if trying to repeat the trick, and things need to be
22217 reversed if it appears to fail. */
22220 /* If we start with a return insn, we only succeed if we find another one. */
22221 int seeking_return
= 0;
22222 enum rtx_code return_code
= UNKNOWN
;
22224 /* START_INSN will hold the insn from where we start looking. This is the
22225 first insn after the following code_label if REVERSE is true. */
22226 rtx start_insn
= insn
;
22228 /* If in state 4, check if the target branch is reached, in order to
22229 change back to state 0. */
22230 if (arm_ccfsm_state
== 4)
22232 if (insn
== arm_target_insn
)
22234 arm_target_insn
= NULL
;
22235 arm_ccfsm_state
= 0;
22240 /* If in state 3, it is possible to repeat the trick, if this insn is an
22241 unconditional branch to a label, and immediately following this branch
22242 is the previous target label which is only used once, and the label this
22243 branch jumps to is not too far off. */
22244 if (arm_ccfsm_state
== 3)
22246 if (simplejump_p (insn
))
22248 start_insn
= next_nonnote_insn (start_insn
);
22249 if (BARRIER_P (start_insn
))
22251 /* XXX Isn't this always a barrier? */
22252 start_insn
= next_nonnote_insn (start_insn
);
22254 if (LABEL_P (start_insn
)
22255 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22256 && LABEL_NUSES (start_insn
) == 1)
22261 else if (ANY_RETURN_P (body
))
22263 start_insn
= next_nonnote_insn (start_insn
);
22264 if (BARRIER_P (start_insn
))
22265 start_insn
= next_nonnote_insn (start_insn
);
22266 if (LABEL_P (start_insn
)
22267 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22268 && LABEL_NUSES (start_insn
) == 1)
22271 seeking_return
= 1;
22272 return_code
= GET_CODE (body
);
22281 gcc_assert (!arm_ccfsm_state
|| reverse
);
22282 if (!JUMP_P (insn
))
22285 /* This jump might be paralleled with a clobber of the condition codes
22286 the jump should always come first */
22287 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22288 body
= XVECEXP (body
, 0, 0);
22291 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22292 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22295 int fail
= FALSE
, succeed
= FALSE
;
22296 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22297 int then_not_else
= TRUE
;
22298 rtx this_insn
= start_insn
, label
= 0;
22300 /* Register the insn jumped to. */
22303 if (!seeking_return
)
22304 label
= XEXP (SET_SRC (body
), 0);
22306 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22307 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22308 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22310 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22311 then_not_else
= FALSE
;
22313 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22315 seeking_return
= 1;
22316 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22318 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22320 seeking_return
= 1;
22321 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22322 then_not_else
= FALSE
;
22325 gcc_unreachable ();
22327 /* See how many insns this branch skips, and what kind of insns. If all
22328 insns are okay, and the label or unconditional branch to the same
22329 label is not too far away, succeed. */
22330 for (insns_skipped
= 0;
22331 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22335 this_insn
= next_nonnote_insn (this_insn
);
22339 switch (GET_CODE (this_insn
))
22342 /* Succeed if it is the target label, otherwise fail since
22343 control falls in from somewhere else. */
22344 if (this_insn
== label
)
22346 arm_ccfsm_state
= 1;
22354 /* Succeed if the following insn is the target label.
22356 If return insns are used then the last insn in a function
22357 will be a barrier. */
22358 this_insn
= next_nonnote_insn (this_insn
);
22359 if (this_insn
&& this_insn
== label
)
22361 arm_ccfsm_state
= 1;
22369 /* The AAPCS says that conditional calls should not be
22370 used since they make interworking inefficient (the
22371 linker can't transform BL<cond> into BLX). That's
22372 only a problem if the machine has BLX. */
22379 /* Succeed if the following insn is the target label, or
22380 if the following two insns are a barrier and the
22382 this_insn
= next_nonnote_insn (this_insn
);
22383 if (this_insn
&& BARRIER_P (this_insn
))
22384 this_insn
= next_nonnote_insn (this_insn
);
22386 if (this_insn
&& this_insn
== label
22387 && insns_skipped
< max_insns_skipped
)
22389 arm_ccfsm_state
= 1;
22397 /* If this is an unconditional branch to the same label, succeed.
22398 If it is to another label, do nothing. If it is conditional,
22400 /* XXX Probably, the tests for SET and the PC are
22403 scanbody
= PATTERN (this_insn
);
22404 if (GET_CODE (scanbody
) == SET
22405 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22407 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22408 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22410 arm_ccfsm_state
= 2;
22413 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22416 /* Fail if a conditional return is undesirable (e.g. on a
22417 StrongARM), but still allow this if optimizing for size. */
22418 else if (GET_CODE (scanbody
) == return_code
22419 && !use_return_insn (TRUE
, NULL
)
22422 else if (GET_CODE (scanbody
) == return_code
)
22424 arm_ccfsm_state
= 2;
22427 else if (GET_CODE (scanbody
) == PARALLEL
)
22429 switch (get_attr_conds (this_insn
))
22439 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22444 /* Instructions using or affecting the condition codes make it
22446 scanbody
= PATTERN (this_insn
);
22447 if (!(GET_CODE (scanbody
) == SET
22448 || GET_CODE (scanbody
) == PARALLEL
)
22449 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22459 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22460 arm_target_label
= CODE_LABEL_NUMBER (label
);
22463 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22465 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22467 this_insn
= next_nonnote_insn (this_insn
);
22468 gcc_assert (!this_insn
22469 || (!BARRIER_P (this_insn
)
22470 && !LABEL_P (this_insn
)));
22474 /* Oh, dear! we ran off the end.. give up. */
22475 extract_constrain_insn_cached (insn
);
22476 arm_ccfsm_state
= 0;
22477 arm_target_insn
= NULL
;
22480 arm_target_insn
= this_insn
;
22483 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22486 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22488 if (reverse
|| then_not_else
)
22489 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22492 /* Restore recog_data (getting the attributes of other insns can
22493 destroy this array, but final.c assumes that it remains intact
22494 across this call. */
22495 extract_constrain_insn_cached (insn
);
22499 /* Output IT instructions. */
22501 thumb2_asm_output_opcode (FILE * stream
)
22506 if (arm_condexec_mask
)
22508 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22509 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22511 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22512 arm_condition_codes
[arm_current_cc
]);
22513 arm_condexec_mask
= 0;
22517 /* Returns true if REGNO is a valid register
22518 for holding a quantity of type MODE. */
22520 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22522 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22523 return (regno
== CC_REGNUM
22524 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22525 && regno
== VFPCC_REGNUM
));
22528 /* For the Thumb we only allow values bigger than SImode in
22529 registers 0 - 6, so that there is always a second low
22530 register available to hold the upper part of the value.
22531 We probably we ought to ensure that the register is the
22532 start of an even numbered register pair. */
22533 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22535 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22536 && IS_VFP_REGNUM (regno
))
22538 if (mode
== SFmode
|| mode
== SImode
)
22539 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22541 if (mode
== DFmode
)
22542 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22544 /* VFP registers can hold HFmode values, but there is no point in
22545 putting them there unless we have hardware conversion insns. */
22546 if (mode
== HFmode
)
22547 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22550 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22551 || (VALID_NEON_QREG_MODE (mode
)
22552 && NEON_REGNO_OK_FOR_QUAD (regno
))
22553 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22554 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22555 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22556 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22557 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22562 if (TARGET_REALLY_IWMMXT
)
22564 if (IS_IWMMXT_GR_REGNUM (regno
))
22565 return mode
== SImode
;
22567 if (IS_IWMMXT_REGNUM (regno
))
22568 return VALID_IWMMXT_REG_MODE (mode
);
22571 /* We allow almost any value to be stored in the general registers.
22572 Restrict doubleword quantities to even register pairs so that we can
22573 use ldrd. Do not allow very large Neon structure opaque modes in
22574 general registers; they would use too many. */
22575 if (regno
<= LAST_ARM_REGNUM
)
22576 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
22577 && ARM_NUM_REGS (mode
) <= 4;
22579 if (regno
== FRAME_POINTER_REGNUM
22580 || regno
== ARG_POINTER_REGNUM
)
22581 /* We only allow integers in the fake hard registers. */
22582 return GET_MODE_CLASS (mode
) == MODE_INT
;
22587 /* Implement MODES_TIEABLE_P. */
22590 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22592 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22595 /* We specifically want to allow elements of "structure" modes to
22596 be tieable to the structure. This more general condition allows
22597 other rarer situations too. */
22599 && (VALID_NEON_DREG_MODE (mode1
)
22600 || VALID_NEON_QREG_MODE (mode1
)
22601 || VALID_NEON_STRUCT_MODE (mode1
))
22602 && (VALID_NEON_DREG_MODE (mode2
)
22603 || VALID_NEON_QREG_MODE (mode2
)
22604 || VALID_NEON_STRUCT_MODE (mode2
)))
22610 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22611 not used in arm mode. */
22614 arm_regno_class (int regno
)
22618 if (regno
== STACK_POINTER_REGNUM
)
22620 if (regno
== CC_REGNUM
)
22627 if (TARGET_THUMB2
&& regno
< 8)
22630 if ( regno
<= LAST_ARM_REGNUM
22631 || regno
== FRAME_POINTER_REGNUM
22632 || regno
== ARG_POINTER_REGNUM
)
22633 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22635 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22636 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22638 if (IS_VFP_REGNUM (regno
))
22640 if (regno
<= D7_VFP_REGNUM
)
22641 return VFP_D0_D7_REGS
;
22642 else if (regno
<= LAST_LO_VFP_REGNUM
)
22643 return VFP_LO_REGS
;
22645 return VFP_HI_REGS
;
22648 if (IS_IWMMXT_REGNUM (regno
))
22649 return IWMMXT_REGS
;
22651 if (IS_IWMMXT_GR_REGNUM (regno
))
22652 return IWMMXT_GR_REGS
;
22657 /* Handle a special case when computing the offset
22658 of an argument from the frame pointer. */
22660 arm_debugger_arg_offset (int value
, rtx addr
)
22664 /* We are only interested if dbxout_parms() failed to compute the offset. */
22668 /* We can only cope with the case where the address is held in a register. */
22672 /* If we are using the frame pointer to point at the argument, then
22673 an offset of 0 is correct. */
22674 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22677 /* If we are using the stack pointer to point at the
22678 argument, then an offset of 0 is correct. */
22679 /* ??? Check this is consistent with thumb2 frame layout. */
22680 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22681 && REGNO (addr
) == SP_REGNUM
)
22684 /* Oh dear. The argument is pointed to by a register rather
22685 than being held in a register, or being stored at a known
22686 offset from the frame pointer. Since GDB only understands
22687 those two kinds of argument we must translate the address
22688 held in the register into an offset from the frame pointer.
22689 We do this by searching through the insns for the function
22690 looking to see where this register gets its value. If the
22691 register is initialized from the frame pointer plus an offset
22692 then we are in luck and we can continue, otherwise we give up.
22694 This code is exercised by producing debugging information
22695 for a function with arguments like this:
22697 double func (double a, double b, int c, double d) {return d;}
22699 Without this code the stab for parameter 'd' will be set to
22700 an offset of 0 from the frame pointer, rather than 8. */
22702 /* The if() statement says:
22704 If the insn is a normal instruction
22705 and if the insn is setting the value in a register
22706 and if the register being set is the register holding the address of the argument
22707 and if the address is computing by an addition
22708 that involves adding to a register
22709 which is the frame pointer
22714 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22716 if ( NONJUMP_INSN_P (insn
)
22717 && GET_CODE (PATTERN (insn
)) == SET
22718 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22719 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22720 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22721 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22722 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22725 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22734 warning (0, "unable to compute real location of stacked parameter");
22735 value
= 8; /* XXX magic hack */
22756 T_MAX
/* Size of enum. Keep last. */
22757 } neon_builtin_type_mode
;
22759 #define TYPE_MODE_BIT(X) (1 << (X))
22761 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22762 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22763 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22764 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22765 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22766 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22768 #define v8qi_UP T_V8QI
22769 #define v4hi_UP T_V4HI
22770 #define v4hf_UP T_V4HF
22771 #define v2si_UP T_V2SI
22772 #define v2sf_UP T_V2SF
22774 #define v16qi_UP T_V16QI
22775 #define v8hi_UP T_V8HI
22776 #define v4si_UP T_V4SI
22777 #define v4sf_UP T_V4SF
22778 #define v2di_UP T_V2DI
22783 #define UP(X) X##_UP
22819 NEON_LOADSTRUCTLANE
,
22821 NEON_STORESTRUCTLANE
,
22830 const neon_itype itype
;
22831 const neon_builtin_type_mode mode
;
22832 const enum insn_code code
;
22833 unsigned int fcode
;
22834 } neon_builtin_datum
;
22836 #define CF(N,X) CODE_FOR_neon_##N##X
22838 #define VAR1(T, N, A) \
22839 {#N, NEON_##T, UP (A), CF (N, A), 0}
22840 #define VAR2(T, N, A, B) \
22842 {#N, NEON_##T, UP (B), CF (N, B), 0}
22843 #define VAR3(T, N, A, B, C) \
22844 VAR2 (T, N, A, B), \
22845 {#N, NEON_##T, UP (C), CF (N, C), 0}
22846 #define VAR4(T, N, A, B, C, D) \
22847 VAR3 (T, N, A, B, C), \
22848 {#N, NEON_##T, UP (D), CF (N, D), 0}
22849 #define VAR5(T, N, A, B, C, D, E) \
22850 VAR4 (T, N, A, B, C, D), \
22851 {#N, NEON_##T, UP (E), CF (N, E), 0}
22852 #define VAR6(T, N, A, B, C, D, E, F) \
22853 VAR5 (T, N, A, B, C, D, E), \
22854 {#N, NEON_##T, UP (F), CF (N, F), 0}
22855 #define VAR7(T, N, A, B, C, D, E, F, G) \
22856 VAR6 (T, N, A, B, C, D, E, F), \
22857 {#N, NEON_##T, UP (G), CF (N, G), 0}
22858 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22859 VAR7 (T, N, A, B, C, D, E, F, G), \
22860 {#N, NEON_##T, UP (H), CF (N, H), 0}
22861 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22862 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22863 {#N, NEON_##T, UP (I), CF (N, I), 0}
22864 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22865 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22866 {#N, NEON_##T, UP (J), CF (N, J), 0}
22868 /* The NEON builtin data can be found in arm_neon_builtins.def.
22869 The mode entries in the following table correspond to the "key" type of the
22870 instruction variant, i.e. equivalent to that which would be specified after
22871 the assembler mnemonic, which usually refers to the last vector operand.
22872 (Signed/unsigned/polynomial types are not differentiated between though, and
22873 are all mapped onto the same mode for a given element size.) The modes
22874 listed per instruction should be the same as those defined for that
22875 instruction's pattern in neon.md. */
22877 static neon_builtin_datum neon_builtin_data
[] =
22879 #include "arm_neon_builtins.def"
22894 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22895 #define VAR1(T, N, A) \
22897 #define VAR2(T, N, A, B) \
22900 #define VAR3(T, N, A, B, C) \
22901 VAR2 (T, N, A, B), \
22903 #define VAR4(T, N, A, B, C, D) \
22904 VAR3 (T, N, A, B, C), \
22906 #define VAR5(T, N, A, B, C, D, E) \
22907 VAR4 (T, N, A, B, C, D), \
22909 #define VAR6(T, N, A, B, C, D, E, F) \
22910 VAR5 (T, N, A, B, C, D, E), \
22912 #define VAR7(T, N, A, B, C, D, E, F, G) \
22913 VAR6 (T, N, A, B, C, D, E, F), \
22915 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22916 VAR7 (T, N, A, B, C, D, E, F, G), \
22918 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22919 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22921 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22922 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22926 ARM_BUILTIN_GETWCGR0
,
22927 ARM_BUILTIN_GETWCGR1
,
22928 ARM_BUILTIN_GETWCGR2
,
22929 ARM_BUILTIN_GETWCGR3
,
22931 ARM_BUILTIN_SETWCGR0
,
22932 ARM_BUILTIN_SETWCGR1
,
22933 ARM_BUILTIN_SETWCGR2
,
22934 ARM_BUILTIN_SETWCGR3
,
22938 ARM_BUILTIN_WAVG2BR
,
22939 ARM_BUILTIN_WAVG2HR
,
22940 ARM_BUILTIN_WAVG2B
,
22941 ARM_BUILTIN_WAVG2H
,
22948 ARM_BUILTIN_WMACSZ
,
22950 ARM_BUILTIN_WMACUZ
,
22953 ARM_BUILTIN_WSADBZ
,
22955 ARM_BUILTIN_WSADHZ
,
22957 ARM_BUILTIN_WALIGNI
,
22958 ARM_BUILTIN_WALIGNR0
,
22959 ARM_BUILTIN_WALIGNR1
,
22960 ARM_BUILTIN_WALIGNR2
,
22961 ARM_BUILTIN_WALIGNR3
,
22964 ARM_BUILTIN_TMIAPH
,
22965 ARM_BUILTIN_TMIABB
,
22966 ARM_BUILTIN_TMIABT
,
22967 ARM_BUILTIN_TMIATB
,
22968 ARM_BUILTIN_TMIATT
,
22970 ARM_BUILTIN_TMOVMSKB
,
22971 ARM_BUILTIN_TMOVMSKH
,
22972 ARM_BUILTIN_TMOVMSKW
,
22974 ARM_BUILTIN_TBCSTB
,
22975 ARM_BUILTIN_TBCSTH
,
22976 ARM_BUILTIN_TBCSTW
,
22978 ARM_BUILTIN_WMADDS
,
22979 ARM_BUILTIN_WMADDU
,
22981 ARM_BUILTIN_WPACKHSS
,
22982 ARM_BUILTIN_WPACKWSS
,
22983 ARM_BUILTIN_WPACKDSS
,
22984 ARM_BUILTIN_WPACKHUS
,
22985 ARM_BUILTIN_WPACKWUS
,
22986 ARM_BUILTIN_WPACKDUS
,
22991 ARM_BUILTIN_WADDSSB
,
22992 ARM_BUILTIN_WADDSSH
,
22993 ARM_BUILTIN_WADDSSW
,
22994 ARM_BUILTIN_WADDUSB
,
22995 ARM_BUILTIN_WADDUSH
,
22996 ARM_BUILTIN_WADDUSW
,
23000 ARM_BUILTIN_WSUBSSB
,
23001 ARM_BUILTIN_WSUBSSH
,
23002 ARM_BUILTIN_WSUBSSW
,
23003 ARM_BUILTIN_WSUBUSB
,
23004 ARM_BUILTIN_WSUBUSH
,
23005 ARM_BUILTIN_WSUBUSW
,
23012 ARM_BUILTIN_WCMPEQB
,
23013 ARM_BUILTIN_WCMPEQH
,
23014 ARM_BUILTIN_WCMPEQW
,
23015 ARM_BUILTIN_WCMPGTUB
,
23016 ARM_BUILTIN_WCMPGTUH
,
23017 ARM_BUILTIN_WCMPGTUW
,
23018 ARM_BUILTIN_WCMPGTSB
,
23019 ARM_BUILTIN_WCMPGTSH
,
23020 ARM_BUILTIN_WCMPGTSW
,
23022 ARM_BUILTIN_TEXTRMSB
,
23023 ARM_BUILTIN_TEXTRMSH
,
23024 ARM_BUILTIN_TEXTRMSW
,
23025 ARM_BUILTIN_TEXTRMUB
,
23026 ARM_BUILTIN_TEXTRMUH
,
23027 ARM_BUILTIN_TEXTRMUW
,
23028 ARM_BUILTIN_TINSRB
,
23029 ARM_BUILTIN_TINSRH
,
23030 ARM_BUILTIN_TINSRW
,
23032 ARM_BUILTIN_WMAXSW
,
23033 ARM_BUILTIN_WMAXSH
,
23034 ARM_BUILTIN_WMAXSB
,
23035 ARM_BUILTIN_WMAXUW
,
23036 ARM_BUILTIN_WMAXUH
,
23037 ARM_BUILTIN_WMAXUB
,
23038 ARM_BUILTIN_WMINSW
,
23039 ARM_BUILTIN_WMINSH
,
23040 ARM_BUILTIN_WMINSB
,
23041 ARM_BUILTIN_WMINUW
,
23042 ARM_BUILTIN_WMINUH
,
23043 ARM_BUILTIN_WMINUB
,
23045 ARM_BUILTIN_WMULUM
,
23046 ARM_BUILTIN_WMULSM
,
23047 ARM_BUILTIN_WMULUL
,
23049 ARM_BUILTIN_PSADBH
,
23050 ARM_BUILTIN_WSHUFH
,
23064 ARM_BUILTIN_WSLLHI
,
23065 ARM_BUILTIN_WSLLWI
,
23066 ARM_BUILTIN_WSLLDI
,
23067 ARM_BUILTIN_WSRAHI
,
23068 ARM_BUILTIN_WSRAWI
,
23069 ARM_BUILTIN_WSRADI
,
23070 ARM_BUILTIN_WSRLHI
,
23071 ARM_BUILTIN_WSRLWI
,
23072 ARM_BUILTIN_WSRLDI
,
23073 ARM_BUILTIN_WRORHI
,
23074 ARM_BUILTIN_WRORWI
,
23075 ARM_BUILTIN_WRORDI
,
23077 ARM_BUILTIN_WUNPCKIHB
,
23078 ARM_BUILTIN_WUNPCKIHH
,
23079 ARM_BUILTIN_WUNPCKIHW
,
23080 ARM_BUILTIN_WUNPCKILB
,
23081 ARM_BUILTIN_WUNPCKILH
,
23082 ARM_BUILTIN_WUNPCKILW
,
23084 ARM_BUILTIN_WUNPCKEHSB
,
23085 ARM_BUILTIN_WUNPCKEHSH
,
23086 ARM_BUILTIN_WUNPCKEHSW
,
23087 ARM_BUILTIN_WUNPCKEHUB
,
23088 ARM_BUILTIN_WUNPCKEHUH
,
23089 ARM_BUILTIN_WUNPCKEHUW
,
23090 ARM_BUILTIN_WUNPCKELSB
,
23091 ARM_BUILTIN_WUNPCKELSH
,
23092 ARM_BUILTIN_WUNPCKELSW
,
23093 ARM_BUILTIN_WUNPCKELUB
,
23094 ARM_BUILTIN_WUNPCKELUH
,
23095 ARM_BUILTIN_WUNPCKELUW
,
23101 ARM_BUILTIN_WADDSUBHX
,
23102 ARM_BUILTIN_WSUBADDHX
,
23104 ARM_BUILTIN_WABSDIFFB
,
23105 ARM_BUILTIN_WABSDIFFH
,
23106 ARM_BUILTIN_WABSDIFFW
,
23108 ARM_BUILTIN_WADDCH
,
23109 ARM_BUILTIN_WADDCW
,
23112 ARM_BUILTIN_WAVG4R
,
23114 ARM_BUILTIN_WMADDSX
,
23115 ARM_BUILTIN_WMADDUX
,
23117 ARM_BUILTIN_WMADDSN
,
23118 ARM_BUILTIN_WMADDUN
,
23120 ARM_BUILTIN_WMULWSM
,
23121 ARM_BUILTIN_WMULWUM
,
23123 ARM_BUILTIN_WMULWSMR
,
23124 ARM_BUILTIN_WMULWUMR
,
23126 ARM_BUILTIN_WMULWL
,
23128 ARM_BUILTIN_WMULSMR
,
23129 ARM_BUILTIN_WMULUMR
,
23131 ARM_BUILTIN_WQMULM
,
23132 ARM_BUILTIN_WQMULMR
,
23134 ARM_BUILTIN_WQMULWM
,
23135 ARM_BUILTIN_WQMULWMR
,
23137 ARM_BUILTIN_WADDBHUSM
,
23138 ARM_BUILTIN_WADDBHUSL
,
23140 ARM_BUILTIN_WQMIABB
,
23141 ARM_BUILTIN_WQMIABT
,
23142 ARM_BUILTIN_WQMIATB
,
23143 ARM_BUILTIN_WQMIATT
,
23145 ARM_BUILTIN_WQMIABBN
,
23146 ARM_BUILTIN_WQMIABTN
,
23147 ARM_BUILTIN_WQMIATBN
,
23148 ARM_BUILTIN_WQMIATTN
,
23150 ARM_BUILTIN_WMIABB
,
23151 ARM_BUILTIN_WMIABT
,
23152 ARM_BUILTIN_WMIATB
,
23153 ARM_BUILTIN_WMIATT
,
23155 ARM_BUILTIN_WMIABBN
,
23156 ARM_BUILTIN_WMIABTN
,
23157 ARM_BUILTIN_WMIATBN
,
23158 ARM_BUILTIN_WMIATTN
,
23160 ARM_BUILTIN_WMIAWBB
,
23161 ARM_BUILTIN_WMIAWBT
,
23162 ARM_BUILTIN_WMIAWTB
,
23163 ARM_BUILTIN_WMIAWTT
,
23165 ARM_BUILTIN_WMIAWBBN
,
23166 ARM_BUILTIN_WMIAWBTN
,
23167 ARM_BUILTIN_WMIAWTBN
,
23168 ARM_BUILTIN_WMIAWTTN
,
23170 ARM_BUILTIN_WMERGE
,
23172 ARM_BUILTIN_CRC32B
,
23173 ARM_BUILTIN_CRC32H
,
23174 ARM_BUILTIN_CRC32W
,
23175 ARM_BUILTIN_CRC32CB
,
23176 ARM_BUILTIN_CRC32CH
,
23177 ARM_BUILTIN_CRC32CW
,
23183 #define CRYPTO1(L, U, M1, M2) \
23184 ARM_BUILTIN_CRYPTO_##U,
23185 #define CRYPTO2(L, U, M1, M2, M3) \
23186 ARM_BUILTIN_CRYPTO_##U,
23187 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23188 ARM_BUILTIN_CRYPTO_##U,
23190 #include "crypto.def"
23196 #include "arm_neon_builtins.def"
23201 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23215 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23217 #define NUM_DREG_TYPES 5
23218 #define NUM_QREG_TYPES 6
23221 arm_init_neon_builtins (void)
23223 unsigned int i
, fcode
;
23226 tree neon_intQI_type_node
;
23227 tree neon_intHI_type_node
;
23228 tree neon_floatHF_type_node
;
23229 tree neon_polyQI_type_node
;
23230 tree neon_polyHI_type_node
;
23231 tree neon_intSI_type_node
;
23232 tree neon_intDI_type_node
;
23233 tree neon_intUTI_type_node
;
23234 tree neon_float_type_node
;
23236 tree intQI_pointer_node
;
23237 tree intHI_pointer_node
;
23238 tree intSI_pointer_node
;
23239 tree intDI_pointer_node
;
23240 tree float_pointer_node
;
23242 tree const_intQI_node
;
23243 tree const_intHI_node
;
23244 tree const_intSI_node
;
23245 tree const_intDI_node
;
23246 tree const_float_node
;
23248 tree const_intQI_pointer_node
;
23249 tree const_intHI_pointer_node
;
23250 tree const_intSI_pointer_node
;
23251 tree const_intDI_pointer_node
;
23252 tree const_float_pointer_node
;
23254 tree V8QI_type_node
;
23255 tree V4HI_type_node
;
23256 tree V4HF_type_node
;
23257 tree V2SI_type_node
;
23258 tree V2SF_type_node
;
23259 tree V16QI_type_node
;
23260 tree V8HI_type_node
;
23261 tree V4SI_type_node
;
23262 tree V4SF_type_node
;
23263 tree V2DI_type_node
;
23265 tree intUQI_type_node
;
23266 tree intUHI_type_node
;
23267 tree intUSI_type_node
;
23268 tree intUDI_type_node
;
23270 tree intEI_type_node
;
23271 tree intOI_type_node
;
23272 tree intCI_type_node
;
23273 tree intXI_type_node
;
23275 tree V8QI_pointer_node
;
23276 tree V4HI_pointer_node
;
23277 tree V2SI_pointer_node
;
23278 tree V2SF_pointer_node
;
23279 tree V16QI_pointer_node
;
23280 tree V8HI_pointer_node
;
23281 tree V4SI_pointer_node
;
23282 tree V4SF_pointer_node
;
23283 tree V2DI_pointer_node
;
23285 tree void_ftype_pv8qi_v8qi_v8qi
;
23286 tree void_ftype_pv4hi_v4hi_v4hi
;
23287 tree void_ftype_pv2si_v2si_v2si
;
23288 tree void_ftype_pv2sf_v2sf_v2sf
;
23289 tree void_ftype_pdi_di_di
;
23290 tree void_ftype_pv16qi_v16qi_v16qi
;
23291 tree void_ftype_pv8hi_v8hi_v8hi
;
23292 tree void_ftype_pv4si_v4si_v4si
;
23293 tree void_ftype_pv4sf_v4sf_v4sf
;
23294 tree void_ftype_pv2di_v2di_v2di
;
23296 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23297 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23298 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23300 /* Create distinguished type nodes for NEON vector element types,
23301 and pointers to values of such types, so we can detect them later. */
23302 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23303 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23304 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23305 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23306 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23307 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23308 neon_float_type_node
= make_node (REAL_TYPE
);
23309 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23310 layout_type (neon_float_type_node
);
23311 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23312 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23313 layout_type (neon_floatHF_type_node
);
23315 /* Define typedefs which exactly correspond to the modes we are basing vector
23316 types on. If you change these names you'll need to change
23317 the table used by arm_mangle_type too. */
23318 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23319 "__builtin_neon_qi");
23320 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23321 "__builtin_neon_hi");
23322 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23323 "__builtin_neon_hf");
23324 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23325 "__builtin_neon_si");
23326 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23327 "__builtin_neon_sf");
23328 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23329 "__builtin_neon_di");
23330 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23331 "__builtin_neon_poly8");
23332 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23333 "__builtin_neon_poly16");
23335 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23336 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23337 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23338 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23339 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23341 /* Next create constant-qualified versions of the above types. */
23342 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23344 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23346 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23348 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23350 const_float_node
= build_qualified_type (neon_float_type_node
,
23353 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23354 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23355 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23356 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23357 const_float_pointer_node
= build_pointer_type (const_float_node
);
23359 /* Now create vector types based on our NEON element types. */
23360 /* 64-bit vectors. */
23362 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23364 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23366 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23368 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23370 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23371 /* 128-bit vectors. */
23373 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23375 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23377 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23379 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23381 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23383 /* Unsigned integer types for various mode sizes. */
23384 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23385 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23386 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23387 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23388 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23391 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23392 "__builtin_neon_uqi");
23393 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23394 "__builtin_neon_uhi");
23395 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23396 "__builtin_neon_usi");
23397 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23398 "__builtin_neon_udi");
23399 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23400 "__builtin_neon_poly64");
23401 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23402 "__builtin_neon_poly128");
23404 /* Opaque integer types for structures of vectors. */
23405 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23406 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23407 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23408 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23410 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23411 "__builtin_neon_ti");
23412 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23413 "__builtin_neon_ei");
23414 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23415 "__builtin_neon_oi");
23416 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23417 "__builtin_neon_ci");
23418 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23419 "__builtin_neon_xi");
23421 /* Pointers to vector types. */
23422 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
23423 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
23424 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
23425 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
23426 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
23427 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
23428 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
23429 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
23430 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
23432 /* Operations which return results as pairs. */
23433 void_ftype_pv8qi_v8qi_v8qi
=
23434 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
23435 V8QI_type_node
, NULL
);
23436 void_ftype_pv4hi_v4hi_v4hi
=
23437 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
23438 V4HI_type_node
, NULL
);
23439 void_ftype_pv2si_v2si_v2si
=
23440 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
23441 V2SI_type_node
, NULL
);
23442 void_ftype_pv2sf_v2sf_v2sf
=
23443 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
23444 V2SF_type_node
, NULL
);
23445 void_ftype_pdi_di_di
=
23446 build_function_type_list (void_type_node
, intDI_pointer_node
,
23447 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
23448 void_ftype_pv16qi_v16qi_v16qi
=
23449 build_function_type_list (void_type_node
, V16QI_pointer_node
,
23450 V16QI_type_node
, V16QI_type_node
, NULL
);
23451 void_ftype_pv8hi_v8hi_v8hi
=
23452 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
23453 V8HI_type_node
, NULL
);
23454 void_ftype_pv4si_v4si_v4si
=
23455 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
23456 V4SI_type_node
, NULL
);
23457 void_ftype_pv4sf_v4sf_v4sf
=
23458 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
23459 V4SF_type_node
, NULL
);
23460 void_ftype_pv2di_v2di_v2di
=
23461 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
23462 V2DI_type_node
, NULL
);
23464 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23466 tree V4USI_type_node
=
23467 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23469 tree V16UQI_type_node
=
23470 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23472 tree v16uqi_ftype_v16uqi
23473 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23475 tree v16uqi_ftype_v16uqi_v16uqi
23476 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23477 V16UQI_type_node
, NULL_TREE
);
23479 tree v4usi_ftype_v4usi
23480 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23482 tree v4usi_ftype_v4usi_v4usi
23483 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23484 V4USI_type_node
, NULL_TREE
);
23486 tree v4usi_ftype_v4usi_v4usi_v4usi
23487 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23488 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23490 tree uti_ftype_udi_udi
23491 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23492 intUDI_type_node
, NULL_TREE
);
23505 ARM_BUILTIN_CRYPTO_##U
23507 "__builtin_arm_crypto_"#L
23508 #define FT1(R, A) \
23510 #define FT2(R, A1, A2) \
23511 R##_ftype_##A1##_##A2
23512 #define FT3(R, A1, A2, A3) \
23513 R##_ftype_##A1##_##A2##_##A3
23514 #define CRYPTO1(L, U, R, A) \
23515 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23516 C (U), BUILT_IN_MD, \
23518 #define CRYPTO2(L, U, R, A1, A2) \
23519 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23520 C (U), BUILT_IN_MD, \
23523 #define CRYPTO3(L, U, R, A1, A2, A3) \
23524 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23525 C (U), BUILT_IN_MD, \
23527 #include "crypto.def"
23538 dreg_types
[0] = V8QI_type_node
;
23539 dreg_types
[1] = V4HI_type_node
;
23540 dreg_types
[2] = V2SI_type_node
;
23541 dreg_types
[3] = V2SF_type_node
;
23542 dreg_types
[4] = neon_intDI_type_node
;
23544 qreg_types
[0] = V16QI_type_node
;
23545 qreg_types
[1] = V8HI_type_node
;
23546 qreg_types
[2] = V4SI_type_node
;
23547 qreg_types
[3] = V4SF_type_node
;
23548 qreg_types
[4] = V2DI_type_node
;
23549 qreg_types
[5] = neon_intUTI_type_node
;
23551 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23554 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23556 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23557 reinterp_ftype_dreg
[i
][j
]
23558 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23560 reinterp_ftype_qreg
[i
][j
]
23561 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23565 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23566 i
< ARRAY_SIZE (neon_builtin_data
);
23569 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23571 const char* const modenames
[] = {
23572 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23573 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23578 int is_load
= 0, is_store
= 0;
23580 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23587 case NEON_LOAD1LANE
:
23588 case NEON_LOADSTRUCT
:
23589 case NEON_LOADSTRUCTLANE
:
23591 /* Fall through. */
23593 case NEON_STORE1LANE
:
23594 case NEON_STORESTRUCT
:
23595 case NEON_STORESTRUCTLANE
:
23598 /* Fall through. */
23602 case NEON_LOGICBINOP
:
23603 case NEON_SHIFTINSERT
:
23610 case NEON_SHIFTIMM
:
23611 case NEON_SHIFTACC
:
23617 case NEON_LANEMULL
:
23618 case NEON_LANEMULH
:
23620 case NEON_SCALARMUL
:
23621 case NEON_SCALARMULL
:
23622 case NEON_SCALARMULH
:
23623 case NEON_SCALARMAC
:
23629 tree return_type
= void_type_node
, args
= void_list_node
;
23631 /* Build a function type directly from the insn_data for
23632 this builtin. The build_function_type() function takes
23633 care of removing duplicates for us. */
23634 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23638 if (is_load
&& k
== 1)
23640 /* Neon load patterns always have the memory
23641 operand in the operand 1 position. */
23642 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23643 == neon_struct_operand
);
23649 eltype
= const_intQI_pointer_node
;
23654 eltype
= const_intHI_pointer_node
;
23659 eltype
= const_intSI_pointer_node
;
23664 eltype
= const_float_pointer_node
;
23669 eltype
= const_intDI_pointer_node
;
23672 default: gcc_unreachable ();
23675 else if (is_store
&& k
== 0)
23677 /* Similarly, Neon store patterns use operand 0 as
23678 the memory location to store to. */
23679 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23680 == neon_struct_operand
);
23686 eltype
= intQI_pointer_node
;
23691 eltype
= intHI_pointer_node
;
23696 eltype
= intSI_pointer_node
;
23701 eltype
= float_pointer_node
;
23706 eltype
= intDI_pointer_node
;
23709 default: gcc_unreachable ();
23714 switch (insn_data
[d
->code
].operand
[k
].mode
)
23716 case VOIDmode
: eltype
= void_type_node
; break;
23718 case QImode
: eltype
= neon_intQI_type_node
; break;
23719 case HImode
: eltype
= neon_intHI_type_node
; break;
23720 case SImode
: eltype
= neon_intSI_type_node
; break;
23721 case SFmode
: eltype
= neon_float_type_node
; break;
23722 case DImode
: eltype
= neon_intDI_type_node
; break;
23723 case TImode
: eltype
= intTI_type_node
; break;
23724 case EImode
: eltype
= intEI_type_node
; break;
23725 case OImode
: eltype
= intOI_type_node
; break;
23726 case CImode
: eltype
= intCI_type_node
; break;
23727 case XImode
: eltype
= intXI_type_node
; break;
23728 /* 64-bit vectors. */
23729 case V8QImode
: eltype
= V8QI_type_node
; break;
23730 case V4HImode
: eltype
= V4HI_type_node
; break;
23731 case V2SImode
: eltype
= V2SI_type_node
; break;
23732 case V2SFmode
: eltype
= V2SF_type_node
; break;
23733 /* 128-bit vectors. */
23734 case V16QImode
: eltype
= V16QI_type_node
; break;
23735 case V8HImode
: eltype
= V8HI_type_node
; break;
23736 case V4SImode
: eltype
= V4SI_type_node
; break;
23737 case V4SFmode
: eltype
= V4SF_type_node
; break;
23738 case V2DImode
: eltype
= V2DI_type_node
; break;
23739 default: gcc_unreachable ();
23743 if (k
== 0 && !is_store
)
23744 return_type
= eltype
;
23746 args
= tree_cons (NULL_TREE
, eltype
, args
);
23749 ftype
= build_function_type (return_type
, args
);
23753 case NEON_RESULTPAIR
:
23755 switch (insn_data
[d
->code
].operand
[1].mode
)
23757 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
23758 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
23759 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
23760 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
23761 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
23762 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
23763 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
23764 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
23765 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
23766 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
23767 default: gcc_unreachable ();
23772 case NEON_REINTERP
:
23774 /* We iterate over NUM_DREG_TYPES doubleword types,
23775 then NUM_QREG_TYPES quadword types.
23776 V4HF is not a type used in reinterpret, so we translate
23777 d->mode to the correct index in reinterp_ftype_dreg. */
23779 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
23780 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
23782 switch (insn_data
[d
->code
].operand
[0].mode
)
23784 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
23785 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
23786 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
23787 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
23788 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
23789 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
23790 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
23791 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
23792 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
23793 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
23794 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
23795 default: gcc_unreachable ();
23799 case NEON_FLOAT_WIDEN
:
23801 tree eltype
= NULL_TREE
;
23802 tree return_type
= NULL_TREE
;
23804 switch (insn_data
[d
->code
].operand
[1].mode
)
23807 eltype
= V4HF_type_node
;
23808 return_type
= V4SF_type_node
;
23810 default: gcc_unreachable ();
23812 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23815 case NEON_FLOAT_NARROW
:
23817 tree eltype
= NULL_TREE
;
23818 tree return_type
= NULL_TREE
;
23820 switch (insn_data
[d
->code
].operand
[1].mode
)
23823 eltype
= V4SF_type_node
;
23824 return_type
= V4HF_type_node
;
23826 default: gcc_unreachable ();
23828 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23832 gcc_unreachable ();
23835 gcc_assert (ftype
!= NULL
);
23837 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
23839 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
23841 arm_builtin_decls
[fcode
] = decl
;
23845 #undef NUM_DREG_TYPES
23846 #undef NUM_QREG_TYPES
23848 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23851 if ((MASK) & insn_flags) \
23854 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23855 BUILT_IN_MD, NULL, NULL_TREE); \
23856 arm_builtin_decls[CODE] = bdecl; \
23861 struct builtin_description
23863 const unsigned int mask
;
23864 const enum insn_code icode
;
23865 const char * const name
;
23866 const enum arm_builtins code
;
23867 const enum rtx_code comparison
;
23868 const unsigned int flag
;
23871 static const struct builtin_description bdesc_2arg
[] =
23873 #define IWMMXT_BUILTIN(code, string, builtin) \
23874 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23875 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23877 #define IWMMXT2_BUILTIN(code, string, builtin) \
23878 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23879 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23881 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
23882 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
23883 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
23884 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
23885 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
23886 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
23887 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
23888 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
23889 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
23890 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
23891 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
23892 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
23893 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
23894 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
23895 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
23896 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
23897 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
23898 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
23899 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
23900 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
23901 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
23902 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
23903 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
23904 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
23905 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
23906 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
23907 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
23908 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
23909 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
23910 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
23911 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
23912 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
23913 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
23914 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
23915 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
23916 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
23917 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
23918 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
23919 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
23920 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
23921 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
23922 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
23923 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
23924 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
23925 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
23926 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
23927 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
23928 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
23929 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
23930 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
23931 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
23932 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
23933 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
23934 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
23935 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
23936 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
23937 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
23938 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
23939 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
23940 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
23941 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
23942 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
23943 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
23944 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
23945 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
23946 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
23947 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
23948 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
23949 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
23950 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
23951 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
23952 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
23953 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
23954 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
23955 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
23956 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
23957 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
23958 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
23960 #define IWMMXT_BUILTIN2(code, builtin) \
23961 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23963 #define IWMMXT2_BUILTIN2(code, builtin) \
23964 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23966 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
23967 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
23968 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
23969 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
23970 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
23971 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
23972 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
23973 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
23974 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
23975 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
23977 #define CRC32_BUILTIN(L, U) \
23978 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
23980 CRC32_BUILTIN (crc32b
, CRC32B
)
23981 CRC32_BUILTIN (crc32h
, CRC32H
)
23982 CRC32_BUILTIN (crc32w
, CRC32W
)
23983 CRC32_BUILTIN (crc32cb
, CRC32CB
)
23984 CRC32_BUILTIN (crc32ch
, CRC32CH
)
23985 CRC32_BUILTIN (crc32cw
, CRC32CW
)
23986 #undef CRC32_BUILTIN
23989 #define CRYPTO_BUILTIN(L, U) \
23990 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
23995 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
23996 #define CRYPTO1(L, U, R, A)
23997 #define CRYPTO3(L, U, R, A1, A2, A3)
23998 #include "crypto.def"
24005 static const struct builtin_description bdesc_1arg
[] =
24007 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24008 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24009 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24010 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24011 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24012 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24013 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24014 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24015 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24016 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24017 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24018 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24019 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24020 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24021 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24022 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24023 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24024 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24025 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24026 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24027 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24028 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24029 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24030 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24032 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24033 #define CRYPTO2(L, U, R, A1, A2)
24034 #define CRYPTO3(L, U, R, A1, A2, A3)
24035 #include "crypto.def"
24041 static const struct builtin_description bdesc_3arg
[] =
24043 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24044 #define CRYPTO1(L, U, R, A)
24045 #define CRYPTO2(L, U, R, A1, A2)
24046 #include "crypto.def"
24051 #undef CRYPTO_BUILTIN
24053 /* Set up all the iWMMXt builtins. This is not called if
24054 TARGET_IWMMXT is zero. */
24057 arm_init_iwmmxt_builtins (void)
24059 const struct builtin_description
* d
;
24062 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24063 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24064 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24066 tree v8qi_ftype_v8qi_v8qi_int
24067 = build_function_type_list (V8QI_type_node
,
24068 V8QI_type_node
, V8QI_type_node
,
24069 integer_type_node
, NULL_TREE
);
24070 tree v4hi_ftype_v4hi_int
24071 = build_function_type_list (V4HI_type_node
,
24072 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24073 tree v2si_ftype_v2si_int
24074 = build_function_type_list (V2SI_type_node
,
24075 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24076 tree v2si_ftype_di_di
24077 = build_function_type_list (V2SI_type_node
,
24078 long_long_integer_type_node
,
24079 long_long_integer_type_node
,
24081 tree di_ftype_di_int
24082 = build_function_type_list (long_long_integer_type_node
,
24083 long_long_integer_type_node
,
24084 integer_type_node
, NULL_TREE
);
24085 tree di_ftype_di_int_int
24086 = build_function_type_list (long_long_integer_type_node
,
24087 long_long_integer_type_node
,
24089 integer_type_node
, NULL_TREE
);
24090 tree int_ftype_v8qi
24091 = build_function_type_list (integer_type_node
,
24092 V8QI_type_node
, NULL_TREE
);
24093 tree int_ftype_v4hi
24094 = build_function_type_list (integer_type_node
,
24095 V4HI_type_node
, NULL_TREE
);
24096 tree int_ftype_v2si
24097 = build_function_type_list (integer_type_node
,
24098 V2SI_type_node
, NULL_TREE
);
24099 tree int_ftype_v8qi_int
24100 = build_function_type_list (integer_type_node
,
24101 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24102 tree int_ftype_v4hi_int
24103 = build_function_type_list (integer_type_node
,
24104 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24105 tree int_ftype_v2si_int
24106 = build_function_type_list (integer_type_node
,
24107 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24108 tree v8qi_ftype_v8qi_int_int
24109 = build_function_type_list (V8QI_type_node
,
24110 V8QI_type_node
, integer_type_node
,
24111 integer_type_node
, NULL_TREE
);
24112 tree v4hi_ftype_v4hi_int_int
24113 = build_function_type_list (V4HI_type_node
,
24114 V4HI_type_node
, integer_type_node
,
24115 integer_type_node
, NULL_TREE
);
24116 tree v2si_ftype_v2si_int_int
24117 = build_function_type_list (V2SI_type_node
,
24118 V2SI_type_node
, integer_type_node
,
24119 integer_type_node
, NULL_TREE
);
24120 /* Miscellaneous. */
24121 tree v8qi_ftype_v4hi_v4hi
24122 = build_function_type_list (V8QI_type_node
,
24123 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24124 tree v4hi_ftype_v2si_v2si
24125 = build_function_type_list (V4HI_type_node
,
24126 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24127 tree v8qi_ftype_v4hi_v8qi
24128 = build_function_type_list (V8QI_type_node
,
24129 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24130 tree v2si_ftype_v4hi_v4hi
24131 = build_function_type_list (V2SI_type_node
,
24132 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24133 tree v2si_ftype_v8qi_v8qi
24134 = build_function_type_list (V2SI_type_node
,
24135 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24136 tree v4hi_ftype_v4hi_di
24137 = build_function_type_list (V4HI_type_node
,
24138 V4HI_type_node
, long_long_integer_type_node
,
24140 tree v2si_ftype_v2si_di
24141 = build_function_type_list (V2SI_type_node
,
24142 V2SI_type_node
, long_long_integer_type_node
,
24145 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24146 tree int_ftype_void
24147 = build_function_type_list (integer_type_node
, NULL_TREE
);
24149 = build_function_type_list (long_long_integer_type_node
,
24150 V8QI_type_node
, NULL_TREE
);
24152 = build_function_type_list (long_long_integer_type_node
,
24153 V4HI_type_node
, NULL_TREE
);
24155 = build_function_type_list (long_long_integer_type_node
,
24156 V2SI_type_node
, NULL_TREE
);
24157 tree v2si_ftype_v4hi
24158 = build_function_type_list (V2SI_type_node
,
24159 V4HI_type_node
, NULL_TREE
);
24160 tree v4hi_ftype_v8qi
24161 = build_function_type_list (V4HI_type_node
,
24162 V8QI_type_node
, NULL_TREE
);
24163 tree v8qi_ftype_v8qi
24164 = build_function_type_list (V8QI_type_node
,
24165 V8QI_type_node
, NULL_TREE
);
24166 tree v4hi_ftype_v4hi
24167 = build_function_type_list (V4HI_type_node
,
24168 V4HI_type_node
, NULL_TREE
);
24169 tree v2si_ftype_v2si
24170 = build_function_type_list (V2SI_type_node
,
24171 V2SI_type_node
, NULL_TREE
);
24173 tree di_ftype_di_v4hi_v4hi
24174 = build_function_type_list (long_long_unsigned_type_node
,
24175 long_long_unsigned_type_node
,
24176 V4HI_type_node
, V4HI_type_node
,
24179 tree di_ftype_v4hi_v4hi
24180 = build_function_type_list (long_long_unsigned_type_node
,
24181 V4HI_type_node
,V4HI_type_node
,
24184 tree v2si_ftype_v2si_v4hi_v4hi
24185 = build_function_type_list (V2SI_type_node
,
24186 V2SI_type_node
, V4HI_type_node
,
24187 V4HI_type_node
, NULL_TREE
);
24189 tree v2si_ftype_v2si_v8qi_v8qi
24190 = build_function_type_list (V2SI_type_node
,
24191 V2SI_type_node
, V8QI_type_node
,
24192 V8QI_type_node
, NULL_TREE
);
24194 tree di_ftype_di_v2si_v2si
24195 = build_function_type_list (long_long_unsigned_type_node
,
24196 long_long_unsigned_type_node
,
24197 V2SI_type_node
, V2SI_type_node
,
24200 tree di_ftype_di_di_int
24201 = build_function_type_list (long_long_unsigned_type_node
,
24202 long_long_unsigned_type_node
,
24203 long_long_unsigned_type_node
,
24204 integer_type_node
, NULL_TREE
);
24206 tree void_ftype_int
24207 = build_function_type_list (void_type_node
,
24208 integer_type_node
, NULL_TREE
);
24210 tree v8qi_ftype_char
24211 = build_function_type_list (V8QI_type_node
,
24212 signed_char_type_node
, NULL_TREE
);
24214 tree v4hi_ftype_short
24215 = build_function_type_list (V4HI_type_node
,
24216 short_integer_type_node
, NULL_TREE
);
24218 tree v2si_ftype_int
24219 = build_function_type_list (V2SI_type_node
,
24220 integer_type_node
, NULL_TREE
);
24222 /* Normal vector binops. */
24223 tree v8qi_ftype_v8qi_v8qi
24224 = build_function_type_list (V8QI_type_node
,
24225 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24226 tree v4hi_ftype_v4hi_v4hi
24227 = build_function_type_list (V4HI_type_node
,
24228 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24229 tree v2si_ftype_v2si_v2si
24230 = build_function_type_list (V2SI_type_node
,
24231 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24232 tree di_ftype_di_di
24233 = build_function_type_list (long_long_unsigned_type_node
,
24234 long_long_unsigned_type_node
,
24235 long_long_unsigned_type_node
,
24238 /* Add all builtins that are more or less simple operations on two
24240 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24242 /* Use one of the operands; the target can have a different mode for
24243 mask-generating compares. */
24244 enum machine_mode mode
;
24250 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24255 type
= v8qi_ftype_v8qi_v8qi
;
24258 type
= v4hi_ftype_v4hi_v4hi
;
24261 type
= v2si_ftype_v2si_v2si
;
24264 type
= di_ftype_di_di
;
24268 gcc_unreachable ();
24271 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24274 /* Add the remaining MMX insns with somewhat more complicated types. */
24275 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24276 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24277 ARM_BUILTIN_ ## CODE)
24279 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24280 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24281 ARM_BUILTIN_ ## CODE)
24283 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24284 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24285 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24286 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24287 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24288 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24289 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24290 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24291 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24293 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24294 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24295 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24296 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24297 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24298 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24300 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24301 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24302 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24303 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24304 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24305 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24307 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24308 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24309 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24310 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24311 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24312 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24314 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24315 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24316 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24317 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24318 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24319 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24321 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24323 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24324 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24325 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24326 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24327 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24328 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24329 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24330 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24331 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24332 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24334 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24335 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24336 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24337 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24338 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24339 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24340 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24341 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24342 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24344 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24345 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24346 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24348 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24349 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24350 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24352 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24353 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24355 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24356 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24357 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24358 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24359 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24360 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24362 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24363 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24364 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24365 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24366 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24367 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24368 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24369 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24370 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24371 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24372 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24373 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24375 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24376 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24377 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24378 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24380 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24381 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24382 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24383 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24384 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24385 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24386 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24388 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24389 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24390 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24392 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24393 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24394 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24395 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24397 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24398 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24399 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24400 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24402 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24403 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24404 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24405 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24407 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24408 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24409 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24410 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24412 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24413 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24414 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24415 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24417 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24418 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24419 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24420 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24422 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24424 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24425 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24426 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24428 #undef iwmmx_mbuiltin
24429 #undef iwmmx2_mbuiltin
24433 arm_init_fp16_builtins (void)
24435 tree fp16_type
= make_node (REAL_TYPE
);
24436 TYPE_PRECISION (fp16_type
) = 16;
24437 layout_type (fp16_type
);
24438 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24442 arm_init_crc32_builtins ()
24444 tree si_ftype_si_qi
24445 = build_function_type_list (unsigned_intSI_type_node
,
24446 unsigned_intSI_type_node
,
24447 unsigned_intQI_type_node
, NULL_TREE
);
24448 tree si_ftype_si_hi
24449 = build_function_type_list (unsigned_intSI_type_node
,
24450 unsigned_intSI_type_node
,
24451 unsigned_intHI_type_node
, NULL_TREE
);
24452 tree si_ftype_si_si
24453 = build_function_type_list (unsigned_intSI_type_node
,
24454 unsigned_intSI_type_node
,
24455 unsigned_intSI_type_node
, NULL_TREE
);
24457 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24458 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24459 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24460 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24461 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24462 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24463 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24464 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24465 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24466 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24467 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24468 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24469 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24470 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24471 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24472 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24473 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24474 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24478 arm_init_builtins (void)
24480 if (TARGET_REALLY_IWMMXT
)
24481 arm_init_iwmmxt_builtins ();
24484 arm_init_neon_builtins ();
24486 if (arm_fp16_format
)
24487 arm_init_fp16_builtins ();
24490 arm_init_crc32_builtins ();
24493 /* Return the ARM builtin for CODE. */
24496 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24498 if (code
>= ARM_BUILTIN_MAX
)
24499 return error_mark_node
;
24501 return arm_builtin_decls
[code
];
24504 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24506 static const char *
24507 arm_invalid_parameter_type (const_tree t
)
24509 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24510 return N_("function parameters cannot have __fp16 type");
24514 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24516 static const char *
24517 arm_invalid_return_type (const_tree t
)
24519 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24520 return N_("functions cannot return __fp16 type");
24524 /* Implement TARGET_PROMOTED_TYPE. */
24527 arm_promoted_type (const_tree t
)
24529 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24530 return float_type_node
;
24534 /* Implement TARGET_CONVERT_TO_TYPE.
24535 Specifically, this hook implements the peculiarity of the ARM
24536 half-precision floating-point C semantics that requires conversions between
24537 __fp16 to or from double to do an intermediate conversion to float. */
24540 arm_convert_to_type (tree type
, tree expr
)
24542 tree fromtype
= TREE_TYPE (expr
);
24543 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24545 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24546 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24547 return convert (type
, convert (float_type_node
, expr
));
24551 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24552 This simply adds HFmode as a supported mode; even though we don't
24553 implement arithmetic on this type directly, it's supported by
24554 optabs conversions, much the way the double-word arithmetic is
24555 special-cased in the default hook. */
24558 arm_scalar_mode_supported_p (enum machine_mode mode
)
24560 if (mode
== HFmode
)
24561 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24562 else if (ALL_FIXED_POINT_MODE_P (mode
))
24565 return default_scalar_mode_supported_p (mode
);
24568 /* Errors in the source file can cause expand_expr to return const0_rtx
24569 where we expect a vector. To avoid crashing, use one of the vector
24570 clear instructions. */
24573 safe_vector_operand (rtx x
, enum machine_mode mode
)
24575 if (x
!= const0_rtx
)
24577 x
= gen_reg_rtx (mode
);
24579 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24580 : gen_rtx_SUBREG (DImode
, x
, 0)));
24584 /* Function to expand ternary builtins. */
24586 arm_expand_ternop_builtin (enum insn_code icode
,
24587 tree exp
, rtx target
)
24590 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24591 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24592 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24594 rtx op0
= expand_normal (arg0
);
24595 rtx op1
= expand_normal (arg1
);
24596 rtx op2
= expand_normal (arg2
);
24597 rtx op3
= NULL_RTX
;
24599 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24600 lane operand depending on endianness. */
24601 bool builtin_sha1cpm_p
= false;
24603 if (insn_data
[icode
].n_operands
== 5)
24605 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24606 || icode
== CODE_FOR_crypto_sha1p
24607 || icode
== CODE_FOR_crypto_sha1m
);
24608 builtin_sha1cpm_p
= true;
24610 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24611 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24612 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24613 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24616 if (VECTOR_MODE_P (mode0
))
24617 op0
= safe_vector_operand (op0
, mode0
);
24618 if (VECTOR_MODE_P (mode1
))
24619 op1
= safe_vector_operand (op1
, mode1
);
24620 if (VECTOR_MODE_P (mode2
))
24621 op2
= safe_vector_operand (op2
, mode2
);
24624 || GET_MODE (target
) != tmode
24625 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24626 target
= gen_reg_rtx (tmode
);
24628 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24629 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24630 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24632 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24633 op0
= copy_to_mode_reg (mode0
, op0
);
24634 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24635 op1
= copy_to_mode_reg (mode1
, op1
);
24636 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24637 op2
= copy_to_mode_reg (mode2
, op2
);
24638 if (builtin_sha1cpm_p
)
24639 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24641 if (builtin_sha1cpm_p
)
24642 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24644 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24651 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24654 arm_expand_binop_builtin (enum insn_code icode
,
24655 tree exp
, rtx target
)
24658 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24659 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24660 rtx op0
= expand_normal (arg0
);
24661 rtx op1
= expand_normal (arg1
);
24662 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24663 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24664 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24666 if (VECTOR_MODE_P (mode0
))
24667 op0
= safe_vector_operand (op0
, mode0
);
24668 if (VECTOR_MODE_P (mode1
))
24669 op1
= safe_vector_operand (op1
, mode1
);
24672 || GET_MODE (target
) != tmode
24673 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24674 target
= gen_reg_rtx (tmode
);
24676 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24677 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24679 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24680 op0
= copy_to_mode_reg (mode0
, op0
);
24681 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24682 op1
= copy_to_mode_reg (mode1
, op1
);
24684 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24691 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24694 arm_expand_unop_builtin (enum insn_code icode
,
24695 tree exp
, rtx target
, int do_load
)
24698 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24699 rtx op0
= expand_normal (arg0
);
24700 rtx op1
= NULL_RTX
;
24701 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24702 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24703 bool builtin_sha1h_p
= false;
24705 if (insn_data
[icode
].n_operands
== 3)
24707 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24708 builtin_sha1h_p
= true;
24712 || GET_MODE (target
) != tmode
24713 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24714 target
= gen_reg_rtx (tmode
);
24716 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
24719 if (VECTOR_MODE_P (mode0
))
24720 op0
= safe_vector_operand (op0
, mode0
);
24722 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24723 op0
= copy_to_mode_reg (mode0
, op0
);
24725 if (builtin_sha1h_p
)
24726 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24728 if (builtin_sha1h_p
)
24729 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24731 pat
= GEN_FCN (icode
) (target
, op0
);
24739 NEON_ARG_COPY_TO_REG
,
24745 #define NEON_MAX_BUILTIN_ARGS 5
24747 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24748 and return an expression for the accessed memory.
24750 The intrinsic function operates on a block of registers that has
24751 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24752 function references the memory at EXP of type TYPE and in mode
24753 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24757 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
24758 enum machine_mode reg_mode
,
24759 neon_builtin_type_mode type_mode
)
24761 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
24762 tree elem_type
, upper_bound
, array_type
;
24764 /* Work out the size of the register block in bytes. */
24765 reg_size
= GET_MODE_SIZE (reg_mode
);
24767 /* Work out the size of each vector in bytes. */
24768 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
24769 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
24771 /* Work out how many vectors there are. */
24772 gcc_assert (reg_size
% vector_size
== 0);
24773 nvectors
= reg_size
/ vector_size
;
24775 /* Work out the type of each element. */
24776 gcc_assert (POINTER_TYPE_P (type
));
24777 elem_type
= TREE_TYPE (type
);
24779 /* Work out how many elements are being loaded or stored.
24780 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24781 and memory elements; anything else implies a lane load or store. */
24782 if (mem_mode
== reg_mode
)
24783 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
24787 /* Create a type that describes the full access. */
24788 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
24789 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
24791 /* Dereference EXP using that type. */
24792 return fold_build2 (MEM_REF
, array_type
, exp
,
24793 build_int_cst (build_pointer_type (array_type
), 0));
24796 /* Expand a Neon builtin. */
24798 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
24799 neon_builtin_type_mode type_mode
,
24800 tree exp
, int fcode
, ...)
24804 tree arg
[NEON_MAX_BUILTIN_ARGS
];
24805 rtx op
[NEON_MAX_BUILTIN_ARGS
];
24808 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24809 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
24810 enum machine_mode other_mode
;
24816 || GET_MODE (target
) != tmode
24817 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
24818 target
= gen_reg_rtx (tmode
);
24820 va_start (ap
, fcode
);
24822 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
24826 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
24828 if (thisarg
== NEON_ARG_STOP
)
24832 opno
= argc
+ have_retval
;
24833 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
24834 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
24835 arg_type
= TREE_VALUE (formals
);
24836 if (thisarg
== NEON_ARG_MEMORY
)
24838 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
24839 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
24840 mode
[argc
], other_mode
,
24844 op
[argc
] = expand_normal (arg
[argc
]);
24848 case NEON_ARG_COPY_TO_REG
:
24849 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24850 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24851 (op
[argc
], mode
[argc
]))
24852 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
24855 case NEON_ARG_CONSTANT
:
24856 /* FIXME: This error message is somewhat unhelpful. */
24857 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24858 (op
[argc
], mode
[argc
]))
24859 error ("argument must be a constant");
24862 case NEON_ARG_MEMORY
:
24863 gcc_assert (MEM_P (op
[argc
]));
24864 PUT_MODE (op
[argc
], mode
[argc
]);
24865 /* ??? arm_neon.h uses the same built-in functions for signed
24866 and unsigned accesses, casting where necessary. This isn't
24868 set_mem_alias_set (op
[argc
], 0);
24869 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24870 (op
[argc
], mode
[argc
]))
24871 op
[argc
] = (replace_equiv_address
24872 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
24875 case NEON_ARG_STOP
:
24876 gcc_unreachable ();
24880 formals
= TREE_CHAIN (formals
);
24890 pat
= GEN_FCN (icode
) (target
, op
[0]);
24894 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
24898 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
24902 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
24906 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
24910 gcc_unreachable ();
24916 pat
= GEN_FCN (icode
) (op
[0]);
24920 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
24924 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
24928 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
24932 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
24936 gcc_unreachable ();
24947 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24948 constants defined per-instruction or per instruction-variant. Instead, the
24949 required info is looked up in the table neon_builtin_data. */
24951 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
24953 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
24954 neon_itype itype
= d
->itype
;
24955 enum insn_code icode
= d
->code
;
24956 neon_builtin_type_mode type_mode
= d
->mode
;
24963 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24964 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24968 case NEON_SCALARMUL
:
24969 case NEON_SCALARMULL
:
24970 case NEON_SCALARMULH
:
24971 case NEON_SHIFTINSERT
:
24972 case NEON_LOGICBINOP
:
24973 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24974 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24978 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24979 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24980 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24984 case NEON_SHIFTIMM
:
24985 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24986 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
24990 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24991 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24996 case NEON_FLOAT_WIDEN
:
24997 case NEON_FLOAT_NARROW
:
24998 case NEON_REINTERP
:
24999 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25000 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25004 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25005 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25007 case NEON_RESULTPAIR
:
25008 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25009 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25013 case NEON_LANEMULL
:
25014 case NEON_LANEMULH
:
25015 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25016 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25017 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25020 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25021 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25022 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25024 case NEON_SHIFTACC
:
25025 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25026 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25027 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25029 case NEON_SCALARMAC
:
25030 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25031 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25032 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25036 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25037 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25041 case NEON_LOADSTRUCT
:
25042 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25043 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25045 case NEON_LOAD1LANE
:
25046 case NEON_LOADSTRUCTLANE
:
25047 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25048 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25052 case NEON_STORESTRUCT
:
25053 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25054 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25056 case NEON_STORE1LANE
:
25057 case NEON_STORESTRUCTLANE
:
25058 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25059 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25063 gcc_unreachable ();
25066 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25068 neon_reinterpret (rtx dest
, rtx src
)
25070 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25073 /* Emit code to place a Neon pair result in memory locations (with equal
25076 neon_emit_pair_result_insn (enum machine_mode mode
,
25077 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
25080 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
25081 rtx tmp1
= gen_reg_rtx (mode
);
25082 rtx tmp2
= gen_reg_rtx (mode
);
25084 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
25086 emit_move_insn (mem
, tmp1
);
25087 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
25088 emit_move_insn (mem
, tmp2
);
25091 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25092 not to early-clobber SRC registers in the process.
25094 We assume that the operands described by SRC and DEST represent a
25095 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25096 number of components into which the copy has been decomposed. */
25098 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25102 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25103 || REGNO (operands
[0]) < REGNO (operands
[1]))
25105 for (i
= 0; i
< count
; i
++)
25107 operands
[2 * i
] = dest
[i
];
25108 operands
[2 * i
+ 1] = src
[i
];
25113 for (i
= 0; i
< count
; i
++)
25115 operands
[2 * i
] = dest
[count
- i
- 1];
25116 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25121 /* Split operands into moves from op[1] + op[2] into op[0]. */
25124 neon_split_vcombine (rtx operands
[3])
25126 unsigned int dest
= REGNO (operands
[0]);
25127 unsigned int src1
= REGNO (operands
[1]);
25128 unsigned int src2
= REGNO (operands
[2]);
25129 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25130 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25131 rtx destlo
, desthi
;
25133 if (src1
== dest
&& src2
== dest
+ halfregs
)
25135 /* No-op move. Can't split to nothing; emit something. */
25136 emit_note (NOTE_INSN_DELETED
);
25140 /* Preserve register attributes for variable tracking. */
25141 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25142 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25143 GET_MODE_SIZE (halfmode
));
25145 /* Special case of reversed high/low parts. Use VSWP. */
25146 if (src2
== dest
&& src1
== dest
+ halfregs
)
25148 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25149 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25150 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25154 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25156 /* Try to avoid unnecessary moves if part of the result
25157 is in the right place already. */
25159 emit_move_insn (destlo
, operands
[1]);
25160 if (src2
!= dest
+ halfregs
)
25161 emit_move_insn (desthi
, operands
[2]);
25165 if (src2
!= dest
+ halfregs
)
25166 emit_move_insn (desthi
, operands
[2]);
25168 emit_move_insn (destlo
, operands
[1]);
25172 /* Expand an expression EXP that calls a built-in function,
25173 with result going to TARGET if that's convenient
25174 (and in mode MODE if that's convenient).
25175 SUBTARGET may be used as the target for computing one of EXP's operands.
25176 IGNORE is nonzero if the value is to be ignored. */
25179 arm_expand_builtin (tree exp
,
25181 rtx subtarget ATTRIBUTE_UNUSED
,
25182 enum machine_mode mode ATTRIBUTE_UNUSED
,
25183 int ignore ATTRIBUTE_UNUSED
)
25185 const struct builtin_description
* d
;
25186 enum insn_code icode
;
25187 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25195 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25197 enum machine_mode tmode
;
25198 enum machine_mode mode0
;
25199 enum machine_mode mode1
;
25200 enum machine_mode mode2
;
25206 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25207 return arm_expand_neon_builtin (fcode
, exp
, target
);
25211 case ARM_BUILTIN_TEXTRMSB
:
25212 case ARM_BUILTIN_TEXTRMUB
:
25213 case ARM_BUILTIN_TEXTRMSH
:
25214 case ARM_BUILTIN_TEXTRMUH
:
25215 case ARM_BUILTIN_TEXTRMSW
:
25216 case ARM_BUILTIN_TEXTRMUW
:
25217 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25218 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25219 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25220 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25221 : CODE_FOR_iwmmxt_textrmw
);
25223 arg0
= CALL_EXPR_ARG (exp
, 0);
25224 arg1
= CALL_EXPR_ARG (exp
, 1);
25225 op0
= expand_normal (arg0
);
25226 op1
= expand_normal (arg1
);
25227 tmode
= insn_data
[icode
].operand
[0].mode
;
25228 mode0
= insn_data
[icode
].operand
[1].mode
;
25229 mode1
= insn_data
[icode
].operand
[2].mode
;
25231 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25232 op0
= copy_to_mode_reg (mode0
, op0
);
25233 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25235 /* @@@ better error message */
25236 error ("selector must be an immediate");
25237 return gen_reg_rtx (tmode
);
25240 opint
= INTVAL (op1
);
25241 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25243 if (opint
> 7 || opint
< 0)
25244 error ("the range of selector should be in 0 to 7");
25246 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25248 if (opint
> 3 || opint
< 0)
25249 error ("the range of selector should be in 0 to 3");
25251 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25253 if (opint
> 1 || opint
< 0)
25254 error ("the range of selector should be in 0 to 1");
25258 || GET_MODE (target
) != tmode
25259 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25260 target
= gen_reg_rtx (tmode
);
25261 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25267 case ARM_BUILTIN_WALIGNI
:
25268 /* If op2 is immediate, call walighi, else call walighr. */
25269 arg0
= CALL_EXPR_ARG (exp
, 0);
25270 arg1
= CALL_EXPR_ARG (exp
, 1);
25271 arg2
= CALL_EXPR_ARG (exp
, 2);
25272 op0
= expand_normal (arg0
);
25273 op1
= expand_normal (arg1
);
25274 op2
= expand_normal (arg2
);
25275 if (CONST_INT_P (op2
))
25277 icode
= CODE_FOR_iwmmxt_waligni
;
25278 tmode
= insn_data
[icode
].operand
[0].mode
;
25279 mode0
= insn_data
[icode
].operand
[1].mode
;
25280 mode1
= insn_data
[icode
].operand
[2].mode
;
25281 mode2
= insn_data
[icode
].operand
[3].mode
;
25282 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25283 op0
= copy_to_mode_reg (mode0
, op0
);
25284 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25285 op1
= copy_to_mode_reg (mode1
, op1
);
25286 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25287 selector
= INTVAL (op2
);
25288 if (selector
> 7 || selector
< 0)
25289 error ("the range of selector should be in 0 to 7");
25293 icode
= CODE_FOR_iwmmxt_walignr
;
25294 tmode
= insn_data
[icode
].operand
[0].mode
;
25295 mode0
= insn_data
[icode
].operand
[1].mode
;
25296 mode1
= insn_data
[icode
].operand
[2].mode
;
25297 mode2
= insn_data
[icode
].operand
[3].mode
;
25298 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25299 op0
= copy_to_mode_reg (mode0
, op0
);
25300 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25301 op1
= copy_to_mode_reg (mode1
, op1
);
25302 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25303 op2
= copy_to_mode_reg (mode2
, op2
);
25306 || GET_MODE (target
) != tmode
25307 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25308 target
= gen_reg_rtx (tmode
);
25309 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25315 case ARM_BUILTIN_TINSRB
:
25316 case ARM_BUILTIN_TINSRH
:
25317 case ARM_BUILTIN_TINSRW
:
25318 case ARM_BUILTIN_WMERGE
:
25319 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25320 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25321 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25322 : CODE_FOR_iwmmxt_tinsrw
);
25323 arg0
= CALL_EXPR_ARG (exp
, 0);
25324 arg1
= CALL_EXPR_ARG (exp
, 1);
25325 arg2
= CALL_EXPR_ARG (exp
, 2);
25326 op0
= expand_normal (arg0
);
25327 op1
= expand_normal (arg1
);
25328 op2
= expand_normal (arg2
);
25329 tmode
= insn_data
[icode
].operand
[0].mode
;
25330 mode0
= insn_data
[icode
].operand
[1].mode
;
25331 mode1
= insn_data
[icode
].operand
[2].mode
;
25332 mode2
= insn_data
[icode
].operand
[3].mode
;
25334 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25335 op0
= copy_to_mode_reg (mode0
, op0
);
25336 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25337 op1
= copy_to_mode_reg (mode1
, op1
);
25338 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25340 error ("selector must be an immediate");
25343 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25345 selector
= INTVAL (op2
);
25346 if (selector
> 7 || selector
< 0)
25347 error ("the range of selector should be in 0 to 7");
25349 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25350 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25351 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25354 selector
= INTVAL (op2
);
25355 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25356 error ("the range of selector should be in 0 to 7");
25357 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25358 error ("the range of selector should be in 0 to 3");
25359 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25360 error ("the range of selector should be in 0 to 1");
25362 op2
= GEN_INT (mask
);
25365 || GET_MODE (target
) != tmode
25366 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25367 target
= gen_reg_rtx (tmode
);
25368 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25374 case ARM_BUILTIN_SETWCGR0
:
25375 case ARM_BUILTIN_SETWCGR1
:
25376 case ARM_BUILTIN_SETWCGR2
:
25377 case ARM_BUILTIN_SETWCGR3
:
25378 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25379 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25380 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25381 : CODE_FOR_iwmmxt_setwcgr3
);
25382 arg0
= CALL_EXPR_ARG (exp
, 0);
25383 op0
= expand_normal (arg0
);
25384 mode0
= insn_data
[icode
].operand
[0].mode
;
25385 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25386 op0
= copy_to_mode_reg (mode0
, op0
);
25387 pat
= GEN_FCN (icode
) (op0
);
25393 case ARM_BUILTIN_GETWCGR0
:
25394 case ARM_BUILTIN_GETWCGR1
:
25395 case ARM_BUILTIN_GETWCGR2
:
25396 case ARM_BUILTIN_GETWCGR3
:
25397 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25398 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25399 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25400 : CODE_FOR_iwmmxt_getwcgr3
);
25401 tmode
= insn_data
[icode
].operand
[0].mode
;
25403 || GET_MODE (target
) != tmode
25404 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25405 target
= gen_reg_rtx (tmode
);
25406 pat
= GEN_FCN (icode
) (target
);
25412 case ARM_BUILTIN_WSHUFH
:
25413 icode
= CODE_FOR_iwmmxt_wshufh
;
25414 arg0
= CALL_EXPR_ARG (exp
, 0);
25415 arg1
= CALL_EXPR_ARG (exp
, 1);
25416 op0
= expand_normal (arg0
);
25417 op1
= expand_normal (arg1
);
25418 tmode
= insn_data
[icode
].operand
[0].mode
;
25419 mode1
= insn_data
[icode
].operand
[1].mode
;
25420 mode2
= insn_data
[icode
].operand
[2].mode
;
25422 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25423 op0
= copy_to_mode_reg (mode1
, op0
);
25424 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25426 error ("mask must be an immediate");
25429 selector
= INTVAL (op1
);
25430 if (selector
< 0 || selector
> 255)
25431 error ("the range of mask should be in 0 to 255");
25433 || GET_MODE (target
) != tmode
25434 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25435 target
= gen_reg_rtx (tmode
);
25436 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25442 case ARM_BUILTIN_WMADDS
:
25443 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25444 case ARM_BUILTIN_WMADDSX
:
25445 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25446 case ARM_BUILTIN_WMADDSN
:
25447 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25448 case ARM_BUILTIN_WMADDU
:
25449 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25450 case ARM_BUILTIN_WMADDUX
:
25451 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25452 case ARM_BUILTIN_WMADDUN
:
25453 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25454 case ARM_BUILTIN_WSADBZ
:
25455 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25456 case ARM_BUILTIN_WSADHZ
:
25457 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25459 /* Several three-argument builtins. */
25460 case ARM_BUILTIN_WMACS
:
25461 case ARM_BUILTIN_WMACU
:
25462 case ARM_BUILTIN_TMIA
:
25463 case ARM_BUILTIN_TMIAPH
:
25464 case ARM_BUILTIN_TMIATT
:
25465 case ARM_BUILTIN_TMIATB
:
25466 case ARM_BUILTIN_TMIABT
:
25467 case ARM_BUILTIN_TMIABB
:
25468 case ARM_BUILTIN_WQMIABB
:
25469 case ARM_BUILTIN_WQMIABT
:
25470 case ARM_BUILTIN_WQMIATB
:
25471 case ARM_BUILTIN_WQMIATT
:
25472 case ARM_BUILTIN_WQMIABBN
:
25473 case ARM_BUILTIN_WQMIABTN
:
25474 case ARM_BUILTIN_WQMIATBN
:
25475 case ARM_BUILTIN_WQMIATTN
:
25476 case ARM_BUILTIN_WMIABB
:
25477 case ARM_BUILTIN_WMIABT
:
25478 case ARM_BUILTIN_WMIATB
:
25479 case ARM_BUILTIN_WMIATT
:
25480 case ARM_BUILTIN_WMIABBN
:
25481 case ARM_BUILTIN_WMIABTN
:
25482 case ARM_BUILTIN_WMIATBN
:
25483 case ARM_BUILTIN_WMIATTN
:
25484 case ARM_BUILTIN_WMIAWBB
:
25485 case ARM_BUILTIN_WMIAWBT
:
25486 case ARM_BUILTIN_WMIAWTB
:
25487 case ARM_BUILTIN_WMIAWTT
:
25488 case ARM_BUILTIN_WMIAWBBN
:
25489 case ARM_BUILTIN_WMIAWBTN
:
25490 case ARM_BUILTIN_WMIAWTBN
:
25491 case ARM_BUILTIN_WMIAWTTN
:
25492 case ARM_BUILTIN_WSADB
:
25493 case ARM_BUILTIN_WSADH
:
25494 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25495 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25496 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25497 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25498 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25499 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25500 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25501 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25502 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25503 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25504 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25505 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25506 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25507 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25508 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25509 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25510 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25511 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25512 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25513 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25514 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25515 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25516 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25517 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25518 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25519 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25520 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25521 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25522 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25523 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25524 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25525 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25526 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25527 : CODE_FOR_iwmmxt_wsadh
);
25528 arg0
= CALL_EXPR_ARG (exp
, 0);
25529 arg1
= CALL_EXPR_ARG (exp
, 1);
25530 arg2
= CALL_EXPR_ARG (exp
, 2);
25531 op0
= expand_normal (arg0
);
25532 op1
= expand_normal (arg1
);
25533 op2
= expand_normal (arg2
);
25534 tmode
= insn_data
[icode
].operand
[0].mode
;
25535 mode0
= insn_data
[icode
].operand
[1].mode
;
25536 mode1
= insn_data
[icode
].operand
[2].mode
;
25537 mode2
= insn_data
[icode
].operand
[3].mode
;
25539 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25540 op0
= copy_to_mode_reg (mode0
, op0
);
25541 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25542 op1
= copy_to_mode_reg (mode1
, op1
);
25543 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25544 op2
= copy_to_mode_reg (mode2
, op2
);
25546 || GET_MODE (target
) != tmode
25547 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25548 target
= gen_reg_rtx (tmode
);
25549 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25555 case ARM_BUILTIN_WZERO
:
25556 target
= gen_reg_rtx (DImode
);
25557 emit_insn (gen_iwmmxt_clrdi (target
));
25560 case ARM_BUILTIN_WSRLHI
:
25561 case ARM_BUILTIN_WSRLWI
:
25562 case ARM_BUILTIN_WSRLDI
:
25563 case ARM_BUILTIN_WSLLHI
:
25564 case ARM_BUILTIN_WSLLWI
:
25565 case ARM_BUILTIN_WSLLDI
:
25566 case ARM_BUILTIN_WSRAHI
:
25567 case ARM_BUILTIN_WSRAWI
:
25568 case ARM_BUILTIN_WSRADI
:
25569 case ARM_BUILTIN_WRORHI
:
25570 case ARM_BUILTIN_WRORWI
:
25571 case ARM_BUILTIN_WRORDI
:
25572 case ARM_BUILTIN_WSRLH
:
25573 case ARM_BUILTIN_WSRLW
:
25574 case ARM_BUILTIN_WSRLD
:
25575 case ARM_BUILTIN_WSLLH
:
25576 case ARM_BUILTIN_WSLLW
:
25577 case ARM_BUILTIN_WSLLD
:
25578 case ARM_BUILTIN_WSRAH
:
25579 case ARM_BUILTIN_WSRAW
:
25580 case ARM_BUILTIN_WSRAD
:
25581 case ARM_BUILTIN_WRORH
:
25582 case ARM_BUILTIN_WRORW
:
25583 case ARM_BUILTIN_WRORD
:
25584 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25585 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25586 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25587 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25588 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25589 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25590 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25591 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25592 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25593 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25594 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25595 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25596 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25597 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25598 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25599 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25600 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25601 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25602 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25603 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25604 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25605 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25606 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25607 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25608 : CODE_FOR_nothing
);
25609 arg1
= CALL_EXPR_ARG (exp
, 1);
25610 op1
= expand_normal (arg1
);
25611 if (GET_MODE (op1
) == VOIDmode
)
25613 imm
= INTVAL (op1
);
25614 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25615 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25616 && (imm
< 0 || imm
> 32))
25618 if (fcode
== ARM_BUILTIN_WRORHI
)
25619 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25620 else if (fcode
== ARM_BUILTIN_WRORWI
)
25621 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25622 else if (fcode
== ARM_BUILTIN_WRORH
)
25623 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25625 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25627 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25628 && (imm
< 0 || imm
> 64))
25630 if (fcode
== ARM_BUILTIN_WRORDI
)
25631 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25633 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25637 if (fcode
== ARM_BUILTIN_WSRLHI
)
25638 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25639 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25640 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25641 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25642 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25643 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25644 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25645 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25646 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25647 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25648 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25649 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25650 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25651 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25652 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25653 else if (fcode
== ARM_BUILTIN_WSRADI
)
25654 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25655 else if (fcode
== ARM_BUILTIN_WSRLH
)
25656 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25657 else if (fcode
== ARM_BUILTIN_WSRLW
)
25658 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25659 else if (fcode
== ARM_BUILTIN_WSRLD
)
25660 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25661 else if (fcode
== ARM_BUILTIN_WSLLH
)
25662 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25663 else if (fcode
== ARM_BUILTIN_WSLLW
)
25664 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25665 else if (fcode
== ARM_BUILTIN_WSLLD
)
25666 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25667 else if (fcode
== ARM_BUILTIN_WSRAH
)
25668 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25669 else if (fcode
== ARM_BUILTIN_WSRAW
)
25670 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25672 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25675 return arm_expand_binop_builtin (icode
, exp
, target
);
25681 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25682 if (d
->code
== (const enum arm_builtins
) fcode
)
25683 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25685 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25686 if (d
->code
== (const enum arm_builtins
) fcode
)
25687 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25689 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25690 if (d
->code
== (const enum arm_builtins
) fcode
)
25691 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25693 /* @@@ Should really do something sensible here. */
25697 /* Return the number (counting from 0) of
25698 the least significant set bit in MASK. */
25701 number_of_first_bit_set (unsigned mask
)
25703 return ctz_hwi (mask
);
25706 /* Like emit_multi_reg_push, but allowing for a different set of
25707 registers to be described as saved. MASK is the set of registers
25708 to be saved; REAL_REGS is the set of registers to be described as
25709 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25712 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25714 unsigned long regno
;
25715 rtx par
[10], tmp
, reg
, insn
;
25718 /* Build the parallel of the registers actually being stored. */
25719 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25721 regno
= ctz_hwi (mask
);
25722 reg
= gen_rtx_REG (SImode
, regno
);
25725 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25727 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25732 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25733 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25734 tmp
= gen_frame_mem (BLKmode
, tmp
);
25735 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
25738 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25739 insn
= emit_insn (tmp
);
25741 /* Always build the stack adjustment note for unwind info. */
25742 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25743 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
25746 /* Build the parallel of the registers recorded as saved for unwind. */
25747 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
25749 regno
= ctz_hwi (real_regs
);
25750 reg
= gen_rtx_REG (SImode
, regno
);
25752 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
25753 tmp
= gen_frame_mem (SImode
, tmp
);
25754 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
25755 RTX_FRAME_RELATED_P (tmp
) = 1;
25763 RTX_FRAME_RELATED_P (par
[0]) = 1;
25764 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
25767 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
25772 /* Emit code to push or pop registers to or from the stack. F is the
25773 assembly file. MASK is the registers to pop. */
25775 thumb_pop (FILE *f
, unsigned long mask
)
25778 int lo_mask
= mask
& 0xFF;
25779 int pushed_words
= 0;
25783 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
25785 /* Special case. Do not generate a POP PC statement here, do it in
25787 thumb_exit (f
, -1);
25791 fprintf (f
, "\tpop\t{");
25793 /* Look at the low registers first. */
25794 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
25798 asm_fprintf (f
, "%r", regno
);
25800 if ((lo_mask
& ~1) != 0)
25807 if (mask
& (1 << PC_REGNUM
))
25809 /* Catch popping the PC. */
25810 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
25811 || crtl
->calls_eh_return
)
25813 /* The PC is never poped directly, instead
25814 it is popped into r3 and then BX is used. */
25815 fprintf (f
, "}\n");
25817 thumb_exit (f
, -1);
25826 asm_fprintf (f
, "%r", PC_REGNUM
);
25830 fprintf (f
, "}\n");
25833 /* Generate code to return from a thumb function.
25834 If 'reg_containing_return_addr' is -1, then the return address is
25835 actually on the stack, at the stack pointer. */
25837 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25839 unsigned regs_available_for_popping
;
25840 unsigned regs_to_pop
;
25842 unsigned available
;
25846 int restore_a4
= FALSE
;
25848 /* Compute the registers we need to pop. */
25852 if (reg_containing_return_addr
== -1)
25854 regs_to_pop
|= 1 << LR_REGNUM
;
25858 if (TARGET_BACKTRACE
)
25860 /* Restore the (ARM) frame pointer and stack pointer. */
25861 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25865 /* If there is nothing to pop then just emit the BX instruction and
25867 if (pops_needed
== 0)
25869 if (crtl
->calls_eh_return
)
25870 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25872 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25875 /* Otherwise if we are not supporting interworking and we have not created
25876 a backtrace structure and the function was not entered in ARM mode then
25877 just pop the return address straight into the PC. */
25878 else if (!TARGET_INTERWORK
25879 && !TARGET_BACKTRACE
25880 && !is_called_in_ARM_mode (current_function_decl
)
25881 && !crtl
->calls_eh_return
)
25883 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25887 /* Find out how many of the (return) argument registers we can corrupt. */
25888 regs_available_for_popping
= 0;
25890 /* If returning via __builtin_eh_return, the bottom three registers
25891 all contain information needed for the return. */
25892 if (crtl
->calls_eh_return
)
25896 /* If we can deduce the registers used from the function's
25897 return value. This is more reliable that examining
25898 df_regs_ever_live_p () because that will be set if the register is
25899 ever used in the function, not just if the register is used
25900 to hold a return value. */
25902 if (crtl
->return_rtx
!= 0)
25903 mode
= GET_MODE (crtl
->return_rtx
);
25905 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25907 size
= GET_MODE_SIZE (mode
);
25911 /* In a void function we can use any argument register.
25912 In a function that returns a structure on the stack
25913 we can use the second and third argument registers. */
25914 if (mode
== VOIDmode
)
25915 regs_available_for_popping
=
25916 (1 << ARG_REGISTER (1))
25917 | (1 << ARG_REGISTER (2))
25918 | (1 << ARG_REGISTER (3));
25920 regs_available_for_popping
=
25921 (1 << ARG_REGISTER (2))
25922 | (1 << ARG_REGISTER (3));
25924 else if (size
<= 4)
25925 regs_available_for_popping
=
25926 (1 << ARG_REGISTER (2))
25927 | (1 << ARG_REGISTER (3));
25928 else if (size
<= 8)
25929 regs_available_for_popping
=
25930 (1 << ARG_REGISTER (3));
25933 /* Match registers to be popped with registers into which we pop them. */
25934 for (available
= regs_available_for_popping
,
25935 required
= regs_to_pop
;
25936 required
!= 0 && available
!= 0;
25937 available
&= ~(available
& - available
),
25938 required
&= ~(required
& - required
))
25941 /* If we have any popping registers left over, remove them. */
25943 regs_available_for_popping
&= ~available
;
25945 /* Otherwise if we need another popping register we can use
25946 the fourth argument register. */
25947 else if (pops_needed
)
25949 /* If we have not found any free argument registers and
25950 reg a4 contains the return address, we must move it. */
25951 if (regs_available_for_popping
== 0
25952 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25954 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25955 reg_containing_return_addr
= LR_REGNUM
;
25957 else if (size
> 12)
25959 /* Register a4 is being used to hold part of the return value,
25960 but we have dire need of a free, low register. */
25963 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
25966 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
25968 /* The fourth argument register is available. */
25969 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
25975 /* Pop as many registers as we can. */
25976 thumb_pop (f
, regs_available_for_popping
);
25978 /* Process the registers we popped. */
25979 if (reg_containing_return_addr
== -1)
25981 /* The return address was popped into the lowest numbered register. */
25982 regs_to_pop
&= ~(1 << LR_REGNUM
);
25984 reg_containing_return_addr
=
25985 number_of_first_bit_set (regs_available_for_popping
);
25987 /* Remove this register for the mask of available registers, so that
25988 the return address will not be corrupted by further pops. */
25989 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
25992 /* If we popped other registers then handle them here. */
25993 if (regs_available_for_popping
)
25997 /* Work out which register currently contains the frame pointer. */
25998 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26000 /* Move it into the correct place. */
26001 asm_fprintf (f
, "\tmov\t%r, %r\n",
26002 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26004 /* (Temporarily) remove it from the mask of popped registers. */
26005 regs_available_for_popping
&= ~(1 << frame_pointer
);
26006 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26008 if (regs_available_for_popping
)
26012 /* We popped the stack pointer as well,
26013 find the register that contains it. */
26014 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26016 /* Move it into the stack register. */
26017 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26019 /* At this point we have popped all necessary registers, so
26020 do not worry about restoring regs_available_for_popping
26021 to its correct value:
26023 assert (pops_needed == 0)
26024 assert (regs_available_for_popping == (1 << frame_pointer))
26025 assert (regs_to_pop == (1 << STACK_POINTER)) */
26029 /* Since we have just move the popped value into the frame
26030 pointer, the popping register is available for reuse, and
26031 we know that we still have the stack pointer left to pop. */
26032 regs_available_for_popping
|= (1 << frame_pointer
);
26036 /* If we still have registers left on the stack, but we no longer have
26037 any registers into which we can pop them, then we must move the return
26038 address into the link register and make available the register that
26040 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26042 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26044 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26045 reg_containing_return_addr
);
26047 reg_containing_return_addr
= LR_REGNUM
;
26050 /* If we have registers left on the stack then pop some more.
26051 We know that at most we will want to pop FP and SP. */
26052 if (pops_needed
> 0)
26057 thumb_pop (f
, regs_available_for_popping
);
26059 /* We have popped either FP or SP.
26060 Move whichever one it is into the correct register. */
26061 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26062 move_to
= number_of_first_bit_set (regs_to_pop
);
26064 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26066 regs_to_pop
&= ~(1 << move_to
);
26071 /* If we still have not popped everything then we must have only
26072 had one register available to us and we are now popping the SP. */
26073 if (pops_needed
> 0)
26077 thumb_pop (f
, regs_available_for_popping
);
26079 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26081 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26083 assert (regs_to_pop == (1 << STACK_POINTER))
26084 assert (pops_needed == 1)
26088 /* If necessary restore the a4 register. */
26091 if (reg_containing_return_addr
!= LR_REGNUM
)
26093 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26094 reg_containing_return_addr
= LR_REGNUM
;
26097 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26100 if (crtl
->calls_eh_return
)
26101 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26103 /* Return to caller. */
26104 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26107 /* Scan INSN just before assembler is output for it.
26108 For Thumb-1, we track the status of the condition codes; this
26109 information is used in the cbranchsi4_insn pattern. */
26111 thumb1_final_prescan_insn (rtx insn
)
26113 if (flag_print_asm_name
)
26114 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26115 INSN_ADDRESSES (INSN_UID (insn
)));
26116 /* Don't overwrite the previous setter when we get to a cbranch. */
26117 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26119 enum attr_conds conds
;
26121 if (cfun
->machine
->thumb1_cc_insn
)
26123 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26124 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26127 conds
= get_attr_conds (insn
);
26128 if (conds
== CONDS_SET
)
26130 rtx set
= single_set (insn
);
26131 cfun
->machine
->thumb1_cc_insn
= insn
;
26132 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26133 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26134 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26135 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26137 rtx src1
= XEXP (SET_SRC (set
), 1);
26138 if (src1
== const0_rtx
)
26139 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26141 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26143 /* Record the src register operand instead of dest because
26144 cprop_hardreg pass propagates src. */
26145 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26148 else if (conds
!= CONDS_NOCOND
)
26149 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26152 /* Check if unexpected far jump is used. */
26153 if (cfun
->machine
->lr_save_eliminated
26154 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26155 internal_error("Unexpected thumb1 far jump");
26159 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26161 unsigned HOST_WIDE_INT mask
= 0xff;
26164 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26165 if (val
== 0) /* XXX */
26168 for (i
= 0; i
< 25; i
++)
26169 if ((val
& (mask
<< i
)) == val
)
26175 /* Returns nonzero if the current function contains,
26176 or might contain a far jump. */
26178 thumb_far_jump_used_p (void)
26181 bool far_jump
= false;
26182 unsigned int func_size
= 0;
26184 /* This test is only important for leaf functions. */
26185 /* assert (!leaf_function_p ()); */
26187 /* If we have already decided that far jumps may be used,
26188 do not bother checking again, and always return true even if
26189 it turns out that they are not being used. Once we have made
26190 the decision that far jumps are present (and that hence the link
26191 register will be pushed onto the stack) we cannot go back on it. */
26192 if (cfun
->machine
->far_jump_used
)
26195 /* If this function is not being called from the prologue/epilogue
26196 generation code then it must be being called from the
26197 INITIAL_ELIMINATION_OFFSET macro. */
26198 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26200 /* In this case we know that we are being asked about the elimination
26201 of the arg pointer register. If that register is not being used,
26202 then there are no arguments on the stack, and we do not have to
26203 worry that a far jump might force the prologue to push the link
26204 register, changing the stack offsets. In this case we can just
26205 return false, since the presence of far jumps in the function will
26206 not affect stack offsets.
26208 If the arg pointer is live (or if it was live, but has now been
26209 eliminated and so set to dead) then we do have to test to see if
26210 the function might contain a far jump. This test can lead to some
26211 false negatives, since before reload is completed, then length of
26212 branch instructions is not known, so gcc defaults to returning their
26213 longest length, which in turn sets the far jump attribute to true.
26215 A false negative will not result in bad code being generated, but it
26216 will result in a needless push and pop of the link register. We
26217 hope that this does not occur too often.
26219 If we need doubleword stack alignment this could affect the other
26220 elimination offsets so we can't risk getting it wrong. */
26221 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26222 cfun
->machine
->arg_pointer_live
= 1;
26223 else if (!cfun
->machine
->arg_pointer_live
)
26227 /* Check to see if the function contains a branch
26228 insn with the far jump attribute set. */
26229 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26231 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26235 func_size
+= get_attr_length (insn
);
26238 /* Attribute far_jump will always be true for thumb1 before
26239 shorten_branch pass. So checking far_jump attribute before
26240 shorten_branch isn't much useful.
26242 Following heuristic tries to estimate more accurately if a far jump
26243 may finally be used. The heuristic is very conservative as there is
26244 no chance to roll-back the decision of not to use far jump.
26246 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26247 2-byte insn is associated with a 4 byte constant pool. Using
26248 function size 2048/3 as the threshold is conservative enough. */
26251 if ((func_size
* 3) >= 2048)
26253 /* Record the fact that we have decided that
26254 the function does use far jumps. */
26255 cfun
->machine
->far_jump_used
= 1;
26263 /* Return nonzero if FUNC must be entered in ARM mode. */
26265 is_called_in_ARM_mode (tree func
)
26267 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26269 /* Ignore the problem about functions whose address is taken. */
26270 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26274 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26280 /* Given the stack offsets and register mask in OFFSETS, decide how
26281 many additional registers to push instead of subtracting a constant
26282 from SP. For epilogues the principle is the same except we use pop.
26283 FOR_PROLOGUE indicates which we're generating. */
26285 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26287 HOST_WIDE_INT amount
;
26288 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26289 /* Extract a mask of the ones we can give to the Thumb's push/pop
26291 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26292 /* Then count how many other high registers will need to be pushed. */
26293 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26294 int n_free
, reg_base
, size
;
26296 if (!for_prologue
&& frame_pointer_needed
)
26297 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26299 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26301 /* If the stack frame size is 512 exactly, we can save one load
26302 instruction, which should make this a win even when optimizing
26304 if (!optimize_size
&& amount
!= 512)
26307 /* Can't do this if there are high registers to push. */
26308 if (high_regs_pushed
!= 0)
26311 /* Shouldn't do it in the prologue if no registers would normally
26312 be pushed at all. In the epilogue, also allow it if we'll have
26313 a pop insn for the PC. */
26316 || TARGET_BACKTRACE
26317 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26318 || TARGET_INTERWORK
26319 || crtl
->args
.pretend_args_size
!= 0))
26322 /* Don't do this if thumb_expand_prologue wants to emit instructions
26323 between the push and the stack frame allocation. */
26325 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26326 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26333 size
= arm_size_return_regs ();
26334 reg_base
= ARM_NUM_INTS (size
);
26335 live_regs_mask
>>= reg_base
;
26338 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26339 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26341 live_regs_mask
>>= 1;
26347 gcc_assert (amount
/ 4 * 4 == amount
);
26349 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26350 return (amount
- 508) / 4;
26351 if (amount
<= n_free
* 4)
26356 /* The bits which aren't usefully expanded as rtl. */
26358 thumb1_unexpanded_epilogue (void)
26360 arm_stack_offsets
*offsets
;
26362 unsigned long live_regs_mask
= 0;
26363 int high_regs_pushed
= 0;
26365 int had_to_push_lr
;
26368 if (cfun
->machine
->return_used_this_function
!= 0)
26371 if (IS_NAKED (arm_current_func_type ()))
26374 offsets
= arm_get_frame_offsets ();
26375 live_regs_mask
= offsets
->saved_regs_mask
;
26376 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26378 /* If we can deduce the registers used from the function's return value.
26379 This is more reliable that examining df_regs_ever_live_p () because that
26380 will be set if the register is ever used in the function, not just if
26381 the register is used to hold a return value. */
26382 size
= arm_size_return_regs ();
26384 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26387 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26388 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26391 /* The prolog may have pushed some high registers to use as
26392 work registers. e.g. the testsuite file:
26393 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26394 compiles to produce:
26395 push {r4, r5, r6, r7, lr}
26399 as part of the prolog. We have to undo that pushing here. */
26401 if (high_regs_pushed
)
26403 unsigned long mask
= live_regs_mask
& 0xff;
26406 /* The available low registers depend on the size of the value we are
26414 /* Oh dear! We have no low registers into which we can pop
26417 ("no low registers available for popping high registers");
26419 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26420 if (live_regs_mask
& (1 << next_hi_reg
))
26423 while (high_regs_pushed
)
26425 /* Find lo register(s) into which the high register(s) can
26427 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26429 if (mask
& (1 << regno
))
26430 high_regs_pushed
--;
26431 if (high_regs_pushed
== 0)
26435 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26437 /* Pop the values into the low register(s). */
26438 thumb_pop (asm_out_file
, mask
);
26440 /* Move the value(s) into the high registers. */
26441 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26443 if (mask
& (1 << regno
))
26445 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26448 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26449 if (live_regs_mask
& (1 << next_hi_reg
))
26454 live_regs_mask
&= ~0x0f00;
26457 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26458 live_regs_mask
&= 0xff;
26460 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26462 /* Pop the return address into the PC. */
26463 if (had_to_push_lr
)
26464 live_regs_mask
|= 1 << PC_REGNUM
;
26466 /* Either no argument registers were pushed or a backtrace
26467 structure was created which includes an adjusted stack
26468 pointer, so just pop everything. */
26469 if (live_regs_mask
)
26470 thumb_pop (asm_out_file
, live_regs_mask
);
26472 /* We have either just popped the return address into the
26473 PC or it is was kept in LR for the entire function.
26474 Note that thumb_pop has already called thumb_exit if the
26475 PC was in the list. */
26476 if (!had_to_push_lr
)
26477 thumb_exit (asm_out_file
, LR_REGNUM
);
26481 /* Pop everything but the return address. */
26482 if (live_regs_mask
)
26483 thumb_pop (asm_out_file
, live_regs_mask
);
26485 if (had_to_push_lr
)
26489 /* We have no free low regs, so save one. */
26490 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26494 /* Get the return address into a temporary register. */
26495 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26499 /* Move the return address to lr. */
26500 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26502 /* Restore the low register. */
26503 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26508 regno
= LAST_ARG_REGNUM
;
26513 /* Remove the argument registers that were pushed onto the stack. */
26514 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26515 SP_REGNUM
, SP_REGNUM
,
26516 crtl
->args
.pretend_args_size
);
26518 thumb_exit (asm_out_file
, regno
);
26524 /* Functions to save and restore machine-specific function data. */
26525 static struct machine_function
*
26526 arm_init_machine_status (void)
26528 struct machine_function
*machine
;
26529 machine
= ggc_alloc_cleared_machine_function ();
26531 #if ARM_FT_UNKNOWN != 0
26532 machine
->func_type
= ARM_FT_UNKNOWN
;
26537 /* Return an RTX indicating where the return address to the
26538 calling function can be found. */
26540 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26545 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26548 /* Do anything needed before RTL is emitted for each function. */
26550 arm_init_expanders (void)
26552 /* Arrange to initialize and mark the machine per-function status. */
26553 init_machine_status
= arm_init_machine_status
;
26555 /* This is to stop the combine pass optimizing away the alignment
26556 adjustment of va_arg. */
26557 /* ??? It is claimed that this should not be necessary. */
26559 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26563 /* Like arm_compute_initial_elimination offset. Simpler because there
26564 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26565 to point at the base of the local variables after static stack
26566 space for a function has been allocated. */
26569 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26571 arm_stack_offsets
*offsets
;
26573 offsets
= arm_get_frame_offsets ();
26577 case ARG_POINTER_REGNUM
:
26580 case STACK_POINTER_REGNUM
:
26581 return offsets
->outgoing_args
- offsets
->saved_args
;
26583 case FRAME_POINTER_REGNUM
:
26584 return offsets
->soft_frame
- offsets
->saved_args
;
26586 case ARM_HARD_FRAME_POINTER_REGNUM
:
26587 return offsets
->saved_regs
- offsets
->saved_args
;
26589 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26590 return offsets
->locals_base
- offsets
->saved_args
;
26593 gcc_unreachable ();
26597 case FRAME_POINTER_REGNUM
:
26600 case STACK_POINTER_REGNUM
:
26601 return offsets
->outgoing_args
- offsets
->soft_frame
;
26603 case ARM_HARD_FRAME_POINTER_REGNUM
:
26604 return offsets
->saved_regs
- offsets
->soft_frame
;
26606 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26607 return offsets
->locals_base
- offsets
->soft_frame
;
26610 gcc_unreachable ();
26615 gcc_unreachable ();
26619 /* Generate the function's prologue. */
26622 thumb1_expand_prologue (void)
26626 HOST_WIDE_INT amount
;
26627 arm_stack_offsets
*offsets
;
26628 unsigned long func_type
;
26630 unsigned long live_regs_mask
;
26631 unsigned long l_mask
;
26632 unsigned high_regs_pushed
= 0;
26634 func_type
= arm_current_func_type ();
26636 /* Naked functions don't have prologues. */
26637 if (IS_NAKED (func_type
))
26640 if (IS_INTERRUPT (func_type
))
26642 error ("interrupt Service Routines cannot be coded in Thumb mode");
26646 if (is_called_in_ARM_mode (current_function_decl
))
26647 emit_insn (gen_prologue_thumb1_interwork ());
26649 offsets
= arm_get_frame_offsets ();
26650 live_regs_mask
= offsets
->saved_regs_mask
;
26652 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26653 l_mask
= live_regs_mask
& 0x40ff;
26654 /* Then count how many other high registers will need to be pushed. */
26655 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26657 if (crtl
->args
.pretend_args_size
)
26659 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26661 if (cfun
->machine
->uses_anonymous_args
)
26663 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26664 unsigned long mask
;
26666 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26667 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26669 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26673 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26674 stack_pointer_rtx
, x
));
26676 RTX_FRAME_RELATED_P (insn
) = 1;
26679 if (TARGET_BACKTRACE
)
26681 HOST_WIDE_INT offset
= 0;
26682 unsigned work_register
;
26683 rtx work_reg
, x
, arm_hfp_rtx
;
26685 /* We have been asked to create a stack backtrace structure.
26686 The code looks like this:
26690 0 sub SP, #16 Reserve space for 4 registers.
26691 2 push {R7} Push low registers.
26692 4 add R7, SP, #20 Get the stack pointer before the push.
26693 6 str R7, [SP, #8] Store the stack pointer
26694 (before reserving the space).
26695 8 mov R7, PC Get hold of the start of this code + 12.
26696 10 str R7, [SP, #16] Store it.
26697 12 mov R7, FP Get hold of the current frame pointer.
26698 14 str R7, [SP, #4] Store it.
26699 16 mov R7, LR Get hold of the current return address.
26700 18 str R7, [SP, #12] Store it.
26701 20 add R7, SP, #16 Point at the start of the
26702 backtrace structure.
26703 22 mov FP, R7 Put this value into the frame pointer. */
26705 work_register
= thumb_find_work_register (live_regs_mask
);
26706 work_reg
= gen_rtx_REG (SImode
, work_register
);
26707 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
26709 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26710 stack_pointer_rtx
, GEN_INT (-16)));
26711 RTX_FRAME_RELATED_P (insn
) = 1;
26715 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
26716 RTX_FRAME_RELATED_P (insn
) = 1;
26718 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
26721 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
26722 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26724 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
26725 x
= gen_frame_mem (SImode
, x
);
26726 emit_move_insn (x
, work_reg
);
26728 /* Make sure that the instruction fetching the PC is in the right place
26729 to calculate "start of backtrace creation code + 12". */
26730 /* ??? The stores using the common WORK_REG ought to be enough to
26731 prevent the scheduler from doing anything weird. Failing that
26732 we could always move all of the following into an UNSPEC_VOLATILE. */
26735 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26736 emit_move_insn (work_reg
, x
);
26738 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26739 x
= gen_frame_mem (SImode
, x
);
26740 emit_move_insn (x
, work_reg
);
26742 emit_move_insn (work_reg
, arm_hfp_rtx
);
26744 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26745 x
= gen_frame_mem (SImode
, x
);
26746 emit_move_insn (x
, work_reg
);
26750 emit_move_insn (work_reg
, arm_hfp_rtx
);
26752 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26753 x
= gen_frame_mem (SImode
, x
);
26754 emit_move_insn (x
, work_reg
);
26756 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26757 emit_move_insn (work_reg
, x
);
26759 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26760 x
= gen_frame_mem (SImode
, x
);
26761 emit_move_insn (x
, work_reg
);
26764 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
26765 emit_move_insn (work_reg
, x
);
26767 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
26768 x
= gen_frame_mem (SImode
, x
);
26769 emit_move_insn (x
, work_reg
);
26771 x
= GEN_INT (offset
+ 12);
26772 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26774 emit_move_insn (arm_hfp_rtx
, work_reg
);
26776 /* Optimization: If we are not pushing any low registers but we are going
26777 to push some high registers then delay our first push. This will just
26778 be a push of LR and we can combine it with the push of the first high
26780 else if ((l_mask
& 0xff) != 0
26781 || (high_regs_pushed
== 0 && l_mask
))
26783 unsigned long mask
= l_mask
;
26784 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
26785 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
26786 RTX_FRAME_RELATED_P (insn
) = 1;
26789 if (high_regs_pushed
)
26791 unsigned pushable_regs
;
26792 unsigned next_hi_reg
;
26793 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
26794 : crtl
->args
.info
.nregs
;
26795 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
26797 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26798 if (live_regs_mask
& (1 << next_hi_reg
))
26801 /* Here we need to mask out registers used for passing arguments
26802 even if they can be pushed. This is to avoid using them to stash the high
26803 registers. Such kind of stash may clobber the use of arguments. */
26804 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
26806 if (pushable_regs
== 0)
26807 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26809 while (high_regs_pushed
> 0)
26811 unsigned long real_regs_mask
= 0;
26813 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26815 if (pushable_regs
& (1 << regno
))
26817 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26818 gen_rtx_REG (SImode
, next_hi_reg
));
26820 high_regs_pushed
--;
26821 real_regs_mask
|= (1 << next_hi_reg
);
26823 if (high_regs_pushed
)
26825 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26827 if (live_regs_mask
& (1 << next_hi_reg
))
26832 pushable_regs
&= ~((1 << regno
) - 1);
26838 /* If we had to find a work register and we have not yet
26839 saved the LR then add it to the list of regs to push. */
26840 if (l_mask
== (1 << LR_REGNUM
))
26842 pushable_regs
|= l_mask
;
26843 real_regs_mask
|= l_mask
;
26847 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
26848 RTX_FRAME_RELATED_P (insn
) = 1;
26852 /* Load the pic register before setting the frame pointer,
26853 so we can use r7 as a temporary work register. */
26854 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26855 arm_load_pic_register (live_regs_mask
);
26857 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26858 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26859 stack_pointer_rtx
);
26861 if (flag_stack_usage_info
)
26862 current_function_static_stack_size
26863 = offsets
->outgoing_args
- offsets
->saved_args
;
26865 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26866 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26871 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26872 GEN_INT (- amount
)));
26873 RTX_FRAME_RELATED_P (insn
) = 1;
26879 /* The stack decrement is too big for an immediate value in a single
26880 insn. In theory we could issue multiple subtracts, but after
26881 three of them it becomes more space efficient to place the full
26882 value in the constant pool and load into a register. (Also the
26883 ARM debugger really likes to see only one stack decrement per
26884 function). So instead we look for a scratch register into which
26885 we can load the decrement, and then we subtract this from the
26886 stack pointer. Unfortunately on the thumb the only available
26887 scratch registers are the argument registers, and we cannot use
26888 these as they may hold arguments to the function. Instead we
26889 attempt to locate a call preserved register which is used by this
26890 function. If we can find one, then we know that it will have
26891 been pushed at the start of the prologue and so we can corrupt
26893 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26894 if (live_regs_mask
& (1 << regno
))
26897 gcc_assert(regno
<= LAST_LO_REGNUM
);
26899 reg
= gen_rtx_REG (SImode
, regno
);
26901 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26903 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26904 stack_pointer_rtx
, reg
));
26906 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26907 plus_constant (Pmode
, stack_pointer_rtx
,
26909 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26910 RTX_FRAME_RELATED_P (insn
) = 1;
26914 if (frame_pointer_needed
)
26915 thumb_set_frame_pointer (offsets
);
26917 /* If we are profiling, make sure no instructions are scheduled before
26918 the call to mcount. Similarly if the user has requested no
26919 scheduling in the prolog. Similarly if we want non-call exceptions
26920 using the EABI unwinder, to prevent faulting instructions from being
26921 swapped with a stack adjustment. */
26922 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26923 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26924 && cfun
->can_throw_non_call_exceptions
))
26925 emit_insn (gen_blockage ());
26927 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26928 if (live_regs_mask
& 0xff)
26929 cfun
->machine
->lr_save_eliminated
= 0;
26932 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26933 POP instruction can be generated. LR should be replaced by PC. All
26934 the checks required are already done by USE_RETURN_INSN (). Hence,
26935 all we really need to check here is if single register is to be
26936 returned, or multiple register return. */
26938 thumb2_expand_return (bool simple_return
)
26941 unsigned long saved_regs_mask
;
26942 arm_stack_offsets
*offsets
;
26944 offsets
= arm_get_frame_offsets ();
26945 saved_regs_mask
= offsets
->saved_regs_mask
;
26947 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26948 if (saved_regs_mask
& (1 << i
))
26951 if (!simple_return
&& saved_regs_mask
)
26955 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26956 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
26957 rtx addr
= gen_rtx_MEM (SImode
,
26958 gen_rtx_POST_INC (SImode
,
26959 stack_pointer_rtx
));
26960 set_mem_alias_set (addr
, get_frame_alias_set ());
26961 XVECEXP (par
, 0, 0) = ret_rtx
;
26962 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
26963 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
26964 emit_jump_insn (par
);
26968 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
26969 saved_regs_mask
|= (1 << PC_REGNUM
);
26970 arm_emit_multi_reg_pop (saved_regs_mask
);
26975 emit_jump_insn (simple_return_rtx
);
26980 thumb1_expand_epilogue (void)
26982 HOST_WIDE_INT amount
;
26983 arm_stack_offsets
*offsets
;
26986 /* Naked functions don't have prologues. */
26987 if (IS_NAKED (arm_current_func_type ()))
26990 offsets
= arm_get_frame_offsets ();
26991 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26993 if (frame_pointer_needed
)
26995 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
26996 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26998 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27000 gcc_assert (amount
>= 0);
27003 emit_insn (gen_blockage ());
27006 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27007 GEN_INT (amount
)));
27010 /* r3 is always free in the epilogue. */
27011 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27013 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27014 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27018 /* Emit a USE (stack_pointer_rtx), so that
27019 the stack adjustment will not be deleted. */
27020 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27022 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27023 emit_insn (gen_blockage ());
27025 /* Emit a clobber for each insn that will be restored in the epilogue,
27026 so that flow2 will get register lifetimes correct. */
27027 for (regno
= 0; regno
< 13; regno
++)
27028 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27029 emit_clobber (gen_rtx_REG (SImode
, regno
));
27031 if (! df_regs_ever_live_p (LR_REGNUM
))
27032 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27035 /* Epilogue code for APCS frame. */
27037 arm_expand_epilogue_apcs_frame (bool really_return
)
27039 unsigned long func_type
;
27040 unsigned long saved_regs_mask
;
27043 int floats_from_frame
= 0;
27044 arm_stack_offsets
*offsets
;
27046 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27047 func_type
= arm_current_func_type ();
27049 /* Get frame offsets for ARM. */
27050 offsets
= arm_get_frame_offsets ();
27051 saved_regs_mask
= offsets
->saved_regs_mask
;
27053 /* Find the offset of the floating-point save area in the frame. */
27055 = (offsets
->saved_args
27056 + arm_compute_static_chain_stack_bytes ()
27059 /* Compute how many core registers saved and how far away the floats are. */
27060 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27061 if (saved_regs_mask
& (1 << i
))
27064 floats_from_frame
+= 4;
27067 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27071 /* The offset is from IP_REGNUM. */
27072 int saved_size
= arm_get_vfp_saved_size ();
27073 if (saved_size
> 0)
27075 floats_from_frame
+= saved_size
;
27076 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
27077 hard_frame_pointer_rtx
,
27078 GEN_INT (-floats_from_frame
)));
27081 /* Generate VFP register multi-pop. */
27082 start_reg
= FIRST_VFP_REGNUM
;
27084 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27085 /* Look for a case where a reg does not need restoring. */
27086 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27087 && (!df_regs_ever_live_p (i
+ 1)
27088 || call_used_regs
[i
+ 1]))
27090 if (start_reg
!= i
)
27091 arm_emit_vfp_multi_reg_pop (start_reg
,
27092 (i
- start_reg
) / 2,
27093 gen_rtx_REG (SImode
,
27098 /* Restore the remaining regs that we have discovered (or possibly
27099 even all of them, if the conditional in the for loop never
27101 if (start_reg
!= i
)
27102 arm_emit_vfp_multi_reg_pop (start_reg
,
27103 (i
- start_reg
) / 2,
27104 gen_rtx_REG (SImode
, IP_REGNUM
));
27109 /* The frame pointer is guaranteed to be non-double-word aligned, as
27110 it is set to double-word-aligned old_stack_pointer - 4. */
27112 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27114 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27115 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27117 rtx addr
= gen_frame_mem (V2SImode
,
27118 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27120 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27121 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27122 gen_rtx_REG (V2SImode
, i
),
27128 /* saved_regs_mask should contain IP which contains old stack pointer
27129 at the time of activation creation. Since SP and IP are adjacent registers,
27130 we can restore the value directly into SP. */
27131 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27132 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27133 saved_regs_mask
|= (1 << SP_REGNUM
);
27135 /* There are two registers left in saved_regs_mask - LR and PC. We
27136 only need to restore LR (the return address), but to
27137 save time we can load it directly into PC, unless we need a
27138 special function exit sequence, or we are not really returning. */
27140 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27141 && !crtl
->calls_eh_return
)
27142 /* Delete LR from the register mask, so that LR on
27143 the stack is loaded into the PC in the register mask. */
27144 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27146 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27148 num_regs
= bit_count (saved_regs_mask
);
27149 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27151 emit_insn (gen_blockage ());
27152 /* Unwind the stack to just below the saved registers. */
27153 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27154 hard_frame_pointer_rtx
,
27155 GEN_INT (- 4 * num_regs
)));
27158 arm_emit_multi_reg_pop (saved_regs_mask
);
27160 if (IS_INTERRUPT (func_type
))
27162 /* Interrupt handlers will have pushed the
27163 IP onto the stack, so restore it now. */
27165 rtx addr
= gen_rtx_MEM (SImode
,
27166 gen_rtx_POST_INC (SImode
,
27167 stack_pointer_rtx
));
27168 set_mem_alias_set (addr
, get_frame_alias_set ());
27169 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27170 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27171 gen_rtx_REG (SImode
, IP_REGNUM
),
27175 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27178 if (crtl
->calls_eh_return
)
27179 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27181 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27183 if (IS_STACKALIGN (func_type
))
27184 /* Restore the original stack pointer. Before prologue, the stack was
27185 realigned and the original stack pointer saved in r0. For details,
27186 see comment in arm_expand_prologue. */
27187 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27189 emit_jump_insn (simple_return_rtx
);
27192 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27193 function is not a sibcall. */
27195 arm_expand_epilogue (bool really_return
)
27197 unsigned long func_type
;
27198 unsigned long saved_regs_mask
;
27202 arm_stack_offsets
*offsets
;
27204 func_type
= arm_current_func_type ();
27206 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27207 let output_return_instruction take care of instruction emission if any. */
27208 if (IS_NAKED (func_type
)
27209 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27212 emit_jump_insn (simple_return_rtx
);
27216 /* If we are throwing an exception, then we really must be doing a
27217 return, so we can't tail-call. */
27218 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27220 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27222 arm_expand_epilogue_apcs_frame (really_return
);
27226 /* Get frame offsets for ARM. */
27227 offsets
= arm_get_frame_offsets ();
27228 saved_regs_mask
= offsets
->saved_regs_mask
;
27229 num_regs
= bit_count (saved_regs_mask
);
27231 if (frame_pointer_needed
)
27234 /* Restore stack pointer if necessary. */
27237 /* In ARM mode, frame pointer points to first saved register.
27238 Restore stack pointer to last saved register. */
27239 amount
= offsets
->frame
- offsets
->saved_regs
;
27241 /* Force out any pending memory operations that reference stacked data
27242 before stack de-allocation occurs. */
27243 emit_insn (gen_blockage ());
27244 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27245 hard_frame_pointer_rtx
,
27246 GEN_INT (amount
)));
27247 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27249 hard_frame_pointer_rtx
);
27251 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27253 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27257 /* In Thumb-2 mode, the frame pointer points to the last saved
27259 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27262 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27263 hard_frame_pointer_rtx
,
27264 GEN_INT (amount
)));
27265 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27266 hard_frame_pointer_rtx
,
27267 hard_frame_pointer_rtx
);
27270 /* Force out any pending memory operations that reference stacked data
27271 before stack de-allocation occurs. */
27272 emit_insn (gen_blockage ());
27273 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27274 hard_frame_pointer_rtx
));
27275 arm_add_cfa_adjust_cfa_note (insn
, 0,
27277 hard_frame_pointer_rtx
);
27278 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27280 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27285 /* Pop off outgoing args and local frame to adjust stack pointer to
27286 last saved register. */
27287 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27291 /* Force out any pending memory operations that reference stacked data
27292 before stack de-allocation occurs. */
27293 emit_insn (gen_blockage ());
27294 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27296 GEN_INT (amount
)));
27297 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27298 stack_pointer_rtx
, stack_pointer_rtx
);
27299 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27301 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27305 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27307 /* Generate VFP register multi-pop. */
27308 int end_reg
= LAST_VFP_REGNUM
+ 1;
27310 /* Scan the registers in reverse order. We need to match
27311 any groupings made in the prologue and generate matching
27312 vldm operations. The need to match groups is because,
27313 unlike pop, vldm can only do consecutive regs. */
27314 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27315 /* Look for a case where a reg does not need restoring. */
27316 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27317 && (!df_regs_ever_live_p (i
+ 1)
27318 || call_used_regs
[i
+ 1]))
27320 /* Restore the regs discovered so far (from reg+2 to
27322 if (end_reg
> i
+ 2)
27323 arm_emit_vfp_multi_reg_pop (i
+ 2,
27324 (end_reg
- (i
+ 2)) / 2,
27325 stack_pointer_rtx
);
27329 /* Restore the remaining regs that we have discovered (or possibly
27330 even all of them, if the conditional in the for loop never
27332 if (end_reg
> i
+ 2)
27333 arm_emit_vfp_multi_reg_pop (i
+ 2,
27334 (end_reg
- (i
+ 2)) / 2,
27335 stack_pointer_rtx
);
27339 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27340 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27343 rtx addr
= gen_rtx_MEM (V2SImode
,
27344 gen_rtx_POST_INC (SImode
,
27345 stack_pointer_rtx
));
27346 set_mem_alias_set (addr
, get_frame_alias_set ());
27347 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27348 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27349 gen_rtx_REG (V2SImode
, i
),
27351 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27352 stack_pointer_rtx
, stack_pointer_rtx
);
27355 if (saved_regs_mask
)
27358 bool return_in_pc
= false;
27360 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27361 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27362 && !IS_STACKALIGN (func_type
)
27364 && crtl
->args
.pretend_args_size
== 0
27365 && saved_regs_mask
& (1 << LR_REGNUM
)
27366 && !crtl
->calls_eh_return
)
27368 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27369 saved_regs_mask
|= (1 << PC_REGNUM
);
27370 return_in_pc
= true;
27373 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27375 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27376 if (saved_regs_mask
& (1 << i
))
27378 rtx addr
= gen_rtx_MEM (SImode
,
27379 gen_rtx_POST_INC (SImode
,
27380 stack_pointer_rtx
));
27381 set_mem_alias_set (addr
, get_frame_alias_set ());
27383 if (i
== PC_REGNUM
)
27385 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27386 XVECEXP (insn
, 0, 0) = ret_rtx
;
27387 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27388 gen_rtx_REG (SImode
, i
),
27390 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27391 insn
= emit_jump_insn (insn
);
27395 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27397 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27398 gen_rtx_REG (SImode
, i
),
27400 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27402 stack_pointer_rtx
);
27409 && current_tune
->prefer_ldrd_strd
27410 && !optimize_function_for_size_p (cfun
))
27413 thumb2_emit_ldrd_pop (saved_regs_mask
);
27414 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27415 arm_emit_ldrd_pop (saved_regs_mask
);
27417 arm_emit_multi_reg_pop (saved_regs_mask
);
27420 arm_emit_multi_reg_pop (saved_regs_mask
);
27423 if (return_in_pc
== true)
27427 if (crtl
->args
.pretend_args_size
)
27430 rtx dwarf
= NULL_RTX
;
27431 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27433 GEN_INT (crtl
->args
.pretend_args_size
)));
27435 RTX_FRAME_RELATED_P (tmp
) = 1;
27437 if (cfun
->machine
->uses_anonymous_args
)
27439 /* Restore pretend args. Refer arm_expand_prologue on how to save
27440 pretend_args in stack. */
27441 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27442 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27443 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27444 if (saved_regs_mask
& (1 << i
))
27446 rtx reg
= gen_rtx_REG (SImode
, i
);
27447 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27450 REG_NOTES (tmp
) = dwarf
;
27452 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27453 stack_pointer_rtx
, stack_pointer_rtx
);
27456 if (!really_return
)
27459 if (crtl
->calls_eh_return
)
27460 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27462 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27464 if (IS_STACKALIGN (func_type
))
27465 /* Restore the original stack pointer. Before prologue, the stack was
27466 realigned and the original stack pointer saved in r0. For details,
27467 see comment in arm_expand_prologue. */
27468 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27470 emit_jump_insn (simple_return_rtx
);
27473 /* Implementation of insn prologue_thumb1_interwork. This is the first
27474 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27477 thumb1_output_interwork (void)
27480 FILE *f
= asm_out_file
;
27482 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27483 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27485 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27487 /* Generate code sequence to switch us into Thumb mode. */
27488 /* The .code 32 directive has already been emitted by
27489 ASM_DECLARE_FUNCTION_NAME. */
27490 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27491 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27493 /* Generate a label, so that the debugger will notice the
27494 change in instruction sets. This label is also used by
27495 the assembler to bypass the ARM code when this function
27496 is called from a Thumb encoded function elsewhere in the
27497 same file. Hence the definition of STUB_NAME here must
27498 agree with the definition in gas/config/tc-arm.c. */
27500 #define STUB_NAME ".real_start_of"
27502 fprintf (f
, "\t.code\t16\n");
27504 if (arm_dllexport_name_p (name
))
27505 name
= arm_strip_name_encoding (name
);
27507 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27508 fprintf (f
, "\t.thumb_func\n");
27509 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27514 /* Handle the case of a double word load into a low register from
27515 a computed memory address. The computed address may involve a
27516 register which is overwritten by the load. */
27518 thumb_load_double_from_address (rtx
*operands
)
27526 gcc_assert (REG_P (operands
[0]));
27527 gcc_assert (MEM_P (operands
[1]));
27529 /* Get the memory address. */
27530 addr
= XEXP (operands
[1], 0);
27532 /* Work out how the memory address is computed. */
27533 switch (GET_CODE (addr
))
27536 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27538 if (REGNO (operands
[0]) == REGNO (addr
))
27540 output_asm_insn ("ldr\t%H0, %2", operands
);
27541 output_asm_insn ("ldr\t%0, %1", operands
);
27545 output_asm_insn ("ldr\t%0, %1", operands
);
27546 output_asm_insn ("ldr\t%H0, %2", operands
);
27551 /* Compute <address> + 4 for the high order load. */
27552 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27554 output_asm_insn ("ldr\t%0, %1", operands
);
27555 output_asm_insn ("ldr\t%H0, %2", operands
);
27559 arg1
= XEXP (addr
, 0);
27560 arg2
= XEXP (addr
, 1);
27562 if (CONSTANT_P (arg1
))
27563 base
= arg2
, offset
= arg1
;
27565 base
= arg1
, offset
= arg2
;
27567 gcc_assert (REG_P (base
));
27569 /* Catch the case of <address> = <reg> + <reg> */
27570 if (REG_P (offset
))
27572 int reg_offset
= REGNO (offset
);
27573 int reg_base
= REGNO (base
);
27574 int reg_dest
= REGNO (operands
[0]);
27576 /* Add the base and offset registers together into the
27577 higher destination register. */
27578 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27579 reg_dest
+ 1, reg_base
, reg_offset
);
27581 /* Load the lower destination register from the address in
27582 the higher destination register. */
27583 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27584 reg_dest
, reg_dest
+ 1);
27586 /* Load the higher destination register from its own address
27588 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27589 reg_dest
+ 1, reg_dest
+ 1);
27593 /* Compute <address> + 4 for the high order load. */
27594 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27596 /* If the computed address is held in the low order register
27597 then load the high order register first, otherwise always
27598 load the low order register first. */
27599 if (REGNO (operands
[0]) == REGNO (base
))
27601 output_asm_insn ("ldr\t%H0, %2", operands
);
27602 output_asm_insn ("ldr\t%0, %1", operands
);
27606 output_asm_insn ("ldr\t%0, %1", operands
);
27607 output_asm_insn ("ldr\t%H0, %2", operands
);
27613 /* With no registers to worry about we can just load the value
27615 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27617 output_asm_insn ("ldr\t%H0, %2", operands
);
27618 output_asm_insn ("ldr\t%0, %1", operands
);
27622 gcc_unreachable ();
27629 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27636 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27639 operands
[4] = operands
[5];
27642 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27643 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27647 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27650 operands
[4] = operands
[5];
27653 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27656 operands
[5] = operands
[6];
27659 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27662 operands
[4] = operands
[5];
27666 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27667 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27671 gcc_unreachable ();
27677 /* Output a call-via instruction for thumb state. */
27679 thumb_call_via_reg (rtx reg
)
27681 int regno
= REGNO (reg
);
27684 gcc_assert (regno
< LR_REGNUM
);
27686 /* If we are in the normal text section we can use a single instance
27687 per compilation unit. If we are doing function sections, then we need
27688 an entry per section, since we can't rely on reachability. */
27689 if (in_section
== text_section
)
27691 thumb_call_reg_needed
= 1;
27693 if (thumb_call_via_label
[regno
] == NULL
)
27694 thumb_call_via_label
[regno
] = gen_label_rtx ();
27695 labelp
= thumb_call_via_label
+ regno
;
27699 if (cfun
->machine
->call_via
[regno
] == NULL
)
27700 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27701 labelp
= cfun
->machine
->call_via
+ regno
;
27704 output_asm_insn ("bl\t%a0", labelp
);
27708 /* Routines for generating rtl. */
27710 thumb_expand_movmemqi (rtx
*operands
)
27712 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27713 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27714 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27715 HOST_WIDE_INT offset
= 0;
27719 emit_insn (gen_movmem12b (out
, in
, out
, in
));
27725 emit_insn (gen_movmem8b (out
, in
, out
, in
));
27731 rtx reg
= gen_reg_rtx (SImode
);
27732 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27733 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27740 rtx reg
= gen_reg_rtx (HImode
);
27741 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27742 plus_constant (Pmode
, in
,
27744 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27753 rtx reg
= gen_reg_rtx (QImode
);
27754 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27755 plus_constant (Pmode
, in
,
27757 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27764 thumb_reload_out_hi (rtx
*operands
)
27766 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27769 /* Handle reading a half-word from memory during reload. */
27771 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
27773 gcc_unreachable ();
27776 /* Return the length of a function name prefix
27777 that starts with the character 'c'. */
27779 arm_get_strip_length (int c
)
27783 ARM_NAME_ENCODING_LENGTHS
27788 /* Return a pointer to a function's name with any
27789 and all prefix encodings stripped from it. */
27791 arm_strip_name_encoding (const char *name
)
27795 while ((skip
= arm_get_strip_length (* name
)))
27801 /* If there is a '*' anywhere in the name's prefix, then
27802 emit the stripped name verbatim, otherwise prepend an
27803 underscore if leading underscores are being used. */
27805 arm_asm_output_labelref (FILE *stream
, const char *name
)
27810 while ((skip
= arm_get_strip_length (* name
)))
27812 verbatim
|= (*name
== '*');
27817 fputs (name
, stream
);
27819 asm_fprintf (stream
, "%U%s", name
);
27822 /* This function is used to emit an EABI tag and its associated value.
27823 We emit the numerical value of the tag in case the assembler does not
27824 support textual tags. (Eg gas prior to 2.20). If requested we include
27825 the tag name in a comment so that anyone reading the assembler output
27826 will know which tag is being set.
27828 This function is not static because arm-c.c needs it too. */
27831 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27833 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27834 if (flag_verbose_asm
|| flag_debug_asm
)
27835 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27836 asm_fprintf (asm_out_file
, "\n");
27840 arm_file_start (void)
27844 if (TARGET_UNIFIED_ASM
)
27845 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
27849 const char *fpu_name
;
27850 if (arm_selected_arch
)
27852 const char* pos
= strchr (arm_selected_arch
->name
, '+');
27856 gcc_assert (strlen (arm_selected_arch
->name
)
27857 <= sizeof (buf
) / sizeof (*pos
));
27858 strncpy (buf
, arm_selected_arch
->name
,
27859 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
27860 buf
[pos
- arm_selected_arch
->name
] = '\0';
27861 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
27862 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
27865 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
27867 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
27868 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
27871 const char* truncated_name
27872 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
27873 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27876 if (TARGET_SOFT_FLOAT
)
27878 fpu_name
= "softvfp";
27882 fpu_name
= arm_fpu_desc
->name
;
27883 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
27885 if (TARGET_HARD_FLOAT
)
27886 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27887 if (TARGET_HARD_FLOAT_ABI
)
27888 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27891 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
27893 /* Some of these attributes only apply when the corresponding features
27894 are used. However we don't have any easy way of figuring this out.
27895 Conservatively record the setting that would have been used. */
27897 if (flag_rounding_math
)
27898 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27900 if (!flag_unsafe_math_optimizations
)
27902 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27903 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27905 if (flag_signaling_nans
)
27906 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27908 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27909 flag_finite_math_only
? 1 : 3);
27911 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27912 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27913 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27914 flag_short_enums
? 1 : 2);
27916 /* Tag_ABI_optimization_goals. */
27919 else if (optimize
>= 2)
27925 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27927 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27930 if (arm_fp16_format
)
27931 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27932 (int) arm_fp16_format
);
27934 if (arm_lang_output_object_attributes_hook
)
27935 arm_lang_output_object_attributes_hook();
27938 default_file_start ();
27942 arm_file_end (void)
27946 if (NEED_INDICATE_EXEC_STACK
)
27947 /* Add .note.GNU-stack. */
27948 file_end_indicate_exec_stack ();
27950 if (! thumb_call_reg_needed
)
27953 switch_to_section (text_section
);
27954 asm_fprintf (asm_out_file
, "\t.code 16\n");
27955 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
27957 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
27959 rtx label
= thumb_call_via_label
[regno
];
27963 targetm
.asm_out
.internal_label (asm_out_file
, "L",
27964 CODE_LABEL_NUMBER (label
));
27965 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
27971 /* Symbols in the text segment can be accessed without indirecting via the
27972 constant pool; it may take an extra binary operation, but this is still
27973 faster than indirecting via memory. Don't do this when not optimizing,
27974 since we won't be calculating al of the offsets necessary to do this
27978 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
27980 if (optimize
> 0 && TREE_CONSTANT (decl
))
27981 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
27983 default_encode_section_info (decl
, rtl
, first
);
27985 #endif /* !ARM_PE */
27988 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
27990 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
27991 && !strcmp (prefix
, "L"))
27993 arm_ccfsm_state
= 0;
27994 arm_target_insn
= NULL
;
27996 default_internal_label (stream
, prefix
, labelno
);
27999 /* Output code to add DELTA to the first argument, and then jump
28000 to FUNCTION. Used for C++ multiple inheritance. */
28002 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28003 HOST_WIDE_INT delta
,
28004 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28007 static int thunk_label
= 0;
28010 int mi_delta
= delta
;
28011 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28013 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28016 mi_delta
= - mi_delta
;
28018 final_start_function (emit_barrier (), file
, 1);
28022 int labelno
= thunk_label
++;
28023 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28024 /* Thunks are entered in arm mode when avaiable. */
28025 if (TARGET_THUMB1_ONLY
)
28027 /* push r3 so we can use it as a temporary. */
28028 /* TODO: Omit this save if r3 is not used. */
28029 fputs ("\tpush {r3}\n", file
);
28030 fputs ("\tldr\tr3, ", file
);
28034 fputs ("\tldr\tr12, ", file
);
28036 assemble_name (file
, label
);
28037 fputc ('\n', file
);
28040 /* If we are generating PIC, the ldr instruction below loads
28041 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28042 the address of the add + 8, so we have:
28044 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28047 Note that we have "+ 1" because some versions of GNU ld
28048 don't set the low bit of the result for R_ARM_REL32
28049 relocations against thumb function symbols.
28050 On ARMv6M this is +4, not +8. */
28051 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28052 assemble_name (file
, labelpc
);
28053 fputs (":\n", file
);
28054 if (TARGET_THUMB1_ONLY
)
28056 /* This is 2 insns after the start of the thunk, so we know it
28057 is 4-byte aligned. */
28058 fputs ("\tadd\tr3, pc, r3\n", file
);
28059 fputs ("\tmov r12, r3\n", file
);
28062 fputs ("\tadd\tr12, pc, r12\n", file
);
28064 else if (TARGET_THUMB1_ONLY
)
28065 fputs ("\tmov r12, r3\n", file
);
28067 if (TARGET_THUMB1_ONLY
)
28069 if (mi_delta
> 255)
28071 fputs ("\tldr\tr3, ", file
);
28072 assemble_name (file
, label
);
28073 fputs ("+4\n", file
);
28074 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28075 mi_op
, this_regno
, this_regno
);
28077 else if (mi_delta
!= 0)
28079 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28080 mi_op
, this_regno
, this_regno
,
28086 /* TODO: Use movw/movt for large constants when available. */
28087 while (mi_delta
!= 0)
28089 if ((mi_delta
& (3 << shift
)) == 0)
28093 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28094 mi_op
, this_regno
, this_regno
,
28095 mi_delta
& (0xff << shift
));
28096 mi_delta
&= ~(0xff << shift
);
28103 if (TARGET_THUMB1_ONLY
)
28104 fputs ("\tpop\t{r3}\n", file
);
28106 fprintf (file
, "\tbx\tr12\n");
28107 ASM_OUTPUT_ALIGN (file
, 2);
28108 assemble_name (file
, label
);
28109 fputs (":\n", file
);
28112 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28113 rtx tem
= XEXP (DECL_RTL (function
), 0);
28114 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28115 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28117 gen_rtx_SYMBOL_REF (Pmode
,
28118 ggc_strdup (labelpc
)));
28119 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28122 /* Output ".word .LTHUNKn". */
28123 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28125 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28126 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28130 fputs ("\tb\t", file
);
28131 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28132 if (NEED_PLT_RELOC
)
28133 fputs ("(PLT)", file
);
28134 fputc ('\n', file
);
28137 final_end_function ();
28141 arm_emit_vector_const (FILE *file
, rtx x
)
28144 const char * pattern
;
28146 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28148 switch (GET_MODE (x
))
28150 case V2SImode
: pattern
= "%08x"; break;
28151 case V4HImode
: pattern
= "%04x"; break;
28152 case V8QImode
: pattern
= "%02x"; break;
28153 default: gcc_unreachable ();
28156 fprintf (file
, "0x");
28157 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28161 element
= CONST_VECTOR_ELT (x
, i
);
28162 fprintf (file
, pattern
, INTVAL (element
));
28168 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28169 HFmode constant pool entries are actually loaded with ldr. */
28171 arm_emit_fp16_const (rtx c
)
28176 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28177 bits
= real_to_target (NULL
, &r
, HFmode
);
28178 if (WORDS_BIG_ENDIAN
)
28179 assemble_zeros (2);
28180 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28181 if (!WORDS_BIG_ENDIAN
)
28182 assemble_zeros (2);
28186 arm_output_load_gr (rtx
*operands
)
28193 if (!MEM_P (operands
[1])
28194 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28195 || !REG_P (reg
= XEXP (sum
, 0))
28196 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28197 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28198 return "wldrw%?\t%0, %1";
28200 /* Fix up an out-of-range load of a GR register. */
28201 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28202 wcgr
= operands
[0];
28204 output_asm_insn ("ldr%?\t%0, %1", operands
);
28206 operands
[0] = wcgr
;
28208 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28209 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28214 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28216 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28217 named arg and all anonymous args onto the stack.
28218 XXX I know the prologue shouldn't be pushing registers, but it is faster
28222 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28223 enum machine_mode mode
,
28226 int second_time ATTRIBUTE_UNUSED
)
28228 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28231 cfun
->machine
->uses_anonymous_args
= 1;
28232 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28234 nregs
= pcum
->aapcs_ncrn
;
28235 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28239 nregs
= pcum
->nregs
;
28241 if (nregs
< NUM_ARG_REGS
)
28242 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28245 /* We can't rely on the caller doing the proper promotion when
28246 using APCS or ATPCS. */
28249 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28251 return !TARGET_AAPCS_BASED
;
28254 static enum machine_mode
28255 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28256 enum machine_mode mode
,
28257 int *punsignedp ATTRIBUTE_UNUSED
,
28258 const_tree fntype ATTRIBUTE_UNUSED
,
28259 int for_return ATTRIBUTE_UNUSED
)
28261 if (GET_MODE_CLASS (mode
) == MODE_INT
28262 && GET_MODE_SIZE (mode
) < 4)
28268 /* AAPCS based ABIs use short enums by default. */
28271 arm_default_short_enums (void)
28273 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28277 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28280 arm_align_anon_bitfield (void)
28282 return TARGET_AAPCS_BASED
;
28286 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28289 arm_cxx_guard_type (void)
28291 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28295 /* The EABI says test the least significant bit of a guard variable. */
28298 arm_cxx_guard_mask_bit (void)
28300 return TARGET_AAPCS_BASED
;
28304 /* The EABI specifies that all array cookies are 8 bytes long. */
28307 arm_get_cookie_size (tree type
)
28311 if (!TARGET_AAPCS_BASED
)
28312 return default_cxx_get_cookie_size (type
);
28314 size
= build_int_cst (sizetype
, 8);
28319 /* The EABI says that array cookies should also contain the element size. */
28322 arm_cookie_has_size (void)
28324 return TARGET_AAPCS_BASED
;
28328 /* The EABI says constructors and destructors should return a pointer to
28329 the object constructed/destroyed. */
28332 arm_cxx_cdtor_returns_this (void)
28334 return TARGET_AAPCS_BASED
;
28337 /* The EABI says that an inline function may never be the key
28341 arm_cxx_key_method_may_be_inline (void)
28343 return !TARGET_AAPCS_BASED
;
28347 arm_cxx_determine_class_data_visibility (tree decl
)
28349 if (!TARGET_AAPCS_BASED
28350 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28353 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28354 is exported. However, on systems without dynamic vague linkage,
28355 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28356 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28357 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28359 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28360 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28364 arm_cxx_class_data_always_comdat (void)
28366 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28367 vague linkage if the class has no key function. */
28368 return !TARGET_AAPCS_BASED
;
28372 /* The EABI says __aeabi_atexit should be used to register static
28376 arm_cxx_use_aeabi_atexit (void)
28378 return TARGET_AAPCS_BASED
;
28383 arm_set_return_address (rtx source
, rtx scratch
)
28385 arm_stack_offsets
*offsets
;
28386 HOST_WIDE_INT delta
;
28388 unsigned long saved_regs
;
28390 offsets
= arm_get_frame_offsets ();
28391 saved_regs
= offsets
->saved_regs_mask
;
28393 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28394 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28397 if (frame_pointer_needed
)
28398 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28401 /* LR will be the first saved register. */
28402 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28407 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28408 GEN_INT (delta
& ~4095)));
28413 addr
= stack_pointer_rtx
;
28415 addr
= plus_constant (Pmode
, addr
, delta
);
28417 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28423 thumb_set_return_address (rtx source
, rtx scratch
)
28425 arm_stack_offsets
*offsets
;
28426 HOST_WIDE_INT delta
;
28427 HOST_WIDE_INT limit
;
28430 unsigned long mask
;
28434 offsets
= arm_get_frame_offsets ();
28435 mask
= offsets
->saved_regs_mask
;
28436 if (mask
& (1 << LR_REGNUM
))
28439 /* Find the saved regs. */
28440 if (frame_pointer_needed
)
28442 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28443 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28449 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28452 /* Allow for the stack frame. */
28453 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28455 /* The link register is always the first saved register. */
28458 /* Construct the address. */
28459 addr
= gen_rtx_REG (SImode
, reg
);
28462 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28463 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28467 addr
= plus_constant (Pmode
, addr
, delta
);
28469 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28472 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28475 /* Implements target hook vector_mode_supported_p. */
28477 arm_vector_mode_supported_p (enum machine_mode mode
)
28479 /* Neon also supports V2SImode, etc. listed in the clause below. */
28480 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28481 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28484 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28485 && ((mode
== V2SImode
)
28486 || (mode
== V4HImode
)
28487 || (mode
== V8QImode
)))
28490 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28491 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28492 || mode
== V2HAmode
))
28498 /* Implements target hook array_mode_supported_p. */
28501 arm_array_mode_supported_p (enum machine_mode mode
,
28502 unsigned HOST_WIDE_INT nelems
)
28505 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28506 && (nelems
>= 2 && nelems
<= 4))
28512 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28513 registers when autovectorizing for Neon, at least until multiple vector
28514 widths are supported properly by the middle-end. */
28516 static enum machine_mode
28517 arm_preferred_simd_mode (enum machine_mode mode
)
28523 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28525 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28527 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28529 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28531 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28538 if (TARGET_REALLY_IWMMXT
)
28554 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28556 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28557 using r0-r4 for function arguments, r7 for the stack frame and don't have
28558 enough left over to do doubleword arithmetic. For Thumb-2 all the
28559 potentially problematic instructions accept high registers so this is not
28560 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28561 that require many low registers. */
28563 arm_class_likely_spilled_p (reg_class_t rclass
)
28565 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28566 || rclass
== CC_REG
)
28572 /* Implements target hook small_register_classes_for_mode_p. */
28574 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28576 return TARGET_THUMB1
;
28579 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28580 ARM insns and therefore guarantee that the shift count is modulo 256.
28581 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28582 guarantee no particular behavior for out-of-range counts. */
28584 static unsigned HOST_WIDE_INT
28585 arm_shift_truncation_mask (enum machine_mode mode
)
28587 return mode
== SImode
? 255 : 0;
28591 /* Map internal gcc register numbers to DWARF2 register numbers. */
28594 arm_dbx_register_number (unsigned int regno
)
28599 if (IS_VFP_REGNUM (regno
))
28601 /* See comment in arm_dwarf_register_span. */
28602 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28603 return 64 + regno
- FIRST_VFP_REGNUM
;
28605 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28608 if (IS_IWMMXT_GR_REGNUM (regno
))
28609 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28611 if (IS_IWMMXT_REGNUM (regno
))
28612 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28614 gcc_unreachable ();
28617 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28618 GCC models tham as 64 32-bit registers, so we need to describe this to
28619 the DWARF generation code. Other registers can use the default. */
28621 arm_dwarf_register_span (rtx rtl
)
28623 enum machine_mode mode
;
28629 regno
= REGNO (rtl
);
28630 if (!IS_VFP_REGNUM (regno
))
28633 /* XXX FIXME: The EABI defines two VFP register ranges:
28634 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28636 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28637 corresponding D register. Until GDB supports this, we shall use the
28638 legacy encodings. We also use these encodings for D0-D15 for
28639 compatibility with older debuggers. */
28640 mode
= GET_MODE (rtl
);
28641 if (GET_MODE_SIZE (mode
) < 8)
28644 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28646 nregs
= GET_MODE_SIZE (mode
) / 4;
28647 for (i
= 0; i
< nregs
; i
+= 2)
28648 if (TARGET_BIG_END
)
28650 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28651 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28655 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28656 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28661 nregs
= GET_MODE_SIZE (mode
) / 8;
28662 for (i
= 0; i
< nregs
; i
++)
28663 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28666 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28669 #if ARM_UNWIND_INFO
28670 /* Emit unwind directives for a store-multiple instruction or stack pointer
28671 push during alignment.
28672 These should only ever be generated by the function prologue code, so
28673 expect them to have a particular form. */
28676 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
28679 HOST_WIDE_INT offset
;
28680 HOST_WIDE_INT nregs
;
28686 e
= XVECEXP (p
, 0, 0);
28687 if (GET_CODE (e
) != SET
)
28690 /* First insn will adjust the stack pointer. */
28691 if (GET_CODE (e
) != SET
28692 || !REG_P (XEXP (e
, 0))
28693 || REGNO (XEXP (e
, 0)) != SP_REGNUM
28694 || GET_CODE (XEXP (e
, 1)) != PLUS
)
28697 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
28698 nregs
= XVECLEN (p
, 0) - 1;
28700 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
28703 /* The function prologue may also push pc, but not annotate it as it is
28704 never restored. We turn this into a stack pointer adjustment. */
28705 if (nregs
* 4 == offset
- 4)
28707 fprintf (asm_out_file
, "\t.pad #4\n");
28711 fprintf (asm_out_file
, "\t.save {");
28713 else if (IS_VFP_REGNUM (reg
))
28716 fprintf (asm_out_file
, "\t.vsave {");
28719 /* Unknown register type. */
28722 /* If the stack increment doesn't match the size of the saved registers,
28723 something has gone horribly wrong. */
28724 if (offset
!= nregs
* reg_size
)
28729 /* The remaining insns will describe the stores. */
28730 for (i
= 1; i
<= nregs
; i
++)
28732 /* Expect (set (mem <addr>) (reg)).
28733 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28734 e
= XVECEXP (p
, 0, i
);
28735 if (GET_CODE (e
) != SET
28736 || !MEM_P (XEXP (e
, 0))
28737 || !REG_P (XEXP (e
, 1)))
28740 reg
= REGNO (XEXP (e
, 1));
28745 fprintf (asm_out_file
, ", ");
28746 /* We can't use %r for vfp because we need to use the
28747 double precision register names. */
28748 if (IS_VFP_REGNUM (reg
))
28749 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28751 asm_fprintf (asm_out_file
, "%r", reg
);
28753 #ifdef ENABLE_CHECKING
28754 /* Check that the addresses are consecutive. */
28755 e
= XEXP (XEXP (e
, 0), 0);
28756 if (GET_CODE (e
) == PLUS
)
28758 offset
+= reg_size
;
28759 if (!REG_P (XEXP (e
, 0))
28760 || REGNO (XEXP (e
, 0)) != SP_REGNUM
28761 || !CONST_INT_P (XEXP (e
, 1))
28762 || offset
!= INTVAL (XEXP (e
, 1)))
28767 || REGNO (e
) != SP_REGNUM
)
28771 fprintf (asm_out_file
, "}\n");
28774 /* Emit unwind directives for a SET. */
28777 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28785 switch (GET_CODE (e0
))
28788 /* Pushing a single register. */
28789 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28790 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28791 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28794 asm_fprintf (asm_out_file
, "\t.save ");
28795 if (IS_VFP_REGNUM (REGNO (e1
)))
28796 asm_fprintf(asm_out_file
, "{d%d}\n",
28797 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28799 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28803 if (REGNO (e0
) == SP_REGNUM
)
28805 /* A stack increment. */
28806 if (GET_CODE (e1
) != PLUS
28807 || !REG_P (XEXP (e1
, 0))
28808 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28809 || !CONST_INT_P (XEXP (e1
, 1)))
28812 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28813 -INTVAL (XEXP (e1
, 1)));
28815 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28817 HOST_WIDE_INT offset
;
28819 if (GET_CODE (e1
) == PLUS
)
28821 if (!REG_P (XEXP (e1
, 0))
28822 || !CONST_INT_P (XEXP (e1
, 1)))
28824 reg
= REGNO (XEXP (e1
, 0));
28825 offset
= INTVAL (XEXP (e1
, 1));
28826 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28827 HARD_FRAME_POINTER_REGNUM
, reg
,
28830 else if (REG_P (e1
))
28833 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28834 HARD_FRAME_POINTER_REGNUM
, reg
);
28839 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28841 /* Move from sp to reg. */
28842 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28844 else if (GET_CODE (e1
) == PLUS
28845 && REG_P (XEXP (e1
, 0))
28846 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28847 && CONST_INT_P (XEXP (e1
, 1)))
28849 /* Set reg to offset from sp. */
28850 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28851 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28863 /* Emit unwind directives for the given insn. */
28866 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
28869 bool handled_one
= false;
28871 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28874 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28875 && (TREE_NOTHROW (current_function_decl
)
28876 || crtl
->all_throwers_are_sibcalls
))
28879 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28882 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28884 switch (REG_NOTE_KIND (note
))
28886 case REG_FRAME_RELATED_EXPR
:
28887 pat
= XEXP (note
, 0);
28890 case REG_CFA_REGISTER
:
28891 pat
= XEXP (note
, 0);
28894 pat
= PATTERN (insn
);
28895 if (GET_CODE (pat
) == PARALLEL
)
28896 pat
= XVECEXP (pat
, 0, 0);
28899 /* Only emitted for IS_STACKALIGN re-alignment. */
28904 src
= SET_SRC (pat
);
28905 dest
= SET_DEST (pat
);
28907 gcc_assert (src
== stack_pointer_rtx
);
28908 reg
= REGNO (dest
);
28909 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28912 handled_one
= true;
28915 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28916 to get correct dwarf information for shrink-wrap. We should not
28917 emit unwind information for it because these are used either for
28918 pretend arguments or notes to adjust sp and restore registers from
28920 case REG_CFA_ADJUST_CFA
:
28921 case REG_CFA_RESTORE
:
28924 case REG_CFA_DEF_CFA
:
28925 case REG_CFA_EXPRESSION
:
28926 case REG_CFA_OFFSET
:
28927 /* ??? Only handling here what we actually emit. */
28928 gcc_unreachable ();
28936 pat
= PATTERN (insn
);
28939 switch (GET_CODE (pat
))
28942 arm_unwind_emit_set (asm_out_file
, pat
);
28946 /* Store multiple. */
28947 arm_unwind_emit_sequence (asm_out_file
, pat
);
28956 /* Output a reference from a function exception table to the type_info
28957 object X. The EABI specifies that the symbol should be relocated by
28958 an R_ARM_TARGET2 relocation. */
28961 arm_output_ttype (rtx x
)
28963 fputs ("\t.word\t", asm_out_file
);
28964 output_addr_const (asm_out_file
, x
);
28965 /* Use special relocations for symbol references. */
28966 if (!CONST_INT_P (x
))
28967 fputs ("(TARGET2)", asm_out_file
);
28968 fputc ('\n', asm_out_file
);
28973 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28976 arm_asm_emit_except_personality (rtx personality
)
28978 fputs ("\t.personality\t", asm_out_file
);
28979 output_addr_const (asm_out_file
, personality
);
28980 fputc ('\n', asm_out_file
);
28983 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28986 arm_asm_init_sections (void)
28988 exception_section
= get_unnamed_section (0, output_section_asm_op
,
28991 #endif /* ARM_UNWIND_INFO */
28993 /* Output unwind directives for the start/end of a function. */
28996 arm_output_fn_unwind (FILE * f
, bool prologue
)
28998 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29002 fputs ("\t.fnstart\n", f
);
29005 /* If this function will never be unwound, then mark it as such.
29006 The came condition is used in arm_unwind_emit to suppress
29007 the frame annotations. */
29008 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29009 && (TREE_NOTHROW (current_function_decl
)
29010 || crtl
->all_throwers_are_sibcalls
))
29011 fputs("\t.cantunwind\n", f
);
29013 fputs ("\t.fnend\n", f
);
29018 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29020 enum tls_reloc reloc
;
29023 val
= XVECEXP (x
, 0, 0);
29024 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29026 output_addr_const (fp
, val
);
29031 fputs ("(tlsgd)", fp
);
29034 fputs ("(tlsldm)", fp
);
29037 fputs ("(tlsldo)", fp
);
29040 fputs ("(gottpoff)", fp
);
29043 fputs ("(tpoff)", fp
);
29046 fputs ("(tlsdesc)", fp
);
29049 gcc_unreachable ();
29058 fputs (" + (. - ", fp
);
29059 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29060 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29061 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29062 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29072 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29075 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29077 gcc_assert (size
== 4);
29078 fputs ("\t.word\t", file
);
29079 output_addr_const (file
, x
);
29080 fputs ("(tlsldo)", file
);
29083 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29086 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29088 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29089 return arm_emit_tls_decoration (fp
, x
);
29090 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29093 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29095 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29096 assemble_name_raw (fp
, label
);
29100 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29102 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29106 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29110 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29112 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29116 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29120 else if (GET_CODE (x
) == CONST_VECTOR
)
29121 return arm_emit_vector_const (fp
, x
);
29126 /* Output assembly for a shift instruction.
29127 SET_FLAGS determines how the instruction modifies the condition codes.
29128 0 - Do not set condition codes.
29129 1 - Set condition codes.
29130 2 - Use smallest instruction. */
29132 arm_output_shift(rtx
* operands
, int set_flags
)
29135 static const char flag_chars
[3] = {'?', '.', '!'};
29140 c
= flag_chars
[set_flags
];
29141 if (TARGET_UNIFIED_ASM
)
29143 shift
= shift_op(operands
[3], &val
);
29147 operands
[2] = GEN_INT(val
);
29148 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29151 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29154 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29155 output_asm_insn (pattern
, operands
);
29159 /* Output assembly for a WMMX immediate shift instruction. */
29161 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29163 int shift
= INTVAL (operands
[2]);
29165 enum machine_mode opmode
= GET_MODE (operands
[0]);
29167 gcc_assert (shift
>= 0);
29169 /* If the shift value in the register versions is > 63 (for D qualifier),
29170 31 (for W qualifier) or 15 (for H qualifier). */
29171 if (((opmode
== V4HImode
) && (shift
> 15))
29172 || ((opmode
== V2SImode
) && (shift
> 31))
29173 || ((opmode
== DImode
) && (shift
> 63)))
29177 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29178 output_asm_insn (templ
, operands
);
29179 if (opmode
== DImode
)
29181 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29182 output_asm_insn (templ
, operands
);
29187 /* The destination register will contain all zeros. */
29188 sprintf (templ
, "wzero\t%%0");
29189 output_asm_insn (templ
, operands
);
29194 if ((opmode
== DImode
) && (shift
> 32))
29196 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29197 output_asm_insn (templ
, operands
);
29198 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29199 output_asm_insn (templ
, operands
);
29203 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29204 output_asm_insn (templ
, operands
);
29209 /* Output assembly for a WMMX tinsr instruction. */
29211 arm_output_iwmmxt_tinsr (rtx
*operands
)
29213 int mask
= INTVAL (operands
[3]);
29216 int units
= mode_nunits
[GET_MODE (operands
[0])];
29217 gcc_assert ((mask
& (mask
- 1)) == 0);
29218 for (i
= 0; i
< units
; ++i
)
29220 if ((mask
& 0x01) == 1)
29226 gcc_assert (i
< units
);
29228 switch (GET_MODE (operands
[0]))
29231 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29234 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29237 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29240 gcc_unreachable ();
29243 output_asm_insn (templ
, operands
);
29248 /* Output a Thumb-1 casesi dispatch sequence. */
29250 thumb1_output_casesi (rtx
*operands
)
29252 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29254 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29256 switch (GET_MODE(diff_vec
))
29259 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29260 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29262 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29263 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29265 return "bl\t%___gnu_thumb1_case_si";
29267 gcc_unreachable ();
29271 /* Output a Thumb-2 casesi instruction. */
29273 thumb2_output_casesi (rtx
*operands
)
29275 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29277 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29279 output_asm_insn ("cmp\t%0, %1", operands
);
29280 output_asm_insn ("bhi\t%l3", operands
);
29281 switch (GET_MODE(diff_vec
))
29284 return "tbb\t[%|pc, %0]";
29286 return "tbh\t[%|pc, %0, lsl #1]";
29290 output_asm_insn ("adr\t%4, %l2", operands
);
29291 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29292 output_asm_insn ("add\t%4, %4, %5", operands
);
29297 output_asm_insn ("adr\t%4, %l2", operands
);
29298 return "ldr\t%|pc, [%4, %0, lsl #2]";
29301 gcc_unreachable ();
29305 /* Most ARM cores are single issue, but some newer ones can dual issue.
29306 The scheduler descriptions rely on this being correct. */
29308 arm_issue_rate (void)
29334 /* A table and a function to perform ARM-specific name mangling for
29335 NEON vector types in order to conform to the AAPCS (see "Procedure
29336 Call Standard for the ARM Architecture", Appendix A). To qualify
29337 for emission with the mangled names defined in that document, a
29338 vector type must not only be of the correct mode but also be
29339 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29342 enum machine_mode mode
;
29343 const char *element_type_name
;
29344 const char *aapcs_name
;
29345 } arm_mangle_map_entry
;
29347 static arm_mangle_map_entry arm_mangle_map
[] = {
29348 /* 64-bit containerized types. */
29349 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29350 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29351 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29352 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29353 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29354 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29355 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29356 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29357 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29358 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29360 /* 128-bit containerized types. */
29361 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29362 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29363 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29364 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29365 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29366 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29367 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29368 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29369 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29370 { VOIDmode
, NULL
, NULL
}
29374 arm_mangle_type (const_tree type
)
29376 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29378 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29379 has to be managled as if it is in the "std" namespace. */
29380 if (TARGET_AAPCS_BASED
29381 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29382 return "St9__va_list";
29384 /* Half-precision float. */
29385 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29388 if (TREE_CODE (type
) != VECTOR_TYPE
)
29391 /* Check the mode of the vector type, and the name of the vector
29392 element type, against the table. */
29393 while (pos
->mode
!= VOIDmode
)
29395 tree elt_type
= TREE_TYPE (type
);
29397 if (pos
->mode
== TYPE_MODE (type
)
29398 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29399 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29400 pos
->element_type_name
))
29401 return pos
->aapcs_name
;
29406 /* Use the default mangling for unrecognized (possibly user-defined)
29411 /* Order of allocation of core registers for Thumb: this allocation is
29412 written over the corresponding initial entries of the array
29413 initialized with REG_ALLOC_ORDER. We allocate all low registers
29414 first. Saving and restoring a low register is usually cheaper than
29415 using a call-clobbered high register. */
29417 static const int thumb_core_reg_alloc_order
[] =
29419 3, 2, 1, 0, 4, 5, 6, 7,
29420 14, 12, 8, 9, 10, 11
29423 /* Adjust register allocation order when compiling for Thumb. */
29426 arm_order_regs_for_local_alloc (void)
29428 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29429 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29431 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29432 sizeof (thumb_core_reg_alloc_order
));
29435 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29438 arm_frame_pointer_required (void)
29440 return (cfun
->has_nonlocal_label
29441 || SUBTARGET_FRAME_POINTER_REQUIRED
29442 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29445 /* Only thumb1 can't support conditional execution, so return true if
29446 the target is not thumb1. */
29448 arm_have_conditional_execution (void)
29450 return !TARGET_THUMB1
;
29454 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29456 enum machine_mode in_mode
, out_mode
;
29459 if (TREE_CODE (type_out
) != VECTOR_TYPE
29460 || TREE_CODE (type_in
) != VECTOR_TYPE
29461 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
29464 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29465 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29466 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29467 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29469 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29470 decl of the vectorized builtin for the appropriate vector mode.
29471 NULL_TREE is returned if no such builtin is available. */
29472 #undef ARM_CHECK_BUILTIN_MODE
29473 #define ARM_CHECK_BUILTIN_MODE(C) \
29474 (out_mode == SFmode && out_n == C \
29475 && in_mode == SFmode && in_n == C)
29477 #undef ARM_FIND_VRINT_VARIANT
29478 #define ARM_FIND_VRINT_VARIANT(N) \
29479 (ARM_CHECK_BUILTIN_MODE (2) \
29480 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29481 : (ARM_CHECK_BUILTIN_MODE (4) \
29482 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29485 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29487 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29490 case BUILT_IN_FLOORF
:
29491 return ARM_FIND_VRINT_VARIANT (vrintm
);
29492 case BUILT_IN_CEILF
:
29493 return ARM_FIND_VRINT_VARIANT (vrintp
);
29494 case BUILT_IN_TRUNCF
:
29495 return ARM_FIND_VRINT_VARIANT (vrintz
);
29496 case BUILT_IN_ROUNDF
:
29497 return ARM_FIND_VRINT_VARIANT (vrinta
);
29504 #undef ARM_CHECK_BUILTIN_MODE
29505 #undef ARM_FIND_VRINT_VARIANT
29507 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29508 static HOST_WIDE_INT
29509 arm_vector_alignment (const_tree type
)
29511 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29513 if (TARGET_AAPCS_BASED
)
29514 align
= MIN (align
, 64);
29519 static unsigned int
29520 arm_autovectorize_vector_sizes (void)
29522 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29526 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29528 /* Vectors which aren't in packed structures will not be less aligned than
29529 the natural alignment of their element type, so this is safe. */
29530 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
29533 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29537 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29538 const_tree type
, int misalignment
,
29541 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
29543 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29548 /* If the misalignment is unknown, we should be able to handle the access
29549 so long as it is not to a member of a packed data structure. */
29550 if (misalignment
== -1)
29553 /* Return true if the misalignment is a multiple of the natural alignment
29554 of the vector's element type. This is probably always going to be
29555 true in practice, since we've already established that this isn't a
29557 return ((misalignment
% align
) == 0);
29560 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29565 arm_conditional_register_usage (void)
29569 if (TARGET_THUMB1
&& optimize_size
)
29571 /* When optimizing for size on Thumb-1, it's better not
29572 to use the HI regs, because of the overhead of
29574 for (regno
= FIRST_HI_REGNUM
;
29575 regno
<= LAST_HI_REGNUM
; ++regno
)
29576 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29579 /* The link register can be clobbered by any branch insn,
29580 but we have no way to track that at present, so mark
29581 it as unavailable. */
29583 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29585 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29587 /* VFPv3 registers are disabled when earlier VFP
29588 versions are selected due to the definition of
29589 LAST_VFP_REGNUM. */
29590 for (regno
= FIRST_VFP_REGNUM
;
29591 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29593 fixed_regs
[regno
] = 0;
29594 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29595 || regno
>= FIRST_VFP_REGNUM
+ 32;
29599 if (TARGET_REALLY_IWMMXT
)
29601 regno
= FIRST_IWMMXT_GR_REGNUM
;
29602 /* The 2002/10/09 revision of the XScale ABI has wCG0
29603 and wCG1 as call-preserved registers. The 2002/11/21
29604 revision changed this so that all wCG registers are
29605 scratch registers. */
29606 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29607 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29608 fixed_regs
[regno
] = 0;
29609 /* The XScale ABI has wR0 - wR9 as scratch registers,
29610 the rest as call-preserved registers. */
29611 for (regno
= FIRST_IWMMXT_REGNUM
;
29612 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29614 fixed_regs
[regno
] = 0;
29615 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29619 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29621 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29622 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29624 else if (TARGET_APCS_STACK
)
29626 fixed_regs
[10] = 1;
29627 call_used_regs
[10] = 1;
29629 /* -mcaller-super-interworking reserves r11 for calls to
29630 _interwork_r11_call_via_rN(). Making the register global
29631 is an easy way of ensuring that it remains valid for all
29633 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29634 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29636 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29637 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29638 if (TARGET_CALLER_INTERWORKING
)
29639 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29641 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29645 arm_preferred_rename_class (reg_class_t rclass
)
29647 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29648 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29649 and code size can be reduced. */
29650 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29656 /* Compute the atrribute "length" of insn "*push_multi".
29657 So this function MUST be kept in sync with that insn pattern. */
29659 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29661 int i
, regno
, hi_reg
;
29662 int num_saves
= XVECLEN (parallel_op
, 0);
29672 regno
= REGNO (first_op
);
29673 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29674 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
29676 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
29677 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29685 /* Compute the number of instructions emitted by output_move_double. */
29687 arm_count_output_move_double_insns (rtx
*operands
)
29691 /* output_move_double may modify the operands array, so call it
29692 here on a copy of the array. */
29693 ops
[0] = operands
[0];
29694 ops
[1] = operands
[1];
29695 output_move_double (ops
, false, &count
);
29700 vfp3_const_double_for_fract_bits (rtx operand
)
29702 REAL_VALUE_TYPE r0
;
29704 if (!CONST_DOUBLE_P (operand
))
29707 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29708 if (exact_real_inverse (DFmode
, &r0
))
29710 if (exact_real_truncate (DFmode
, &r0
))
29712 HOST_WIDE_INT value
= real_to_integer (&r0
);
29713 value
= value
& 0xffffffff;
29714 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29715 return int_log2 (value
);
29722 vfp3_const_double_for_bits (rtx operand
)
29724 REAL_VALUE_TYPE r0
;
29726 if (!CONST_DOUBLE_P (operand
))
29729 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29730 if (exact_real_truncate (DFmode
, &r0
))
29732 HOST_WIDE_INT value
= real_to_integer (&r0
);
29733 value
= value
& 0xffffffff;
29734 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29735 return int_log2 (value
);
29741 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29744 arm_pre_atomic_barrier (enum memmodel model
)
29746 if (need_atomic_barrier_p (model
, true))
29747 emit_insn (gen_memory_barrier ());
29751 arm_post_atomic_barrier (enum memmodel model
)
29753 if (need_atomic_barrier_p (model
, false))
29754 emit_insn (gen_memory_barrier ());
29757 /* Emit the load-exclusive and store-exclusive instructions.
29758 Use acquire and release versions if necessary. */
29761 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29763 rtx (*gen
) (rtx
, rtx
);
29769 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29770 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29771 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29772 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29774 gcc_unreachable ();
29781 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29782 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
29783 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
29784 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
29786 gcc_unreachable ();
29790 emit_insn (gen (rval
, mem
));
29794 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
29797 rtx (*gen
) (rtx
, rtx
, rtx
);
29803 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29804 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29805 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29806 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29808 gcc_unreachable ();
29815 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29816 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
29817 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
29818 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
29820 gcc_unreachable ();
29824 emit_insn (gen (bval
, rval
, mem
));
29827 /* Mark the previous jump instruction as unlikely. */
29830 emit_unlikely_jump (rtx insn
)
29832 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
29834 insn
= emit_jump_insn (insn
);
29835 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
29838 /* Expand a compare and swap pattern. */
29841 arm_expand_compare_and_swap (rtx operands
[])
29843 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29844 enum machine_mode mode
;
29845 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
29847 bval
= operands
[0];
29848 rval
= operands
[1];
29850 oldval
= operands
[3];
29851 newval
= operands
[4];
29852 is_weak
= operands
[5];
29853 mod_s
= operands
[6];
29854 mod_f
= operands
[7];
29855 mode
= GET_MODE (mem
);
29857 /* Normally the succ memory model must be stronger than fail, but in the
29858 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29859 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29861 if (TARGET_HAVE_LDACQ
29862 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
29863 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
29864 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29870 /* For narrow modes, we're going to perform the comparison in SImode,
29871 so do the zero-extension now. */
29872 rval
= gen_reg_rtx (SImode
);
29873 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29877 /* Force the value into a register if needed. We waited until after
29878 the zero-extension above to do this properly. */
29879 if (!arm_add_operand (oldval
, SImode
))
29880 oldval
= force_reg (SImode
, oldval
);
29884 if (!cmpdi_operand (oldval
, mode
))
29885 oldval
= force_reg (mode
, oldval
);
29889 gcc_unreachable ();
29894 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
29895 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
29896 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
29897 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
29899 gcc_unreachable ();
29902 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
29904 if (mode
== QImode
|| mode
== HImode
)
29905 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29907 /* In all cases, we arrange for success to be signaled by Z set.
29908 This arrangement allows for the boolean result to be used directly
29909 in a subsequent branch, post optimization. */
29910 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29911 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
29912 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
29915 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29916 another memory store between the load-exclusive and store-exclusive can
29917 reset the monitor from Exclusive to Open state. This means we must wait
29918 until after reload to split the pattern, lest we get a register spill in
29919 the middle of the atomic sequence. */
29922 arm_split_compare_and_swap (rtx operands
[])
29924 rtx rval
, mem
, oldval
, newval
, scratch
;
29925 enum machine_mode mode
;
29926 enum memmodel mod_s
, mod_f
;
29928 rtx label1
, label2
, x
, cond
;
29930 rval
= operands
[0];
29932 oldval
= operands
[2];
29933 newval
= operands
[3];
29934 is_weak
= (operands
[4] != const0_rtx
);
29935 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
29936 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
29937 scratch
= operands
[7];
29938 mode
= GET_MODE (mem
);
29940 bool use_acquire
= TARGET_HAVE_LDACQ
29941 && !(mod_s
== MEMMODEL_RELAXED
29942 || mod_s
== MEMMODEL_CONSUME
29943 || mod_s
== MEMMODEL_RELEASE
);
29945 bool use_release
= TARGET_HAVE_LDACQ
29946 && !(mod_s
== MEMMODEL_RELAXED
29947 || mod_s
== MEMMODEL_CONSUME
29948 || mod_s
== MEMMODEL_ACQUIRE
);
29950 /* Checks whether a barrier is needed and emits one accordingly. */
29951 if (!(use_acquire
|| use_release
))
29952 arm_pre_atomic_barrier (mod_s
);
29957 label1
= gen_label_rtx ();
29958 emit_label (label1
);
29960 label2
= gen_label_rtx ();
29962 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
29964 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
29965 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29966 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29967 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
29968 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29970 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
29972 /* Weak or strong, we want EQ to be true for success, so that we
29973 match the flags that we got from the compare above. */
29974 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29975 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
29976 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
29980 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29981 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29982 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
29983 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29986 if (mod_f
!= MEMMODEL_RELAXED
)
29987 emit_label (label2
);
29989 /* Checks whether a barrier is needed and emits one accordingly. */
29990 if (!(use_acquire
|| use_release
))
29991 arm_post_atomic_barrier (mod_s
);
29993 if (mod_f
== MEMMODEL_RELAXED
)
29994 emit_label (label2
);
29998 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
29999 rtx value
, rtx model_rtx
, rtx cond
)
30001 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30002 enum machine_mode mode
= GET_MODE (mem
);
30003 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30006 bool use_acquire
= TARGET_HAVE_LDACQ
30007 && !(model
== MEMMODEL_RELAXED
30008 || model
== MEMMODEL_CONSUME
30009 || model
== MEMMODEL_RELEASE
);
30011 bool use_release
= TARGET_HAVE_LDACQ
30012 && !(model
== MEMMODEL_RELAXED
30013 || model
== MEMMODEL_CONSUME
30014 || model
== MEMMODEL_ACQUIRE
);
30016 /* Checks whether a barrier is needed and emits one accordingly. */
30017 if (!(use_acquire
|| use_release
))
30018 arm_pre_atomic_barrier (model
);
30020 label
= gen_label_rtx ();
30021 emit_label (label
);
30024 new_out
= gen_lowpart (wmode
, new_out
);
30026 old_out
= gen_lowpart (wmode
, old_out
);
30029 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30031 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30040 x
= gen_rtx_AND (wmode
, old_out
, value
);
30041 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30042 x
= gen_rtx_NOT (wmode
, new_out
);
30043 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30047 if (CONST_INT_P (value
))
30049 value
= GEN_INT (-INTVAL (value
));
30055 if (mode
== DImode
)
30057 /* DImode plus/minus need to clobber flags. */
30058 /* The adddi3 and subdi3 patterns are incorrectly written so that
30059 they require matching operands, even when we could easily support
30060 three operands. Thankfully, this can be fixed up post-splitting,
30061 as the individual add+adc patterns do accept three operands and
30062 post-reload cprop can make these moves go away. */
30063 emit_move_insn (new_out
, old_out
);
30065 x
= gen_adddi3 (new_out
, new_out
, value
);
30067 x
= gen_subdi3 (new_out
, new_out
, value
);
30074 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30075 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30079 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30082 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30083 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30085 /* Checks whether a barrier is needed and emits one accordingly. */
30086 if (!(use_acquire
|| use_release
))
30087 arm_post_atomic_barrier (model
);
30090 #define MAX_VECT_LEN 16
30092 struct expand_vec_perm_d
30094 rtx target
, op0
, op1
;
30095 unsigned char perm
[MAX_VECT_LEN
];
30096 enum machine_mode vmode
;
30097 unsigned char nelt
;
30102 /* Generate a variable permutation. */
30105 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30107 enum machine_mode vmode
= GET_MODE (target
);
30108 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30110 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30111 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30112 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30113 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30114 gcc_checking_assert (TARGET_NEON
);
30118 if (vmode
== V8QImode
)
30119 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30121 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30127 if (vmode
== V8QImode
)
30129 pair
= gen_reg_rtx (V16QImode
);
30130 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30131 pair
= gen_lowpart (TImode
, pair
);
30132 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30136 pair
= gen_reg_rtx (OImode
);
30137 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30138 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30144 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30146 enum machine_mode vmode
= GET_MODE (target
);
30147 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30148 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30149 rtx rmask
[MAX_VECT_LEN
], mask
;
30151 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30152 numbering of elements for big-endian, we must reverse the order. */
30153 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30155 /* The VTBL instruction does not use a modulo index, so we must take care
30156 of that ourselves. */
30157 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30158 for (i
= 0; i
< nelt
; ++i
)
30160 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30161 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30163 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30166 /* Generate or test for an insn that supports a constant permutation. */
30168 /* Recognize patterns for the VUZP insns. */
30171 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30173 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30174 rtx out0
, out1
, in0
, in1
, x
;
30175 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30177 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30180 /* Note that these are little-endian tests. Adjust for big-endian later. */
30181 if (d
->perm
[0] == 0)
30183 else if (d
->perm
[0] == 1)
30187 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30189 for (i
= 0; i
< nelt
; i
++)
30191 unsigned elt
= (i
* 2 + odd
) & mask
;
30192 if (d
->perm
[i
] != elt
)
30202 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30203 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30204 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30205 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30206 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30207 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30208 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30209 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30211 gcc_unreachable ();
30216 if (BYTES_BIG_ENDIAN
)
30218 x
= in0
, in0
= in1
, in1
= x
;
30223 out1
= gen_reg_rtx (d
->vmode
);
30225 x
= out0
, out0
= out1
, out1
= x
;
30227 emit_insn (gen (out0
, in0
, in1
, out1
));
30231 /* Recognize patterns for the VZIP insns. */
30234 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30236 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30237 rtx out0
, out1
, in0
, in1
, x
;
30238 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30240 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30243 /* Note that these are little-endian tests. Adjust for big-endian later. */
30245 if (d
->perm
[0] == high
)
30247 else if (d
->perm
[0] == 0)
30251 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30253 for (i
= 0; i
< nelt
/ 2; i
++)
30255 unsigned elt
= (i
+ high
) & mask
;
30256 if (d
->perm
[i
* 2] != elt
)
30258 elt
= (elt
+ nelt
) & mask
;
30259 if (d
->perm
[i
* 2 + 1] != elt
)
30269 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30270 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30271 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30272 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30273 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30274 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30275 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30276 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30278 gcc_unreachable ();
30283 if (BYTES_BIG_ENDIAN
)
30285 x
= in0
, in0
= in1
, in1
= x
;
30290 out1
= gen_reg_rtx (d
->vmode
);
30292 x
= out0
, out0
= out1
, out1
= x
;
30294 emit_insn (gen (out0
, in0
, in1
, out1
));
30298 /* Recognize patterns for the VREV insns. */
30301 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30303 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30304 rtx (*gen
)(rtx
, rtx
, rtx
);
30306 if (!d
->one_vector_p
)
30315 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30316 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30324 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30325 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30326 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30327 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30335 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30336 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30337 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30338 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30339 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30340 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30341 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30342 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30351 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30352 for (j
= 0; j
<= diff
; j
+= 1)
30354 /* This is guaranteed to be true as the value of diff
30355 is 7, 3, 1 and we should have enough elements in the
30356 queue to generate this. Getting a vector mask with a
30357 value of diff other than these values implies that
30358 something is wrong by the time we get here. */
30359 gcc_assert (i
+ j
< nelt
);
30360 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30368 /* ??? The third operand is an artifact of the builtin infrastructure
30369 and is ignored by the actual instruction. */
30370 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30374 /* Recognize patterns for the VTRN insns. */
30377 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30379 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30380 rtx out0
, out1
, in0
, in1
, x
;
30381 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30383 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30386 /* Note that these are little-endian tests. Adjust for big-endian later. */
30387 if (d
->perm
[0] == 0)
30389 else if (d
->perm
[0] == 1)
30393 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30395 for (i
= 0; i
< nelt
; i
+= 2)
30397 if (d
->perm
[i
] != i
+ odd
)
30399 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30409 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30410 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30411 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30412 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30413 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30414 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30415 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30416 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30418 gcc_unreachable ();
30423 if (BYTES_BIG_ENDIAN
)
30425 x
= in0
, in0
= in1
, in1
= x
;
30430 out1
= gen_reg_rtx (d
->vmode
);
30432 x
= out0
, out0
= out1
, out1
= x
;
30434 emit_insn (gen (out0
, in0
, in1
, out1
));
30438 /* Recognize patterns for the VEXT insns. */
30441 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30443 unsigned int i
, nelt
= d
->nelt
;
30444 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30447 unsigned int location
;
30449 unsigned int next
= d
->perm
[0] + 1;
30451 /* TODO: Handle GCC's numbering of elements for big-endian. */
30452 if (BYTES_BIG_ENDIAN
)
30455 /* Check if the extracted indexes are increasing by one. */
30456 for (i
= 1; i
< nelt
; next
++, i
++)
30458 /* If we hit the most significant element of the 2nd vector in
30459 the previous iteration, no need to test further. */
30460 if (next
== 2 * nelt
)
30463 /* If we are operating on only one vector: it could be a
30464 rotation. If there are only two elements of size < 64, let
30465 arm_evpc_neon_vrev catch it. */
30466 if (d
->one_vector_p
&& (next
== nelt
))
30468 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30474 if (d
->perm
[i
] != next
)
30478 location
= d
->perm
[0];
30482 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30483 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30484 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30485 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30486 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30487 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30488 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30489 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30490 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30499 offset
= GEN_INT (location
);
30500 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30504 /* The NEON VTBL instruction is a fully variable permuation that's even
30505 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30506 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30507 can do slightly better by expanding this as a constant where we don't
30508 have to apply a mask. */
30511 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30513 rtx rperm
[MAX_VECT_LEN
], sel
;
30514 enum machine_mode vmode
= d
->vmode
;
30515 unsigned int i
, nelt
= d
->nelt
;
30517 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30518 numbering of elements for big-endian, we must reverse the order. */
30519 if (BYTES_BIG_ENDIAN
)
30525 /* Generic code will try constant permutation twice. Once with the
30526 original mode and again with the elements lowered to QImode.
30527 So wait and don't do the selector expansion ourselves. */
30528 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30531 for (i
= 0; i
< nelt
; ++i
)
30532 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30533 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30534 sel
= force_reg (vmode
, sel
);
30536 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30541 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30543 /* Check if the input mask matches vext before reordering the
30546 if (arm_evpc_neon_vext (d
))
30549 /* The pattern matching functions above are written to look for a small
30550 number to begin the sequence (0, 1, N/2). If we begin with an index
30551 from the second operand, we can swap the operands. */
30552 if (d
->perm
[0] >= d
->nelt
)
30554 unsigned i
, nelt
= d
->nelt
;
30557 for (i
= 0; i
< nelt
; ++i
)
30558 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30567 if (arm_evpc_neon_vuzp (d
))
30569 if (arm_evpc_neon_vzip (d
))
30571 if (arm_evpc_neon_vrev (d
))
30573 if (arm_evpc_neon_vtrn (d
))
30575 return arm_evpc_neon_vtbl (d
);
30580 /* Expand a vec_perm_const pattern. */
30583 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30585 struct expand_vec_perm_d d
;
30586 int i
, nelt
, which
;
30592 d
.vmode
= GET_MODE (target
);
30593 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30594 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30595 d
.testing_p
= false;
30597 for (i
= which
= 0; i
< nelt
; ++i
)
30599 rtx e
= XVECEXP (sel
, 0, i
);
30600 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30601 which
|= (ei
< nelt
? 1 : 2);
30611 d
.one_vector_p
= false;
30612 if (!rtx_equal_p (op0
, op1
))
30615 /* The elements of PERM do not suggest that only the first operand
30616 is used, but both operands are identical. Allow easier matching
30617 of the permutation by folding the permutation into the single
30621 for (i
= 0; i
< nelt
; ++i
)
30622 d
.perm
[i
] &= nelt
- 1;
30624 d
.one_vector_p
= true;
30629 d
.one_vector_p
= true;
30633 return arm_expand_vec_perm_const_1 (&d
);
30636 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30639 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
30640 const unsigned char *sel
)
30642 struct expand_vec_perm_d d
;
30643 unsigned int i
, nelt
, which
;
30647 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30648 d
.testing_p
= true;
30649 memcpy (d
.perm
, sel
, nelt
);
30651 /* Categorize the set of elements in the selector. */
30652 for (i
= which
= 0; i
< nelt
; ++i
)
30654 unsigned char e
= d
.perm
[i
];
30655 gcc_assert (e
< 2 * nelt
);
30656 which
|= (e
< nelt
? 1 : 2);
30659 /* For all elements from second vector, fold the elements to first. */
30661 for (i
= 0; i
< nelt
; ++i
)
30664 /* Check whether the mask can be applied to the vector type. */
30665 d
.one_vector_p
= (which
!= 3);
30667 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30668 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
30669 if (!d
.one_vector_p
)
30670 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30673 ret
= arm_expand_vec_perm_const_1 (&d
);
30680 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
30682 /* If we are soft float and we do not have ldrd
30683 then all auto increment forms are ok. */
30684 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30689 /* Post increment and Pre Decrement are supported for all
30690 instruction forms except for vector forms. */
30693 if (VECTOR_MODE_P (mode
))
30695 if (code
!= ARM_PRE_DEC
)
30705 /* Without LDRD and mode size greater than
30706 word size, there is no point in auto-incrementing
30707 because ldm and stm will not have these forms. */
30708 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30711 /* Vector and floating point modes do not support
30712 these auto increment forms. */
30713 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30726 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30727 on ARM, since we know that shifts by negative amounts are no-ops.
30728 Additionally, the default expansion code is not available or suitable
30729 for post-reload insn splits (this can occur when the register allocator
30730 chooses not to do a shift in NEON).
30732 This function is used in both initial expand and post-reload splits, and
30733 handles all kinds of 64-bit shifts.
30735 Input requirements:
30736 - It is safe for the input and output to be the same register, but
30737 early-clobber rules apply for the shift amount and scratch registers.
30738 - Shift by register requires both scratch registers. In all other cases
30739 the scratch registers may be NULL.
30740 - Ashiftrt by a register also clobbers the CC register. */
30742 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30743 rtx amount
, rtx scratch1
, rtx scratch2
)
30745 rtx out_high
= gen_highpart (SImode
, out
);
30746 rtx out_low
= gen_lowpart (SImode
, out
);
30747 rtx in_high
= gen_highpart (SImode
, in
);
30748 rtx in_low
= gen_lowpart (SImode
, in
);
30751 in = the register pair containing the input value.
30752 out = the destination register pair.
30753 up = the high- or low-part of each pair.
30754 down = the opposite part to "up".
30755 In a shift, we can consider bits to shift from "up"-stream to
30756 "down"-stream, so in a left-shift "up" is the low-part and "down"
30757 is the high-part of each register pair. */
30759 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30760 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30761 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30762 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30764 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30766 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30767 && GET_MODE (out
) == DImode
);
30769 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30770 && GET_MODE (in
) == DImode
);
30772 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30773 && GET_MODE (amount
) == SImode
)
30774 || CONST_INT_P (amount
)));
30775 gcc_assert (scratch1
== NULL
30776 || (GET_CODE (scratch1
) == SCRATCH
)
30777 || (GET_MODE (scratch1
) == SImode
30778 && REG_P (scratch1
)));
30779 gcc_assert (scratch2
== NULL
30780 || (GET_CODE (scratch2
) == SCRATCH
)
30781 || (GET_MODE (scratch2
) == SImode
30782 && REG_P (scratch2
)));
30783 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30784 || !HARD_REGISTER_P (out
)
30785 || (REGNO (out
) != REGNO (amount
)
30786 && REGNO (out
) + 1 != REGNO (amount
)));
30788 /* Macros to make following code more readable. */
30789 #define SUB_32(DEST,SRC) \
30790 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30791 #define RSB_32(DEST,SRC) \
30792 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30793 #define SUB_S_32(DEST,SRC) \
30794 gen_addsi3_compare0 ((DEST), (SRC), \
30796 #define SET(DEST,SRC) \
30797 gen_rtx_SET (SImode, (DEST), (SRC))
30798 #define SHIFT(CODE,SRC,AMOUNT) \
30799 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30800 #define LSHIFT(CODE,SRC,AMOUNT) \
30801 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30802 SImode, (SRC), (AMOUNT))
30803 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30804 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30805 SImode, (SRC), (AMOUNT))
30807 gen_rtx_IOR (SImode, (A), (B))
30808 #define BRANCH(COND,LABEL) \
30809 gen_arm_cond_branch ((LABEL), \
30810 gen_rtx_ ## COND (CCmode, cc_reg, \
30814 /* Shifts by register and shifts by constant are handled separately. */
30815 if (CONST_INT_P (amount
))
30817 /* We have a shift-by-constant. */
30819 /* First, handle out-of-range shift amounts.
30820 In both cases we try to match the result an ARM instruction in a
30821 shift-by-register would give. This helps reduce execution
30822 differences between optimization levels, but it won't stop other
30823 parts of the compiler doing different things. This is "undefined
30824 behaviour, in any case. */
30825 if (INTVAL (amount
) <= 0)
30826 emit_insn (gen_movdi (out
, in
));
30827 else if (INTVAL (amount
) >= 64)
30829 if (code
== ASHIFTRT
)
30831 rtx const31_rtx
= GEN_INT (31);
30832 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30833 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30836 emit_insn (gen_movdi (out
, const0_rtx
));
30839 /* Now handle valid shifts. */
30840 else if (INTVAL (amount
) < 32)
30842 /* Shifts by a constant less than 32. */
30843 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30845 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30846 emit_insn (SET (out_down
,
30847 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30849 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30853 /* Shifts by a constant greater than 31. */
30854 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30856 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30857 if (code
== ASHIFTRT
)
30858 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30861 emit_insn (SET (out_up
, const0_rtx
));
30866 /* We have a shift-by-register. */
30867 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30869 /* This alternative requires the scratch registers. */
30870 gcc_assert (scratch1
&& REG_P (scratch1
));
30871 gcc_assert (scratch2
&& REG_P (scratch2
));
30873 /* We will need the values "amount-32" and "32-amount" later.
30874 Swapping them around now allows the later code to be more general. */
30878 emit_insn (SUB_32 (scratch1
, amount
));
30879 emit_insn (RSB_32 (scratch2
, amount
));
30882 emit_insn (RSB_32 (scratch1
, amount
));
30883 /* Also set CC = amount > 32. */
30884 emit_insn (SUB_S_32 (scratch2
, amount
));
30887 emit_insn (RSB_32 (scratch1
, amount
));
30888 emit_insn (SUB_32 (scratch2
, amount
));
30891 gcc_unreachable ();
30894 /* Emit code like this:
30897 out_down = in_down << amount;
30898 out_down = (in_up << (amount - 32)) | out_down;
30899 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30900 out_up = in_up << amount;
30903 out_down = in_down >> amount;
30904 out_down = (in_up << (32 - amount)) | out_down;
30906 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30907 out_up = in_up << amount;
30910 out_down = in_down >> amount;
30911 out_down = (in_up << (32 - amount)) | out_down;
30913 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30914 out_up = in_up << amount;
30916 The ARM and Thumb2 variants are the same but implemented slightly
30917 differently. If this were only called during expand we could just
30918 use the Thumb2 case and let combine do the right thing, but this
30919 can also be called from post-reload splitters. */
30921 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30923 if (!TARGET_THUMB2
)
30925 /* Emit code for ARM mode. */
30926 emit_insn (SET (out_down
,
30927 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
30928 if (code
== ASHIFTRT
)
30930 rtx done_label
= gen_label_rtx ();
30931 emit_jump_insn (BRANCH (LT
, done_label
));
30932 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
30934 emit_label (done_label
);
30937 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
30942 /* Emit code for Thumb2 mode.
30943 Thumb2 can't do shift and or in one insn. */
30944 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
30945 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
30947 if (code
== ASHIFTRT
)
30949 rtx done_label
= gen_label_rtx ();
30950 emit_jump_insn (BRANCH (LT
, done_label
));
30951 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
30952 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
30953 emit_label (done_label
);
30957 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
30958 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
30962 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30977 /* Returns true if a valid comparison operation and makes
30978 the operands in a form that is valid. */
30980 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
30982 enum rtx_code code
= GET_CODE (*comparison
);
30984 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
30985 ? GET_MODE (*op2
) : GET_MODE (*op1
);
30987 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
30989 if (code
== UNEQ
|| code
== LTGT
)
30992 code_int
= (int)code
;
30993 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
30994 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
30999 if (!arm_add_operand (*op1
, mode
))
31000 *op1
= force_reg (mode
, *op1
);
31001 if (!arm_add_operand (*op2
, mode
))
31002 *op2
= force_reg (mode
, *op2
);
31006 if (!cmpdi_operand (*op1
, mode
))
31007 *op1
= force_reg (mode
, *op1
);
31008 if (!cmpdi_operand (*op2
, mode
))
31009 *op2
= force_reg (mode
, *op2
);
31014 if (!arm_float_compare_operand (*op1
, mode
))
31015 *op1
= force_reg (mode
, *op1
);
31016 if (!arm_float_compare_operand (*op2
, mode
))
31017 *op2
= force_reg (mode
, *op2
);
31027 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31029 static unsigned HOST_WIDE_INT
31030 arm_asan_shadow_offset (void)
31032 return (unsigned HOST_WIDE_INT
) 1 << 29;
31035 #include "gt-arm.h"