1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "target-def.h"
55 #include "langhooks.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode
;
65 typedef struct minipool_fixup Mfix
;
67 void (*arm_lang_output_object_attributes_hook
)(void);
74 /* Forward function declarations. */
75 static bool arm_const_not_ok_for_debug_p (rtx
);
76 static bool arm_lra_p (void);
77 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
78 static int arm_compute_static_chain_stack_bytes (void);
79 static arm_stack_offsets
*arm_get_frame_offsets (void);
80 static void arm_add_gc_roots (void);
81 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
82 HOST_WIDE_INT
, rtx
, rtx
, int, int);
83 static unsigned bit_count (unsigned long);
84 static int arm_address_register_rtx_p (rtx
, int);
85 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
86 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
87 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
88 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
89 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
90 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
91 inline static int thumb1_index_register_rtx_p (rtx
, int);
92 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
93 static int thumb_far_jump_used_p (void);
94 static bool thumb_force_lr_save (void);
95 static unsigned arm_size_return_regs (void);
96 static bool arm_assemble_integer (rtx
, unsigned int, int);
97 static void arm_print_operand (FILE *, rtx
, int);
98 static void arm_print_operand_address (FILE *, rtx
);
99 static bool arm_print_operand_punct_valid_p (unsigned char code
);
100 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
101 static arm_cc
get_arm_condition_code (rtx
);
102 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
103 static const char *output_multi_immediate (rtx
*, const char *, const char *,
105 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
106 static struct machine_function
*arm_init_machine_status (void);
107 static void thumb_exit (FILE *, int);
108 static HOST_WIDE_INT
get_jump_table_size (rtx
);
109 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
110 static Mnode
*add_minipool_forward_ref (Mfix
*);
111 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
112 static Mnode
*add_minipool_backward_ref (Mfix
*);
113 static void assign_minipool_offsets (Mfix
*);
114 static void arm_print_value (FILE *, rtx
);
115 static void dump_minipool (rtx
);
116 static int arm_barrier_cost (rtx
);
117 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
118 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
119 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
121 static void arm_reorg (void);
122 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
123 static unsigned long arm_compute_save_reg0_reg12_mask (void);
124 static unsigned long arm_compute_save_reg_mask (void);
125 static unsigned long arm_isr_value (tree
);
126 static unsigned long arm_compute_func_type (void);
127 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
128 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
129 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
130 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
131 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
133 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
134 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
135 static int arm_comp_type_attributes (const_tree
, const_tree
);
136 static void arm_set_default_type_attributes (tree
);
137 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
138 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
139 static int optimal_immediate_sequence (enum rtx_code code
,
140 unsigned HOST_WIDE_INT val
,
141 struct four_ints
*return_sequence
);
142 static int optimal_immediate_sequence_1 (enum rtx_code code
,
143 unsigned HOST_WIDE_INT val
,
144 struct four_ints
*return_sequence
,
146 static int arm_get_strip_length (int);
147 static bool arm_function_ok_for_sibcall (tree
, tree
);
148 static enum machine_mode
arm_promote_function_mode (const_tree
,
149 enum machine_mode
, int *,
151 static bool arm_return_in_memory (const_tree
, const_tree
);
152 static rtx
arm_function_value (const_tree
, const_tree
, bool);
153 static rtx
arm_libcall_value_1 (enum machine_mode
);
154 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
155 static bool arm_function_value_regno_p (const unsigned int);
156 static void arm_internal_label (FILE *, const char *, unsigned long);
157 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
159 static bool arm_have_conditional_execution (void);
160 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
161 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
162 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
163 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
164 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
166 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
167 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
168 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
169 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
170 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
171 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
175 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
176 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
177 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
178 static tree
arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond
, rtx pattern
);
180 static rtx
emit_set_insn (rtx
, rtx
);
181 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
182 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
184 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
186 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
188 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
189 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
191 static rtx
aapcs_libcall_value (enum machine_mode
);
192 static int aapcs_select_return_coproc (const_tree
, const_tree
);
194 #ifdef OBJECT_FORMAT_ELF
195 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
196 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
199 static void arm_encode_section_info (tree
, rtx
, int);
202 static void arm_file_end (void);
203 static void arm_file_start (void);
205 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
207 static bool arm_pass_by_reference (cumulative_args_t
,
208 enum machine_mode
, const_tree
, bool);
209 static bool arm_promote_prototypes (const_tree
);
210 static bool arm_default_short_enums (void);
211 static bool arm_align_anon_bitfield (void);
212 static bool arm_return_in_msb (const_tree
);
213 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
214 static bool arm_return_in_memory (const_tree
, const_tree
);
216 static void arm_unwind_emit (FILE *, rtx
);
217 static bool arm_output_ttype (rtx
);
218 static void arm_asm_emit_except_personality (rtx
);
219 static void arm_asm_init_sections (void);
221 static rtx
arm_dwarf_register_span (rtx
);
223 static tree
arm_cxx_guard_type (void);
224 static bool arm_cxx_guard_mask_bit (void);
225 static tree
arm_get_cookie_size (tree
);
226 static bool arm_cookie_has_size (void);
227 static bool arm_cxx_cdtor_returns_this (void);
228 static bool arm_cxx_key_method_may_be_inline (void);
229 static void arm_cxx_determine_class_data_visibility (tree
);
230 static bool arm_cxx_class_data_always_comdat (void);
231 static bool arm_cxx_use_aeabi_atexit (void);
232 static void arm_init_libfuncs (void);
233 static tree
arm_build_builtin_va_list (void);
234 static void arm_expand_builtin_va_start (tree
, rtx
);
235 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
236 static void arm_option_override (void);
237 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
238 static bool arm_cannot_copy_insn_p (rtx
);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
241 static bool arm_output_addr_const_extra (FILE *, rtx
);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree
);
244 static const char *arm_invalid_parameter_type (const_tree t
);
245 static const char *arm_invalid_return_type (const_tree t
);
246 static tree
arm_promoted_type (const_tree t
);
247 static tree
arm_convert_to_type (tree type
, tree expr
);
248 static bool arm_scalar_mode_supported_p (enum machine_mode
);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx
, tree
, rtx
);
253 static rtx
arm_trampoline_adjust_address (rtx
);
254 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
255 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
257 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode
,
259 unsigned HOST_WIDE_INT
);
260 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
261 static bool arm_class_likely_spilled_p (reg_class_t
);
262 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
263 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
268 static void arm_conditional_register_usage (void);
269 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
276 const unsigned char *sel
);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
280 int misalign ATTRIBUTE_UNUSED
);
281 static unsigned arm_add_stmt_cost (void *data
, int count
,
282 enum vect_cost_for_stmt kind
,
283 struct _stmt_vec_info
*stmt_info
,
285 enum vect_cost_model_location where
);
287 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
288 bool op0_preserve_value
);
289 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table
[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
299 { "long_call", 0, 0, false, true, true, NULL
, false },
300 /* Whereas these functions are always known to reside within the 26 bit
302 { "short_call", 0, 0, false, true, true, NULL
, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
314 /* ARM/PE has three new attributes:
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
323 { "dllimport", 0, 0, true, false, false, NULL
, false },
324 { "dllexport", 0, 0, true, false, false, NULL
, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
333 { NULL
, 0, 0, false, false, false, NULL
, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
679 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
681 struct gcc_target targetm
= TARGET_INITIALIZER
;
683 /* Obstack for minipool constant handling. */
684 static struct obstack minipool_obstack
;
685 static char * minipool_startobj
;
687 /* The maximum number of insns skipped which
688 will be conditionalised if possible. */
689 static int max_insns_skipped
= 5;
691 extern FILE * asm_out_file
;
693 /* True if we are currently building a constant table. */
694 int making_const_table
;
696 /* The processor for which instructions should be scheduled. */
697 enum processor_type arm_tune
= arm_none
;
699 /* The current tuning set. */
700 const struct tune_params
*current_tune
;
702 /* Which floating point hardware to schedule for. */
705 /* Which floating popint hardware to use. */
706 const struct arm_fpu_desc
*arm_fpu_desc
;
708 /* Used for Thumb call_via trampolines. */
709 rtx thumb_call_via_label
[14];
710 static int thumb_call_reg_needed
;
712 /* Bit values used to identify processor capabilities. */
713 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
714 #define FL_ARCH3M (1 << 1) /* Extended multiply */
715 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
716 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
717 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
718 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
719 #define FL_THUMB (1 << 6) /* Thumb aware */
720 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
721 #define FL_STRONG (1 << 8) /* StrongARM */
722 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
723 #define FL_XSCALE (1 << 10) /* XScale */
724 /* spare (1 << 11) */
725 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
726 media instructions. */
727 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
728 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
729 Note: ARM6 & 7 derivatives only. */
730 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
731 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
732 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
734 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
735 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
736 #define FL_NEON (1 << 20) /* Neon instructions. */
737 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
739 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
740 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
741 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
742 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
744 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
745 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
747 /* Flags that only effect tuning, not available instructions. */
748 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
751 #define FL_FOR_ARCH2 FL_NOTM
752 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
753 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
754 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
755 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
756 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
757 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
758 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
759 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
760 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
761 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
762 #define FL_FOR_ARCH6J FL_FOR_ARCH6
763 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
764 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
765 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
766 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
767 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
768 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
769 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
770 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
771 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
772 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
773 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
774 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
776 /* The bits in this mask specify which
777 instructions we are allowed to generate. */
778 static unsigned long insn_flags
= 0;
780 /* The bits in this mask specify which instruction scheduling options should
782 static unsigned long tune_flags
= 0;
784 /* The highest ARM architecture version supported by the
786 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
788 /* The following are used in the arm.md file as equivalents to bits
789 in the above two flag variables. */
791 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
806 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
809 /* Nonzero if this chip supports the ARM 6K extensions. */
812 /* Nonzero if instructions present in ARMv6-M can be used. */
815 /* Nonzero if this chip supports the ARM 7 extensions. */
818 /* Nonzero if instructions not present in the 'M' profile can be used. */
819 int arm_arch_notm
= 0;
821 /* Nonzero if instructions present in ARMv7E-M can be used. */
824 /* Nonzero if instructions present in ARMv8 can be used. */
827 /* Nonzero if this chip can benefit from load scheduling. */
828 int arm_ld_sched
= 0;
830 /* Nonzero if this chip is a StrongARM. */
831 int arm_tune_strongarm
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX technology. */
834 int arm_arch_iwmmxt
= 0;
836 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
837 int arm_arch_iwmmxt2
= 0;
839 /* Nonzero if this chip is an XScale. */
840 int arm_arch_xscale
= 0;
842 /* Nonzero if tuning for XScale */
843 int arm_tune_xscale
= 0;
845 /* Nonzero if we want to tune for stores that access the write-buffer.
846 This typically means an ARM6 or ARM7 with MMU or MPU. */
847 int arm_tune_wbuf
= 0;
849 /* Nonzero if tuning for Cortex-A9. */
850 int arm_tune_cortex_a9
= 0;
852 /* Nonzero if generating Thumb instructions. */
855 /* Nonzero if generating Thumb-1 instructions. */
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork
= 0;
865 /* Nonzero if chip supports Thumb 2. */
868 /* Nonzero if chip supports integer division instruction. */
869 int arm_arch_arm_hwdiv
;
870 int arm_arch_thumb_hwdiv
;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits
= 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool
= false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 enum machine_mode output_memory_reference_mode
;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register
= INVALID_REGNUM
;
887 /* Set to 1 after arm_reorg has started. Reset to start at the start of
888 the next function. */
889 static int after_arm_reorg
= 0;
891 enum arm_pcs arm_pcs_default
;
893 /* For an explanation of these variables, see final_prescan_insn below. */
895 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
896 enum arm_cond_code arm_current_cc
;
899 int arm_target_label
;
900 /* The number of conditionally executed insns, including the current insn. */
901 int arm_condexec_count
= 0;
902 /* A bitmask specifying the patterns for the IT block.
903 Zero means do not output an IT block before this insn. */
904 int arm_condexec_mask
= 0;
905 /* The number of bits used in arm_condexec_mask. */
906 int arm_condexec_masklen
= 0;
908 /* Nonzero if chip supports the ARMv8 CRC instructions. */
909 int arm_arch_crc
= 0;
911 /* The condition codes of the ARM, and the inverse function. */
912 static const char * const arm_condition_codes
[] =
914 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
915 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
918 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
919 int arm_regs_in_sequence
[] =
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
924 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
925 #define streq(string1, string2) (strcmp (string1, string2) == 0)
927 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
928 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
929 | (1 << PIC_OFFSET_TABLE_REGNUM)))
931 /* Initialization code. */
935 const char *const name
;
936 enum processor_type core
;
938 enum base_architecture base_arch
;
939 const unsigned long flags
;
940 const struct tune_params
*const tune
;
944 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
945 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
950 /* arm generic vectorizer costs. */
952 struct cpu_vec_costs arm_default_vec_cost
= {
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 1, /* vec_unalign_load_cost. */
961 1, /* vec_unalign_store_cost. */
962 1, /* vec_store_cost. */
963 3, /* cond_taken_branch_cost. */
964 1, /* cond_not_taken_branch_cost. */
967 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
968 #include "aarch-cost-tables.h"
972 const struct cpu_cost_table cortexa9_extra_costs
=
979 COSTS_N_INSNS (1), /* shift_reg. */
980 COSTS_N_INSNS (1), /* arith_shift. */
981 COSTS_N_INSNS (2), /* arith_shift_reg. */
983 COSTS_N_INSNS (1), /* log_shift_reg. */
984 COSTS_N_INSNS (1), /* extend. */
985 COSTS_N_INSNS (2), /* extend_arith. */
986 COSTS_N_INSNS (1), /* bfi. */
987 COSTS_N_INSNS (1), /* bfx. */
990 true /* non_exec_costs_exec. */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1008 COSTS_N_INSNS (4), /* extend_add. */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1068 COSTS_N_INSNS (1) /* alu. */
1073 const struct cpu_cost_table cortexa7_extra_costs
=
1079 COSTS_N_INSNS (1), /* shift. */
1080 COSTS_N_INSNS (1), /* shift_reg. */
1081 COSTS_N_INSNS (1), /* arith_shift. */
1082 COSTS_N_INSNS (1), /* arith_shift_reg. */
1083 COSTS_N_INSNS (1), /* log_shift. */
1084 COSTS_N_INSNS (1), /* log_shift_reg. */
1085 COSTS_N_INSNS (1), /* extend. */
1086 COSTS_N_INSNS (1), /* extend_arith. */
1087 COSTS_N_INSNS (1), /* bfi. */
1088 COSTS_N_INSNS (1), /* bfx. */
1089 COSTS_N_INSNS (1), /* clz. */
1091 true /* non_exec_costs_exec. */
1098 COSTS_N_INSNS (1), /* flag_setting. */
1099 COSTS_N_INSNS (1), /* extend. */
1100 COSTS_N_INSNS (1), /* add. */
1101 COSTS_N_INSNS (1), /* extend_add. */
1102 COSTS_N_INSNS (7) /* idiv. */
1106 0, /* simple (N/A). */
1107 0, /* flag_setting (N/A). */
1108 COSTS_N_INSNS (1), /* extend. */
1110 COSTS_N_INSNS (2), /* extend_add. */
1116 COSTS_N_INSNS (1), /* load. */
1117 COSTS_N_INSNS (1), /* load_sign_extend. */
1118 COSTS_N_INSNS (3), /* ldrd. */
1119 COSTS_N_INSNS (1), /* ldm_1st. */
1120 1, /* ldm_regs_per_insn_1st. */
1121 2, /* ldm_regs_per_insn_subsequent. */
1122 COSTS_N_INSNS (2), /* loadf. */
1123 COSTS_N_INSNS (2), /* loadd. */
1124 COSTS_N_INSNS (1), /* load_unaligned. */
1125 COSTS_N_INSNS (1), /* store. */
1126 COSTS_N_INSNS (3), /* strd. */
1127 COSTS_N_INSNS (1), /* stm_1st. */
1128 1, /* stm_regs_per_insn_1st. */
1129 2, /* stm_regs_per_insn_subsequent. */
1130 COSTS_N_INSNS (2), /* storef. */
1131 COSTS_N_INSNS (2), /* stored. */
1132 COSTS_N_INSNS (1) /* store_unaligned. */
1137 COSTS_N_INSNS (15), /* div. */
1138 COSTS_N_INSNS (3), /* mult. */
1139 COSTS_N_INSNS (7), /* mult_addsub. */
1140 COSTS_N_INSNS (7), /* fma. */
1141 COSTS_N_INSNS (3), /* addsub. */
1142 COSTS_N_INSNS (3), /* fpconst. */
1143 COSTS_N_INSNS (3), /* neg. */
1144 COSTS_N_INSNS (3), /* compare. */
1145 COSTS_N_INSNS (3), /* widen. */
1146 COSTS_N_INSNS (3), /* narrow. */
1147 COSTS_N_INSNS (3), /* toint. */
1148 COSTS_N_INSNS (3), /* fromint. */
1149 COSTS_N_INSNS (3) /* roundint. */
1153 COSTS_N_INSNS (30), /* div. */
1154 COSTS_N_INSNS (6), /* mult. */
1155 COSTS_N_INSNS (10), /* mult_addsub. */
1156 COSTS_N_INSNS (7), /* fma. */
1157 COSTS_N_INSNS (3), /* addsub. */
1158 COSTS_N_INSNS (3), /* fpconst. */
1159 COSTS_N_INSNS (3), /* neg. */
1160 COSTS_N_INSNS (3), /* compare. */
1161 COSTS_N_INSNS (3), /* widen. */
1162 COSTS_N_INSNS (3), /* narrow. */
1163 COSTS_N_INSNS (3), /* toint. */
1164 COSTS_N_INSNS (3), /* fromint. */
1165 COSTS_N_INSNS (3) /* roundint. */
1170 COSTS_N_INSNS (1) /* alu. */
1174 const struct cpu_cost_table cortexa12_extra_costs
=
1181 COSTS_N_INSNS (1), /* shift_reg. */
1182 COSTS_N_INSNS (1), /* arith_shift. */
1183 COSTS_N_INSNS (1), /* arith_shift_reg. */
1184 COSTS_N_INSNS (1), /* log_shift. */
1185 COSTS_N_INSNS (1), /* log_shift_reg. */
1187 COSTS_N_INSNS (1), /* extend_arith. */
1189 COSTS_N_INSNS (1), /* bfx. */
1190 COSTS_N_INSNS (1), /* clz. */
1192 true /* non_exec_costs_exec. */
1197 COSTS_N_INSNS (2), /* simple. */
1198 COSTS_N_INSNS (3), /* flag_setting. */
1199 COSTS_N_INSNS (2), /* extend. */
1200 COSTS_N_INSNS (3), /* add. */
1201 COSTS_N_INSNS (2), /* extend_add. */
1202 COSTS_N_INSNS (18) /* idiv. */
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (3), /* extend. */
1210 COSTS_N_INSNS (3), /* extend_add. */
1216 COSTS_N_INSNS (3), /* load. */
1217 COSTS_N_INSNS (3), /* load_sign_extend. */
1218 COSTS_N_INSNS (3), /* ldrd. */
1219 COSTS_N_INSNS (3), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (3), /* loadf. */
1223 COSTS_N_INSNS (3), /* loadd. */
1224 0, /* load_unaligned. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 0 /* store_unaligned. */
1237 COSTS_N_INSNS (17), /* div. */
1238 COSTS_N_INSNS (4), /* mult. */
1239 COSTS_N_INSNS (8), /* mult_addsub. */
1240 COSTS_N_INSNS (8), /* fma. */
1241 COSTS_N_INSNS (4), /* addsub. */
1242 COSTS_N_INSNS (2), /* fpconst. */
1243 COSTS_N_INSNS (2), /* neg. */
1244 COSTS_N_INSNS (2), /* compare. */
1245 COSTS_N_INSNS (4), /* widen. */
1246 COSTS_N_INSNS (4), /* narrow. */
1247 COSTS_N_INSNS (4), /* toint. */
1248 COSTS_N_INSNS (4), /* fromint. */
1249 COSTS_N_INSNS (4) /* roundint. */
1253 COSTS_N_INSNS (31), /* div. */
1254 COSTS_N_INSNS (4), /* mult. */
1255 COSTS_N_INSNS (8), /* mult_addsub. */
1256 COSTS_N_INSNS (8), /* fma. */
1257 COSTS_N_INSNS (4), /* addsub. */
1258 COSTS_N_INSNS (2), /* fpconst. */
1259 COSTS_N_INSNS (2), /* neg. */
1260 COSTS_N_INSNS (2), /* compare. */
1261 COSTS_N_INSNS (4), /* widen. */
1262 COSTS_N_INSNS (4), /* narrow. */
1263 COSTS_N_INSNS (4), /* toint. */
1264 COSTS_N_INSNS (4), /* fromint. */
1265 COSTS_N_INSNS (4) /* roundint. */
1270 COSTS_N_INSNS (1) /* alu. */
1274 const struct cpu_cost_table cortexa15_extra_costs
=
1282 COSTS_N_INSNS (1), /* arith_shift. */
1283 COSTS_N_INSNS (1), /* arith_shift_reg. */
1284 COSTS_N_INSNS (1), /* log_shift. */
1285 COSTS_N_INSNS (1), /* log_shift_reg. */
1287 COSTS_N_INSNS (1), /* extend_arith. */
1288 COSTS_N_INSNS (1), /* bfi. */
1292 true /* non_exec_costs_exec. */
1297 COSTS_N_INSNS (2), /* simple. */
1298 COSTS_N_INSNS (3), /* flag_setting. */
1299 COSTS_N_INSNS (2), /* extend. */
1300 COSTS_N_INSNS (2), /* add. */
1301 COSTS_N_INSNS (2), /* extend_add. */
1302 COSTS_N_INSNS (18) /* idiv. */
1306 0, /* simple (N/A). */
1307 0, /* flag_setting (N/A). */
1308 COSTS_N_INSNS (3), /* extend. */
1310 COSTS_N_INSNS (3), /* extend_add. */
1316 COSTS_N_INSNS (3), /* load. */
1317 COSTS_N_INSNS (3), /* load_sign_extend. */
1318 COSTS_N_INSNS (3), /* ldrd. */
1319 COSTS_N_INSNS (4), /* ldm_1st. */
1320 1, /* ldm_regs_per_insn_1st. */
1321 2, /* ldm_regs_per_insn_subsequent. */
1322 COSTS_N_INSNS (4), /* loadf. */
1323 COSTS_N_INSNS (4), /* loadd. */
1324 0, /* load_unaligned. */
1327 COSTS_N_INSNS (1), /* stm_1st. */
1328 1, /* stm_regs_per_insn_1st. */
1329 2, /* stm_regs_per_insn_subsequent. */
1332 0 /* store_unaligned. */
1337 COSTS_N_INSNS (17), /* div. */
1338 COSTS_N_INSNS (4), /* mult. */
1339 COSTS_N_INSNS (8), /* mult_addsub. */
1340 COSTS_N_INSNS (8), /* fma. */
1341 COSTS_N_INSNS (4), /* addsub. */
1342 COSTS_N_INSNS (2), /* fpconst. */
1343 COSTS_N_INSNS (2), /* neg. */
1344 COSTS_N_INSNS (5), /* compare. */
1345 COSTS_N_INSNS (4), /* widen. */
1346 COSTS_N_INSNS (4), /* narrow. */
1347 COSTS_N_INSNS (4), /* toint. */
1348 COSTS_N_INSNS (4), /* fromint. */
1349 COSTS_N_INSNS (4) /* roundint. */
1353 COSTS_N_INSNS (31), /* div. */
1354 COSTS_N_INSNS (4), /* mult. */
1355 COSTS_N_INSNS (8), /* mult_addsub. */
1356 COSTS_N_INSNS (8), /* fma. */
1357 COSTS_N_INSNS (4), /* addsub. */
1358 COSTS_N_INSNS (2), /* fpconst. */
1359 COSTS_N_INSNS (2), /* neg. */
1360 COSTS_N_INSNS (2), /* compare. */
1361 COSTS_N_INSNS (4), /* widen. */
1362 COSTS_N_INSNS (4), /* narrow. */
1363 COSTS_N_INSNS (4), /* toint. */
1364 COSTS_N_INSNS (4), /* fromint. */
1365 COSTS_N_INSNS (4) /* roundint. */
1370 COSTS_N_INSNS (1) /* alu. */
1374 const struct cpu_cost_table v7m_extra_costs
=
1382 0, /* arith_shift. */
1383 COSTS_N_INSNS (1), /* arith_shift_reg. */
1385 COSTS_N_INSNS (1), /* log_shift_reg. */
1387 COSTS_N_INSNS (1), /* extend_arith. */
1391 COSTS_N_INSNS (1), /* non_exec. */
1392 false /* non_exec_costs_exec. */
1397 COSTS_N_INSNS (1), /* simple. */
1398 COSTS_N_INSNS (1), /* flag_setting. */
1399 COSTS_N_INSNS (2), /* extend. */
1400 COSTS_N_INSNS (1), /* add. */
1401 COSTS_N_INSNS (3), /* extend_add. */
1402 COSTS_N_INSNS (8) /* idiv. */
1406 0, /* simple (N/A). */
1407 0, /* flag_setting (N/A). */
1408 COSTS_N_INSNS (2), /* extend. */
1410 COSTS_N_INSNS (3), /* extend_add. */
1416 COSTS_N_INSNS (2), /* load. */
1417 0, /* load_sign_extend. */
1418 COSTS_N_INSNS (3), /* ldrd. */
1419 COSTS_N_INSNS (2), /* ldm_1st. */
1420 1, /* ldm_regs_per_insn_1st. */
1421 1, /* ldm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (2), /* loadf. */
1423 COSTS_N_INSNS (3), /* loadd. */
1424 COSTS_N_INSNS (1), /* load_unaligned. */
1425 COSTS_N_INSNS (2), /* store. */
1426 COSTS_N_INSNS (3), /* strd. */
1427 COSTS_N_INSNS (2), /* stm_1st. */
1428 1, /* stm_regs_per_insn_1st. */
1429 1, /* stm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (2), /* storef. */
1431 COSTS_N_INSNS (3), /* stored. */
1432 COSTS_N_INSNS (1) /* store_unaligned. */
1437 COSTS_N_INSNS (7), /* div. */
1438 COSTS_N_INSNS (2), /* mult. */
1439 COSTS_N_INSNS (5), /* mult_addsub. */
1440 COSTS_N_INSNS (3), /* fma. */
1441 COSTS_N_INSNS (1), /* addsub. */
1453 COSTS_N_INSNS (15), /* div. */
1454 COSTS_N_INSNS (5), /* mult. */
1455 COSTS_N_INSNS (7), /* mult_addsub. */
1456 COSTS_N_INSNS (7), /* fma. */
1457 COSTS_N_INSNS (3), /* addsub. */
1470 COSTS_N_INSNS (1) /* alu. */
1474 const struct tune_params arm_slowmul_tune
=
1476 arm_slowmul_rtx_costs
,
1478 NULL
, /* Sched adj cost. */
1479 3, /* Constant limit. */
1480 5, /* Max cond insns. */
1481 ARM_PREFETCH_NOT_BENEFICIAL
,
1482 true, /* Prefer constant pool. */
1483 arm_default_branch_cost
,
1484 false, /* Prefer LDRD/STRD. */
1485 {true, true}, /* Prefer non short circuit. */
1486 &arm_default_vec_cost
, /* Vectorizer costs. */
1487 false /* Prefer Neon for 64-bits bitops. */
1490 const struct tune_params arm_fastmul_tune
=
1492 arm_fastmul_rtx_costs
,
1494 NULL
, /* Sched adj cost. */
1495 1, /* Constant limit. */
1496 5, /* Max cond insns. */
1497 ARM_PREFETCH_NOT_BENEFICIAL
,
1498 true, /* Prefer constant pool. */
1499 arm_default_branch_cost
,
1500 false, /* Prefer LDRD/STRD. */
1501 {true, true}, /* Prefer non short circuit. */
1502 &arm_default_vec_cost
, /* Vectorizer costs. */
1503 false /* Prefer Neon for 64-bits bitops. */
1506 /* StrongARM has early execution of branches, so a sequence that is worth
1507 skipping is shorter. Set max_insns_skipped to a lower value. */
1509 const struct tune_params arm_strongarm_tune
=
1511 arm_fastmul_rtx_costs
,
1513 NULL
, /* Sched adj cost. */
1514 1, /* Constant limit. */
1515 3, /* Max cond insns. */
1516 ARM_PREFETCH_NOT_BENEFICIAL
,
1517 true, /* Prefer constant pool. */
1518 arm_default_branch_cost
,
1519 false, /* Prefer LDRD/STRD. */
1520 {true, true}, /* Prefer non short circuit. */
1521 &arm_default_vec_cost
, /* Vectorizer costs. */
1522 false /* Prefer Neon for 64-bits bitops. */
1525 const struct tune_params arm_xscale_tune
=
1527 arm_xscale_rtx_costs
,
1529 xscale_sched_adjust_cost
,
1530 2, /* Constant limit. */
1531 3, /* Max cond insns. */
1532 ARM_PREFETCH_NOT_BENEFICIAL
,
1533 true, /* Prefer constant pool. */
1534 arm_default_branch_cost
,
1535 false, /* Prefer LDRD/STRD. */
1536 {true, true}, /* Prefer non short circuit. */
1537 &arm_default_vec_cost
, /* Vectorizer costs. */
1538 false /* Prefer Neon for 64-bits bitops. */
1541 const struct tune_params arm_9e_tune
=
1545 NULL
, /* Sched adj cost. */
1546 1, /* Constant limit. */
1547 5, /* Max cond insns. */
1548 ARM_PREFETCH_NOT_BENEFICIAL
,
1549 true, /* Prefer constant pool. */
1550 arm_default_branch_cost
,
1551 false, /* Prefer LDRD/STRD. */
1552 {true, true}, /* Prefer non short circuit. */
1553 &arm_default_vec_cost
, /* Vectorizer costs. */
1554 false /* Prefer Neon for 64-bits bitops. */
1557 const struct tune_params arm_v6t2_tune
=
1561 NULL
, /* Sched adj cost. */
1562 1, /* Constant limit. */
1563 5, /* Max cond insns. */
1564 ARM_PREFETCH_NOT_BENEFICIAL
,
1565 false, /* Prefer constant pool. */
1566 arm_default_branch_cost
,
1567 false, /* Prefer LDRD/STRD. */
1568 {true, true}, /* Prefer non short circuit. */
1569 &arm_default_vec_cost
, /* Vectorizer costs. */
1570 false /* Prefer Neon for 64-bits bitops. */
1573 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1574 const struct tune_params arm_cortex_tune
=
1577 &generic_extra_costs
,
1578 NULL
, /* Sched adj cost. */
1579 1, /* Constant limit. */
1580 5, /* Max cond insns. */
1581 ARM_PREFETCH_NOT_BENEFICIAL
,
1582 false, /* Prefer constant pool. */
1583 arm_default_branch_cost
,
1584 false, /* Prefer LDRD/STRD. */
1585 {true, true}, /* Prefer non short circuit. */
1586 &arm_default_vec_cost
, /* Vectorizer costs. */
1587 false /* Prefer Neon for 64-bits bitops. */
1590 const struct tune_params arm_cortex_a7_tune
=
1593 &cortexa7_extra_costs
,
1595 1, /* Constant limit. */
1596 5, /* Max cond insns. */
1597 ARM_PREFETCH_NOT_BENEFICIAL
,
1598 false, /* Prefer constant pool. */
1599 arm_default_branch_cost
,
1600 false, /* Prefer LDRD/STRD. */
1601 {true, true}, /* Prefer non short circuit. */
1602 &arm_default_vec_cost
, /* Vectorizer costs. */
1603 false /* Prefer Neon for 64-bits bitops. */
1606 const struct tune_params arm_cortex_a15_tune
=
1609 &cortexa15_extra_costs
,
1610 NULL
, /* Sched adj cost. */
1611 1, /* Constant limit. */
1612 2, /* Max cond insns. */
1613 ARM_PREFETCH_NOT_BENEFICIAL
,
1614 false, /* Prefer constant pool. */
1615 arm_default_branch_cost
,
1616 true, /* Prefer LDRD/STRD. */
1617 {true, true}, /* Prefer non short circuit. */
1618 &arm_default_vec_cost
, /* Vectorizer costs. */
1619 false /* Prefer Neon for 64-bits bitops. */
1622 const struct tune_params arm_cortex_a53_tune
=
1625 &cortexa53_extra_costs
,
1626 NULL
, /* Scheduler cost adjustment. */
1627 1, /* Constant limit. */
1628 5, /* Max cond insns. */
1629 ARM_PREFETCH_NOT_BENEFICIAL
,
1630 false, /* Prefer constant pool. */
1631 arm_default_branch_cost
,
1632 false, /* Prefer LDRD/STRD. */
1633 {true, true}, /* Prefer non short circuit. */
1634 &arm_default_vec_cost
, /* Vectorizer costs. */
1635 false /* Prefer Neon for 64-bits bitops. */
1638 const struct tune_params arm_cortex_a57_tune
=
1641 &cortexa57_extra_costs
,
1642 NULL
, /* Scheduler cost adjustment. */
1643 1, /* Constant limit. */
1644 2, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL
,
1646 false, /* Prefer constant pool. */
1647 arm_default_branch_cost
,
1648 true, /* Prefer LDRD/STRD. */
1649 {true, true}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost
, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1654 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1655 less appealing. Set max_insns_skipped to a low value. */
1657 const struct tune_params arm_cortex_a5_tune
=
1661 NULL
, /* Sched adj cost. */
1662 1, /* Constant limit. */
1663 1, /* Max cond insns. */
1664 ARM_PREFETCH_NOT_BENEFICIAL
,
1665 false, /* Prefer constant pool. */
1666 arm_cortex_a5_branch_cost
,
1667 false, /* Prefer LDRD/STRD. */
1668 {false, false}, /* Prefer non short circuit. */
1669 &arm_default_vec_cost
, /* Vectorizer costs. */
1670 false /* Prefer Neon for 64-bits bitops. */
1673 const struct tune_params arm_cortex_a9_tune
=
1676 &cortexa9_extra_costs
,
1677 cortex_a9_sched_adjust_cost
,
1678 1, /* Constant limit. */
1679 5, /* Max cond insns. */
1680 ARM_PREFETCH_BENEFICIAL(4,32,32),
1681 false, /* Prefer constant pool. */
1682 arm_default_branch_cost
,
1683 false, /* Prefer LDRD/STRD. */
1684 {true, true}, /* Prefer non short circuit. */
1685 &arm_default_vec_cost
, /* Vectorizer costs. */
1686 false /* Prefer Neon for 64-bits bitops. */
1689 const struct tune_params arm_cortex_a12_tune
=
1692 &cortexa12_extra_costs
,
1694 1, /* Constant limit. */
1695 5, /* Max cond insns. */
1696 ARM_PREFETCH_BENEFICIAL(4,32,32),
1697 false, /* Prefer constant pool. */
1698 arm_default_branch_cost
,
1699 true, /* Prefer LDRD/STRD. */
1700 {true, true}, /* Prefer non short circuit. */
1701 &arm_default_vec_cost
, /* Vectorizer costs. */
1702 false /* Prefer Neon for 64-bits bitops. */
1705 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1706 cycle to execute each. An LDR from the constant pool also takes two cycles
1707 to execute, but mildly increases pipelining opportunity (consecutive
1708 loads/stores can be pipelined together, saving one cycle), and may also
1709 improve icache utilisation. Hence we prefer the constant pool for such
1712 const struct tune_params arm_v7m_tune
=
1716 NULL
, /* Sched adj cost. */
1717 1, /* Constant limit. */
1718 2, /* Max cond insns. */
1719 ARM_PREFETCH_NOT_BENEFICIAL
,
1720 true, /* Prefer constant pool. */
1721 arm_cortex_m_branch_cost
,
1722 false, /* Prefer LDRD/STRD. */
1723 {false, false}, /* Prefer non short circuit. */
1724 &arm_default_vec_cost
, /* Vectorizer costs. */
1725 false /* Prefer Neon for 64-bits bitops. */
1728 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1729 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1730 const struct tune_params arm_v6m_tune
=
1734 NULL
, /* Sched adj cost. */
1735 1, /* Constant limit. */
1736 5, /* Max cond insns. */
1737 ARM_PREFETCH_NOT_BENEFICIAL
,
1738 false, /* Prefer constant pool. */
1739 arm_default_branch_cost
,
1740 false, /* Prefer LDRD/STRD. */
1741 {false, false}, /* Prefer non short circuit. */
1742 &arm_default_vec_cost
, /* Vectorizer costs. */
1743 false /* Prefer Neon for 64-bits bitops. */
1746 const struct tune_params arm_fa726te_tune
=
1750 fa726te_sched_adjust_cost
,
1751 1, /* Constant limit. */
1752 5, /* Max cond insns. */
1753 ARM_PREFETCH_NOT_BENEFICIAL
,
1754 true, /* Prefer constant pool. */
1755 arm_default_branch_cost
,
1756 false, /* Prefer LDRD/STRD. */
1757 {true, true}, /* Prefer non short circuit. */
1758 &arm_default_vec_cost
, /* Vectorizer costs. */
1759 false /* Prefer Neon for 64-bits bitops. */
1763 /* Not all of these give usefully different compilation alternatives,
1764 but there is no simple way of generalizing them. */
1765 static const struct processors all_cores
[] =
1768 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1769 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1770 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1771 #include "arm-cores.def"
1773 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1776 static const struct processors all_architectures
[] =
1778 /* ARM Architectures */
1779 /* We don't specify tuning costs here as it will be figured out
1782 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1783 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1784 #include "arm-arches.def"
1786 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1790 /* These are populated as commandline arguments are processed, or NULL
1791 if not specified. */
1792 static const struct processors
*arm_selected_arch
;
1793 static const struct processors
*arm_selected_cpu
;
1794 static const struct processors
*arm_selected_tune
;
1796 /* The name of the preprocessor macro to define for this architecture. */
1798 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1800 /* Available values for -mfpu=. */
1802 static const struct arm_fpu_desc all_fpus
[] =
1804 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1805 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1806 #include "arm-fpus.def"
1811 /* Supported TLS relocations. */
1819 TLS_DESCSEQ
/* GNU scheme */
1822 /* The maximum number of insns to be used when loading a constant. */
1824 arm_constant_limit (bool size_p
)
1826 return size_p
? 1 : current_tune
->constant_limit
;
1829 /* Emit an insn that's a simple single-set. Both the operands must be known
1832 emit_set_insn (rtx x
, rtx y
)
1834 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1837 /* Return the number of bits set in VALUE. */
1839 bit_count (unsigned long value
)
1841 unsigned long count
= 0;
1846 value
&= value
- 1; /* Clear the least-significant set bit. */
1854 enum machine_mode mode
;
1856 } arm_fixed_mode_set
;
1858 /* A small helper for setting fixed-point library libfuncs. */
1861 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1862 const char *funcname
, const char *modename
,
1867 if (num_suffix
== 0)
1868 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1870 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1872 set_optab_libfunc (optable
, mode
, buffer
);
1876 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1877 enum machine_mode from
, const char *funcname
,
1878 const char *toname
, const char *fromname
)
1881 const char *maybe_suffix_2
= "";
1883 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1884 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1885 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1886 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1887 maybe_suffix_2
= "2";
1889 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1892 set_conv_libfunc (optable
, to
, from
, buffer
);
1895 /* Set up library functions unique to ARM. */
1898 arm_init_libfuncs (void)
1900 /* For Linux, we have access to kernel support for atomic operations. */
1901 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1902 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1904 /* There are no special library functions unless we are using the
1909 /* The functions below are described in Section 4 of the "Run-Time
1910 ABI for the ARM architecture", Version 1.0. */
1912 /* Double-precision floating-point arithmetic. Table 2. */
1913 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1914 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1915 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1916 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1917 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1919 /* Double-precision comparisons. Table 3. */
1920 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1921 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1922 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1923 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1924 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1925 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1926 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1928 /* Single-precision floating-point arithmetic. Table 4. */
1929 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1930 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1931 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1932 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1933 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1935 /* Single-precision comparisons. Table 5. */
1936 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1937 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1938 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1939 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1940 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1941 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1942 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1944 /* Floating-point to integer conversions. Table 6. */
1945 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1946 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1947 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1948 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1949 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1950 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1951 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1952 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1954 /* Conversions between floating types. Table 7. */
1955 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1956 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1958 /* Integer to floating-point conversions. Table 8. */
1959 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1960 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1961 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1962 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1963 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1964 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1965 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1966 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1968 /* Long long. Table 9. */
1969 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1970 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1971 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1972 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1973 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1974 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1975 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1976 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1978 /* Integer (32/32->32) division. \S 4.3.1. */
1979 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1980 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1982 /* The divmod functions are designed so that they can be used for
1983 plain division, even though they return both the quotient and the
1984 remainder. The quotient is returned in the usual location (i.e.,
1985 r0 for SImode, {r0, r1} for DImode), just as would be expected
1986 for an ordinary division routine. Because the AAPCS calling
1987 conventions specify that all of { r0, r1, r2, r3 } are
1988 callee-saved registers, there is no need to tell the compiler
1989 explicitly that those registers are clobbered by these
1991 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1992 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1994 /* For SImode division the ABI provides div-without-mod routines,
1995 which are faster. */
1996 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1997 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1999 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2000 divmod libcalls instead. */
2001 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2002 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2003 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2004 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2006 /* Half-precision float operations. The compiler handles all operations
2007 with NULL libfuncs by converting the SFmode. */
2008 switch (arm_fp16_format
)
2010 case ARM_FP16_FORMAT_IEEE
:
2011 case ARM_FP16_FORMAT_ALTERNATIVE
:
2014 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2015 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2017 : "__gnu_f2h_alternative"));
2018 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2019 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2021 : "__gnu_h2f_alternative"));
2024 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2025 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2026 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2027 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2028 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2031 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2032 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2033 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2034 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2035 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2036 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2037 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2044 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2046 const arm_fixed_mode_set fixed_arith_modes
[] =
2067 const arm_fixed_mode_set fixed_conv_modes
[] =
2097 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2099 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2100 "add", fixed_arith_modes
[i
].name
, 3);
2101 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2102 "ssadd", fixed_arith_modes
[i
].name
, 3);
2103 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2104 "usadd", fixed_arith_modes
[i
].name
, 3);
2105 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2106 "sub", fixed_arith_modes
[i
].name
, 3);
2107 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2108 "sssub", fixed_arith_modes
[i
].name
, 3);
2109 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2110 "ussub", fixed_arith_modes
[i
].name
, 3);
2111 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2112 "mul", fixed_arith_modes
[i
].name
, 3);
2113 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2114 "ssmul", fixed_arith_modes
[i
].name
, 3);
2115 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2116 "usmul", fixed_arith_modes
[i
].name
, 3);
2117 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2118 "div", fixed_arith_modes
[i
].name
, 3);
2119 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2120 "udiv", fixed_arith_modes
[i
].name
, 3);
2121 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2122 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2123 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2124 "usdiv", fixed_arith_modes
[i
].name
, 3);
2125 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2126 "neg", fixed_arith_modes
[i
].name
, 2);
2127 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2128 "ssneg", fixed_arith_modes
[i
].name
, 2);
2129 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2130 "usneg", fixed_arith_modes
[i
].name
, 2);
2131 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2132 "ashl", fixed_arith_modes
[i
].name
, 3);
2133 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2134 "ashr", fixed_arith_modes
[i
].name
, 3);
2135 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2136 "lshr", fixed_arith_modes
[i
].name
, 3);
2137 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2138 "ssashl", fixed_arith_modes
[i
].name
, 3);
2139 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2140 "usashl", fixed_arith_modes
[i
].name
, 3);
2141 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2142 "cmp", fixed_arith_modes
[i
].name
, 2);
2145 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2146 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2149 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2150 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2153 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2154 fixed_conv_modes
[j
].mode
, "fract",
2155 fixed_conv_modes
[i
].name
,
2156 fixed_conv_modes
[j
].name
);
2157 arm_set_fixed_conv_libfunc (satfract_optab
,
2158 fixed_conv_modes
[i
].mode
,
2159 fixed_conv_modes
[j
].mode
, "satfract",
2160 fixed_conv_modes
[i
].name
,
2161 fixed_conv_modes
[j
].name
);
2162 arm_set_fixed_conv_libfunc (fractuns_optab
,
2163 fixed_conv_modes
[i
].mode
,
2164 fixed_conv_modes
[j
].mode
, "fractuns",
2165 fixed_conv_modes
[i
].name
,
2166 fixed_conv_modes
[j
].name
);
2167 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2168 fixed_conv_modes
[i
].mode
,
2169 fixed_conv_modes
[j
].mode
, "satfractuns",
2170 fixed_conv_modes
[i
].name
,
2171 fixed_conv_modes
[j
].name
);
2175 if (TARGET_AAPCS_BASED
)
2176 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2179 /* On AAPCS systems, this is the "struct __va_list". */
2180 static GTY(()) tree va_list_type
;
2182 /* Return the type to use as __builtin_va_list. */
2184 arm_build_builtin_va_list (void)
2189 if (!TARGET_AAPCS_BASED
)
2190 return std_build_builtin_va_list ();
2192 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2200 The C Library ABI further reinforces this definition in \S
2203 We must follow this definition exactly. The structure tag
2204 name is visible in C++ mangled names, and thus forms a part
2205 of the ABI. The field name may be used by people who
2206 #include <stdarg.h>. */
2207 /* Create the type. */
2208 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2209 /* Give it the required name. */
2210 va_list_name
= build_decl (BUILTINS_LOCATION
,
2212 get_identifier ("__va_list"),
2214 DECL_ARTIFICIAL (va_list_name
) = 1;
2215 TYPE_NAME (va_list_type
) = va_list_name
;
2216 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2217 /* Create the __ap field. */
2218 ap_field
= build_decl (BUILTINS_LOCATION
,
2220 get_identifier ("__ap"),
2222 DECL_ARTIFICIAL (ap_field
) = 1;
2223 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2224 TYPE_FIELDS (va_list_type
) = ap_field
;
2225 /* Compute its layout. */
2226 layout_type (va_list_type
);
2228 return va_list_type
;
2231 /* Return an expression of type "void *" pointing to the next
2232 available argument in a variable-argument list. VALIST is the
2233 user-level va_list object, of type __builtin_va_list. */
2235 arm_extract_valist_ptr (tree valist
)
2237 if (TREE_TYPE (valist
) == error_mark_node
)
2238 return error_mark_node
;
2240 /* On an AAPCS target, the pointer is stored within "struct
2242 if (TARGET_AAPCS_BASED
)
2244 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2245 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2246 valist
, ap_field
, NULL_TREE
);
2252 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2254 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2256 valist
= arm_extract_valist_ptr (valist
);
2257 std_expand_builtin_va_start (valist
, nextarg
);
2260 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2262 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2265 valist
= arm_extract_valist_ptr (valist
);
2266 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2269 /* Fix up any incompatible options that the user has specified. */
2271 arm_option_override (void)
2273 if (global_options_set
.x_arm_arch_option
)
2274 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2276 if (global_options_set
.x_arm_cpu_option
)
2278 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2279 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2282 if (global_options_set
.x_arm_tune_option
)
2283 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2285 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2286 SUBTARGET_OVERRIDE_OPTIONS
;
2289 if (arm_selected_arch
)
2291 if (arm_selected_cpu
)
2293 /* Check for conflict between mcpu and march. */
2294 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2296 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2297 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2298 /* -march wins for code generation.
2299 -mcpu wins for default tuning. */
2300 if (!arm_selected_tune
)
2301 arm_selected_tune
= arm_selected_cpu
;
2303 arm_selected_cpu
= arm_selected_arch
;
2307 arm_selected_arch
= NULL
;
2310 /* Pick a CPU based on the architecture. */
2311 arm_selected_cpu
= arm_selected_arch
;
2314 /* If the user did not specify a processor, choose one for them. */
2315 if (!arm_selected_cpu
)
2317 const struct processors
* sel
;
2318 unsigned int sought
;
2320 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2321 if (!arm_selected_cpu
->name
)
2323 #ifdef SUBTARGET_CPU_DEFAULT
2324 /* Use the subtarget default CPU if none was specified by
2326 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2328 /* Default to ARM6. */
2329 if (!arm_selected_cpu
->name
)
2330 arm_selected_cpu
= &all_cores
[arm6
];
2333 sel
= arm_selected_cpu
;
2334 insn_flags
= sel
->flags
;
2336 /* Now check to see if the user has specified some command line
2337 switch that require certain abilities from the cpu. */
2340 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2342 sought
|= (FL_THUMB
| FL_MODE32
);
2344 /* There are no ARM processors that support both APCS-26 and
2345 interworking. Therefore we force FL_MODE26 to be removed
2346 from insn_flags here (if it was set), so that the search
2347 below will always be able to find a compatible processor. */
2348 insn_flags
&= ~FL_MODE26
;
2351 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2353 /* Try to locate a CPU type that supports all of the abilities
2354 of the default CPU, plus the extra abilities requested by
2356 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2357 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2360 if (sel
->name
== NULL
)
2362 unsigned current_bit_count
= 0;
2363 const struct processors
* best_fit
= NULL
;
2365 /* Ideally we would like to issue an error message here
2366 saying that it was not possible to find a CPU compatible
2367 with the default CPU, but which also supports the command
2368 line options specified by the programmer, and so they
2369 ought to use the -mcpu=<name> command line option to
2370 override the default CPU type.
2372 If we cannot find a cpu that has both the
2373 characteristics of the default cpu and the given
2374 command line options we scan the array again looking
2375 for a best match. */
2376 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2377 if ((sel
->flags
& sought
) == sought
)
2381 count
= bit_count (sel
->flags
& insn_flags
);
2383 if (count
>= current_bit_count
)
2386 current_bit_count
= count
;
2390 gcc_assert (best_fit
);
2394 arm_selected_cpu
= sel
;
2398 gcc_assert (arm_selected_cpu
);
2399 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2400 if (!arm_selected_tune
)
2401 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2403 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2404 insn_flags
= arm_selected_cpu
->flags
;
2405 arm_base_arch
= arm_selected_cpu
->base_arch
;
2407 arm_tune
= arm_selected_tune
->core
;
2408 tune_flags
= arm_selected_tune
->flags
;
2409 current_tune
= arm_selected_tune
->tune
;
2411 /* Make sure that the processor choice does not conflict with any of the
2412 other command line choices. */
2413 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2414 error ("target CPU does not support ARM mode");
2416 /* BPABI targets use linker tricks to allow interworking on cores
2417 without thumb support. */
2418 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2420 warning (0, "target CPU does not support interworking" );
2421 target_flags
&= ~MASK_INTERWORK
;
2424 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2426 warning (0, "target CPU does not support THUMB instructions");
2427 target_flags
&= ~MASK_THUMB
;
2430 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2432 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2433 target_flags
&= ~MASK_APCS_FRAME
;
2436 /* Callee super interworking implies thumb interworking. Adding
2437 this to the flags here simplifies the logic elsewhere. */
2438 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2439 target_flags
|= MASK_INTERWORK
;
2441 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2442 from here where no function is being compiled currently. */
2443 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2444 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2446 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2447 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2449 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2451 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2452 target_flags
|= MASK_APCS_FRAME
;
2455 if (TARGET_POKE_FUNCTION_NAME
)
2456 target_flags
|= MASK_APCS_FRAME
;
2458 if (TARGET_APCS_REENT
&& flag_pic
)
2459 error ("-fpic and -mapcs-reent are incompatible");
2461 if (TARGET_APCS_REENT
)
2462 warning (0, "APCS reentrant code not supported. Ignored");
2464 /* If this target is normally configured to use APCS frames, warn if they
2465 are turned off and debugging is turned on. */
2467 && write_symbols
!= NO_DEBUG
2468 && !TARGET_APCS_FRAME
2469 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2470 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2472 if (TARGET_APCS_FLOAT
)
2473 warning (0, "passing floating point arguments in fp regs not yet supported");
2475 if (TARGET_LITTLE_WORDS
)
2476 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2477 "will be removed in a future release");
2479 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2480 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2481 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2482 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2483 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2484 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2485 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2486 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2487 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2488 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2489 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2490 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2491 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2492 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2493 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2495 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2496 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2497 thumb_code
= TARGET_ARM
== 0;
2498 thumb1_code
= TARGET_THUMB1
!= 0;
2499 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2500 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2501 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2502 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2503 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2504 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2505 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2506 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2507 if (arm_restrict_it
== 2)
2508 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2511 arm_restrict_it
= 0;
2513 /* If we are not using the default (ARM mode) section anchor offset
2514 ranges, then set the correct ranges now. */
2517 /* Thumb-1 LDR instructions cannot have negative offsets.
2518 Permissible positive offset ranges are 5-bit (for byte loads),
2519 6-bit (for halfword loads), or 7-bit (for word loads).
2520 Empirical results suggest a 7-bit anchor range gives the best
2521 overall code size. */
2522 targetm
.min_anchor_offset
= 0;
2523 targetm
.max_anchor_offset
= 127;
2525 else if (TARGET_THUMB2
)
2527 /* The minimum is set such that the total size of the block
2528 for a particular anchor is 248 + 1 + 4095 bytes, which is
2529 divisible by eight, ensuring natural spacing of anchors. */
2530 targetm
.min_anchor_offset
= -248;
2531 targetm
.max_anchor_offset
= 4095;
2534 /* V5 code we generate is completely interworking capable, so we turn off
2535 TARGET_INTERWORK here to avoid many tests later on. */
2537 /* XXX However, we must pass the right pre-processor defines to CPP
2538 or GLD can get confused. This is a hack. */
2539 if (TARGET_INTERWORK
)
2540 arm_cpp_interwork
= 1;
2543 target_flags
&= ~MASK_INTERWORK
;
2545 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2546 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2548 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2549 error ("iwmmxt abi requires an iwmmxt capable cpu");
2551 if (!global_options_set
.x_arm_fpu_index
)
2553 const char *target_fpu_name
;
2556 #ifdef FPUTYPE_DEFAULT
2557 target_fpu_name
= FPUTYPE_DEFAULT
;
2559 target_fpu_name
= "vfp";
2562 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2567 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2569 switch (arm_fpu_desc
->model
)
2571 case ARM_FP_MODEL_VFP
:
2572 arm_fpu_attr
= FPU_VFP
;
2579 if (TARGET_AAPCS_BASED
)
2581 if (TARGET_CALLER_INTERWORKING
)
2582 error ("AAPCS does not support -mcaller-super-interworking");
2584 if (TARGET_CALLEE_INTERWORKING
)
2585 error ("AAPCS does not support -mcallee-super-interworking");
2588 /* iWMMXt and NEON are incompatible. */
2589 if (TARGET_IWMMXT
&& TARGET_NEON
)
2590 error ("iWMMXt and NEON are incompatible");
2592 /* iWMMXt unsupported under Thumb mode. */
2593 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2594 error ("iWMMXt unsupported under Thumb mode");
2596 /* __fp16 support currently assumes the core has ldrh. */
2597 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2598 sorry ("__fp16 and no ldrh");
2600 /* If soft-float is specified then don't use FPU. */
2601 if (TARGET_SOFT_FLOAT
)
2602 arm_fpu_attr
= FPU_NONE
;
2604 if (TARGET_AAPCS_BASED
)
2606 if (arm_abi
== ARM_ABI_IWMMXT
)
2607 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2608 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2609 && TARGET_HARD_FLOAT
2611 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2613 arm_pcs_default
= ARM_PCS_AAPCS
;
2617 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2618 sorry ("-mfloat-abi=hard and VFP");
2620 if (arm_abi
== ARM_ABI_APCS
)
2621 arm_pcs_default
= ARM_PCS_APCS
;
2623 arm_pcs_default
= ARM_PCS_ATPCS
;
2626 /* For arm2/3 there is no need to do any scheduling if we are doing
2627 software floating-point. */
2628 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2629 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2631 /* Use the cp15 method if it is available. */
2632 if (target_thread_pointer
== TP_AUTO
)
2634 if (arm_arch6k
&& !TARGET_THUMB1
)
2635 target_thread_pointer
= TP_CP15
;
2637 target_thread_pointer
= TP_SOFT
;
2640 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2641 error ("can not use -mtp=cp15 with 16-bit Thumb");
2643 /* Override the default structure alignment for AAPCS ABI. */
2644 if (!global_options_set
.x_arm_structure_size_boundary
)
2646 if (TARGET_AAPCS_BASED
)
2647 arm_structure_size_boundary
= 8;
2651 if (arm_structure_size_boundary
!= 8
2652 && arm_structure_size_boundary
!= 32
2653 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2655 if (ARM_DOUBLEWORD_ALIGN
)
2657 "structure size boundary can only be set to 8, 32 or 64");
2659 warning (0, "structure size boundary can only be set to 8 or 32");
2660 arm_structure_size_boundary
2661 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2665 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2667 error ("RTP PIC is incompatible with Thumb");
2671 /* If stack checking is disabled, we can use r10 as the PIC register,
2672 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2673 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2675 if (TARGET_VXWORKS_RTP
)
2676 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2677 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2680 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2681 arm_pic_register
= 9;
2683 if (arm_pic_register_string
!= NULL
)
2685 int pic_register
= decode_reg_name (arm_pic_register_string
);
2688 warning (0, "-mpic-register= is useless without -fpic");
2690 /* Prevent the user from choosing an obviously stupid PIC register. */
2691 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2692 || pic_register
== HARD_FRAME_POINTER_REGNUM
2693 || pic_register
== STACK_POINTER_REGNUM
2694 || pic_register
>= PC_REGNUM
2695 || (TARGET_VXWORKS_RTP
2696 && (unsigned int) pic_register
!= arm_pic_register
))
2697 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2699 arm_pic_register
= pic_register
;
2702 if (TARGET_VXWORKS_RTP
2703 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2704 arm_pic_data_is_text_relative
= 0;
2706 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2707 if (fix_cm3_ldrd
== 2)
2709 if (arm_selected_cpu
->core
== cortexm3
)
2715 /* Enable -munaligned-access by default for
2716 - all ARMv6 architecture-based processors
2717 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2718 - ARMv8 architecture-base processors.
2720 Disable -munaligned-access by default for
2721 - all pre-ARMv6 architecture-based processors
2722 - ARMv6-M architecture-based processors. */
2724 if (unaligned_access
== 2)
2726 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2727 unaligned_access
= 1;
2729 unaligned_access
= 0;
2731 else if (unaligned_access
== 1
2732 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2734 warning (0, "target CPU does not support unaligned accesses");
2735 unaligned_access
= 0;
2738 if (TARGET_THUMB1
&& flag_schedule_insns
)
2740 /* Don't warn since it's on by default in -O2. */
2741 flag_schedule_insns
= 0;
2746 /* If optimizing for size, bump the number of instructions that we
2747 are prepared to conditionally execute (even on a StrongARM). */
2748 max_insns_skipped
= 6;
2751 max_insns_skipped
= current_tune
->max_insns_skipped
;
2753 /* Hot/Cold partitioning is not currently supported, since we can't
2754 handle literal pool placement in that case. */
2755 if (flag_reorder_blocks_and_partition
)
2757 inform (input_location
,
2758 "-freorder-blocks-and-partition not supported on this architecture");
2759 flag_reorder_blocks_and_partition
= 0;
2760 flag_reorder_blocks
= 1;
2764 /* Hoisting PIC address calculations more aggressively provides a small,
2765 but measurable, size reduction for PIC code. Therefore, we decrease
2766 the bar for unrestricted expression hoisting to the cost of PIC address
2767 calculation, which is 2 instructions. */
2768 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2769 global_options
.x_param_values
,
2770 global_options_set
.x_param_values
);
2772 /* ARM EABI defaults to strict volatile bitfields. */
2773 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2774 && abi_version_at_least(2))
2775 flag_strict_volatile_bitfields
= 1;
2777 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2778 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2779 if (flag_prefetch_loop_arrays
< 0
2782 && current_tune
->num_prefetch_slots
> 0)
2783 flag_prefetch_loop_arrays
= 1;
2785 /* Set up parameters to be used in prefetching algorithm. Do not override the
2786 defaults unless we are tuning for a core we have researched values for. */
2787 if (current_tune
->num_prefetch_slots
> 0)
2788 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2789 current_tune
->num_prefetch_slots
,
2790 global_options
.x_param_values
,
2791 global_options_set
.x_param_values
);
2792 if (current_tune
->l1_cache_line_size
>= 0)
2793 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2794 current_tune
->l1_cache_line_size
,
2795 global_options
.x_param_values
,
2796 global_options_set
.x_param_values
);
2797 if (current_tune
->l1_cache_size
>= 0)
2798 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2799 current_tune
->l1_cache_size
,
2800 global_options
.x_param_values
,
2801 global_options_set
.x_param_values
);
2803 /* Use Neon to perform 64-bits operations rather than core
2805 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2806 if (use_neon_for_64bits
== 1)
2807 prefer_neon_for_64bits
= true;
2809 /* Use the alternative scheduling-pressure algorithm by default. */
2810 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2811 global_options
.x_param_values
,
2812 global_options_set
.x_param_values
);
2814 /* Disable shrink-wrap when optimizing function for size, since it tends to
2815 generate additional returns. */
2816 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2817 flag_shrink_wrap
= false;
2818 /* TBD: Dwarf info for apcs frame is not handled yet. */
2819 if (TARGET_APCS_FRAME
)
2820 flag_shrink_wrap
= false;
2822 /* We only support -mslow-flash-data on armv7-m targets. */
2823 if (target_slow_flash_data
2824 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2825 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2826 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2828 /* Currently, for slow flash data, we just disable literal pools. */
2829 if (target_slow_flash_data
)
2830 arm_disable_literal_pool
= true;
2832 /* Register global variables with the garbage collector. */
2833 arm_add_gc_roots ();
2837 arm_add_gc_roots (void)
2839 gcc_obstack_init(&minipool_obstack
);
2840 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2843 /* A table of known ARM exception types.
2844 For use with the interrupt function attribute. */
2848 const char *const arg
;
2849 const unsigned long return_value
;
2853 static const isr_attribute_arg isr_attribute_args
[] =
2855 { "IRQ", ARM_FT_ISR
},
2856 { "irq", ARM_FT_ISR
},
2857 { "FIQ", ARM_FT_FIQ
},
2858 { "fiq", ARM_FT_FIQ
},
2859 { "ABORT", ARM_FT_ISR
},
2860 { "abort", ARM_FT_ISR
},
2861 { "ABORT", ARM_FT_ISR
},
2862 { "abort", ARM_FT_ISR
},
2863 { "UNDEF", ARM_FT_EXCEPTION
},
2864 { "undef", ARM_FT_EXCEPTION
},
2865 { "SWI", ARM_FT_EXCEPTION
},
2866 { "swi", ARM_FT_EXCEPTION
},
2867 { NULL
, ARM_FT_NORMAL
}
2870 /* Returns the (interrupt) function type of the current
2871 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2873 static unsigned long
2874 arm_isr_value (tree argument
)
2876 const isr_attribute_arg
* ptr
;
2880 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2882 /* No argument - default to IRQ. */
2883 if (argument
== NULL_TREE
)
2886 /* Get the value of the argument. */
2887 if (TREE_VALUE (argument
) == NULL_TREE
2888 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2889 return ARM_FT_UNKNOWN
;
2891 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2893 /* Check it against the list of known arguments. */
2894 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2895 if (streq (arg
, ptr
->arg
))
2896 return ptr
->return_value
;
2898 /* An unrecognized interrupt type. */
2899 return ARM_FT_UNKNOWN
;
2902 /* Computes the type of the current function. */
2904 static unsigned long
2905 arm_compute_func_type (void)
2907 unsigned long type
= ARM_FT_UNKNOWN
;
2911 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2913 /* Decide if the current function is volatile. Such functions
2914 never return, and many memory cycles can be saved by not storing
2915 register values that will never be needed again. This optimization
2916 was added to speed up context switching in a kernel application. */
2918 && (TREE_NOTHROW (current_function_decl
)
2919 || !(flag_unwind_tables
2921 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2922 && TREE_THIS_VOLATILE (current_function_decl
))
2923 type
|= ARM_FT_VOLATILE
;
2925 if (cfun
->static_chain_decl
!= NULL
)
2926 type
|= ARM_FT_NESTED
;
2928 attr
= DECL_ATTRIBUTES (current_function_decl
);
2930 a
= lookup_attribute ("naked", attr
);
2932 type
|= ARM_FT_NAKED
;
2934 a
= lookup_attribute ("isr", attr
);
2936 a
= lookup_attribute ("interrupt", attr
);
2939 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2941 type
|= arm_isr_value (TREE_VALUE (a
));
2946 /* Returns the type of the current function. */
2949 arm_current_func_type (void)
2951 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2952 cfun
->machine
->func_type
= arm_compute_func_type ();
2954 return cfun
->machine
->func_type
;
2958 arm_allocate_stack_slots_for_args (void)
2960 /* Naked functions should not allocate stack slots for arguments. */
2961 return !IS_NAKED (arm_current_func_type ());
2965 arm_warn_func_return (tree decl
)
2967 /* Naked functions are implemented entirely in assembly, including the
2968 return sequence, so suppress warnings about this. */
2969 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2973 /* Output assembler code for a block containing the constant parts
2974 of a trampoline, leaving space for the variable parts.
2976 On the ARM, (if r8 is the static chain regnum, and remembering that
2977 referencing pc adds an offset of 8) the trampoline looks like:
2980 .word static chain value
2981 .word function's address
2982 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2985 arm_asm_trampoline_template (FILE *f
)
2989 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2990 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2992 else if (TARGET_THUMB2
)
2994 /* The Thumb-2 trampoline is similar to the arm implementation.
2995 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2996 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2997 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2998 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3002 ASM_OUTPUT_ALIGN (f
, 2);
3003 fprintf (f
, "\t.code\t16\n");
3004 fprintf (f
, ".Ltrampoline_start:\n");
3005 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3006 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3007 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3008 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3009 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3010 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3012 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3013 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3016 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3019 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3021 rtx fnaddr
, mem
, a_tramp
;
3023 emit_block_move (m_tramp
, assemble_trampoline_template (),
3024 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3026 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3027 emit_move_insn (mem
, chain_value
);
3029 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3030 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3031 emit_move_insn (mem
, fnaddr
);
3033 a_tramp
= XEXP (m_tramp
, 0);
3034 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3035 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3036 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3039 /* Thumb trampolines should be entered in thumb mode, so set
3040 the bottom bit of the address. */
3043 arm_trampoline_adjust_address (rtx addr
)
3046 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3047 NULL
, 0, OPTAB_LIB_WIDEN
);
3051 /* Return 1 if it is possible to return using a single instruction.
3052 If SIBLING is non-null, this is a test for a return before a sibling
3053 call. SIBLING is the call insn, so we can examine its register usage. */
3056 use_return_insn (int iscond
, rtx sibling
)
3059 unsigned int func_type
;
3060 unsigned long saved_int_regs
;
3061 unsigned HOST_WIDE_INT stack_adjust
;
3062 arm_stack_offsets
*offsets
;
3064 /* Never use a return instruction before reload has run. */
3065 if (!reload_completed
)
3068 func_type
= arm_current_func_type ();
3070 /* Naked, volatile and stack alignment functions need special
3072 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3075 /* So do interrupt functions that use the frame pointer and Thumb
3076 interrupt functions. */
3077 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3080 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3081 && !optimize_function_for_size_p (cfun
))
3084 offsets
= arm_get_frame_offsets ();
3085 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3087 /* As do variadic functions. */
3088 if (crtl
->args
.pretend_args_size
3089 || cfun
->machine
->uses_anonymous_args
3090 /* Or if the function calls __builtin_eh_return () */
3091 || crtl
->calls_eh_return
3092 /* Or if the function calls alloca */
3093 || cfun
->calls_alloca
3094 /* Or if there is a stack adjustment. However, if the stack pointer
3095 is saved on the stack, we can use a pre-incrementing stack load. */
3096 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3097 && stack_adjust
== 4)))
3100 saved_int_regs
= offsets
->saved_regs_mask
;
3102 /* Unfortunately, the insn
3104 ldmib sp, {..., sp, ...}
3106 triggers a bug on most SA-110 based devices, such that the stack
3107 pointer won't be correctly restored if the instruction takes a
3108 page fault. We work around this problem by popping r3 along with
3109 the other registers, since that is never slower than executing
3110 another instruction.
3112 We test for !arm_arch5 here, because code for any architecture
3113 less than this could potentially be run on one of the buggy
3115 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3117 /* Validate that r3 is a call-clobbered register (always true in
3118 the default abi) ... */
3119 if (!call_used_regs
[3])
3122 /* ... that it isn't being used for a return value ... */
3123 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3126 /* ... or for a tail-call argument ... */
3129 gcc_assert (CALL_P (sibling
));
3131 if (find_regno_fusage (sibling
, USE
, 3))
3135 /* ... and that there are no call-saved registers in r0-r2
3136 (always true in the default ABI). */
3137 if (saved_int_regs
& 0x7)
3141 /* Can't be done if interworking with Thumb, and any registers have been
3143 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3146 /* On StrongARM, conditional returns are expensive if they aren't
3147 taken and multiple registers have been stacked. */
3148 if (iscond
&& arm_tune_strongarm
)
3150 /* Conditional return when just the LR is stored is a simple
3151 conditional-load instruction, that's not expensive. */
3152 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3156 && arm_pic_register
!= INVALID_REGNUM
3157 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3161 /* If there are saved registers but the LR isn't saved, then we need
3162 two instructions for the return. */
3163 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3166 /* Can't be done if any of the VFP regs are pushed,
3167 since this also requires an insn. */
3168 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3169 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3170 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3173 if (TARGET_REALLY_IWMMXT
)
3174 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3175 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3181 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3182 shrink-wrapping if possible. This is the case if we need to emit a
3183 prologue, which we can test by looking at the offsets. */
3185 use_simple_return_p (void)
3187 arm_stack_offsets
*offsets
;
3189 offsets
= arm_get_frame_offsets ();
3190 return offsets
->outgoing_args
!= 0;
3193 /* Return TRUE if int I is a valid immediate ARM constant. */
3196 const_ok_for_arm (HOST_WIDE_INT i
)
3200 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3201 be all zero, or all one. */
3202 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3203 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3204 != ((~(unsigned HOST_WIDE_INT
) 0)
3205 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3208 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3210 /* Fast return for 0 and small values. We must do this for zero, since
3211 the code below can't handle that one case. */
3212 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3215 /* Get the number of trailing zeros. */
3216 lowbit
= ffs((int) i
) - 1;
3218 /* Only even shifts are allowed in ARM mode so round down to the
3219 nearest even number. */
3223 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3228 /* Allow rotated constants in ARM mode. */
3230 && ((i
& ~0xc000003f) == 0
3231 || (i
& ~0xf000000f) == 0
3232 || (i
& ~0xfc000003) == 0))
3239 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3242 if (i
== v
|| i
== (v
| (v
<< 8)))
3245 /* Allow repeated pattern 0xXY00XY00. */
3255 /* Return true if I is a valid constant for the operation CODE. */
3257 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3259 if (const_ok_for_arm (i
))
3265 /* See if we can use movw. */
3266 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3269 /* Otherwise, try mvn. */
3270 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3273 /* See if we can use addw or subw. */
3275 && ((i
& 0xfffff000) == 0
3276 || ((-i
) & 0xfffff000) == 0))
3278 /* else fall through. */
3298 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3300 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3306 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3310 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3317 /* Return true if I is a valid di mode constant for the operation CODE. */
3319 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3321 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3322 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3323 rtx hi
= GEN_INT (hi_val
);
3324 rtx lo
= GEN_INT (lo_val
);
3334 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3335 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3337 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3344 /* Emit a sequence of insns to handle a large constant.
3345 CODE is the code of the operation required, it can be any of SET, PLUS,
3346 IOR, AND, XOR, MINUS;
3347 MODE is the mode in which the operation is being performed;
3348 VAL is the integer to operate on;
3349 SOURCE is the other operand (a register, or a null-pointer for SET);
3350 SUBTARGETS means it is safe to create scratch registers if that will
3351 either produce a simpler sequence, or we will want to cse the values.
3352 Return value is the number of insns emitted. */
3354 /* ??? Tweak this for thumb2. */
3356 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3357 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3361 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3362 cond
= COND_EXEC_TEST (PATTERN (insn
));
3366 if (subtargets
|| code
== SET
3367 || (REG_P (target
) && REG_P (source
)
3368 && REGNO (target
) != REGNO (source
)))
3370 /* After arm_reorg has been called, we can't fix up expensive
3371 constants by pushing them into memory so we must synthesize
3372 them in-line, regardless of the cost. This is only likely to
3373 be more costly on chips that have load delay slots and we are
3374 compiling without running the scheduler (so no splitting
3375 occurred before the final instruction emission).
3377 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3379 if (!after_arm_reorg
3381 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3383 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3388 /* Currently SET is the only monadic value for CODE, all
3389 the rest are diadic. */
3390 if (TARGET_USE_MOVT
)
3391 arm_emit_movpair (target
, GEN_INT (val
));
3393 emit_set_insn (target
, GEN_INT (val
));
3399 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3401 if (TARGET_USE_MOVT
)
3402 arm_emit_movpair (temp
, GEN_INT (val
));
3404 emit_set_insn (temp
, GEN_INT (val
));
3406 /* For MINUS, the value is subtracted from, since we never
3407 have subtraction of a constant. */
3409 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3411 emit_set_insn (target
,
3412 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3418 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3422 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3423 ARM/THUMB2 immediates, and add up to VAL.
3424 Thr function return value gives the number of insns required. */
3426 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3427 struct four_ints
*return_sequence
)
3429 int best_consecutive_zeros
= 0;
3433 struct four_ints tmp_sequence
;
3435 /* If we aren't targeting ARM, the best place to start is always at
3436 the bottom, otherwise look more closely. */
3439 for (i
= 0; i
< 32; i
+= 2)
3441 int consecutive_zeros
= 0;
3443 if (!(val
& (3 << i
)))
3445 while ((i
< 32) && !(val
& (3 << i
)))
3447 consecutive_zeros
+= 2;
3450 if (consecutive_zeros
> best_consecutive_zeros
)
3452 best_consecutive_zeros
= consecutive_zeros
;
3453 best_start
= i
- consecutive_zeros
;
3460 /* So long as it won't require any more insns to do so, it's
3461 desirable to emit a small constant (in bits 0...9) in the last
3462 insn. This way there is more chance that it can be combined with
3463 a later addressing insn to form a pre-indexed load or store
3464 operation. Consider:
3466 *((volatile int *)0xe0000100) = 1;
3467 *((volatile int *)0xe0000110) = 2;
3469 We want this to wind up as:
3473 str rB, [rA, #0x100]
3475 str rB, [rA, #0x110]
3477 rather than having to synthesize both large constants from scratch.
3479 Therefore, we calculate how many insns would be required to emit
3480 the constant starting from `best_start', and also starting from
3481 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3482 yield a shorter sequence, we may as well use zero. */
3483 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3485 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3487 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3488 if (insns2
<= insns1
)
3490 *return_sequence
= tmp_sequence
;
3498 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3500 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3501 struct four_ints
*return_sequence
, int i
)
3503 int remainder
= val
& 0xffffffff;
3506 /* Try and find a way of doing the job in either two or three
3509 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3510 location. We start at position I. This may be the MSB, or
3511 optimial_immediate_sequence may have positioned it at the largest block
3512 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3513 wrapping around to the top of the word when we drop off the bottom.
3514 In the worst case this code should produce no more than four insns.
3516 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3517 constants, shifted to any arbitrary location. We should always start
3522 unsigned int b1
, b2
, b3
, b4
;
3523 unsigned HOST_WIDE_INT result
;
3526 gcc_assert (insns
< 4);
3531 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3532 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3535 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3536 /* We can use addw/subw for the last 12 bits. */
3540 /* Use an 8-bit shifted/rotated immediate. */
3544 result
= remainder
& ((0x0ff << end
)
3545 | ((i
< end
) ? (0xff >> (32 - end
))
3552 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3553 arbitrary shifts. */
3554 i
-= TARGET_ARM
? 2 : 1;
3558 /* Next, see if we can do a better job with a thumb2 replicated
3561 We do it this way around to catch the cases like 0x01F001E0 where
3562 two 8-bit immediates would work, but a replicated constant would
3565 TODO: 16-bit constants that don't clear all the bits, but still win.
3566 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3569 b1
= (remainder
& 0xff000000) >> 24;
3570 b2
= (remainder
& 0x00ff0000) >> 16;
3571 b3
= (remainder
& 0x0000ff00) >> 8;
3572 b4
= remainder
& 0xff;
3576 /* The 8-bit immediate already found clears b1 (and maybe b2),
3577 but must leave b3 and b4 alone. */
3579 /* First try to find a 32-bit replicated constant that clears
3580 almost everything. We can assume that we can't do it in one,
3581 or else we wouldn't be here. */
3582 unsigned int tmp
= b1
& b2
& b3
& b4
;
3583 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3585 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3586 + (tmp
== b3
) + (tmp
== b4
);
3588 && (matching_bytes
>= 3
3589 || (matching_bytes
== 2
3590 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3592 /* At least 3 of the bytes match, and the fourth has at
3593 least as many bits set, or two of the bytes match
3594 and it will only require one more insn to finish. */
3602 /* Second, try to find a 16-bit replicated constant that can
3603 leave three of the bytes clear. If b2 or b4 is already
3604 zero, then we can. If the 8-bit from above would not
3605 clear b2 anyway, then we still win. */
3606 else if (b1
== b3
&& (!b2
|| !b4
3607 || (remainder
& 0x00ff0000 & ~result
)))
3609 result
= remainder
& 0xff00ff00;
3615 /* The 8-bit immediate already found clears b2 (and maybe b3)
3616 and we don't get here unless b1 is alredy clear, but it will
3617 leave b4 unchanged. */
3619 /* If we can clear b2 and b4 at once, then we win, since the
3620 8-bits couldn't possibly reach that far. */
3623 result
= remainder
& 0x00ff00ff;
3629 return_sequence
->i
[insns
++] = result
;
3630 remainder
&= ~result
;
3632 if (code
== SET
|| code
== MINUS
)
3640 /* Emit an instruction with the indicated PATTERN. If COND is
3641 non-NULL, conditionalize the execution of the instruction on COND
3645 emit_constant_insn (rtx cond
, rtx pattern
)
3648 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3649 emit_insn (pattern
);
3652 /* As above, but extra parameter GENERATE which, if clear, suppresses
3656 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3657 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3662 int final_invert
= 0;
3664 int set_sign_bit_copies
= 0;
3665 int clear_sign_bit_copies
= 0;
3666 int clear_zero_bit_copies
= 0;
3667 int set_zero_bit_copies
= 0;
3668 int insns
= 0, neg_insns
, inv_insns
;
3669 unsigned HOST_WIDE_INT temp1
, temp2
;
3670 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3671 struct four_ints
*immediates
;
3672 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3674 /* Find out which operations are safe for a given CODE. Also do a quick
3675 check for degenerate cases; these can occur when DImode operations
3688 if (remainder
== 0xffffffff)
3691 emit_constant_insn (cond
,
3692 gen_rtx_SET (VOIDmode
, target
,
3693 GEN_INT (ARM_SIGN_EXTEND (val
))));
3699 if (reload_completed
&& rtx_equal_p (target
, source
))
3703 emit_constant_insn (cond
,
3704 gen_rtx_SET (VOIDmode
, target
, source
));
3713 emit_constant_insn (cond
,
3714 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3717 if (remainder
== 0xffffffff)
3719 if (reload_completed
&& rtx_equal_p (target
, source
))
3722 emit_constant_insn (cond
,
3723 gen_rtx_SET (VOIDmode
, target
, source
));
3732 if (reload_completed
&& rtx_equal_p (target
, source
))
3735 emit_constant_insn (cond
,
3736 gen_rtx_SET (VOIDmode
, target
, source
));
3740 if (remainder
== 0xffffffff)
3743 emit_constant_insn (cond
,
3744 gen_rtx_SET (VOIDmode
, target
,
3745 gen_rtx_NOT (mode
, source
)));
3752 /* We treat MINUS as (val - source), since (source - val) is always
3753 passed as (source + (-val)). */
3757 emit_constant_insn (cond
,
3758 gen_rtx_SET (VOIDmode
, target
,
3759 gen_rtx_NEG (mode
, source
)));
3762 if (const_ok_for_arm (val
))
3765 emit_constant_insn (cond
,
3766 gen_rtx_SET (VOIDmode
, target
,
3767 gen_rtx_MINUS (mode
, GEN_INT (val
),
3778 /* If we can do it in one insn get out quickly. */
3779 if (const_ok_for_op (val
, code
))
3782 emit_constant_insn (cond
,
3783 gen_rtx_SET (VOIDmode
, target
,
3785 ? gen_rtx_fmt_ee (code
, mode
, source
,
3791 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3793 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3794 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3798 if (mode
== SImode
&& i
== 16)
3799 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3801 emit_constant_insn (cond
,
3802 gen_zero_extendhisi2
3803 (target
, gen_lowpart (HImode
, source
)));
3805 /* Extz only supports SImode, but we can coerce the operands
3807 emit_constant_insn (cond
,
3808 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3809 gen_lowpart (SImode
, source
),
3810 GEN_INT (i
), const0_rtx
));
3816 /* Calculate a few attributes that may be useful for specific
3818 /* Count number of leading zeros. */
3819 for (i
= 31; i
>= 0; i
--)
3821 if ((remainder
& (1 << i
)) == 0)
3822 clear_sign_bit_copies
++;
3827 /* Count number of leading 1's. */
3828 for (i
= 31; i
>= 0; i
--)
3830 if ((remainder
& (1 << i
)) != 0)
3831 set_sign_bit_copies
++;
3836 /* Count number of trailing zero's. */
3837 for (i
= 0; i
<= 31; i
++)
3839 if ((remainder
& (1 << i
)) == 0)
3840 clear_zero_bit_copies
++;
3845 /* Count number of trailing 1's. */
3846 for (i
= 0; i
<= 31; i
++)
3848 if ((remainder
& (1 << i
)) != 0)
3849 set_zero_bit_copies
++;
3857 /* See if we can do this by sign_extending a constant that is known
3858 to be negative. This is a good, way of doing it, since the shift
3859 may well merge into a subsequent insn. */
3860 if (set_sign_bit_copies
> 1)
3862 if (const_ok_for_arm
3863 (temp1
= ARM_SIGN_EXTEND (remainder
3864 << (set_sign_bit_copies
- 1))))
3868 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3869 emit_constant_insn (cond
,
3870 gen_rtx_SET (VOIDmode
, new_src
,
3872 emit_constant_insn (cond
,
3873 gen_ashrsi3 (target
, new_src
,
3874 GEN_INT (set_sign_bit_copies
- 1)));
3878 /* For an inverted constant, we will need to set the low bits,
3879 these will be shifted out of harm's way. */
3880 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3881 if (const_ok_for_arm (~temp1
))
3885 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3886 emit_constant_insn (cond
,
3887 gen_rtx_SET (VOIDmode
, new_src
,
3889 emit_constant_insn (cond
,
3890 gen_ashrsi3 (target
, new_src
,
3891 GEN_INT (set_sign_bit_copies
- 1)));
3897 /* See if we can calculate the value as the difference between two
3898 valid immediates. */
3899 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3901 int topshift
= clear_sign_bit_copies
& ~1;
3903 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3904 & (0xff000000 >> topshift
));
3906 /* If temp1 is zero, then that means the 9 most significant
3907 bits of remainder were 1 and we've caused it to overflow.
3908 When topshift is 0 we don't need to do anything since we
3909 can borrow from 'bit 32'. */
3910 if (temp1
== 0 && topshift
!= 0)
3911 temp1
= 0x80000000 >> (topshift
- 1);
3913 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3915 if (const_ok_for_arm (temp2
))
3919 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3920 emit_constant_insn (cond
,
3921 gen_rtx_SET (VOIDmode
, new_src
,
3923 emit_constant_insn (cond
,
3924 gen_addsi3 (target
, new_src
,
3932 /* See if we can generate this by setting the bottom (or the top)
3933 16 bits, and then shifting these into the other half of the
3934 word. We only look for the simplest cases, to do more would cost
3935 too much. Be careful, however, not to generate this when the
3936 alternative would take fewer insns. */
3937 if (val
& 0xffff0000)
3939 temp1
= remainder
& 0xffff0000;
3940 temp2
= remainder
& 0x0000ffff;
3942 /* Overlaps outside this range are best done using other methods. */
3943 for (i
= 9; i
< 24; i
++)
3945 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3946 && !const_ok_for_arm (temp2
))
3948 rtx new_src
= (subtargets
3949 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3951 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3952 source
, subtargets
, generate
);
3960 gen_rtx_ASHIFT (mode
, source
,
3967 /* Don't duplicate cases already considered. */
3968 for (i
= 17; i
< 24; i
++)
3970 if (((temp1
| (temp1
>> i
)) == remainder
)
3971 && !const_ok_for_arm (temp1
))
3973 rtx new_src
= (subtargets
3974 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3976 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3977 source
, subtargets
, generate
);
3982 gen_rtx_SET (VOIDmode
, target
,
3985 gen_rtx_LSHIFTRT (mode
, source
,
3996 /* If we have IOR or XOR, and the constant can be loaded in a
3997 single instruction, and we can find a temporary to put it in,
3998 then this can be done in two instructions instead of 3-4. */
4000 /* TARGET can't be NULL if SUBTARGETS is 0 */
4001 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4003 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4007 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4009 emit_constant_insn (cond
,
4010 gen_rtx_SET (VOIDmode
, sub
,
4012 emit_constant_insn (cond
,
4013 gen_rtx_SET (VOIDmode
, target
,
4014 gen_rtx_fmt_ee (code
, mode
,
4025 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4026 and the remainder 0s for e.g. 0xfff00000)
4027 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4029 This can be done in 2 instructions by using shifts with mov or mvn.
4034 mvn r0, r0, lsr #12 */
4035 if (set_sign_bit_copies
> 8
4036 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4040 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4041 rtx shift
= GEN_INT (set_sign_bit_copies
);
4045 gen_rtx_SET (VOIDmode
, sub
,
4047 gen_rtx_ASHIFT (mode
,
4052 gen_rtx_SET (VOIDmode
, target
,
4054 gen_rtx_LSHIFTRT (mode
, sub
,
4061 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4063 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4065 For eg. r0 = r0 | 0xfff
4070 if (set_zero_bit_copies
> 8
4071 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4075 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4076 rtx shift
= GEN_INT (set_zero_bit_copies
);
4080 gen_rtx_SET (VOIDmode
, sub
,
4082 gen_rtx_LSHIFTRT (mode
,
4087 gen_rtx_SET (VOIDmode
, target
,
4089 gen_rtx_ASHIFT (mode
, sub
,
4095 /* This will never be reached for Thumb2 because orn is a valid
4096 instruction. This is for Thumb1 and the ARM 32 bit cases.
4098 x = y | constant (such that ~constant is a valid constant)
4100 x = ~(~y & ~constant).
4102 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4106 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4107 emit_constant_insn (cond
,
4108 gen_rtx_SET (VOIDmode
, sub
,
4109 gen_rtx_NOT (mode
, source
)));
4112 sub
= gen_reg_rtx (mode
);
4113 emit_constant_insn (cond
,
4114 gen_rtx_SET (VOIDmode
, sub
,
4115 gen_rtx_AND (mode
, source
,
4117 emit_constant_insn (cond
,
4118 gen_rtx_SET (VOIDmode
, target
,
4119 gen_rtx_NOT (mode
, sub
)));
4126 /* See if two shifts will do 2 or more insn's worth of work. */
4127 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4129 HOST_WIDE_INT shift_mask
= ((0xffffffff
4130 << (32 - clear_sign_bit_copies
))
4133 if ((remainder
| shift_mask
) != 0xffffffff)
4137 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4138 insns
= arm_gen_constant (AND
, mode
, cond
,
4139 remainder
| shift_mask
,
4140 new_src
, source
, subtargets
, 1);
4145 rtx targ
= subtargets
? NULL_RTX
: target
;
4146 insns
= arm_gen_constant (AND
, mode
, cond
,
4147 remainder
| shift_mask
,
4148 targ
, source
, subtargets
, 0);
4154 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4155 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4157 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4158 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4164 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4166 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4168 if ((remainder
| shift_mask
) != 0xffffffff)
4172 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4174 insns
= arm_gen_constant (AND
, mode
, cond
,
4175 remainder
| shift_mask
,
4176 new_src
, source
, subtargets
, 1);
4181 rtx targ
= subtargets
? NULL_RTX
: target
;
4183 insns
= arm_gen_constant (AND
, mode
, cond
,
4184 remainder
| shift_mask
,
4185 targ
, source
, subtargets
, 0);
4191 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4192 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4194 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4195 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4207 /* Calculate what the instruction sequences would be if we generated it
4208 normally, negated, or inverted. */
4210 /* AND cannot be split into multiple insns, so invert and use BIC. */
4213 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4216 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4221 if (can_invert
|| final_invert
)
4222 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4227 immediates
= &pos_immediates
;
4229 /* Is the negated immediate sequence more efficient? */
4230 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4233 immediates
= &neg_immediates
;
4238 /* Is the inverted immediate sequence more efficient?
4239 We must allow for an extra NOT instruction for XOR operations, although
4240 there is some chance that the final 'mvn' will get optimized later. */
4241 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4244 immediates
= &inv_immediates
;
4252 /* Now output the chosen sequence as instructions. */
4255 for (i
= 0; i
< insns
; i
++)
4257 rtx new_src
, temp1_rtx
;
4259 temp1
= immediates
->i
[i
];
4261 if (code
== SET
|| code
== MINUS
)
4262 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4263 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4264 new_src
= gen_reg_rtx (mode
);
4270 else if (can_negate
)
4273 temp1
= trunc_int_for_mode (temp1
, mode
);
4274 temp1_rtx
= GEN_INT (temp1
);
4278 else if (code
== MINUS
)
4279 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4281 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4283 emit_constant_insn (cond
,
4284 gen_rtx_SET (VOIDmode
, new_src
,
4290 can_negate
= can_invert
;
4294 else if (code
== MINUS
)
4302 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4303 gen_rtx_NOT (mode
, source
)));
4310 /* Canonicalize a comparison so that we are more likely to recognize it.
4311 This can be done for a few constant compares, where we can make the
4312 immediate value easier to load. */
4315 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4316 bool op0_preserve_value
)
4318 enum machine_mode mode
;
4319 unsigned HOST_WIDE_INT i
, maxval
;
4321 mode
= GET_MODE (*op0
);
4322 if (mode
== VOIDmode
)
4323 mode
= GET_MODE (*op1
);
4325 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4327 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4328 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4329 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4330 for GTU/LEU in Thumb mode. */
4335 if (*code
== GT
|| *code
== LE
4336 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4338 /* Missing comparison. First try to use an available
4340 if (CONST_INT_P (*op1
))
4348 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4350 *op1
= GEN_INT (i
+ 1);
4351 *code
= *code
== GT
? GE
: LT
;
4357 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4358 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4360 *op1
= GEN_INT (i
+ 1);
4361 *code
= *code
== GTU
? GEU
: LTU
;
4370 /* If that did not work, reverse the condition. */
4371 if (!op0_preserve_value
)
4376 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4382 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4383 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4384 to facilitate possible combining with a cmp into 'ands'. */
4386 && GET_CODE (*op0
) == ZERO_EXTEND
4387 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4388 && GET_MODE (XEXP (*op0
, 0)) == QImode
4389 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4390 && subreg_lowpart_p (XEXP (*op0
, 0))
4391 && *op1
== const0_rtx
)
4392 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4395 /* Comparisons smaller than DImode. Only adjust comparisons against
4396 an out-of-range constant. */
4397 if (!CONST_INT_P (*op1
)
4398 || const_ok_for_arm (INTVAL (*op1
))
4399 || const_ok_for_arm (- INTVAL (*op1
)))
4413 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4415 *op1
= GEN_INT (i
+ 1);
4416 *code
= *code
== GT
? GE
: LT
;
4424 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4426 *op1
= GEN_INT (i
- 1);
4427 *code
= *code
== GE
? GT
: LE
;
4434 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4435 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4437 *op1
= GEN_INT (i
+ 1);
4438 *code
= *code
== GTU
? GEU
: LTU
;
4446 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4448 *op1
= GEN_INT (i
- 1);
4449 *code
= *code
== GEU
? GTU
: LEU
;
4460 /* Define how to find the value returned by a function. */
4463 arm_function_value(const_tree type
, const_tree func
,
4464 bool outgoing ATTRIBUTE_UNUSED
)
4466 enum machine_mode mode
;
4467 int unsignedp ATTRIBUTE_UNUSED
;
4468 rtx r ATTRIBUTE_UNUSED
;
4470 mode
= TYPE_MODE (type
);
4472 if (TARGET_AAPCS_BASED
)
4473 return aapcs_allocate_return_reg (mode
, type
, func
);
4475 /* Promote integer types. */
4476 if (INTEGRAL_TYPE_P (type
))
4477 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4479 /* Promotes small structs returned in a register to full-word size
4480 for big-endian AAPCS. */
4481 if (arm_return_in_msb (type
))
4483 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4484 if (size
% UNITS_PER_WORD
!= 0)
4486 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4487 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4491 return arm_libcall_value_1 (mode
);
4494 /* libcall hashtable helpers. */
4496 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4498 typedef rtx_def value_type
;
4499 typedef rtx_def compare_type
;
4500 static inline hashval_t
hash (const value_type
*);
4501 static inline bool equal (const value_type
*, const compare_type
*);
4502 static inline void remove (value_type
*);
4506 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4508 return rtx_equal_p (p1
, p2
);
4512 libcall_hasher::hash (const value_type
*p1
)
4514 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4517 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4520 add_libcall (libcall_table_type htab
, rtx libcall
)
4522 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4526 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4528 static bool init_done
= false;
4529 static libcall_table_type libcall_htab
;
4535 libcall_htab
.create (31);
4536 add_libcall (libcall_htab
,
4537 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4538 add_libcall (libcall_htab
,
4539 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4540 add_libcall (libcall_htab
,
4541 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4542 add_libcall (libcall_htab
,
4543 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4545 add_libcall (libcall_htab
,
4546 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4547 add_libcall (libcall_htab
,
4548 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4549 add_libcall (libcall_htab
,
4550 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4551 add_libcall (libcall_htab
,
4552 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4554 add_libcall (libcall_htab
,
4555 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4556 add_libcall (libcall_htab
,
4557 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4558 add_libcall (libcall_htab
,
4559 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4560 add_libcall (libcall_htab
,
4561 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4562 add_libcall (libcall_htab
,
4563 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4564 add_libcall (libcall_htab
,
4565 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4566 add_libcall (libcall_htab
,
4567 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4568 add_libcall (libcall_htab
,
4569 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4571 /* Values from double-precision helper functions are returned in core
4572 registers if the selected core only supports single-precision
4573 arithmetic, even if we are using the hard-float ABI. The same is
4574 true for single-precision helpers, but we will never be using the
4575 hard-float ABI on a CPU which doesn't support single-precision
4576 operations in hardware. */
4577 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4578 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4579 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4580 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4581 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4582 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4583 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4584 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4585 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4586 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4587 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4588 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4590 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4594 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4598 arm_libcall_value_1 (enum machine_mode mode
)
4600 if (TARGET_AAPCS_BASED
)
4601 return aapcs_libcall_value (mode
);
4602 else if (TARGET_IWMMXT_ABI
4603 && arm_vector_mode_supported_p (mode
))
4604 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4606 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4609 /* Define how to find the value returned by a library function
4610 assuming the value has mode MODE. */
4613 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4615 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4616 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4618 /* The following libcalls return their result in integer registers,
4619 even though they return a floating point value. */
4620 if (arm_libcall_uses_aapcs_base (libcall
))
4621 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4625 return arm_libcall_value_1 (mode
);
4628 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4631 arm_function_value_regno_p (const unsigned int regno
)
4633 if (regno
== ARG_REGISTER (1)
4635 && TARGET_AAPCS_BASED
4637 && TARGET_HARD_FLOAT
4638 && regno
== FIRST_VFP_REGNUM
)
4639 || (TARGET_IWMMXT_ABI
4640 && regno
== FIRST_IWMMXT_REGNUM
))
4646 /* Determine the amount of memory needed to store the possible return
4647 registers of an untyped call. */
4649 arm_apply_result_size (void)
4655 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4657 if (TARGET_IWMMXT_ABI
)
4664 /* Decide whether TYPE should be returned in memory (true)
4665 or in a register (false). FNTYPE is the type of the function making
4668 arm_return_in_memory (const_tree type
, const_tree fntype
)
4672 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4674 if (TARGET_AAPCS_BASED
)
4676 /* Simple, non-aggregate types (ie not including vectors and
4677 complex) are always returned in a register (or registers).
4678 We don't care about which register here, so we can short-cut
4679 some of the detail. */
4680 if (!AGGREGATE_TYPE_P (type
)
4681 && TREE_CODE (type
) != VECTOR_TYPE
4682 && TREE_CODE (type
) != COMPLEX_TYPE
)
4685 /* Any return value that is no larger than one word can be
4687 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4690 /* Check any available co-processors to see if they accept the
4691 type as a register candidate (VFP, for example, can return
4692 some aggregates in consecutive registers). These aren't
4693 available if the call is variadic. */
4694 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4697 /* Vector values should be returned using ARM registers, not
4698 memory (unless they're over 16 bytes, which will break since
4699 we only have four call-clobbered registers to play with). */
4700 if (TREE_CODE (type
) == VECTOR_TYPE
)
4701 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4703 /* The rest go in memory. */
4707 if (TREE_CODE (type
) == VECTOR_TYPE
)
4708 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4710 if (!AGGREGATE_TYPE_P (type
) &&
4711 (TREE_CODE (type
) != VECTOR_TYPE
))
4712 /* All simple types are returned in registers. */
4715 if (arm_abi
!= ARM_ABI_APCS
)
4717 /* ATPCS and later return aggregate types in memory only if they are
4718 larger than a word (or are variable size). */
4719 return (size
< 0 || size
> UNITS_PER_WORD
);
4722 /* For the arm-wince targets we choose to be compatible with Microsoft's
4723 ARM and Thumb compilers, which always return aggregates in memory. */
4725 /* All structures/unions bigger than one word are returned in memory.
4726 Also catch the case where int_size_in_bytes returns -1. In this case
4727 the aggregate is either huge or of variable size, and in either case
4728 we will want to return it via memory and not in a register. */
4729 if (size
< 0 || size
> UNITS_PER_WORD
)
4732 if (TREE_CODE (type
) == RECORD_TYPE
)
4736 /* For a struct the APCS says that we only return in a register
4737 if the type is 'integer like' and every addressable element
4738 has an offset of zero. For practical purposes this means
4739 that the structure can have at most one non bit-field element
4740 and that this element must be the first one in the structure. */
4742 /* Find the first field, ignoring non FIELD_DECL things which will
4743 have been created by C++. */
4744 for (field
= TYPE_FIELDS (type
);
4745 field
&& TREE_CODE (field
) != FIELD_DECL
;
4746 field
= DECL_CHAIN (field
))
4750 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4752 /* Check that the first field is valid for returning in a register. */
4754 /* ... Floats are not allowed */
4755 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4758 /* ... Aggregates that are not themselves valid for returning in
4759 a register are not allowed. */
4760 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4763 /* Now check the remaining fields, if any. Only bitfields are allowed,
4764 since they are not addressable. */
4765 for (field
= DECL_CHAIN (field
);
4767 field
= DECL_CHAIN (field
))
4769 if (TREE_CODE (field
) != FIELD_DECL
)
4772 if (!DECL_BIT_FIELD_TYPE (field
))
4779 if (TREE_CODE (type
) == UNION_TYPE
)
4783 /* Unions can be returned in registers if every element is
4784 integral, or can be returned in an integer register. */
4785 for (field
= TYPE_FIELDS (type
);
4787 field
= DECL_CHAIN (field
))
4789 if (TREE_CODE (field
) != FIELD_DECL
)
4792 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4795 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4801 #endif /* not ARM_WINCE */
4803 /* Return all other types in memory. */
4807 const struct pcs_attribute_arg
4811 } pcs_attribute_args
[] =
4813 {"aapcs", ARM_PCS_AAPCS
},
4814 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4816 /* We could recognize these, but changes would be needed elsewhere
4817 * to implement them. */
4818 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4819 {"atpcs", ARM_PCS_ATPCS
},
4820 {"apcs", ARM_PCS_APCS
},
4822 {NULL
, ARM_PCS_UNKNOWN
}
4826 arm_pcs_from_attribute (tree attr
)
4828 const struct pcs_attribute_arg
*ptr
;
4831 /* Get the value of the argument. */
4832 if (TREE_VALUE (attr
) == NULL_TREE
4833 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4834 return ARM_PCS_UNKNOWN
;
4836 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4838 /* Check it against the list of known arguments. */
4839 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4840 if (streq (arg
, ptr
->arg
))
4843 /* An unrecognized interrupt type. */
4844 return ARM_PCS_UNKNOWN
;
4847 /* Get the PCS variant to use for this call. TYPE is the function's type
4848 specification, DECL is the specific declartion. DECL may be null if
4849 the call could be indirect or if this is a library call. */
4851 arm_get_pcs_model (const_tree type
, const_tree decl
)
4853 bool user_convention
= false;
4854 enum arm_pcs user_pcs
= arm_pcs_default
;
4859 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4862 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4863 user_convention
= true;
4866 if (TARGET_AAPCS_BASED
)
4868 /* Detect varargs functions. These always use the base rules
4869 (no argument is ever a candidate for a co-processor
4871 bool base_rules
= stdarg_p (type
);
4873 if (user_convention
)
4875 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4876 sorry ("non-AAPCS derived PCS variant");
4877 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4878 error ("variadic functions must use the base AAPCS variant");
4882 return ARM_PCS_AAPCS
;
4883 else if (user_convention
)
4885 else if (decl
&& flag_unit_at_a_time
)
4887 /* Local functions never leak outside this compilation unit,
4888 so we are free to use whatever conventions are
4890 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4891 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4893 return ARM_PCS_AAPCS_LOCAL
;
4896 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4897 sorry ("PCS variant");
4899 /* For everything else we use the target's default. */
4900 return arm_pcs_default
;
4905 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4906 const_tree fntype ATTRIBUTE_UNUSED
,
4907 rtx libcall ATTRIBUTE_UNUSED
,
4908 const_tree fndecl ATTRIBUTE_UNUSED
)
4910 /* Record the unallocated VFP registers. */
4911 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4912 pcum
->aapcs_vfp_reg_alloc
= 0;
4915 /* Walk down the type tree of TYPE counting consecutive base elements.
4916 If *MODEP is VOIDmode, then set it to the first valid floating point
4917 type. If a non-floating point type is found, or if a floating point
4918 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4919 otherwise return the count in the sub-tree. */
4921 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4923 enum machine_mode mode
;
4926 switch (TREE_CODE (type
))
4929 mode
= TYPE_MODE (type
);
4930 if (mode
!= DFmode
&& mode
!= SFmode
)
4933 if (*modep
== VOIDmode
)
4942 mode
= TYPE_MODE (TREE_TYPE (type
));
4943 if (mode
!= DFmode
&& mode
!= SFmode
)
4946 if (*modep
== VOIDmode
)
4955 /* Use V2SImode and V4SImode as representatives of all 64-bit
4956 and 128-bit vector types, whether or not those modes are
4957 supported with the present options. */
4958 size
= int_size_in_bytes (type
);
4971 if (*modep
== VOIDmode
)
4974 /* Vector modes are considered to be opaque: two vectors are
4975 equivalent for the purposes of being homogeneous aggregates
4976 if they are the same size. */
4985 tree index
= TYPE_DOMAIN (type
);
4987 /* Can't handle incomplete types. */
4988 if (!COMPLETE_TYPE_P (type
))
4991 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4994 || !TYPE_MAX_VALUE (index
)
4995 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
4996 || !TYPE_MIN_VALUE (index
)
4997 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5001 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5002 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5004 /* There must be no padding. */
5005 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5006 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5007 != count
* GET_MODE_BITSIZE (*modep
)))
5019 /* Can't handle incomplete types. */
5020 if (!COMPLETE_TYPE_P (type
))
5023 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5025 if (TREE_CODE (field
) != FIELD_DECL
)
5028 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5034 /* There must be no padding. */
5035 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5036 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5037 != count
* GET_MODE_BITSIZE (*modep
)))
5044 case QUAL_UNION_TYPE
:
5046 /* These aren't very interesting except in a degenerate case. */
5051 /* Can't handle incomplete types. */
5052 if (!COMPLETE_TYPE_P (type
))
5055 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5057 if (TREE_CODE (field
) != FIELD_DECL
)
5060 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5063 count
= count
> sub_count
? count
: sub_count
;
5066 /* There must be no padding. */
5067 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5068 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5069 != count
* GET_MODE_BITSIZE (*modep
)))
5082 /* Return true if PCS_VARIANT should use VFP registers. */
5084 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5086 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5088 static bool seen_thumb1_vfp
= false;
5090 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5092 sorry ("Thumb-1 hard-float VFP ABI");
5093 /* sorry() is not immediately fatal, so only display this once. */
5094 seen_thumb1_vfp
= true;
5100 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5103 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5104 (TARGET_VFP_DOUBLE
|| !is_double
));
5107 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5108 suitable for passing or returning in VFP registers for the PCS
5109 variant selected. If it is, then *BASE_MODE is updated to contain
5110 a machine mode describing each element of the argument's type and
5111 *COUNT to hold the number of such elements. */
5113 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5114 enum machine_mode mode
, const_tree type
,
5115 enum machine_mode
*base_mode
, int *count
)
5117 enum machine_mode new_mode
= VOIDmode
;
5119 /* If we have the type information, prefer that to working things
5120 out from the mode. */
5123 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5125 if (ag_count
> 0 && ag_count
<= 4)
5130 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5131 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5132 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5137 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5140 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5146 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5149 *base_mode
= new_mode
;
5154 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5155 enum machine_mode mode
, const_tree type
)
5157 int count ATTRIBUTE_UNUSED
;
5158 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5160 if (!use_vfp_abi (pcs_variant
, false))
5162 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5167 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5170 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5173 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5174 &pcum
->aapcs_vfp_rmode
,
5175 &pcum
->aapcs_vfp_rcount
);
5179 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5180 const_tree type ATTRIBUTE_UNUSED
)
5182 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5183 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5186 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5187 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5189 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5191 || (mode
== TImode
&& ! TARGET_NEON
)
5192 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5195 int rcount
= pcum
->aapcs_vfp_rcount
;
5197 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5201 /* Avoid using unsupported vector modes. */
5202 if (rmode
== V2SImode
)
5204 else if (rmode
== V4SImode
)
5211 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5212 for (i
= 0; i
< rcount
; i
++)
5214 rtx tmp
= gen_rtx_REG (rmode
,
5215 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5216 tmp
= gen_rtx_EXPR_LIST
5218 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5219 XVECEXP (par
, 0, i
) = tmp
;
5222 pcum
->aapcs_reg
= par
;
5225 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5232 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5233 enum machine_mode mode
,
5234 const_tree type ATTRIBUTE_UNUSED
)
5236 if (!use_vfp_abi (pcs_variant
, false))
5239 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5242 enum machine_mode ag_mode
;
5247 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5252 if (ag_mode
== V2SImode
)
5254 else if (ag_mode
== V4SImode
)
5260 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5261 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5262 for (i
= 0; i
< count
; i
++)
5264 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5265 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5266 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5267 XVECEXP (par
, 0, i
) = tmp
;
5273 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5277 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5278 enum machine_mode mode ATTRIBUTE_UNUSED
,
5279 const_tree type ATTRIBUTE_UNUSED
)
5281 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5282 pcum
->aapcs_vfp_reg_alloc
= 0;
5286 #define AAPCS_CP(X) \
5288 aapcs_ ## X ## _cum_init, \
5289 aapcs_ ## X ## _is_call_candidate, \
5290 aapcs_ ## X ## _allocate, \
5291 aapcs_ ## X ## _is_return_candidate, \
5292 aapcs_ ## X ## _allocate_return_reg, \
5293 aapcs_ ## X ## _advance \
5296 /* Table of co-processors that can be used to pass arguments in
5297 registers. Idealy no arugment should be a candidate for more than
5298 one co-processor table entry, but the table is processed in order
5299 and stops after the first match. If that entry then fails to put
5300 the argument into a co-processor register, the argument will go on
5304 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5305 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5307 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5308 BLKmode) is a candidate for this co-processor's registers; this
5309 function should ignore any position-dependent state in
5310 CUMULATIVE_ARGS and only use call-type dependent information. */
5311 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5313 /* Return true if the argument does get a co-processor register; it
5314 should set aapcs_reg to an RTX of the register allocated as is
5315 required for a return from FUNCTION_ARG. */
5316 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5318 /* Return true if a result of mode MODE (or type TYPE if MODE is
5319 BLKmode) is can be returned in this co-processor's registers. */
5320 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5322 /* Allocate and return an RTX element to hold the return type of a
5323 call, this routine must not fail and will only be called if
5324 is_return_candidate returned true with the same parameters. */
5325 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5327 /* Finish processing this argument and prepare to start processing
5329 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5330 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5338 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5343 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5344 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5351 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5353 /* We aren't passed a decl, so we can't check that a call is local.
5354 However, it isn't clear that that would be a win anyway, since it
5355 might limit some tail-calling opportunities. */
5356 enum arm_pcs pcs_variant
;
5360 const_tree fndecl
= NULL_TREE
;
5362 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5365 fntype
= TREE_TYPE (fntype
);
5368 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5371 pcs_variant
= arm_pcs_default
;
5373 if (pcs_variant
!= ARM_PCS_AAPCS
)
5377 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5378 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5387 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5390 /* We aren't passed a decl, so we can't check that a call is local.
5391 However, it isn't clear that that would be a win anyway, since it
5392 might limit some tail-calling opportunities. */
5393 enum arm_pcs pcs_variant
;
5394 int unsignedp ATTRIBUTE_UNUSED
;
5398 const_tree fndecl
= NULL_TREE
;
5400 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5403 fntype
= TREE_TYPE (fntype
);
5406 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5409 pcs_variant
= arm_pcs_default
;
5411 /* Promote integer types. */
5412 if (type
&& INTEGRAL_TYPE_P (type
))
5413 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5415 if (pcs_variant
!= ARM_PCS_AAPCS
)
5419 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5420 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5422 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5426 /* Promotes small structs returned in a register to full-word size
5427 for big-endian AAPCS. */
5428 if (type
&& arm_return_in_msb (type
))
5430 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5431 if (size
% UNITS_PER_WORD
!= 0)
5433 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5434 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5438 return gen_rtx_REG (mode
, R0_REGNUM
);
5442 aapcs_libcall_value (enum machine_mode mode
)
5444 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5445 && GET_MODE_SIZE (mode
) <= 4)
5448 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5451 /* Lay out a function argument using the AAPCS rules. The rule
5452 numbers referred to here are those in the AAPCS. */
5454 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5455 const_tree type
, bool named
)
5460 /* We only need to do this once per argument. */
5461 if (pcum
->aapcs_arg_processed
)
5464 pcum
->aapcs_arg_processed
= true;
5466 /* Special case: if named is false then we are handling an incoming
5467 anonymous argument which is on the stack. */
5471 /* Is this a potential co-processor register candidate? */
5472 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5474 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5475 pcum
->aapcs_cprc_slot
= slot
;
5477 /* We don't have to apply any of the rules from part B of the
5478 preparation phase, these are handled elsewhere in the
5483 /* A Co-processor register candidate goes either in its own
5484 class of registers or on the stack. */
5485 if (!pcum
->aapcs_cprc_failed
[slot
])
5487 /* C1.cp - Try to allocate the argument to co-processor
5489 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5492 /* C2.cp - Put the argument on the stack and note that we
5493 can't assign any more candidates in this slot. We also
5494 need to note that we have allocated stack space, so that
5495 we won't later try to split a non-cprc candidate between
5496 core registers and the stack. */
5497 pcum
->aapcs_cprc_failed
[slot
] = true;
5498 pcum
->can_split
= false;
5501 /* We didn't get a register, so this argument goes on the
5503 gcc_assert (pcum
->can_split
== false);
5508 /* C3 - For double-word aligned arguments, round the NCRN up to the
5509 next even number. */
5510 ncrn
= pcum
->aapcs_ncrn
;
5511 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5514 nregs
= ARM_NUM_REGS2(mode
, type
);
5516 /* Sigh, this test should really assert that nregs > 0, but a GCC
5517 extension allows empty structs and then gives them empty size; it
5518 then allows such a structure to be passed by value. For some of
5519 the code below we have to pretend that such an argument has
5520 non-zero size so that we 'locate' it correctly either in
5521 registers or on the stack. */
5522 gcc_assert (nregs
>= 0);
5524 nregs2
= nregs
? nregs
: 1;
5526 /* C4 - Argument fits entirely in core registers. */
5527 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5529 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5530 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5534 /* C5 - Some core registers left and there are no arguments already
5535 on the stack: split this argument between the remaining core
5536 registers and the stack. */
5537 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5539 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5540 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5541 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5545 /* C6 - NCRN is set to 4. */
5546 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5548 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5552 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5553 for a call to a function whose data type is FNTYPE.
5554 For a library call, FNTYPE is NULL. */
5556 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5558 tree fndecl ATTRIBUTE_UNUSED
)
5560 /* Long call handling. */
5562 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5564 pcum
->pcs_variant
= arm_pcs_default
;
5566 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5568 if (arm_libcall_uses_aapcs_base (libname
))
5569 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5571 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5572 pcum
->aapcs_reg
= NULL_RTX
;
5573 pcum
->aapcs_partial
= 0;
5574 pcum
->aapcs_arg_processed
= false;
5575 pcum
->aapcs_cprc_slot
= -1;
5576 pcum
->can_split
= true;
5578 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5582 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5584 pcum
->aapcs_cprc_failed
[i
] = false;
5585 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5593 /* On the ARM, the offset starts at 0. */
5595 pcum
->iwmmxt_nregs
= 0;
5596 pcum
->can_split
= true;
5598 /* Varargs vectors are treated the same as long long.
5599 named_count avoids having to change the way arm handles 'named' */
5600 pcum
->named_count
= 0;
5603 if (TARGET_REALLY_IWMMXT
&& fntype
)
5607 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5609 fn_arg
= TREE_CHAIN (fn_arg
))
5610 pcum
->named_count
+= 1;
5612 if (! pcum
->named_count
)
5613 pcum
->named_count
= INT_MAX
;
5617 /* Return true if we use LRA instead of reload pass. */
5621 return arm_lra_flag
;
5624 /* Return true if mode/type need doubleword alignment. */
5626 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5628 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5629 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5633 /* Determine where to put an argument to a function.
5634 Value is zero to push the argument on the stack,
5635 or a hard register in which to store the argument.
5637 MODE is the argument's machine mode.
5638 TYPE is the data type of the argument (as a tree).
5639 This is null for libcalls where that information may
5641 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5642 the preceding args and about the function being called.
5643 NAMED is nonzero if this argument is a named parameter
5644 (otherwise it is an extra parameter matching an ellipsis).
5646 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5647 other arguments are passed on the stack. If (NAMED == 0) (which happens
5648 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5649 defined), say it is passed in the stack (function_prologue will
5650 indeed make it pass in the stack if necessary). */
5653 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5654 const_tree type
, bool named
)
5656 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5659 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5660 a call insn (op3 of a call_value insn). */
5661 if (mode
== VOIDmode
)
5664 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5666 aapcs_layout_arg (pcum
, mode
, type
, named
);
5667 return pcum
->aapcs_reg
;
5670 /* Varargs vectors are treated the same as long long.
5671 named_count avoids having to change the way arm handles 'named' */
5672 if (TARGET_IWMMXT_ABI
5673 && arm_vector_mode_supported_p (mode
)
5674 && pcum
->named_count
> pcum
->nargs
+ 1)
5676 if (pcum
->iwmmxt_nregs
<= 9)
5677 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5680 pcum
->can_split
= false;
5685 /* Put doubleword aligned quantities in even register pairs. */
5687 && ARM_DOUBLEWORD_ALIGN
5688 && arm_needs_doubleword_align (mode
, type
))
5691 /* Only allow splitting an arg between regs and memory if all preceding
5692 args were allocated to regs. For args passed by reference we only count
5693 the reference pointer. */
5694 if (pcum
->can_split
)
5697 nregs
= ARM_NUM_REGS2 (mode
, type
);
5699 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5702 return gen_rtx_REG (mode
, pcum
->nregs
);
5706 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5708 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5709 ? DOUBLEWORD_ALIGNMENT
5714 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5715 tree type
, bool named
)
5717 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5718 int nregs
= pcum
->nregs
;
5720 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5722 aapcs_layout_arg (pcum
, mode
, type
, named
);
5723 return pcum
->aapcs_partial
;
5726 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5729 if (NUM_ARG_REGS
> nregs
5730 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5732 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5737 /* Update the data in PCUM to advance over an argument
5738 of mode MODE and data type TYPE.
5739 (TYPE is null for libcalls where that information may not be available.) */
5742 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5743 const_tree type
, bool named
)
5745 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5747 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5749 aapcs_layout_arg (pcum
, mode
, type
, named
);
5751 if (pcum
->aapcs_cprc_slot
>= 0)
5753 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5755 pcum
->aapcs_cprc_slot
= -1;
5758 /* Generic stuff. */
5759 pcum
->aapcs_arg_processed
= false;
5760 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5761 pcum
->aapcs_reg
= NULL_RTX
;
5762 pcum
->aapcs_partial
= 0;
5767 if (arm_vector_mode_supported_p (mode
)
5768 && pcum
->named_count
> pcum
->nargs
5769 && TARGET_IWMMXT_ABI
)
5770 pcum
->iwmmxt_nregs
+= 1;
5772 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5776 /* Variable sized types are passed by reference. This is a GCC
5777 extension to the ARM ABI. */
5780 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5781 enum machine_mode mode ATTRIBUTE_UNUSED
,
5782 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5784 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5787 /* Encode the current state of the #pragma [no_]long_calls. */
5790 OFF
, /* No #pragma [no_]long_calls is in effect. */
5791 LONG
, /* #pragma long_calls is in effect. */
5792 SHORT
/* #pragma no_long_calls is in effect. */
5795 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5798 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5800 arm_pragma_long_calls
= LONG
;
5804 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5806 arm_pragma_long_calls
= SHORT
;
5810 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5812 arm_pragma_long_calls
= OFF
;
5815 /* Handle an attribute requiring a FUNCTION_DECL;
5816 arguments as in struct attribute_spec.handler. */
5818 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5819 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5821 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5823 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5825 *no_add_attrs
= true;
5831 /* Handle an "interrupt" or "isr" attribute;
5832 arguments as in struct attribute_spec.handler. */
5834 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5839 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5841 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5843 *no_add_attrs
= true;
5845 /* FIXME: the argument if any is checked for type attributes;
5846 should it be checked for decl ones? */
5850 if (TREE_CODE (*node
) == FUNCTION_TYPE
5851 || TREE_CODE (*node
) == METHOD_TYPE
)
5853 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5855 warning (OPT_Wattributes
, "%qE attribute ignored",
5857 *no_add_attrs
= true;
5860 else if (TREE_CODE (*node
) == POINTER_TYPE
5861 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5862 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5863 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5865 *node
= build_variant_type_copy (*node
);
5866 TREE_TYPE (*node
) = build_type_attribute_variant
5868 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5869 *no_add_attrs
= true;
5873 /* Possibly pass this attribute on from the type to a decl. */
5874 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5875 | (int) ATTR_FLAG_FUNCTION_NEXT
5876 | (int) ATTR_FLAG_ARRAY_NEXT
))
5878 *no_add_attrs
= true;
5879 return tree_cons (name
, args
, NULL_TREE
);
5883 warning (OPT_Wattributes
, "%qE attribute ignored",
5892 /* Handle a "pcs" attribute; arguments as in struct
5893 attribute_spec.handler. */
5895 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5896 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5898 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5900 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5901 *no_add_attrs
= true;
5906 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5907 /* Handle the "notshared" attribute. This attribute is another way of
5908 requesting hidden visibility. ARM's compiler supports
5909 "__declspec(notshared)"; we support the same thing via an
5913 arm_handle_notshared_attribute (tree
*node
,
5914 tree name ATTRIBUTE_UNUSED
,
5915 tree args ATTRIBUTE_UNUSED
,
5916 int flags ATTRIBUTE_UNUSED
,
5919 tree decl
= TYPE_NAME (*node
);
5923 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5924 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5925 *no_add_attrs
= false;
5931 /* Return 0 if the attributes for two types are incompatible, 1 if they
5932 are compatible, and 2 if they are nearly compatible (which causes a
5933 warning to be generated). */
5935 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5939 /* Check for mismatch of non-default calling convention. */
5940 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5943 /* Check for mismatched call attributes. */
5944 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5945 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5946 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5947 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5949 /* Only bother to check if an attribute is defined. */
5950 if (l1
| l2
| s1
| s2
)
5952 /* If one type has an attribute, the other must have the same attribute. */
5953 if ((l1
!= l2
) || (s1
!= s2
))
5956 /* Disallow mixed attributes. */
5957 if ((l1
& s2
) || (l2
& s1
))
5961 /* Check for mismatched ISR attribute. */
5962 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5964 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5965 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5967 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5974 /* Assigns default attributes to newly defined type. This is used to
5975 set short_call/long_call attributes for function types of
5976 functions defined inside corresponding #pragma scopes. */
5978 arm_set_default_type_attributes (tree type
)
5980 /* Add __attribute__ ((long_call)) to all functions, when
5981 inside #pragma long_calls or __attribute__ ((short_call)),
5982 when inside #pragma no_long_calls. */
5983 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5985 tree type_attr_list
, attr_name
;
5986 type_attr_list
= TYPE_ATTRIBUTES (type
);
5988 if (arm_pragma_long_calls
== LONG
)
5989 attr_name
= get_identifier ("long_call");
5990 else if (arm_pragma_long_calls
== SHORT
)
5991 attr_name
= get_identifier ("short_call");
5995 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5996 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6000 /* Return true if DECL is known to be linked into section SECTION. */
6003 arm_function_in_section_p (tree decl
, section
*section
)
6005 /* We can only be certain about functions defined in the same
6006 compilation unit. */
6007 if (!TREE_STATIC (decl
))
6010 /* Make sure that SYMBOL always binds to the definition in this
6011 compilation unit. */
6012 if (!targetm
.binds_local_p (decl
))
6015 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6016 if (!DECL_SECTION_NAME (decl
))
6018 /* Make sure that we will not create a unique section for DECL. */
6019 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
6023 return function_section (decl
) == section
;
6026 /* Return nonzero if a 32-bit "long_call" should be generated for
6027 a call from the current function to DECL. We generate a long_call
6030 a. has an __attribute__((long call))
6031 or b. is within the scope of a #pragma long_calls
6032 or c. the -mlong-calls command line switch has been specified
6034 However we do not generate a long call if the function:
6036 d. has an __attribute__ ((short_call))
6037 or e. is inside the scope of a #pragma no_long_calls
6038 or f. is defined in the same section as the current function. */
6041 arm_is_long_call_p (tree decl
)
6046 return TARGET_LONG_CALLS
;
6048 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6049 if (lookup_attribute ("short_call", attrs
))
6052 /* For "f", be conservative, and only cater for cases in which the
6053 whole of the current function is placed in the same section. */
6054 if (!flag_reorder_blocks_and_partition
6055 && TREE_CODE (decl
) == FUNCTION_DECL
6056 && arm_function_in_section_p (decl
, current_function_section ()))
6059 if (lookup_attribute ("long_call", attrs
))
6062 return TARGET_LONG_CALLS
;
6065 /* Return nonzero if it is ok to make a tail-call to DECL. */
6067 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6069 unsigned long func_type
;
6071 if (cfun
->machine
->sibcall_blocked
)
6074 /* Never tailcall something if we are generating code for Thumb-1. */
6078 /* The PIC register is live on entry to VxWorks PLT entries, so we
6079 must make the call before restoring the PIC register. */
6080 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6083 /* Cannot tail-call to long calls, since these are out of range of
6084 a branch instruction. */
6085 if (decl
&& arm_is_long_call_p (decl
))
6088 /* If we are interworking and the function is not declared static
6089 then we can't tail-call it unless we know that it exists in this
6090 compilation unit (since it might be a Thumb routine). */
6091 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6092 && !TREE_ASM_WRITTEN (decl
))
6095 func_type
= arm_current_func_type ();
6096 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6097 if (IS_INTERRUPT (func_type
))
6100 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6102 /* Check that the return value locations are the same. For
6103 example that we aren't returning a value from the sibling in
6104 a VFP register but then need to transfer it to a core
6108 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6109 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6111 if (!rtx_equal_p (a
, b
))
6115 /* Never tailcall if function may be called with a misaligned SP. */
6116 if (IS_STACKALIGN (func_type
))
6119 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6120 references should become a NOP. Don't convert such calls into
6122 if (TARGET_AAPCS_BASED
6123 && arm_abi
== ARM_ABI_AAPCS
6125 && DECL_WEAK (decl
))
6128 /* Everything else is ok. */
6133 /* Addressing mode support functions. */
6135 /* Return nonzero if X is a legitimate immediate operand when compiling
6136 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6138 legitimate_pic_operand_p (rtx x
)
6140 if (GET_CODE (x
) == SYMBOL_REF
6141 || (GET_CODE (x
) == CONST
6142 && GET_CODE (XEXP (x
, 0)) == PLUS
6143 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6149 /* Record that the current function needs a PIC register. Initialize
6150 cfun->machine->pic_reg if we have not already done so. */
6153 require_pic_register (void)
6155 /* A lot of the logic here is made obscure by the fact that this
6156 routine gets called as part of the rtx cost estimation process.
6157 We don't want those calls to affect any assumptions about the real
6158 function; and further, we can't call entry_of_function() until we
6159 start the real expansion process. */
6160 if (!crtl
->uses_pic_offset_table
)
6162 gcc_assert (can_create_pseudo_p ());
6163 if (arm_pic_register
!= INVALID_REGNUM
6164 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6166 if (!cfun
->machine
->pic_reg
)
6167 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6169 /* Play games to avoid marking the function as needing pic
6170 if we are being called as part of the cost-estimation
6172 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6173 crtl
->uses_pic_offset_table
= 1;
6179 if (!cfun
->machine
->pic_reg
)
6180 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6182 /* Play games to avoid marking the function as needing pic
6183 if we are being called as part of the cost-estimation
6185 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6187 crtl
->uses_pic_offset_table
= 1;
6190 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6191 && arm_pic_register
> LAST_LO_REGNUM
)
6192 emit_move_insn (cfun
->machine
->pic_reg
,
6193 gen_rtx_REG (Pmode
, arm_pic_register
));
6195 arm_load_pic_register (0UL);
6200 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6202 INSN_LOCATION (insn
) = prologue_location
;
6204 /* We can be called during expansion of PHI nodes, where
6205 we can't yet emit instructions directly in the final
6206 insn stream. Queue the insns on the entry edge, they will
6207 be committed after everything else is expanded. */
6208 insert_insn_on_edge (seq
,
6209 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6216 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6218 if (GET_CODE (orig
) == SYMBOL_REF
6219 || GET_CODE (orig
) == LABEL_REF
)
6225 gcc_assert (can_create_pseudo_p ());
6226 reg
= gen_reg_rtx (Pmode
);
6229 /* VxWorks does not impose a fixed gap between segments; the run-time
6230 gap can be different from the object-file gap. We therefore can't
6231 use GOTOFF unless we are absolutely sure that the symbol is in the
6232 same segment as the GOT. Unfortunately, the flexibility of linker
6233 scripts means that we can't be sure of that in general, so assume
6234 that GOTOFF is never valid on VxWorks. */
6235 if ((GET_CODE (orig
) == LABEL_REF
6236 || (GET_CODE (orig
) == SYMBOL_REF
&&
6237 SYMBOL_REF_LOCAL_P (orig
)))
6239 && arm_pic_data_is_text_relative
)
6240 insn
= arm_pic_static_addr (orig
, reg
);
6246 /* If this function doesn't have a pic register, create one now. */
6247 require_pic_register ();
6249 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6251 /* Make the MEM as close to a constant as possible. */
6252 mem
= SET_SRC (pat
);
6253 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6254 MEM_READONLY_P (mem
) = 1;
6255 MEM_NOTRAP_P (mem
) = 1;
6257 insn
= emit_insn (pat
);
6260 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6262 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6266 else if (GET_CODE (orig
) == CONST
)
6270 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6271 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6274 /* Handle the case where we have: const (UNSPEC_TLS). */
6275 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6276 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6279 /* Handle the case where we have:
6280 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6282 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6283 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6284 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6286 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6292 gcc_assert (can_create_pseudo_p ());
6293 reg
= gen_reg_rtx (Pmode
);
6296 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6298 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6299 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6300 base
== reg
? 0 : reg
);
6302 if (CONST_INT_P (offset
))
6304 /* The base register doesn't really matter, we only want to
6305 test the index for the appropriate mode. */
6306 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6308 gcc_assert (can_create_pseudo_p ());
6309 offset
= force_reg (Pmode
, offset
);
6312 if (CONST_INT_P (offset
))
6313 return plus_constant (Pmode
, base
, INTVAL (offset
));
6316 if (GET_MODE_SIZE (mode
) > 4
6317 && (GET_MODE_CLASS (mode
) == MODE_INT
6318 || TARGET_SOFT_FLOAT
))
6320 emit_insn (gen_addsi3 (reg
, base
, offset
));
6324 return gen_rtx_PLUS (Pmode
, base
, offset
);
6331 /* Find a spare register to use during the prolog of a function. */
6334 thumb_find_work_register (unsigned long pushed_regs_mask
)
6338 /* Check the argument registers first as these are call-used. The
6339 register allocation order means that sometimes r3 might be used
6340 but earlier argument registers might not, so check them all. */
6341 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6342 if (!df_regs_ever_live_p (reg
))
6345 /* Before going on to check the call-saved registers we can try a couple
6346 more ways of deducing that r3 is available. The first is when we are
6347 pushing anonymous arguments onto the stack and we have less than 4
6348 registers worth of fixed arguments(*). In this case r3 will be part of
6349 the variable argument list and so we can be sure that it will be
6350 pushed right at the start of the function. Hence it will be available
6351 for the rest of the prologue.
6352 (*): ie crtl->args.pretend_args_size is greater than 0. */
6353 if (cfun
->machine
->uses_anonymous_args
6354 && crtl
->args
.pretend_args_size
> 0)
6355 return LAST_ARG_REGNUM
;
6357 /* The other case is when we have fixed arguments but less than 4 registers
6358 worth. In this case r3 might be used in the body of the function, but
6359 it is not being used to convey an argument into the function. In theory
6360 we could just check crtl->args.size to see how many bytes are
6361 being passed in argument registers, but it seems that it is unreliable.
6362 Sometimes it will have the value 0 when in fact arguments are being
6363 passed. (See testcase execute/20021111-1.c for an example). So we also
6364 check the args_info.nregs field as well. The problem with this field is
6365 that it makes no allowances for arguments that are passed to the
6366 function but which are not used. Hence we could miss an opportunity
6367 when a function has an unused argument in r3. But it is better to be
6368 safe than to be sorry. */
6369 if (! cfun
->machine
->uses_anonymous_args
6370 && crtl
->args
.size
>= 0
6371 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6372 && (TARGET_AAPCS_BASED
6373 ? crtl
->args
.info
.aapcs_ncrn
< 4
6374 : crtl
->args
.info
.nregs
< 4))
6375 return LAST_ARG_REGNUM
;
6377 /* Otherwise look for a call-saved register that is going to be pushed. */
6378 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6379 if (pushed_regs_mask
& (1 << reg
))
6384 /* Thumb-2 can use high regs. */
6385 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6386 if (pushed_regs_mask
& (1 << reg
))
6389 /* Something went wrong - thumb_compute_save_reg_mask()
6390 should have arranged for a suitable register to be pushed. */
6394 static GTY(()) int pic_labelno
;
6396 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6400 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6402 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6404 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6407 gcc_assert (flag_pic
);
6409 pic_reg
= cfun
->machine
->pic_reg
;
6410 if (TARGET_VXWORKS_RTP
)
6412 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6413 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6414 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6416 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6418 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6419 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6423 /* We use an UNSPEC rather than a LABEL_REF because this label
6424 never appears in the code stream. */
6426 labelno
= GEN_INT (pic_labelno
++);
6427 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6428 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6430 /* On the ARM the PC register contains 'dot + 8' at the time of the
6431 addition, on the Thumb it is 'dot + 4'. */
6432 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6433 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6435 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6439 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6441 else /* TARGET_THUMB1 */
6443 if (arm_pic_register
!= INVALID_REGNUM
6444 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6446 /* We will have pushed the pic register, so we should always be
6447 able to find a work register. */
6448 pic_tmp
= gen_rtx_REG (SImode
,
6449 thumb_find_work_register (saved_regs
));
6450 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6451 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6452 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6454 else if (arm_pic_register
!= INVALID_REGNUM
6455 && arm_pic_register
> LAST_LO_REGNUM
6456 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6458 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6459 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6460 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6463 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6467 /* Need to emit this whether or not we obey regdecls,
6468 since setjmp/longjmp can cause life info to screw up. */
6472 /* Generate code to load the address of a static var when flag_pic is set. */
6474 arm_pic_static_addr (rtx orig
, rtx reg
)
6476 rtx l1
, labelno
, offset_rtx
, insn
;
6478 gcc_assert (flag_pic
);
6480 /* We use an UNSPEC rather than a LABEL_REF because this label
6481 never appears in the code stream. */
6482 labelno
= GEN_INT (pic_labelno
++);
6483 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6484 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6486 /* On the ARM the PC register contains 'dot + 8' at the time of the
6487 addition, on the Thumb it is 'dot + 4'. */
6488 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6489 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6490 UNSPEC_SYMBOL_OFFSET
);
6491 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6493 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6497 /* Return nonzero if X is valid as an ARM state addressing register. */
6499 arm_address_register_rtx_p (rtx x
, int strict_p
)
6509 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6511 return (regno
<= LAST_ARM_REGNUM
6512 || regno
>= FIRST_PSEUDO_REGISTER
6513 || regno
== FRAME_POINTER_REGNUM
6514 || regno
== ARG_POINTER_REGNUM
);
6517 /* Return TRUE if this rtx is the difference of a symbol and a label,
6518 and will reduce to a PC-relative relocation in the object file.
6519 Expressions like this can be left alone when generating PIC, rather
6520 than forced through the GOT. */
6522 pcrel_constant_p (rtx x
)
6524 if (GET_CODE (x
) == MINUS
)
6525 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6530 /* Return true if X will surely end up in an index register after next
6533 will_be_in_index_register (const_rtx x
)
6535 /* arm.md: calculate_pic_address will split this into a register. */
6536 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6539 /* Return nonzero if X is a valid ARM state address operand. */
6541 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6545 enum rtx_code code
= GET_CODE (x
);
6547 if (arm_address_register_rtx_p (x
, strict_p
))
6550 use_ldrd
= (TARGET_LDRD
6552 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6554 if (code
== POST_INC
|| code
== PRE_DEC
6555 || ((code
== PRE_INC
|| code
== POST_DEC
)
6556 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6557 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6559 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6560 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6561 && GET_CODE (XEXP (x
, 1)) == PLUS
6562 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6564 rtx addend
= XEXP (XEXP (x
, 1), 1);
6566 /* Don't allow ldrd post increment by register because it's hard
6567 to fixup invalid register choices. */
6569 && GET_CODE (x
) == POST_MODIFY
6573 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6574 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6577 /* After reload constants split into minipools will have addresses
6578 from a LABEL_REF. */
6579 else if (reload_completed
6580 && (code
== LABEL_REF
6582 && GET_CODE (XEXP (x
, 0)) == PLUS
6583 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6584 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6587 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6590 else if (code
== PLUS
)
6592 rtx xop0
= XEXP (x
, 0);
6593 rtx xop1
= XEXP (x
, 1);
6595 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6596 && ((CONST_INT_P (xop1
)
6597 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6598 || (!strict_p
&& will_be_in_index_register (xop1
))))
6599 || (arm_address_register_rtx_p (xop1
, strict_p
)
6600 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6604 /* Reload currently can't handle MINUS, so disable this for now */
6605 else if (GET_CODE (x
) == MINUS
)
6607 rtx xop0
= XEXP (x
, 0);
6608 rtx xop1
= XEXP (x
, 1);
6610 return (arm_address_register_rtx_p (xop0
, strict_p
)
6611 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6615 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6616 && code
== SYMBOL_REF
6617 && CONSTANT_POOL_ADDRESS_P (x
)
6619 && symbol_mentioned_p (get_pool_constant (x
))
6620 && ! pcrel_constant_p (get_pool_constant (x
))))
6626 /* Return nonzero if X is a valid Thumb-2 address operand. */
6628 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6631 enum rtx_code code
= GET_CODE (x
);
6633 if (arm_address_register_rtx_p (x
, strict_p
))
6636 use_ldrd
= (TARGET_LDRD
6638 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6640 if (code
== POST_INC
|| code
== PRE_DEC
6641 || ((code
== PRE_INC
|| code
== POST_DEC
)
6642 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6643 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6645 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6646 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6647 && GET_CODE (XEXP (x
, 1)) == PLUS
6648 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6650 /* Thumb-2 only has autoincrement by constant. */
6651 rtx addend
= XEXP (XEXP (x
, 1), 1);
6652 HOST_WIDE_INT offset
;
6654 if (!CONST_INT_P (addend
))
6657 offset
= INTVAL(addend
);
6658 if (GET_MODE_SIZE (mode
) <= 4)
6659 return (offset
> -256 && offset
< 256);
6661 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6662 && (offset
& 3) == 0);
6665 /* After reload constants split into minipools will have addresses
6666 from a LABEL_REF. */
6667 else if (reload_completed
6668 && (code
== LABEL_REF
6670 && GET_CODE (XEXP (x
, 0)) == PLUS
6671 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6672 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6675 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6678 else if (code
== PLUS
)
6680 rtx xop0
= XEXP (x
, 0);
6681 rtx xop1
= XEXP (x
, 1);
6683 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6684 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6685 || (!strict_p
&& will_be_in_index_register (xop1
))))
6686 || (arm_address_register_rtx_p (xop1
, strict_p
)
6687 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6690 /* Normally we can assign constant values to target registers without
6691 the help of constant pool. But there are cases we have to use constant
6693 1) assign a label to register.
6694 2) sign-extend a 8bit value to 32bit and then assign to register.
6696 Constant pool access in format:
6697 (set (reg r0) (mem (symbol_ref (".LC0"))))
6698 will cause the use of literal pool (later in function arm_reorg).
6699 So here we mark such format as an invalid format, then the compiler
6700 will adjust it into:
6701 (set (reg r0) (symbol_ref (".LC0")))
6702 (set (reg r0) (mem (reg r0))).
6703 No extra register is required, and (mem (reg r0)) won't cause the use
6704 of literal pools. */
6705 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6706 && CONSTANT_POOL_ADDRESS_P (x
))
6709 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6710 && code
== SYMBOL_REF
6711 && CONSTANT_POOL_ADDRESS_P (x
)
6713 && symbol_mentioned_p (get_pool_constant (x
))
6714 && ! pcrel_constant_p (get_pool_constant (x
))))
6720 /* Return nonzero if INDEX is valid for an address index operand in
6723 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6726 HOST_WIDE_INT range
;
6727 enum rtx_code code
= GET_CODE (index
);
6729 /* Standard coprocessor addressing modes. */
6730 if (TARGET_HARD_FLOAT
6732 && (mode
== SFmode
|| mode
== DFmode
))
6733 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6734 && INTVAL (index
) > -1024
6735 && (INTVAL (index
) & 3) == 0);
6737 /* For quad modes, we restrict the constant offset to be slightly less
6738 than what the instruction format permits. We do this because for
6739 quad mode moves, we will actually decompose them into two separate
6740 double-mode reads or writes. INDEX must therefore be a valid
6741 (double-mode) offset and so should INDEX+8. */
6742 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6743 return (code
== CONST_INT
6744 && INTVAL (index
) < 1016
6745 && INTVAL (index
) > -1024
6746 && (INTVAL (index
) & 3) == 0);
6748 /* We have no such constraint on double mode offsets, so we permit the
6749 full range of the instruction format. */
6750 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6751 return (code
== CONST_INT
6752 && INTVAL (index
) < 1024
6753 && INTVAL (index
) > -1024
6754 && (INTVAL (index
) & 3) == 0);
6756 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6757 return (code
== CONST_INT
6758 && INTVAL (index
) < 1024
6759 && INTVAL (index
) > -1024
6760 && (INTVAL (index
) & 3) == 0);
6762 if (arm_address_register_rtx_p (index
, strict_p
)
6763 && (GET_MODE_SIZE (mode
) <= 4))
6766 if (mode
== DImode
|| mode
== DFmode
)
6768 if (code
== CONST_INT
)
6770 HOST_WIDE_INT val
= INTVAL (index
);
6773 return val
> -256 && val
< 256;
6775 return val
> -4096 && val
< 4092;
6778 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6781 if (GET_MODE_SIZE (mode
) <= 4
6785 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6789 rtx xiop0
= XEXP (index
, 0);
6790 rtx xiop1
= XEXP (index
, 1);
6792 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6793 && power_of_two_operand (xiop1
, SImode
))
6794 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6795 && power_of_two_operand (xiop0
, SImode
)));
6797 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6798 || code
== ASHIFT
|| code
== ROTATERT
)
6800 rtx op
= XEXP (index
, 1);
6802 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6805 && INTVAL (op
) <= 31);
6809 /* For ARM v4 we may be doing a sign-extend operation during the
6815 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6821 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6823 return (code
== CONST_INT
6824 && INTVAL (index
) < range
6825 && INTVAL (index
) > -range
);
6828 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6829 index operand. i.e. 1, 2, 4 or 8. */
6831 thumb2_index_mul_operand (rtx op
)
6835 if (!CONST_INT_P (op
))
6839 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6842 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6844 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6846 enum rtx_code code
= GET_CODE (index
);
6848 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6849 /* Standard coprocessor addressing modes. */
6850 if (TARGET_HARD_FLOAT
6852 && (mode
== SFmode
|| mode
== DFmode
))
6853 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6854 /* Thumb-2 allows only > -256 index range for it's core register
6855 load/stores. Since we allow SF/DF in core registers, we have
6856 to use the intersection between -256~4096 (core) and -1024~1024
6858 && INTVAL (index
) > -256
6859 && (INTVAL (index
) & 3) == 0);
6861 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6863 /* For DImode assume values will usually live in core regs
6864 and only allow LDRD addressing modes. */
6865 if (!TARGET_LDRD
|| mode
!= DImode
)
6866 return (code
== CONST_INT
6867 && INTVAL (index
) < 1024
6868 && INTVAL (index
) > -1024
6869 && (INTVAL (index
) & 3) == 0);
6872 /* For quad modes, we restrict the constant offset to be slightly less
6873 than what the instruction format permits. We do this because for
6874 quad mode moves, we will actually decompose them into two separate
6875 double-mode reads or writes. INDEX must therefore be a valid
6876 (double-mode) offset and so should INDEX+8. */
6877 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6878 return (code
== CONST_INT
6879 && INTVAL (index
) < 1016
6880 && INTVAL (index
) > -1024
6881 && (INTVAL (index
) & 3) == 0);
6883 /* We have no such constraint on double mode offsets, so we permit the
6884 full range of the instruction format. */
6885 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6886 return (code
== CONST_INT
6887 && INTVAL (index
) < 1024
6888 && INTVAL (index
) > -1024
6889 && (INTVAL (index
) & 3) == 0);
6891 if (arm_address_register_rtx_p (index
, strict_p
)
6892 && (GET_MODE_SIZE (mode
) <= 4))
6895 if (mode
== DImode
|| mode
== DFmode
)
6897 if (code
== CONST_INT
)
6899 HOST_WIDE_INT val
= INTVAL (index
);
6900 /* ??? Can we assume ldrd for thumb2? */
6901 /* Thumb-2 ldrd only has reg+const addressing modes. */
6902 /* ldrd supports offsets of +-1020.
6903 However the ldr fallback does not. */
6904 return val
> -256 && val
< 256 && (val
& 3) == 0;
6912 rtx xiop0
= XEXP (index
, 0);
6913 rtx xiop1
= XEXP (index
, 1);
6915 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6916 && thumb2_index_mul_operand (xiop1
))
6917 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6918 && thumb2_index_mul_operand (xiop0
)));
6920 else if (code
== ASHIFT
)
6922 rtx op
= XEXP (index
, 1);
6924 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6927 && INTVAL (op
) <= 3);
6930 return (code
== CONST_INT
6931 && INTVAL (index
) < 4096
6932 && INTVAL (index
) > -256);
6935 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6937 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6947 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6949 return (regno
<= LAST_LO_REGNUM
6950 || regno
> LAST_VIRTUAL_REGISTER
6951 || regno
== FRAME_POINTER_REGNUM
6952 || (GET_MODE_SIZE (mode
) >= 4
6953 && (regno
== STACK_POINTER_REGNUM
6954 || regno
>= FIRST_PSEUDO_REGISTER
6955 || x
== hard_frame_pointer_rtx
6956 || x
== arg_pointer_rtx
)));
6959 /* Return nonzero if x is a legitimate index register. This is the case
6960 for any base register that can access a QImode object. */
6962 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6964 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6967 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6969 The AP may be eliminated to either the SP or the FP, so we use the
6970 least common denominator, e.g. SImode, and offsets from 0 to 64.
6972 ??? Verify whether the above is the right approach.
6974 ??? Also, the FP may be eliminated to the SP, so perhaps that
6975 needs special handling also.
6977 ??? Look at how the mips16 port solves this problem. It probably uses
6978 better ways to solve some of these problems.
6980 Although it is not incorrect, we don't accept QImode and HImode
6981 addresses based on the frame pointer or arg pointer until the
6982 reload pass starts. This is so that eliminating such addresses
6983 into stack based ones won't produce impossible code. */
6985 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6987 /* ??? Not clear if this is right. Experiment. */
6988 if (GET_MODE_SIZE (mode
) < 4
6989 && !(reload_in_progress
|| reload_completed
)
6990 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6991 || reg_mentioned_p (arg_pointer_rtx
, x
)
6992 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6993 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6994 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6995 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6998 /* Accept any base register. SP only in SImode or larger. */
6999 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7002 /* This is PC relative data before arm_reorg runs. */
7003 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7004 && GET_CODE (x
) == SYMBOL_REF
7005 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7008 /* This is PC relative data after arm_reorg runs. */
7009 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7011 && (GET_CODE (x
) == LABEL_REF
7012 || (GET_CODE (x
) == CONST
7013 && GET_CODE (XEXP (x
, 0)) == PLUS
7014 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7015 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7018 /* Post-inc indexing only supported for SImode and larger. */
7019 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7020 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7023 else if (GET_CODE (x
) == PLUS
)
7025 /* REG+REG address can be any two index registers. */
7026 /* We disallow FRAME+REG addressing since we know that FRAME
7027 will be replaced with STACK, and SP relative addressing only
7028 permits SP+OFFSET. */
7029 if (GET_MODE_SIZE (mode
) <= 4
7030 && XEXP (x
, 0) != frame_pointer_rtx
7031 && XEXP (x
, 1) != frame_pointer_rtx
7032 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7033 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7034 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7037 /* REG+const has 5-7 bit offset for non-SP registers. */
7038 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7039 || XEXP (x
, 0) == arg_pointer_rtx
)
7040 && CONST_INT_P (XEXP (x
, 1))
7041 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7044 /* REG+const has 10-bit offset for SP, but only SImode and
7045 larger is supported. */
7046 /* ??? Should probably check for DI/DFmode overflow here
7047 just like GO_IF_LEGITIMATE_OFFSET does. */
7048 else if (REG_P (XEXP (x
, 0))
7049 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7050 && GET_MODE_SIZE (mode
) >= 4
7051 && CONST_INT_P (XEXP (x
, 1))
7052 && INTVAL (XEXP (x
, 1)) >= 0
7053 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7054 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7057 else if (REG_P (XEXP (x
, 0))
7058 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7059 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7060 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7061 && REGNO (XEXP (x
, 0))
7062 <= LAST_VIRTUAL_POINTER_REGISTER
))
7063 && GET_MODE_SIZE (mode
) >= 4
7064 && CONST_INT_P (XEXP (x
, 1))
7065 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7069 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7070 && GET_MODE_SIZE (mode
) == 4
7071 && GET_CODE (x
) == SYMBOL_REF
7072 && CONSTANT_POOL_ADDRESS_P (x
)
7074 && symbol_mentioned_p (get_pool_constant (x
))
7075 && ! pcrel_constant_p (get_pool_constant (x
))))
7081 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7082 instruction of mode MODE. */
7084 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7086 switch (GET_MODE_SIZE (mode
))
7089 return val
>= 0 && val
< 32;
7092 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7096 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7102 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7105 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7106 else if (TARGET_THUMB2
)
7107 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7108 else /* if (TARGET_THUMB1) */
7109 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7112 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7114 Given an rtx X being reloaded into a reg required to be
7115 in class CLASS, return the class of reg to actually use.
7116 In general this is just CLASS, but for the Thumb core registers and
7117 immediate constants we prefer a LO_REGS class or a subset. */
7120 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7126 if (rclass
== GENERAL_REGS
)
7133 /* Build the SYMBOL_REF for __tls_get_addr. */
7135 static GTY(()) rtx tls_get_addr_libfunc
;
7138 get_tls_get_addr (void)
7140 if (!tls_get_addr_libfunc
)
7141 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7142 return tls_get_addr_libfunc
;
7146 arm_load_tp (rtx target
)
7149 target
= gen_reg_rtx (SImode
);
7153 /* Can return in any reg. */
7154 emit_insn (gen_load_tp_hard (target
));
7158 /* Always returned in r0. Immediately copy the result into a pseudo,
7159 otherwise other uses of r0 (e.g. setting up function arguments) may
7160 clobber the value. */
7164 emit_insn (gen_load_tp_soft ());
7166 tmp
= gen_rtx_REG (SImode
, 0);
7167 emit_move_insn (target
, tmp
);
7173 load_tls_operand (rtx x
, rtx reg
)
7177 if (reg
== NULL_RTX
)
7178 reg
= gen_reg_rtx (SImode
);
7180 tmp
= gen_rtx_CONST (SImode
, x
);
7182 emit_move_insn (reg
, tmp
);
7188 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7190 rtx insns
, label
, labelno
, sum
;
7192 gcc_assert (reloc
!= TLS_DESCSEQ
);
7195 labelno
= GEN_INT (pic_labelno
++);
7196 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7197 label
= gen_rtx_CONST (VOIDmode
, label
);
7199 sum
= gen_rtx_UNSPEC (Pmode
,
7200 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7201 GEN_INT (TARGET_ARM
? 8 : 4)),
7203 reg
= load_tls_operand (sum
, reg
);
7206 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7208 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7210 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7211 LCT_PURE
, /* LCT_CONST? */
7212 Pmode
, 1, reg
, Pmode
);
7214 insns
= get_insns ();
7221 arm_tls_descseq_addr (rtx x
, rtx reg
)
7223 rtx labelno
= GEN_INT (pic_labelno
++);
7224 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7225 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7226 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7227 gen_rtx_CONST (VOIDmode
, label
),
7228 GEN_INT (!TARGET_ARM
)),
7230 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7232 emit_insn (gen_tlscall (x
, labelno
));
7234 reg
= gen_reg_rtx (SImode
);
7236 gcc_assert (REGNO (reg
) != 0);
7238 emit_move_insn (reg
, reg0
);
7244 legitimize_tls_address (rtx x
, rtx reg
)
7246 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7247 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7251 case TLS_MODEL_GLOBAL_DYNAMIC
:
7252 if (TARGET_GNU2_TLS
)
7254 reg
= arm_tls_descseq_addr (x
, reg
);
7256 tp
= arm_load_tp (NULL_RTX
);
7258 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7262 /* Original scheme */
7263 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7264 dest
= gen_reg_rtx (Pmode
);
7265 emit_libcall_block (insns
, dest
, ret
, x
);
7269 case TLS_MODEL_LOCAL_DYNAMIC
:
7270 if (TARGET_GNU2_TLS
)
7272 reg
= arm_tls_descseq_addr (x
, reg
);
7274 tp
= arm_load_tp (NULL_RTX
);
7276 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7280 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7282 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7283 share the LDM result with other LD model accesses. */
7284 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7286 dest
= gen_reg_rtx (Pmode
);
7287 emit_libcall_block (insns
, dest
, ret
, eqv
);
7289 /* Load the addend. */
7290 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7291 GEN_INT (TLS_LDO32
)),
7293 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7294 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7298 case TLS_MODEL_INITIAL_EXEC
:
7299 labelno
= GEN_INT (pic_labelno
++);
7300 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7301 label
= gen_rtx_CONST (VOIDmode
, label
);
7302 sum
= gen_rtx_UNSPEC (Pmode
,
7303 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7304 GEN_INT (TARGET_ARM
? 8 : 4)),
7306 reg
= load_tls_operand (sum
, reg
);
7309 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7310 else if (TARGET_THUMB2
)
7311 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7314 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7315 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7318 tp
= arm_load_tp (NULL_RTX
);
7320 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7322 case TLS_MODEL_LOCAL_EXEC
:
7323 tp
= arm_load_tp (NULL_RTX
);
7325 reg
= gen_rtx_UNSPEC (Pmode
,
7326 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7328 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7330 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7337 /* Try machine-dependent ways of modifying an illegitimate address
7338 to be legitimate. If we find one, return the new, valid address. */
7340 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7342 if (arm_tls_referenced_p (x
))
7346 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7348 addend
= XEXP (XEXP (x
, 0), 1);
7349 x
= XEXP (XEXP (x
, 0), 0);
7352 if (GET_CODE (x
) != SYMBOL_REF
)
7355 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7357 x
= legitimize_tls_address (x
, NULL_RTX
);
7361 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7370 /* TODO: legitimize_address for Thumb2. */
7373 return thumb_legitimize_address (x
, orig_x
, mode
);
7376 if (GET_CODE (x
) == PLUS
)
7378 rtx xop0
= XEXP (x
, 0);
7379 rtx xop1
= XEXP (x
, 1);
7381 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7382 xop0
= force_reg (SImode
, xop0
);
7384 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7385 && !symbol_mentioned_p (xop1
))
7386 xop1
= force_reg (SImode
, xop1
);
7388 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7389 && CONST_INT_P (xop1
))
7391 HOST_WIDE_INT n
, low_n
;
7395 /* VFP addressing modes actually allow greater offsets, but for
7396 now we just stick with the lowest common denominator. */
7398 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7410 low_n
= ((mode
) == TImode
? 0
7411 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7415 base_reg
= gen_reg_rtx (SImode
);
7416 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7417 emit_move_insn (base_reg
, val
);
7418 x
= plus_constant (Pmode
, base_reg
, low_n
);
7420 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7421 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7424 /* XXX We don't allow MINUS any more -- see comment in
7425 arm_legitimate_address_outer_p (). */
7426 else if (GET_CODE (x
) == MINUS
)
7428 rtx xop0
= XEXP (x
, 0);
7429 rtx xop1
= XEXP (x
, 1);
7431 if (CONSTANT_P (xop0
))
7432 xop0
= force_reg (SImode
, xop0
);
7434 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7435 xop1
= force_reg (SImode
, xop1
);
7437 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7438 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7441 /* Make sure to take full advantage of the pre-indexed addressing mode
7442 with absolute addresses which often allows for the base register to
7443 be factorized for multiple adjacent memory references, and it might
7444 even allows for the mini pool to be avoided entirely. */
7445 else if (CONST_INT_P (x
) && optimize
> 0)
7448 HOST_WIDE_INT mask
, base
, index
;
7451 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7452 use a 8-bit index. So let's use a 12-bit index for SImode only and
7453 hope that arm_gen_constant will enable ldrb to use more bits. */
7454 bits
= (mode
== SImode
) ? 12 : 8;
7455 mask
= (1 << bits
) - 1;
7456 base
= INTVAL (x
) & ~mask
;
7457 index
= INTVAL (x
) & mask
;
7458 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7460 /* It'll most probably be more efficient to generate the base
7461 with more bits set and use a negative index instead. */
7465 base_reg
= force_reg (SImode
, GEN_INT (base
));
7466 x
= plus_constant (Pmode
, base_reg
, index
);
7471 /* We need to find and carefully transform any SYMBOL and LABEL
7472 references; so go back to the original address expression. */
7473 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7475 if (new_x
!= orig_x
)
7483 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7484 to be legitimate. If we find one, return the new, valid address. */
7486 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7488 if (GET_CODE (x
) == PLUS
7489 && CONST_INT_P (XEXP (x
, 1))
7490 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7491 || INTVAL (XEXP (x
, 1)) < 0))
7493 rtx xop0
= XEXP (x
, 0);
7494 rtx xop1
= XEXP (x
, 1);
7495 HOST_WIDE_INT offset
= INTVAL (xop1
);
7497 /* Try and fold the offset into a biasing of the base register and
7498 then offsetting that. Don't do this when optimizing for space
7499 since it can cause too many CSEs. */
7500 if (optimize_size
&& offset
>= 0
7501 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7503 HOST_WIDE_INT delta
;
7506 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7507 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7508 delta
= 31 * GET_MODE_SIZE (mode
);
7510 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7512 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7514 x
= plus_constant (Pmode
, xop0
, delta
);
7516 else if (offset
< 0 && offset
> -256)
7517 /* Small negative offsets are best done with a subtract before the
7518 dereference, forcing these into a register normally takes two
7520 x
= force_operand (x
, NULL_RTX
);
7523 /* For the remaining cases, force the constant into a register. */
7524 xop1
= force_reg (SImode
, xop1
);
7525 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7528 else if (GET_CODE (x
) == PLUS
7529 && s_register_operand (XEXP (x
, 1), SImode
)
7530 && !s_register_operand (XEXP (x
, 0), SImode
))
7532 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7534 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7539 /* We need to find and carefully transform any SYMBOL and LABEL
7540 references; so go back to the original address expression. */
7541 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7543 if (new_x
!= orig_x
)
7551 arm_legitimize_reload_address (rtx
*p
,
7552 enum machine_mode mode
,
7553 int opnum
, int type
,
7554 int ind_levels ATTRIBUTE_UNUSED
)
7556 /* We must recognize output that we have already generated ourselves. */
7557 if (GET_CODE (*p
) == PLUS
7558 && GET_CODE (XEXP (*p
, 0)) == PLUS
7559 && REG_P (XEXP (XEXP (*p
, 0), 0))
7560 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7561 && CONST_INT_P (XEXP (*p
, 1)))
7563 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7564 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7565 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7569 if (GET_CODE (*p
) == PLUS
7570 && REG_P (XEXP (*p
, 0))
7571 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7572 /* If the base register is equivalent to a constant, let the generic
7573 code handle it. Otherwise we will run into problems if a future
7574 reload pass decides to rematerialize the constant. */
7575 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7576 && CONST_INT_P (XEXP (*p
, 1)))
7578 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7579 HOST_WIDE_INT low
, high
;
7581 /* Detect coprocessor load/stores. */
7582 bool coproc_p
= ((TARGET_HARD_FLOAT
7584 && (mode
== SFmode
|| mode
== DFmode
))
7585 || (TARGET_REALLY_IWMMXT
7586 && VALID_IWMMXT_REG_MODE (mode
))
7588 && (VALID_NEON_DREG_MODE (mode
)
7589 || VALID_NEON_QREG_MODE (mode
))));
7591 /* For some conditions, bail out when lower two bits are unaligned. */
7592 if ((val
& 0x3) != 0
7593 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7595 /* For DI, and DF under soft-float: */
7596 || ((mode
== DImode
|| mode
== DFmode
)
7597 /* Without ldrd, we use stm/ldm, which does not
7598 fair well with unaligned bits. */
7600 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7601 || TARGET_THUMB2
))))
7604 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7605 of which the (reg+high) gets turned into a reload add insn,
7606 we try to decompose the index into high/low values that can often
7607 also lead to better reload CSE.
7609 ldr r0, [r2, #4100] // Offset too large
7610 ldr r1, [r2, #4104] // Offset too large
7612 is best reloaded as:
7618 which post-reload CSE can simplify in most cases to eliminate the
7619 second add instruction:
7624 The idea here is that we want to split out the bits of the constant
7625 as a mask, rather than as subtracting the maximum offset that the
7626 respective type of load/store used can handle.
7628 When encountering negative offsets, we can still utilize it even if
7629 the overall offset is positive; sometimes this may lead to an immediate
7630 that can be constructed with fewer instructions.
7632 ldr r0, [r2, #0x3FFFFC]
7634 This is best reloaded as:
7635 add t1, r2, #0x400000
7638 The trick for spotting this for a load insn with N bits of offset
7639 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7640 negative offset that is going to make bit N and all the bits below
7641 it become zero in the remainder part.
7643 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7644 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7645 used in most cases of ARM load/store instructions. */
7647 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7648 (((VAL) & ((1 << (N)) - 1)) \
7649 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7654 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7656 /* NEON quad-word load/stores are made of two double-word accesses,
7657 so the valid index range is reduced by 8. Treat as 9-bit range if
7659 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7660 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7662 else if (GET_MODE_SIZE (mode
) == 8)
7665 low
= (TARGET_THUMB2
7666 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7667 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7669 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7670 to access doublewords. The supported load/store offsets are
7671 -8, -4, and 4, which we try to produce here. */
7672 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7674 else if (GET_MODE_SIZE (mode
) < 8)
7676 /* NEON element load/stores do not have an offset. */
7677 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7682 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7683 Try the wider 12-bit range first, and re-try if the result
7685 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7687 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7691 if (mode
== HImode
|| mode
== HFmode
)
7694 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7697 /* The storehi/movhi_bytes fallbacks can use only
7698 [-4094,+4094] of the full ldrb/strb index range. */
7699 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7700 if (low
== 4095 || low
== -4095)
7705 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7711 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7712 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7713 - (unsigned HOST_WIDE_INT
) 0x80000000);
7714 /* Check for overflow or zero */
7715 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7718 /* Reload the high part into a base reg; leave the low part
7720 Note that replacing this gen_rtx_PLUS with plus_constant is
7721 wrong in this case because we rely on the
7722 (plus (plus reg c1) c2) structure being preserved so that
7723 XEXP (*p, 0) in push_reload below uses the correct term. */
7724 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7725 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7728 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7729 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7730 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7738 thumb_legitimize_reload_address (rtx
*x_p
,
7739 enum machine_mode mode
,
7740 int opnum
, int type
,
7741 int ind_levels ATTRIBUTE_UNUSED
)
7745 if (GET_CODE (x
) == PLUS
7746 && GET_MODE_SIZE (mode
) < 4
7747 && REG_P (XEXP (x
, 0))
7748 && XEXP (x
, 0) == stack_pointer_rtx
7749 && CONST_INT_P (XEXP (x
, 1))
7750 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7755 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7756 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7760 /* If both registers are hi-regs, then it's better to reload the
7761 entire expression rather than each register individually. That
7762 only requires one reload register rather than two. */
7763 if (GET_CODE (x
) == PLUS
7764 && REG_P (XEXP (x
, 0))
7765 && REG_P (XEXP (x
, 1))
7766 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7767 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7772 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7773 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7780 /* Test for various thread-local symbols. */
7782 /* Helper for arm_tls_referenced_p. */
7785 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7787 if (GET_CODE (*x
) == SYMBOL_REF
)
7788 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7790 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7791 TLS offsets, not real symbol references. */
7792 if (GET_CODE (*x
) == UNSPEC
7793 && XINT (*x
, 1) == UNSPEC_TLS
)
7799 /* Return TRUE if X contains any TLS symbol references. */
7802 arm_tls_referenced_p (rtx x
)
7804 if (! TARGET_HAVE_TLS
)
7807 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7810 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7812 On the ARM, allow any integer (invalid ones are removed later by insn
7813 patterns), nice doubles and symbol_refs which refer to the function's
7816 When generating pic allow anything. */
7819 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7821 /* At present, we have no support for Neon structure constants, so forbid
7822 them here. It might be possible to handle simple cases like 0 and -1
7824 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7827 return flag_pic
|| !label_mentioned_p (x
);
7831 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7833 return (CONST_INT_P (x
)
7834 || CONST_DOUBLE_P (x
)
7835 || CONSTANT_ADDRESS_P (x
)
7840 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7842 return (!arm_cannot_force_const_mem (mode
, x
)
7844 ? arm_legitimate_constant_p_1 (mode
, x
)
7845 : thumb_legitimate_constant_p (mode
, x
)));
7848 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7851 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7855 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7857 split_const (x
, &base
, &offset
);
7858 if (GET_CODE (base
) == SYMBOL_REF
7859 && !offset_within_block_p (base
, INTVAL (offset
)))
7862 return arm_tls_referenced_p (x
);
7865 #define REG_OR_SUBREG_REG(X) \
7867 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7869 #define REG_OR_SUBREG_RTX(X) \
7870 (REG_P (X) ? (X) : SUBREG_REG (X))
7873 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7875 enum machine_mode mode
= GET_MODE (x
);
7884 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7891 return COSTS_N_INSNS (1);
7894 if (CONST_INT_P (XEXP (x
, 1)))
7897 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7904 return COSTS_N_INSNS (2) + cycles
;
7906 return COSTS_N_INSNS (1) + 16;
7909 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7911 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
7912 return (COSTS_N_INSNS (words
)
7913 + 4 * ((MEM_P (SET_SRC (x
)))
7914 + MEM_P (SET_DEST (x
))));
7919 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7921 if (thumb_shiftable_const (INTVAL (x
)))
7922 return COSTS_N_INSNS (2);
7923 return COSTS_N_INSNS (3);
7925 else if ((outer
== PLUS
|| outer
== COMPARE
)
7926 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7928 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7929 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7930 return COSTS_N_INSNS (1);
7931 else if (outer
== AND
)
7934 /* This duplicates the tests in the andsi3 expander. */
7935 for (i
= 9; i
<= 31; i
++)
7936 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7937 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7938 return COSTS_N_INSNS (2);
7940 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7941 || outer
== LSHIFTRT
)
7943 return COSTS_N_INSNS (2);
7949 return COSTS_N_INSNS (3);
7967 /* XXX another guess. */
7968 /* Memory costs quite a lot for the first word, but subsequent words
7969 load at the equivalent of a single insn each. */
7970 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7971 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7976 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7982 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7983 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7989 return total
+ COSTS_N_INSNS (1);
7991 /* Assume a two-shift sequence. Increase the cost slightly so
7992 we prefer actual shifts over an extend operation. */
7993 return total
+ 1 + COSTS_N_INSNS (2);
8001 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8003 enum machine_mode mode
= GET_MODE (x
);
8004 enum rtx_code subcode
;
8006 enum rtx_code code
= GET_CODE (x
);
8012 /* Memory costs quite a lot for the first word, but subsequent words
8013 load at the equivalent of a single insn each. */
8014 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8021 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8022 *total
= COSTS_N_INSNS (2);
8023 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8024 *total
= COSTS_N_INSNS (4);
8026 *total
= COSTS_N_INSNS (20);
8030 if (REG_P (XEXP (x
, 1)))
8031 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8032 else if (!CONST_INT_P (XEXP (x
, 1)))
8033 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8039 *total
+= COSTS_N_INSNS (4);
8044 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8045 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8048 *total
+= COSTS_N_INSNS (3);
8052 *total
+= COSTS_N_INSNS (1);
8053 /* Increase the cost of complex shifts because they aren't any faster,
8054 and reduce dual issue opportunities. */
8055 if (arm_tune_cortex_a9
8056 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8064 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8065 if (CONST_INT_P (XEXP (x
, 0))
8066 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8068 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8072 if (CONST_INT_P (XEXP (x
, 1))
8073 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8075 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8082 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8084 if (TARGET_HARD_FLOAT
8086 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8088 *total
= COSTS_N_INSNS (1);
8089 if (CONST_DOUBLE_P (XEXP (x
, 0))
8090 && arm_const_double_rtx (XEXP (x
, 0)))
8092 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8096 if (CONST_DOUBLE_P (XEXP (x
, 1))
8097 && arm_const_double_rtx (XEXP (x
, 1)))
8099 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8105 *total
= COSTS_N_INSNS (20);
8109 *total
= COSTS_N_INSNS (1);
8110 if (CONST_INT_P (XEXP (x
, 0))
8111 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8113 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8117 subcode
= GET_CODE (XEXP (x
, 1));
8118 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8119 || subcode
== LSHIFTRT
8120 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8122 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8123 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8127 /* A shift as a part of RSB costs no more than RSB itself. */
8128 if (GET_CODE (XEXP (x
, 0)) == MULT
8129 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8131 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8132 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8137 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8139 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8140 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8144 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8145 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8147 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8148 if (REG_P (XEXP (XEXP (x
, 1), 0))
8149 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8150 *total
+= COSTS_N_INSNS (1);
8158 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8159 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8160 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8162 *total
= COSTS_N_INSNS (1);
8163 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8165 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8169 /* MLA: All arguments must be registers. We filter out
8170 multiplication by a power of two, so that we fall down into
8172 if (GET_CODE (XEXP (x
, 0)) == MULT
8173 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8175 /* The cost comes from the cost of the multiply. */
8179 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8181 if (TARGET_HARD_FLOAT
8183 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8185 *total
= COSTS_N_INSNS (1);
8186 if (CONST_DOUBLE_P (XEXP (x
, 1))
8187 && arm_const_double_rtx (XEXP (x
, 1)))
8189 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8196 *total
= COSTS_N_INSNS (20);
8200 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8201 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8203 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8204 if (REG_P (XEXP (XEXP (x
, 0), 0))
8205 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8206 *total
+= COSTS_N_INSNS (1);
8212 case AND
: case XOR
: case IOR
:
8214 /* Normally the frame registers will be spilt into reg+const during
8215 reload, so it is a bad idea to combine them with other instructions,
8216 since then they might not be moved outside of loops. As a compromise
8217 we allow integration with ops that have a constant as their second
8219 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8220 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8221 && !CONST_INT_P (XEXP (x
, 1)))
8222 *total
= COSTS_N_INSNS (1);
8226 *total
+= COSTS_N_INSNS (2);
8227 if (CONST_INT_P (XEXP (x
, 1))
8228 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8230 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8237 *total
+= COSTS_N_INSNS (1);
8238 if (CONST_INT_P (XEXP (x
, 1))
8239 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8241 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8244 subcode
= GET_CODE (XEXP (x
, 0));
8245 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8246 || subcode
== LSHIFTRT
8247 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8249 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8250 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8255 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8257 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8258 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8262 if (subcode
== UMIN
|| subcode
== UMAX
8263 || subcode
== SMIN
|| subcode
== SMAX
)
8265 *total
= COSTS_N_INSNS (3);
8272 /* This should have been handled by the CPU specific routines. */
8276 if (arm_arch3m
&& mode
== SImode
8277 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8278 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8279 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8280 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8281 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8282 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8284 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8287 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8291 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8293 if (TARGET_HARD_FLOAT
8295 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8297 *total
= COSTS_N_INSNS (1);
8300 *total
= COSTS_N_INSNS (2);
8306 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8307 if (mode
== SImode
&& code
== NOT
)
8309 subcode
= GET_CODE (XEXP (x
, 0));
8310 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8311 || subcode
== LSHIFTRT
8312 || subcode
== ROTATE
|| subcode
== ROTATERT
8314 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8316 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8317 /* Register shifts cost an extra cycle. */
8318 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8319 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8328 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8330 *total
= COSTS_N_INSNS (4);
8334 operand
= XEXP (x
, 0);
8336 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8337 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8338 && REG_P (XEXP (operand
, 0))
8339 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8340 *total
+= COSTS_N_INSNS (1);
8341 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8342 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8346 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8348 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8354 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8355 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8357 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8363 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8364 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8366 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8386 /* SCC insns. In the case where the comparison has already been
8387 performed, then they cost 2 instructions. Otherwise they need
8388 an additional comparison before them. */
8389 *total
= COSTS_N_INSNS (2);
8390 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8397 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8403 *total
+= COSTS_N_INSNS (1);
8404 if (CONST_INT_P (XEXP (x
, 1))
8405 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8407 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8411 subcode
= GET_CODE (XEXP (x
, 0));
8412 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8413 || subcode
== LSHIFTRT
8414 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8416 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8417 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8422 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8424 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8425 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8435 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8436 if (!CONST_INT_P (XEXP (x
, 1))
8437 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8438 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8442 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8444 if (TARGET_HARD_FLOAT
8446 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8448 *total
= COSTS_N_INSNS (1);
8451 *total
= COSTS_N_INSNS (20);
8454 *total
= COSTS_N_INSNS (1);
8456 *total
+= COSTS_N_INSNS (3);
8462 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8464 rtx op
= XEXP (x
, 0);
8465 enum machine_mode opmode
= GET_MODE (op
);
8468 *total
+= COSTS_N_INSNS (1);
8470 if (opmode
!= SImode
)
8474 /* If !arm_arch4, we use one of the extendhisi2_mem
8475 or movhi_bytes patterns for HImode. For a QImode
8476 sign extension, we first zero-extend from memory
8477 and then perform a shift sequence. */
8478 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8479 *total
+= COSTS_N_INSNS (2);
8482 *total
+= COSTS_N_INSNS (1);
8484 /* We don't have the necessary insn, so we need to perform some
8486 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8487 /* An and with constant 255. */
8488 *total
+= COSTS_N_INSNS (1);
8490 /* A shift sequence. Increase costs slightly to avoid
8491 combining two shifts into an extend operation. */
8492 *total
+= COSTS_N_INSNS (2) + 1;
8498 switch (GET_MODE (XEXP (x
, 0)))
8505 *total
= COSTS_N_INSNS (1);
8515 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8519 if (const_ok_for_arm (INTVAL (x
))
8520 || const_ok_for_arm (~INTVAL (x
)))
8521 *total
= COSTS_N_INSNS (1);
8523 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8524 INTVAL (x
), NULL_RTX
,
8531 *total
= COSTS_N_INSNS (3);
8535 *total
= COSTS_N_INSNS (1);
8539 *total
= COSTS_N_INSNS (1);
8540 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8544 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8545 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8546 *total
= COSTS_N_INSNS (1);
8548 *total
= COSTS_N_INSNS (4);
8552 /* The vec_extract patterns accept memory operands that require an
8553 address reload. Account for the cost of that reload to give the
8554 auto-inc-dec pass an incentive to try to replace them. */
8555 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8556 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8558 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8559 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8560 *total
+= COSTS_N_INSNS (1);
8563 /* Likewise for the vec_set patterns. */
8564 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8565 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8566 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8568 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8569 *total
= rtx_cost (mem
, code
, 0, speed
);
8570 if (!neon_vector_mem_operand (mem
, 2, true))
8571 *total
+= COSTS_N_INSNS (1);
8577 /* We cost this as high as our memory costs to allow this to
8578 be hoisted from loops. */
8579 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8581 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8587 && TARGET_HARD_FLOAT
8589 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8590 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8591 *total
= COSTS_N_INSNS (1);
8593 *total
= COSTS_N_INSNS (4);
8597 *total
= COSTS_N_INSNS (4);
8602 /* Estimates the size cost of thumb1 instructions.
8603 For now most of the code is copied from thumb1_rtx_costs. We need more
8604 fine grain tuning when we have more related test cases. */
8606 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8608 enum machine_mode mode
= GET_MODE (x
);
8617 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8621 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8622 defined by RTL expansion, especially for the expansion of
8624 if ((GET_CODE (XEXP (x
, 0)) == MULT
8625 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8626 || (GET_CODE (XEXP (x
, 1)) == MULT
8627 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8628 return COSTS_N_INSNS (2);
8629 /* On purpose fall through for normal RTX. */
8633 return COSTS_N_INSNS (1);
8636 if (CONST_INT_P (XEXP (x
, 1)))
8638 /* Thumb1 mul instruction can't operate on const. We must Load it
8639 into a register first. */
8640 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8641 return COSTS_N_INSNS (1) + const_size
;
8643 return COSTS_N_INSNS (1);
8646 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8648 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8649 return (COSTS_N_INSNS (words
)
8650 + 4 * ((MEM_P (SET_SRC (x
)))
8651 + MEM_P (SET_DEST (x
))));
8656 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8657 return COSTS_N_INSNS (1);
8658 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8659 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8660 return COSTS_N_INSNS (2);
8661 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8662 if (thumb_shiftable_const (INTVAL (x
)))
8663 return COSTS_N_INSNS (2);
8664 return COSTS_N_INSNS (3);
8666 else if ((outer
== PLUS
|| outer
== COMPARE
)
8667 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8669 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8670 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8671 return COSTS_N_INSNS (1);
8672 else if (outer
== AND
)
8675 /* This duplicates the tests in the andsi3 expander. */
8676 for (i
= 9; i
<= 31; i
++)
8677 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8678 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8679 return COSTS_N_INSNS (2);
8681 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8682 || outer
== LSHIFTRT
)
8684 return COSTS_N_INSNS (2);
8690 return COSTS_N_INSNS (3);
8708 /* XXX another guess. */
8709 /* Memory costs quite a lot for the first word, but subsequent words
8710 load at the equivalent of a single insn each. */
8711 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8712 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8717 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8722 /* XXX still guessing. */
8723 switch (GET_MODE (XEXP (x
, 0)))
8726 return (1 + (mode
== DImode
? 4 : 0)
8727 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8730 return (4 + (mode
== DImode
? 4 : 0)
8731 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8734 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8745 /* RTX costs when optimizing for size. */
8747 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8750 enum machine_mode mode
= GET_MODE (x
);
8753 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8757 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8761 /* A memory access costs 1 insn if the mode is small, or the address is
8762 a single register, otherwise it costs one insn per word. */
8763 if (REG_P (XEXP (x
, 0)))
8764 *total
= COSTS_N_INSNS (1);
8766 && GET_CODE (XEXP (x
, 0)) == PLUS
8767 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8768 /* This will be split into two instructions.
8769 See arm.md:calculate_pic_address. */
8770 *total
= COSTS_N_INSNS (2);
8772 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8779 /* Needs a libcall, so it costs about this. */
8780 *total
= COSTS_N_INSNS (2);
8784 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8786 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8794 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8796 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8799 else if (mode
== SImode
)
8801 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8802 /* Slightly disparage register shifts, but not by much. */
8803 if (!CONST_INT_P (XEXP (x
, 1)))
8804 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8808 /* Needs a libcall. */
8809 *total
= COSTS_N_INSNS (2);
8813 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8814 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8816 *total
= COSTS_N_INSNS (1);
8822 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8823 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8825 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8826 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8827 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8828 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8829 || subcode1
== ASHIFTRT
)
8831 /* It's just the cost of the two operands. */
8836 *total
= COSTS_N_INSNS (1);
8840 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8844 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8845 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8847 *total
= COSTS_N_INSNS (1);
8851 /* A shift as a part of ADD costs nothing. */
8852 if (GET_CODE (XEXP (x
, 0)) == MULT
8853 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8855 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8856 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8857 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8862 case AND
: case XOR
: case IOR
:
8865 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
8867 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
8868 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
8869 || (code
== AND
&& subcode
== NOT
))
8871 /* It's just the cost of the two operands. */
8877 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8881 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8885 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8886 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8888 *total
= COSTS_N_INSNS (1);
8894 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8903 if (cc_register (XEXP (x
, 0), VOIDmode
))
8906 *total
= COSTS_N_INSNS (1);
8910 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8911 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8912 *total
= COSTS_N_INSNS (1);
8914 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8919 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8922 if (const_ok_for_arm (INTVAL (x
)))
8923 /* A multiplication by a constant requires another instruction
8924 to load the constant to a register. */
8925 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8927 else if (const_ok_for_arm (~INTVAL (x
)))
8928 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8929 else if (const_ok_for_arm (-INTVAL (x
)))
8931 if (outer_code
== COMPARE
|| outer_code
== PLUS
8932 || outer_code
== MINUS
)
8935 *total
= COSTS_N_INSNS (1);
8938 *total
= COSTS_N_INSNS (2);
8944 *total
= COSTS_N_INSNS (2);
8948 *total
= COSTS_N_INSNS (4);
8953 && TARGET_HARD_FLOAT
8954 && outer_code
== SET
8955 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8956 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8957 *total
= COSTS_N_INSNS (1);
8959 *total
= COSTS_N_INSNS (4);
8964 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8965 cost of these slightly. */
8966 *total
= COSTS_N_INSNS (1) + 1;
8973 if (mode
!= VOIDmode
)
8974 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8976 *total
= COSTS_N_INSNS (4); /* How knows? */
8981 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8982 operand, then return the operand that is being shifted. If the shift
8983 is not by a constant, then set SHIFT_REG to point to the operand.
8984 Return NULL if OP is not a shifter operand. */
8986 shifter_op_p (rtx op
, rtx
*shift_reg
)
8988 enum rtx_code code
= GET_CODE (op
);
8990 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8991 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8992 return XEXP (op
, 0);
8993 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8994 return XEXP (op
, 0);
8995 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8996 || code
== ASHIFTRT
)
8998 if (!CONST_INT_P (XEXP (op
, 1)))
8999 *shift_reg
= XEXP (op
, 1);
9000 return XEXP (op
, 0);
9007 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9009 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9010 gcc_assert (GET_CODE (x
) == UNSPEC
);
9012 switch (XINT (x
, 1))
9014 case UNSPEC_UNALIGNED_LOAD
:
9015 /* We can only do unaligned loads into the integer unit, and we can't
9017 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9019 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9020 + extra_cost
->ldst
.load_unaligned
);
9023 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9024 ADDR_SPACE_GENERIC
, speed_p
);
9028 case UNSPEC_UNALIGNED_STORE
:
9029 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9031 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9032 + extra_cost
->ldst
.store_unaligned
);
9034 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9036 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9037 ADDR_SPACE_GENERIC
, speed_p
);
9047 *cost
= COSTS_N_INSNS (1);
9049 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9053 *cost
= COSTS_N_INSNS (2);
9059 /* Cost of a libcall. We assume one insn per argument, an amount for the
9060 call (one insn for -Os) and then one for processing the result. */
9061 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9063 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9066 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9067 if (shift_op != NULL \
9068 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9073 *cost += extra_cost->alu.arith_shift_reg; \
9074 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9077 *cost += extra_cost->alu.arith_shift; \
9079 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9080 + rtx_cost (XEXP (x, 1 - IDX), \
9087 /* RTX costs. Make an estimate of the cost of executing the operation
9088 X, which is contained with an operation with code OUTER_CODE.
9089 SPEED_P indicates whether the cost desired is the performance cost,
9090 or the size cost. The estimate is stored in COST and the return
9091 value is TRUE if the cost calculation is final, or FALSE if the
9092 caller should recurse through the operands of X to add additional
9095 We currently make no attempt to model the size savings of Thumb-2
9096 16-bit instructions. At the normal points in compilation where
9097 this code is called we have no measure of whether the condition
9098 flags are live or not, and thus no realistic way to determine what
9099 the size will eventually be. */
9101 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9102 const struct cpu_cost_table
*extra_cost
,
9103 int *cost
, bool speed_p
)
9105 enum machine_mode mode
= GET_MODE (x
);
9110 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9112 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9120 /* SET RTXs don't have a mode so we get it from the destination. */
9121 mode
= GET_MODE (SET_DEST (x
));
9123 if (REG_P (SET_SRC (x
))
9124 && REG_P (SET_DEST (x
)))
9126 /* Assume that most copies can be done with a single insn,
9127 unless we don't have HW FP, in which case everything
9128 larger than word mode will require two insns. */
9129 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9130 && GET_MODE_SIZE (mode
) > 4)
9133 /* Conditional register moves can be encoded
9134 in 16 bits in Thumb mode. */
9135 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9141 if (CONST_INT_P (SET_SRC (x
)))
9143 /* Handle CONST_INT here, since the value doesn't have a mode
9144 and we would otherwise be unable to work out the true cost. */
9145 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9147 /* Slightly lower the cost of setting a core reg to a constant.
9148 This helps break up chains and allows for better scheduling. */
9149 if (REG_P (SET_DEST (x
))
9150 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9153 /* Immediate moves with an immediate in the range [0, 255] can be
9154 encoded in 16 bits in Thumb mode. */
9155 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9156 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9158 goto const_int_cost
;
9164 /* A memory access costs 1 insn if the mode is small, or the address is
9165 a single register, otherwise it costs one insn per word. */
9166 if (REG_P (XEXP (x
, 0)))
9167 *cost
= COSTS_N_INSNS (1);
9169 && GET_CODE (XEXP (x
, 0)) == PLUS
9170 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9171 /* This will be split into two instructions.
9172 See arm.md:calculate_pic_address. */
9173 *cost
= COSTS_N_INSNS (2);
9175 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9177 /* For speed optimizations, add the costs of the address and
9178 accessing memory. */
9181 *cost
+= (extra_cost
->ldst
.load
9182 + arm_address_cost (XEXP (x
, 0), mode
,
9183 ADDR_SPACE_GENERIC
, speed_p
));
9185 *cost
+= extra_cost
->ldst
.load
;
9191 /* Calculations of LDM costs are complex. We assume an initial cost
9192 (ldm_1st) which will load the number of registers mentioned in
9193 ldm_regs_per_insn_1st registers; then each additional
9194 ldm_regs_per_insn_subsequent registers cost one more insn. The
9195 formula for N regs is thus:
9197 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9198 + ldm_regs_per_insn_subsequent - 1)
9199 / ldm_regs_per_insn_subsequent).
9201 Additional costs may also be added for addressing. A similar
9202 formula is used for STM. */
9204 bool is_ldm
= load_multiple_operation (x
, SImode
);
9205 bool is_stm
= store_multiple_operation (x
, SImode
);
9207 *cost
= COSTS_N_INSNS (1);
9209 if (is_ldm
|| is_stm
)
9213 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9214 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9215 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9216 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9217 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9218 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9219 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9221 *cost
+= regs_per_insn_1st
9222 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9223 + regs_per_insn_sub
- 1)
9224 / regs_per_insn_sub
);
9233 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9234 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9235 *cost
= COSTS_N_INSNS (speed_p
9236 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9237 else if (mode
== SImode
&& TARGET_IDIV
)
9238 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9240 *cost
= LIBCALL_COST (2);
9241 return false; /* All arguments must be in registers. */
9245 *cost
= LIBCALL_COST (2);
9246 return false; /* All arguments must be in registers. */
9249 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9251 *cost
= (COSTS_N_INSNS (2)
9252 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9254 *cost
+= extra_cost
->alu
.shift_reg
;
9262 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9264 *cost
= (COSTS_N_INSNS (3)
9265 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9267 *cost
+= 2 * extra_cost
->alu
.shift
;
9270 else if (mode
== SImode
)
9272 *cost
= (COSTS_N_INSNS (1)
9273 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9274 /* Slightly disparage register shifts at -Os, but not by much. */
9275 if (!CONST_INT_P (XEXP (x
, 1)))
9276 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9277 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9280 else if (GET_MODE_CLASS (mode
) == MODE_INT
9281 && GET_MODE_SIZE (mode
) < 4)
9285 *cost
= (COSTS_N_INSNS (1)
9286 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9287 /* Slightly disparage register shifts at -Os, but not by
9289 if (!CONST_INT_P (XEXP (x
, 1)))
9290 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9291 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9293 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9295 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9297 /* Can use SBFX/UBFX. */
9298 *cost
= COSTS_N_INSNS (1);
9300 *cost
+= extra_cost
->alu
.bfx
;
9301 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9305 *cost
= COSTS_N_INSNS (2);
9306 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9309 if (CONST_INT_P (XEXP (x
, 1)))
9310 *cost
+= 2 * extra_cost
->alu
.shift
;
9312 *cost
+= (extra_cost
->alu
.shift
9313 + extra_cost
->alu
.shift_reg
);
9316 /* Slightly disparage register shifts. */
9317 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9322 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9323 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9326 if (CONST_INT_P (XEXP (x
, 1)))
9327 *cost
+= (2 * extra_cost
->alu
.shift
9328 + extra_cost
->alu
.log_shift
);
9330 *cost
+= (extra_cost
->alu
.shift
9331 + extra_cost
->alu
.shift_reg
9332 + extra_cost
->alu
.log_shift_reg
);
9338 *cost
= LIBCALL_COST (2);
9342 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9343 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9345 *cost
= COSTS_N_INSNS (1);
9346 if (GET_CODE (XEXP (x
, 0)) == MULT
9347 || GET_CODE (XEXP (x
, 1)) == MULT
)
9349 rtx mul_op0
, mul_op1
, sub_op
;
9352 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9354 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9356 mul_op0
= XEXP (XEXP (x
, 0), 0);
9357 mul_op1
= XEXP (XEXP (x
, 0), 1);
9358 sub_op
= XEXP (x
, 1);
9362 mul_op0
= XEXP (XEXP (x
, 1), 0);
9363 mul_op1
= XEXP (XEXP (x
, 1), 1);
9364 sub_op
= XEXP (x
, 0);
9367 /* The first operand of the multiply may be optionally
9369 if (GET_CODE (mul_op0
) == NEG
)
9370 mul_op0
= XEXP (mul_op0
, 0);
9372 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9373 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9374 + rtx_cost (sub_op
, code
, 0, speed_p
));
9380 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9386 rtx shift_by_reg
= NULL
;
9390 *cost
= COSTS_N_INSNS (1);
9392 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9393 if (shift_op
== NULL
)
9395 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9396 non_shift_op
= XEXP (x
, 0);
9399 non_shift_op
= XEXP (x
, 1);
9401 if (shift_op
!= NULL
)
9403 if (shift_by_reg
!= NULL
)
9406 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9407 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9410 *cost
+= extra_cost
->alu
.arith_shift
;
9412 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9413 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9418 && GET_CODE (XEXP (x
, 1)) == MULT
)
9422 *cost
+= extra_cost
->mult
[0].add
;
9423 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9424 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9425 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9429 if (CONST_INT_P (XEXP (x
, 0)))
9431 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9432 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9434 *cost
= COSTS_N_INSNS (insns
);
9436 *cost
+= insns
* extra_cost
->alu
.arith
;
9437 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9444 if (GET_MODE_CLASS (mode
) == MODE_INT
9445 && GET_MODE_SIZE (mode
) < 4)
9447 rtx shift_op
, shift_reg
;
9450 /* We check both sides of the MINUS for shifter operands since,
9451 unlike PLUS, it's not commutative. */
9453 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9454 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9456 /* Slightly disparage, as we might need to widen the result. */
9457 *cost
= 1 + COSTS_N_INSNS (1);
9459 *cost
+= extra_cost
->alu
.arith
;
9461 if (CONST_INT_P (XEXP (x
, 0)))
9463 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9472 *cost
= COSTS_N_INSNS (2);
9474 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9476 rtx op1
= XEXP (x
, 1);
9479 *cost
+= 2 * extra_cost
->alu
.arith
;
9481 if (GET_CODE (op1
) == ZERO_EXTEND
)
9482 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9484 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9485 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9489 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9492 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9493 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9495 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9498 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9499 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9502 *cost
+= (extra_cost
->alu
.arith
9503 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9504 ? extra_cost
->alu
.arith
9505 : extra_cost
->alu
.arith_shift
));
9506 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9507 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9508 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9513 *cost
+= 2 * extra_cost
->alu
.arith
;
9519 *cost
= LIBCALL_COST (2);
9523 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9524 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9526 *cost
= COSTS_N_INSNS (1);
9527 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9529 rtx mul_op0
, mul_op1
, add_op
;
9532 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9534 mul_op0
= XEXP (XEXP (x
, 0), 0);
9535 mul_op1
= XEXP (XEXP (x
, 0), 1);
9536 add_op
= XEXP (x
, 1);
9538 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9539 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9540 + rtx_cost (add_op
, code
, 0, speed_p
));
9546 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9549 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9551 *cost
= LIBCALL_COST (2);
9555 /* Narrow modes can be synthesized in SImode, but the range
9556 of useful sub-operations is limited. Check for shift operations
9557 on one of the operands. Only left shifts can be used in the
9559 if (GET_MODE_CLASS (mode
) == MODE_INT
9560 && GET_MODE_SIZE (mode
) < 4)
9562 rtx shift_op
, shift_reg
;
9565 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9567 if (CONST_INT_P (XEXP (x
, 1)))
9569 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9570 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9572 *cost
= COSTS_N_INSNS (insns
);
9574 *cost
+= insns
* extra_cost
->alu
.arith
;
9575 /* Slightly penalize a narrow operation as the result may
9577 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9581 /* Slightly penalize a narrow operation as the result may
9583 *cost
= 1 + COSTS_N_INSNS (1);
9585 *cost
+= extra_cost
->alu
.arith
;
9592 rtx shift_op
, shift_reg
;
9594 *cost
= COSTS_N_INSNS (1);
9596 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9597 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9599 /* UXTA[BH] or SXTA[BH]. */
9601 *cost
+= extra_cost
->alu
.extend_arith
;
9602 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9604 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9609 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9610 if (shift_op
!= NULL
)
9615 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9616 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9619 *cost
+= extra_cost
->alu
.arith_shift
;
9621 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9622 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9625 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9627 rtx mul_op
= XEXP (x
, 0);
9629 *cost
= COSTS_N_INSNS (1);
9631 if (TARGET_DSP_MULTIPLY
9632 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9633 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9634 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9635 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9636 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9637 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9638 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9639 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9640 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9641 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9642 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9643 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9648 *cost
+= extra_cost
->mult
[0].extend_add
;
9649 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9650 SIGN_EXTEND
, 0, speed_p
)
9651 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9652 SIGN_EXTEND
, 0, speed_p
)
9653 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9658 *cost
+= extra_cost
->mult
[0].add
;
9659 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9660 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9661 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9664 if (CONST_INT_P (XEXP (x
, 1)))
9666 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9667 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9669 *cost
= COSTS_N_INSNS (insns
);
9671 *cost
+= insns
* extra_cost
->alu
.arith
;
9672 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9681 && GET_CODE (XEXP (x
, 0)) == MULT
9682 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9683 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9684 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9685 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9687 *cost
= COSTS_N_INSNS (1);
9689 *cost
+= extra_cost
->mult
[1].extend_add
;
9690 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9691 ZERO_EXTEND
, 0, speed_p
)
9692 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9693 ZERO_EXTEND
, 0, speed_p
)
9694 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9698 *cost
= COSTS_N_INSNS (2);
9700 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9701 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9704 *cost
+= (extra_cost
->alu
.arith
9705 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9706 ? extra_cost
->alu
.arith
9707 : extra_cost
->alu
.arith_shift
));
9709 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9711 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9716 *cost
+= 2 * extra_cost
->alu
.arith
;
9721 *cost
= LIBCALL_COST (2);
9724 case AND
: case XOR
: case IOR
:
9727 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9728 rtx op0
= XEXP (x
, 0);
9729 rtx shift_op
, shift_reg
;
9731 *cost
= COSTS_N_INSNS (1);
9735 || (code
== IOR
&& TARGET_THUMB2
)))
9736 op0
= XEXP (op0
, 0);
9739 shift_op
= shifter_op_p (op0
, &shift_reg
);
9740 if (shift_op
!= NULL
)
9745 *cost
+= extra_cost
->alu
.log_shift_reg
;
9746 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9749 *cost
+= extra_cost
->alu
.log_shift
;
9751 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9752 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9756 if (CONST_INT_P (XEXP (x
, 1)))
9758 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9759 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9762 *cost
= COSTS_N_INSNS (insns
);
9764 *cost
+= insns
* extra_cost
->alu
.logical
;
9765 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9770 *cost
+= extra_cost
->alu
.logical
;
9771 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9772 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9778 rtx op0
= XEXP (x
, 0);
9779 enum rtx_code subcode
= GET_CODE (op0
);
9781 *cost
= COSTS_N_INSNS (2);
9785 || (code
== IOR
&& TARGET_THUMB2
)))
9786 op0
= XEXP (op0
, 0);
9788 if (GET_CODE (op0
) == ZERO_EXTEND
)
9791 *cost
+= 2 * extra_cost
->alu
.logical
;
9793 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9794 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9797 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9800 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9802 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9803 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9808 *cost
+= 2 * extra_cost
->alu
.logical
;
9814 *cost
= LIBCALL_COST (2);
9818 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9819 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9821 rtx op0
= XEXP (x
, 0);
9823 *cost
= COSTS_N_INSNS (1);
9825 if (GET_CODE (op0
) == NEG
)
9826 op0
= XEXP (op0
, 0);
9829 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9831 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
9832 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
9835 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9837 *cost
= LIBCALL_COST (2);
9843 *cost
= COSTS_N_INSNS (1);
9844 if (TARGET_DSP_MULTIPLY
9845 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9846 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9847 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9848 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9849 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9850 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9851 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9852 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9853 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9854 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9855 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9856 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9861 *cost
+= extra_cost
->mult
[0].extend
;
9862 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
9863 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
9867 *cost
+= extra_cost
->mult
[0].simple
;
9874 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9875 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9876 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9877 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9879 *cost
= COSTS_N_INSNS (1);
9881 *cost
+= extra_cost
->mult
[1].extend
;
9882 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
9883 ZERO_EXTEND
, 0, speed_p
)
9884 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9885 ZERO_EXTEND
, 0, speed_p
));
9889 *cost
= LIBCALL_COST (2);
9894 *cost
= LIBCALL_COST (2);
9898 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9899 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9901 *cost
= COSTS_N_INSNS (1);
9903 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9907 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9909 *cost
= LIBCALL_COST (1);
9915 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9917 *cost
= COSTS_N_INSNS (2);
9918 /* Assume the non-flag-changing variant. */
9920 *cost
+= (extra_cost
->alu
.log_shift
9921 + extra_cost
->alu
.arith_shift
);
9922 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
9926 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9927 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9929 *cost
= COSTS_N_INSNS (2);
9930 /* No extra cost for MOV imm and MVN imm. */
9931 /* If the comparison op is using the flags, there's no further
9932 cost, otherwise we need to add the cost of the comparison. */
9933 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9934 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9935 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9937 *cost
+= (COSTS_N_INSNS (1)
9938 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
9940 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
9943 *cost
+= extra_cost
->alu
.arith
;
9947 *cost
= COSTS_N_INSNS (1);
9949 *cost
+= extra_cost
->alu
.arith
;
9953 if (GET_MODE_CLASS (mode
) == MODE_INT
9954 && GET_MODE_SIZE (mode
) < 4)
9956 /* Slightly disparage, as we might need an extend operation. */
9957 *cost
= 1 + COSTS_N_INSNS (1);
9959 *cost
+= extra_cost
->alu
.arith
;
9965 *cost
= COSTS_N_INSNS (2);
9967 *cost
+= 2 * extra_cost
->alu
.arith
;
9972 *cost
= LIBCALL_COST (1);
9979 rtx shift_reg
= NULL
;
9981 *cost
= COSTS_N_INSNS (1);
9982 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9986 if (shift_reg
!= NULL
)
9989 *cost
+= extra_cost
->alu
.log_shift_reg
;
9990 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9993 *cost
+= extra_cost
->alu
.log_shift
;
9994 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
9999 *cost
+= extra_cost
->alu
.logical
;
10002 if (mode
== DImode
)
10004 *cost
= COSTS_N_INSNS (2);
10010 *cost
+= LIBCALL_COST (1);
10015 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10017 *cost
= COSTS_N_INSNS (4);
10020 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10021 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10023 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10024 /* Assume that if one arm of the if_then_else is a register,
10025 that it will be tied with the result and eliminate the
10026 conditional insn. */
10027 if (REG_P (XEXP (x
, 1)))
10029 else if (REG_P (XEXP (x
, 2)))
10035 if (extra_cost
->alu
.non_exec_costs_exec
)
10036 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10038 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10041 *cost
+= op1cost
+ op2cost
;
10047 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10051 enum machine_mode op0mode
;
10052 /* We'll mostly assume that the cost of a compare is the cost of the
10053 LHS. However, there are some notable exceptions. */
10055 /* Floating point compares are never done as side-effects. */
10056 op0mode
= GET_MODE (XEXP (x
, 0));
10057 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10058 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10060 *cost
= COSTS_N_INSNS (1);
10062 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10064 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10066 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10072 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10074 *cost
= LIBCALL_COST (2);
10078 /* DImode compares normally take two insns. */
10079 if (op0mode
== DImode
)
10081 *cost
= COSTS_N_INSNS (2);
10083 *cost
+= 2 * extra_cost
->alu
.arith
;
10087 if (op0mode
== SImode
)
10092 if (XEXP (x
, 1) == const0_rtx
10093 && !(REG_P (XEXP (x
, 0))
10094 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10095 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10097 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10099 /* Multiply operations that set the flags are often
10100 significantly more expensive. */
10102 && GET_CODE (XEXP (x
, 0)) == MULT
10103 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10104 *cost
+= extra_cost
->mult
[0].flag_setting
;
10107 && GET_CODE (XEXP (x
, 0)) == PLUS
10108 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10109 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10111 *cost
+= extra_cost
->mult
[0].flag_setting
;
10116 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10117 if (shift_op
!= NULL
)
10119 *cost
= COSTS_N_INSNS (1);
10120 if (shift_reg
!= NULL
)
10122 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10124 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10127 *cost
+= extra_cost
->alu
.arith_shift
;
10128 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10129 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10133 *cost
= COSTS_N_INSNS (1);
10135 *cost
+= extra_cost
->alu
.arith
;
10136 if (CONST_INT_P (XEXP (x
, 1))
10137 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10139 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10147 *cost
= LIBCALL_COST (2);
10170 if (outer_code
== SET
)
10172 /* Is it a store-flag operation? */
10173 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10174 && XEXP (x
, 1) == const0_rtx
)
10176 /* Thumb also needs an IT insn. */
10177 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10180 if (XEXP (x
, 1) == const0_rtx
)
10185 /* LSR Rd, Rn, #31. */
10186 *cost
= COSTS_N_INSNS (1);
10188 *cost
+= extra_cost
->alu
.shift
;
10198 *cost
= COSTS_N_INSNS (2);
10202 /* RSBS T1, Rn, Rn, LSR #31
10204 *cost
= COSTS_N_INSNS (2);
10206 *cost
+= extra_cost
->alu
.arith_shift
;
10210 /* RSB Rd, Rn, Rn, ASR #1
10211 LSR Rd, Rd, #31. */
10212 *cost
= COSTS_N_INSNS (2);
10214 *cost
+= (extra_cost
->alu
.arith_shift
10215 + extra_cost
->alu
.shift
);
10221 *cost
= COSTS_N_INSNS (2);
10223 *cost
+= extra_cost
->alu
.shift
;
10227 /* Remaining cases are either meaningless or would take
10228 three insns anyway. */
10229 *cost
= COSTS_N_INSNS (3);
10232 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10237 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10238 if (CONST_INT_P (XEXP (x
, 1))
10239 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10241 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10248 /* Not directly inside a set. If it involves the condition code
10249 register it must be the condition for a branch, cond_exec or
10250 I_T_E operation. Since the comparison is performed elsewhere
10251 this is just the control part which has no additional
10253 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10254 && XEXP (x
, 1) == const0_rtx
)
10262 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10263 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10265 *cost
= COSTS_N_INSNS (1);
10267 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10271 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10273 *cost
= LIBCALL_COST (1);
10277 if (mode
== SImode
)
10279 *cost
= COSTS_N_INSNS (1);
10281 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10285 *cost
= LIBCALL_COST (1);
10289 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10290 && MEM_P (XEXP (x
, 0)))
10292 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10294 if (mode
== DImode
)
10295 *cost
+= COSTS_N_INSNS (1);
10300 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10301 *cost
+= extra_cost
->ldst
.load
;
10303 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10305 if (mode
== DImode
)
10306 *cost
+= extra_cost
->alu
.shift
;
10311 /* Widening from less than 32-bits requires an extend operation. */
10312 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10314 /* We have SXTB/SXTH. */
10315 *cost
= COSTS_N_INSNS (1);
10316 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10318 *cost
+= extra_cost
->alu
.extend
;
10320 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10322 /* Needs two shifts. */
10323 *cost
= COSTS_N_INSNS (2);
10324 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10326 *cost
+= 2 * extra_cost
->alu
.shift
;
10329 /* Widening beyond 32-bits requires one more insn. */
10330 if (mode
== DImode
)
10332 *cost
+= COSTS_N_INSNS (1);
10334 *cost
+= extra_cost
->alu
.shift
;
10341 || GET_MODE (XEXP (x
, 0)) == SImode
10342 || GET_MODE (XEXP (x
, 0)) == QImode
)
10343 && MEM_P (XEXP (x
, 0)))
10345 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10347 if (mode
== DImode
)
10348 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10353 /* Widening from less than 32-bits requires an extend operation. */
10354 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10356 /* UXTB can be a shorter instruction in Thumb2, but it might
10357 be slower than the AND Rd, Rn, #255 alternative. When
10358 optimizing for speed it should never be slower to use
10359 AND, and we don't really model 16-bit vs 32-bit insns
10361 *cost
= COSTS_N_INSNS (1);
10363 *cost
+= extra_cost
->alu
.logical
;
10365 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10367 /* We have UXTB/UXTH. */
10368 *cost
= COSTS_N_INSNS (1);
10369 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10371 *cost
+= extra_cost
->alu
.extend
;
10373 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10375 /* Needs two shifts. It's marginally preferable to use
10376 shifts rather than two BIC instructions as the second
10377 shift may merge with a subsequent insn as a shifter
10379 *cost
= COSTS_N_INSNS (2);
10380 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10382 *cost
+= 2 * extra_cost
->alu
.shift
;
10384 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10385 *cost
= COSTS_N_INSNS (1);
10387 /* Widening beyond 32-bits requires one more insn. */
10388 if (mode
== DImode
)
10390 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10397 /* CONST_INT has no mode, so we cannot tell for sure how many
10398 insns are really going to be needed. The best we can do is
10399 look at the value passed. If it fits in SImode, then assume
10400 that's the mode it will be used for. Otherwise assume it
10401 will be used in DImode. */
10402 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10407 /* Avoid blowing up in arm_gen_constant (). */
10408 if (!(outer_code
== PLUS
10409 || outer_code
== AND
10410 || outer_code
== IOR
10411 || outer_code
== XOR
10412 || outer_code
== MINUS
))
10416 if (mode
== SImode
)
10418 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10419 INTVAL (x
), NULL
, NULL
,
10425 *cost
+= COSTS_N_INSNS (arm_gen_constant
10426 (outer_code
, SImode
, NULL
,
10427 trunc_int_for_mode (INTVAL (x
), SImode
),
10429 + arm_gen_constant (outer_code
, SImode
, NULL
,
10430 INTVAL (x
) >> 32, NULL
,
10442 if (arm_arch_thumb2
&& !flag_pic
)
10443 *cost
= COSTS_N_INSNS (2);
10445 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10448 *cost
= COSTS_N_INSNS (2);
10452 *cost
+= COSTS_N_INSNS (1);
10454 *cost
+= extra_cost
->alu
.arith
;
10460 *cost
= COSTS_N_INSNS (4);
10465 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10466 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10468 if (vfp3_const_double_rtx (x
))
10470 *cost
= COSTS_N_INSNS (1);
10472 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10478 *cost
= COSTS_N_INSNS (1);
10479 if (mode
== DFmode
)
10480 *cost
+= extra_cost
->ldst
.loadd
;
10482 *cost
+= extra_cost
->ldst
.loadf
;
10485 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10489 *cost
= COSTS_N_INSNS (4);
10495 && TARGET_HARD_FLOAT
10496 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10497 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10498 *cost
= COSTS_N_INSNS (1);
10500 *cost
= COSTS_N_INSNS (4);
10505 *cost
= COSTS_N_INSNS (1);
10506 /* When optimizing for size, we prefer constant pool entries to
10507 MOVW/MOVT pairs, so bump the cost of these slightly. */
10513 *cost
= COSTS_N_INSNS (1);
10515 *cost
+= extra_cost
->alu
.clz
;
10519 if (XEXP (x
, 1) == const0_rtx
)
10521 *cost
= COSTS_N_INSNS (1);
10523 *cost
+= extra_cost
->alu
.log_shift
;
10524 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10527 /* Fall through. */
10531 *cost
= COSTS_N_INSNS (2);
10535 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10536 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10537 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10538 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10539 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10540 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10541 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10542 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10545 *cost
= COSTS_N_INSNS (1);
10547 *cost
+= extra_cost
->mult
[1].extend
;
10548 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10550 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10554 *cost
= LIBCALL_COST (1);
10558 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10561 /* Reading the PC is like reading any other register. Writing it
10562 is more expensive, but we take that into account elsewhere. */
10567 /* TODO: Simple zero_extract of bottom bits using AND. */
10568 /* Fall through. */
10572 && CONST_INT_P (XEXP (x
, 1))
10573 && CONST_INT_P (XEXP (x
, 2)))
10575 *cost
= COSTS_N_INSNS (1);
10577 *cost
+= extra_cost
->alu
.bfx
;
10578 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10581 /* Without UBFX/SBFX, need to resort to shift operations. */
10582 *cost
= COSTS_N_INSNS (2);
10584 *cost
+= 2 * extra_cost
->alu
.shift
;
10585 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10589 if (TARGET_HARD_FLOAT
)
10591 *cost
= COSTS_N_INSNS (1);
10593 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10594 if (!TARGET_FPU_ARMV8
10595 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10597 /* Pre v8, widening HF->DF is a two-step process, first
10598 widening to SFmode. */
10599 *cost
+= COSTS_N_INSNS (1);
10601 *cost
+= extra_cost
->fp
[0].widen
;
10603 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10607 *cost
= LIBCALL_COST (1);
10610 case FLOAT_TRUNCATE
:
10611 if (TARGET_HARD_FLOAT
)
10613 *cost
= COSTS_N_INSNS (1);
10615 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10616 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10618 /* Vector modes? */
10620 *cost
= LIBCALL_COST (1);
10625 if (TARGET_HARD_FLOAT
)
10627 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10629 *cost
= COSTS_N_INSNS (1);
10631 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10632 /* Strip of the 'cost' of rounding towards zero. */
10633 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10634 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10636 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10637 /* ??? Increase the cost to deal with transferring from
10638 FP -> CORE registers? */
10641 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10642 && TARGET_FPU_ARMV8
)
10644 *cost
= COSTS_N_INSNS (1);
10646 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10649 /* Vector costs? */
10651 *cost
= LIBCALL_COST (1);
10655 case UNSIGNED_FLOAT
:
10656 if (TARGET_HARD_FLOAT
)
10658 /* ??? Increase the cost to deal with transferring from CORE
10659 -> FP registers? */
10660 *cost
= COSTS_N_INSNS (1);
10662 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10665 *cost
= LIBCALL_COST (1);
10669 *cost
= COSTS_N_INSNS (1);
10674 /* Just a guess. Guess number of instructions in the asm
10675 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10676 though (see PR60663). */
10677 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10678 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10680 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10684 if (mode
!= VOIDmode
)
10685 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10687 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10692 #undef HANDLE_NARROW_SHIFT_ARITH
10694 /* RTX costs when optimizing for size. */
10696 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10697 int *total
, bool speed
)
10701 if (TARGET_OLD_RTX_COSTS
10702 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10704 /* Old way. (Deprecated.) */
10706 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10707 (enum rtx_code
) outer_code
, total
);
10709 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10710 (enum rtx_code
) outer_code
, total
,
10716 if (current_tune
->insn_extra_cost
)
10717 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10718 (enum rtx_code
) outer_code
,
10719 current_tune
->insn_extra_cost
,
10721 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10722 && current_tune->insn_extra_cost != NULL */
10724 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10725 (enum rtx_code
) outer_code
,
10726 &generic_extra_costs
, total
, speed
);
10729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10731 print_rtl_single (dump_file
, x
);
10732 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10733 *total
, result
? "final" : "partial");
10738 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10739 supported on any "slowmul" cores, so it can be ignored. */
10742 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10743 int *total
, bool speed
)
10745 enum machine_mode mode
= GET_MODE (x
);
10749 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10756 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10759 *total
= COSTS_N_INSNS (20);
10763 if (CONST_INT_P (XEXP (x
, 1)))
10765 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10766 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10767 int cost
, const_ok
= const_ok_for_arm (i
);
10768 int j
, booth_unit_size
;
10770 /* Tune as appropriate. */
10771 cost
= const_ok
? 4 : 8;
10772 booth_unit_size
= 2;
10773 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10775 i
>>= booth_unit_size
;
10779 *total
= COSTS_N_INSNS (cost
);
10780 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
10784 *total
= COSTS_N_INSNS (20);
10788 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
10793 /* RTX cost for cores with a fast multiply unit (M variants). */
10796 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10797 int *total
, bool speed
)
10799 enum machine_mode mode
= GET_MODE (x
);
10803 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10807 /* ??? should thumb2 use different costs? */
10811 /* There is no point basing this on the tuning, since it is always the
10812 fast variant if it exists at all. */
10814 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10815 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10816 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10818 *total
= COSTS_N_INSNS(2);
10823 if (mode
== DImode
)
10825 *total
= COSTS_N_INSNS (5);
10829 if (CONST_INT_P (XEXP (x
, 1)))
10831 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10832 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10833 int cost
, const_ok
= const_ok_for_arm (i
);
10834 int j
, booth_unit_size
;
10836 /* Tune as appropriate. */
10837 cost
= const_ok
? 4 : 8;
10838 booth_unit_size
= 8;
10839 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10841 i
>>= booth_unit_size
;
10845 *total
= COSTS_N_INSNS(cost
);
10849 if (mode
== SImode
)
10851 *total
= COSTS_N_INSNS (4);
10855 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10857 if (TARGET_HARD_FLOAT
10859 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10861 *total
= COSTS_N_INSNS (1);
10866 /* Requires a lib call */
10867 *total
= COSTS_N_INSNS (20);
10871 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10876 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10877 so it can be ignored. */
10880 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10881 int *total
, bool speed
)
10883 enum machine_mode mode
= GET_MODE (x
);
10887 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10894 if (GET_CODE (XEXP (x
, 0)) != MULT
)
10895 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10897 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10898 will stall until the multiplication is complete. */
10899 *total
= COSTS_N_INSNS (3);
10903 /* There is no point basing this on the tuning, since it is always the
10904 fast variant if it exists at all. */
10906 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10907 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10908 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10910 *total
= COSTS_N_INSNS (2);
10915 if (mode
== DImode
)
10917 *total
= COSTS_N_INSNS (5);
10921 if (CONST_INT_P (XEXP (x
, 1)))
10923 /* If operand 1 is a constant we can more accurately
10924 calculate the cost of the multiply. The multiplier can
10925 retire 15 bits on the first cycle and a further 12 on the
10926 second. We do, of course, have to load the constant into
10927 a register first. */
10928 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
10929 /* There's a general overhead of one cycle. */
10931 unsigned HOST_WIDE_INT masked_const
;
10933 if (i
& 0x80000000)
10936 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
10938 masked_const
= i
& 0xffff8000;
10939 if (masked_const
!= 0)
10942 masked_const
= i
& 0xf8000000;
10943 if (masked_const
!= 0)
10946 *total
= COSTS_N_INSNS (cost
);
10950 if (mode
== SImode
)
10952 *total
= COSTS_N_INSNS (3);
10956 /* Requires a lib call */
10957 *total
= COSTS_N_INSNS (20);
10961 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10966 /* RTX costs for 9e (and later) cores. */
10969 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10970 int *total
, bool speed
)
10972 enum machine_mode mode
= GET_MODE (x
);
10979 *total
= COSTS_N_INSNS (3);
10983 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10991 /* There is no point basing this on the tuning, since it is always the
10992 fast variant if it exists at all. */
10994 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10995 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10996 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10998 *total
= COSTS_N_INSNS (2);
11003 if (mode
== DImode
)
11005 *total
= COSTS_N_INSNS (5);
11009 if (mode
== SImode
)
11011 *total
= COSTS_N_INSNS (2);
11015 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11017 if (TARGET_HARD_FLOAT
11019 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11021 *total
= COSTS_N_INSNS (1);
11026 *total
= COSTS_N_INSNS (20);
11030 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11033 /* All address computations that can be done are free, but rtx cost returns
11034 the same for practically all of them. So we weight the different types
11035 of address here in the order (most pref first):
11036 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11038 arm_arm_address_cost (rtx x
)
11040 enum rtx_code c
= GET_CODE (x
);
11042 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11044 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11049 if (CONST_INT_P (XEXP (x
, 1)))
11052 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11062 arm_thumb_address_cost (rtx x
)
11064 enum rtx_code c
= GET_CODE (x
);
11069 && REG_P (XEXP (x
, 0))
11070 && CONST_INT_P (XEXP (x
, 1)))
11077 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11078 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11080 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11083 /* Adjust cost hook for XScale. */
11085 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11087 /* Some true dependencies can have a higher cost depending
11088 on precisely how certain input operands are used. */
11089 if (REG_NOTE_KIND(link
) == 0
11090 && recog_memoized (insn
) >= 0
11091 && recog_memoized (dep
) >= 0)
11093 int shift_opnum
= get_attr_shift (insn
);
11094 enum attr_type attr_type
= get_attr_type (dep
);
11096 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11097 operand for INSN. If we have a shifted input operand and the
11098 instruction we depend on is another ALU instruction, then we may
11099 have to account for an additional stall. */
11100 if (shift_opnum
!= 0
11101 && (attr_type
== TYPE_ALU_SHIFT_IMM
11102 || attr_type
== TYPE_ALUS_SHIFT_IMM
11103 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11104 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11105 || attr_type
== TYPE_ALU_SHIFT_REG
11106 || attr_type
== TYPE_ALUS_SHIFT_REG
11107 || attr_type
== TYPE_LOGIC_SHIFT_REG
11108 || attr_type
== TYPE_LOGICS_SHIFT_REG
11109 || attr_type
== TYPE_MOV_SHIFT
11110 || attr_type
== TYPE_MVN_SHIFT
11111 || attr_type
== TYPE_MOV_SHIFT_REG
11112 || attr_type
== TYPE_MVN_SHIFT_REG
))
11114 rtx shifted_operand
;
11117 /* Get the shifted operand. */
11118 extract_insn (insn
);
11119 shifted_operand
= recog_data
.operand
[shift_opnum
];
11121 /* Iterate over all the operands in DEP. If we write an operand
11122 that overlaps with SHIFTED_OPERAND, then we have increase the
11123 cost of this dependency. */
11124 extract_insn (dep
);
11125 preprocess_constraints ();
11126 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11128 /* We can ignore strict inputs. */
11129 if (recog_data
.operand_type
[opno
] == OP_IN
)
11132 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11144 /* Adjust cost hook for Cortex A9. */
11146 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11148 switch (REG_NOTE_KIND (link
))
11155 case REG_DEP_OUTPUT
:
11156 if (recog_memoized (insn
) >= 0
11157 && recog_memoized (dep
) >= 0)
11159 if (GET_CODE (PATTERN (insn
)) == SET
)
11162 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11164 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11166 enum attr_type attr_type_insn
= get_attr_type (insn
);
11167 enum attr_type attr_type_dep
= get_attr_type (dep
);
11169 /* By default all dependencies of the form
11172 have an extra latency of 1 cycle because
11173 of the input and output dependency in this
11174 case. However this gets modeled as an true
11175 dependency and hence all these checks. */
11176 if (REG_P (SET_DEST (PATTERN (insn
)))
11177 && REG_P (SET_DEST (PATTERN (dep
)))
11178 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11179 SET_DEST (PATTERN (dep
))))
11181 /* FMACS is a special case where the dependent
11182 instruction can be issued 3 cycles before
11183 the normal latency in case of an output
11185 if ((attr_type_insn
== TYPE_FMACS
11186 || attr_type_insn
== TYPE_FMACD
)
11187 && (attr_type_dep
== TYPE_FMACS
11188 || attr_type_dep
== TYPE_FMACD
))
11190 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11191 *cost
= insn_default_latency (dep
) - 3;
11193 *cost
= insn_default_latency (dep
);
11198 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11199 *cost
= insn_default_latency (dep
) + 1;
11201 *cost
= insn_default_latency (dep
);
11211 gcc_unreachable ();
11217 /* Adjust cost hook for FA726TE. */
11219 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11221 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11222 have penalty of 3. */
11223 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11224 && recog_memoized (insn
) >= 0
11225 && recog_memoized (dep
) >= 0
11226 && get_attr_conds (dep
) == CONDS_SET
)
11228 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11229 if (get_attr_conds (insn
) == CONDS_USE
11230 && get_attr_type (insn
) != TYPE_BRANCH
)
11236 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11237 || get_attr_conds (insn
) == CONDS_USE
)
11247 /* Implement TARGET_REGISTER_MOVE_COST.
11249 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11250 it is typically more expensive than a single memory access. We set
11251 the cost to less than two memory accesses so that floating
11252 point to integer conversion does not go through memory. */
11255 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11256 reg_class_t from
, reg_class_t to
)
11260 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11261 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11263 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11264 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11266 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11273 if (from
== HI_REGS
|| to
== HI_REGS
)
11280 /* Implement TARGET_MEMORY_MOVE_COST. */
11283 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11284 bool in ATTRIBUTE_UNUSED
)
11290 if (GET_MODE_SIZE (mode
) < 4)
11293 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11297 /* Vectorizer cost model implementation. */
11299 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11301 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11303 int misalign ATTRIBUTE_UNUSED
)
11307 switch (type_of_cost
)
11310 return current_tune
->vec_costs
->scalar_stmt_cost
;
11313 return current_tune
->vec_costs
->scalar_load_cost
;
11316 return current_tune
->vec_costs
->scalar_store_cost
;
11319 return current_tune
->vec_costs
->vec_stmt_cost
;
11322 return current_tune
->vec_costs
->vec_align_load_cost
;
11325 return current_tune
->vec_costs
->vec_store_cost
;
11327 case vec_to_scalar
:
11328 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11330 case scalar_to_vec
:
11331 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11333 case unaligned_load
:
11334 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11336 case unaligned_store
:
11337 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11339 case cond_branch_taken
:
11340 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11342 case cond_branch_not_taken
:
11343 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11346 case vec_promote_demote
:
11347 return current_tune
->vec_costs
->vec_stmt_cost
;
11349 case vec_construct
:
11350 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11351 return elements
/ 2 + 1;
11354 gcc_unreachable ();
11358 /* Implement targetm.vectorize.add_stmt_cost. */
11361 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11362 struct _stmt_vec_info
*stmt_info
, int misalign
,
11363 enum vect_cost_model_location where
)
11365 unsigned *cost
= (unsigned *) data
;
11366 unsigned retval
= 0;
11368 if (flag_vect_cost_model
)
11370 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11371 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11373 /* Statements in an inner loop relative to the loop being
11374 vectorized are weighted more heavily. The value here is
11375 arbitrary and could potentially be improved with analysis. */
11376 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11377 count
*= 50; /* FIXME. */
11379 retval
= (unsigned) (count
* stmt_cost
);
11380 cost
[where
] += retval
;
11386 /* Return true if and only if this insn can dual-issue only as older. */
11388 cortexa7_older_only (rtx insn
)
11390 if (recog_memoized (insn
) < 0)
11393 switch (get_attr_type (insn
))
11396 case TYPE_ALUS_REG
:
11397 case TYPE_LOGIC_REG
:
11398 case TYPE_LOGICS_REG
:
11400 case TYPE_ADCS_REG
:
11405 case TYPE_SHIFT_IMM
:
11406 case TYPE_SHIFT_REG
:
11407 case TYPE_LOAD_BYTE
:
11410 case TYPE_FFARITHS
:
11412 case TYPE_FFARITHD
:
11430 case TYPE_F_STORES
:
11437 /* Return true if and only if this insn can dual-issue as younger. */
11439 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11441 if (recog_memoized (insn
) < 0)
11444 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11448 switch (get_attr_type (insn
))
11451 case TYPE_ALUS_IMM
:
11452 case TYPE_LOGIC_IMM
:
11453 case TYPE_LOGICS_IMM
:
11458 case TYPE_MOV_SHIFT
:
11459 case TYPE_MOV_SHIFT_REG
:
11469 /* Look for an instruction that can dual issue only as an older
11470 instruction, and move it in front of any instructions that can
11471 dual-issue as younger, while preserving the relative order of all
11472 other instructions in the ready list. This is a hueuristic to help
11473 dual-issue in later cycles, by postponing issue of more flexible
11474 instructions. This heuristic may affect dual issue opportunities
11475 in the current cycle. */
11477 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11481 int first_older_only
= -1, first_younger
= -1;
11485 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11489 /* Traverse the ready list from the head (the instruction to issue
11490 first), and looking for the first instruction that can issue as
11491 younger and the first instruction that can dual-issue only as
11493 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11495 rtx insn
= ready
[i
];
11496 if (cortexa7_older_only (insn
))
11498 first_older_only
= i
;
11500 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11503 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11507 /* Nothing to reorder because either no younger insn found or insn
11508 that can dual-issue only as older appears before any insn that
11509 can dual-issue as younger. */
11510 if (first_younger
== -1)
11513 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11517 /* Nothing to reorder because no older-only insn in the ready list. */
11518 if (first_older_only
== -1)
11521 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11525 /* Move first_older_only insn before first_younger. */
11527 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11528 INSN_UID(ready
[first_older_only
]),
11529 INSN_UID(ready
[first_younger
]));
11530 rtx first_older_only_insn
= ready
[first_older_only
];
11531 for (i
= first_older_only
; i
< first_younger
; i
++)
11533 ready
[i
] = ready
[i
+1];
11536 ready
[i
] = first_older_only_insn
;
11540 /* Implement TARGET_SCHED_REORDER. */
11542 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11548 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11551 /* Do nothing for other cores. */
11555 return arm_issue_rate ();
11558 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11559 It corrects the value of COST based on the relationship between
11560 INSN and DEP through the dependence LINK. It returns the new
11561 value. There is a per-core adjust_cost hook to adjust scheduler costs
11562 and the per-core hook can choose to completely override the generic
11563 adjust_cost function. Only put bits of code into arm_adjust_cost that
11564 are common across all cores. */
11566 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11570 /* When generating Thumb-1 code, we want to place flag-setting operations
11571 close to a conditional branch which depends on them, so that we can
11572 omit the comparison. */
11574 && REG_NOTE_KIND (link
) == 0
11575 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11576 && recog_memoized (dep
) >= 0
11577 && get_attr_conds (dep
) == CONDS_SET
)
11580 if (current_tune
->sched_adjust_cost
!= NULL
)
11582 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11586 /* XXX Is this strictly true? */
11587 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11588 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11591 /* Call insns don't incur a stall, even if they follow a load. */
11592 if (REG_NOTE_KIND (link
) == 0
11596 if ((i_pat
= single_set (insn
)) != NULL
11597 && MEM_P (SET_SRC (i_pat
))
11598 && (d_pat
= single_set (dep
)) != NULL
11599 && MEM_P (SET_DEST (d_pat
)))
11601 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11602 /* This is a load after a store, there is no conflict if the load reads
11603 from a cached area. Assume that loads from the stack, and from the
11604 constant pool are cached, and that others will miss. This is a
11607 if ((GET_CODE (src_mem
) == SYMBOL_REF
11608 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11609 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11610 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11611 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11619 arm_max_conditional_execute (void)
11621 return max_insns_skipped
;
11625 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11628 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11630 return (optimize
> 0) ? 2 : 0;
11634 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11636 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11639 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11640 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11641 sequences of non-executed instructions in IT blocks probably take the same
11642 amount of time as executed instructions (and the IT instruction itself takes
11643 space in icache). This function was experimentally determined to give good
11644 results on a popular embedded benchmark. */
11647 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11649 return (TARGET_32BIT
&& speed_p
) ? 1
11650 : arm_default_branch_cost (speed_p
, predictable_p
);
11653 static bool fp_consts_inited
= false;
11655 static REAL_VALUE_TYPE value_fp0
;
11658 init_fp_table (void)
11662 r
= REAL_VALUE_ATOF ("0", DFmode
);
11664 fp_consts_inited
= true;
11667 /* Return TRUE if rtx X is a valid immediate FP constant. */
11669 arm_const_double_rtx (rtx x
)
11673 if (!fp_consts_inited
)
11676 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11677 if (REAL_VALUE_MINUS_ZERO (r
))
11680 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11686 /* VFPv3 has a fairly wide range of representable immediates, formed from
11687 "quarter-precision" floating-point values. These can be evaluated using this
11688 formula (with ^ for exponentiation):
11692 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11693 16 <= n <= 31 and 0 <= r <= 7.
11695 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11697 - A (most-significant) is the sign bit.
11698 - BCD are the exponent (encoded as r XOR 3).
11699 - EFGH are the mantissa (encoded as n - 16).
11702 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11703 fconst[sd] instruction, or -1 if X isn't suitable. */
11705 vfp3_const_double_index (rtx x
)
11707 REAL_VALUE_TYPE r
, m
;
11708 int sign
, exponent
;
11709 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11710 unsigned HOST_WIDE_INT mask
;
11711 HOST_WIDE_INT m1
, m2
;
11712 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11714 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11717 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11719 /* We can't represent these things, so detect them first. */
11720 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11723 /* Extract sign, exponent and mantissa. */
11724 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11725 r
= real_value_abs (&r
);
11726 exponent
= REAL_EXP (&r
);
11727 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11728 highest (sign) bit, with a fixed binary point at bit point_pos.
11729 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11730 bits for the mantissa, this may fail (low bits would be lost). */
11731 real_ldexp (&m
, &r
, point_pos
- exponent
);
11732 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11736 /* If there are bits set in the low part of the mantissa, we can't
11737 represent this value. */
11741 /* Now make it so that mantissa contains the most-significant bits, and move
11742 the point_pos to indicate that the least-significant bits have been
11744 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11745 mantissa
= mant_hi
;
11747 /* We can permit four significant bits of mantissa only, plus a high bit
11748 which is always 1. */
11749 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11750 if ((mantissa
& mask
) != 0)
11753 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11754 mantissa
>>= point_pos
- 5;
11756 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11757 floating-point immediate zero with Neon using an integer-zero load, but
11758 that case is handled elsewhere.) */
11762 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11764 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11765 normalized significands are in the range [1, 2). (Our mantissa is shifted
11766 left 4 places at this point relative to normalized IEEE754 values). GCC
11767 internally uses [0.5, 1) (see real.c), so the exponent returned from
11768 REAL_EXP must be altered. */
11769 exponent
= 5 - exponent
;
11771 if (exponent
< 0 || exponent
> 7)
11774 /* Sign, mantissa and exponent are now in the correct form to plug into the
11775 formula described in the comment above. */
11776 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11779 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11781 vfp3_const_double_rtx (rtx x
)
11786 return vfp3_const_double_index (x
) != -1;
11789 /* Recognize immediates which can be used in various Neon instructions. Legal
11790 immediates are described by the following table (for VMVN variants, the
11791 bitwise inverse of the constant shown is recognized. In either case, VMOV
11792 is output and the correct instruction to use for a given constant is chosen
11793 by the assembler). The constant shown is replicated across all elements of
11794 the destination vector.
11796 insn elems variant constant (binary)
11797 ---- ----- ------- -----------------
11798 vmov i32 0 00000000 00000000 00000000 abcdefgh
11799 vmov i32 1 00000000 00000000 abcdefgh 00000000
11800 vmov i32 2 00000000 abcdefgh 00000000 00000000
11801 vmov i32 3 abcdefgh 00000000 00000000 00000000
11802 vmov i16 4 00000000 abcdefgh
11803 vmov i16 5 abcdefgh 00000000
11804 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11805 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11806 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11807 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11808 vmvn i16 10 00000000 abcdefgh
11809 vmvn i16 11 abcdefgh 00000000
11810 vmov i32 12 00000000 00000000 abcdefgh 11111111
11811 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11812 vmov i32 14 00000000 abcdefgh 11111111 11111111
11813 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11814 vmov i8 16 abcdefgh
11815 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11816 eeeeeeee ffffffff gggggggg hhhhhhhh
11817 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11818 vmov f32 19 00000000 00000000 00000000 00000000
11820 For case 18, B = !b. Representable values are exactly those accepted by
11821 vfp3_const_double_index, but are output as floating-point numbers rather
11824 For case 19, we will change it to vmov.i32 when assembling.
11826 Variants 0-5 (inclusive) may also be used as immediates for the second
11827 operand of VORR/VBIC instructions.
11829 The INVERSE argument causes the bitwise inverse of the given operand to be
11830 recognized instead (used for recognizing legal immediates for the VAND/VORN
11831 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11832 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11833 output, rather than the real insns vbic/vorr).
11835 INVERSE makes no difference to the recognition of float vectors.
11837 The return value is the variant of immediate as shown in the above table, or
11838 -1 if the given value doesn't match any of the listed patterns.
11841 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
11842 rtx
*modconst
, int *elementwidth
)
11844 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11846 for (i = 0; i < idx; i += (STRIDE)) \
11851 immtype = (CLASS); \
11852 elsize = (ELSIZE); \
11856 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11857 unsigned int innersize
;
11858 unsigned char bytes
[16];
11859 int immtype
= -1, matches
;
11860 unsigned int invmask
= inverse
? 0xff : 0;
11861 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11865 n_elts
= CONST_VECTOR_NUNITS (op
);
11866 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11871 if (mode
== VOIDmode
)
11873 innersize
= GET_MODE_SIZE (mode
);
11876 /* Vectors of float constants. */
11877 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11879 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11880 REAL_VALUE_TYPE r0
;
11882 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11885 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
11887 for (i
= 1; i
< n_elts
; i
++)
11889 rtx elt
= CONST_VECTOR_ELT (op
, i
);
11890 REAL_VALUE_TYPE re
;
11892 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
11894 if (!REAL_VALUES_EQUAL (r0
, re
))
11899 *modconst
= CONST_VECTOR_ELT (op
, 0);
11904 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11910 /* Splat vector constant out into a byte vector. */
11911 for (i
= 0; i
< n_elts
; i
++)
11913 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11914 unsigned HOST_WIDE_INT elpart
;
11915 unsigned int part
, parts
;
11917 if (CONST_INT_P (el
))
11919 elpart
= INTVAL (el
);
11922 else if (CONST_DOUBLE_P (el
))
11924 elpart
= CONST_DOUBLE_LOW (el
);
11928 gcc_unreachable ();
11930 for (part
= 0; part
< parts
; part
++)
11933 for (byte
= 0; byte
< innersize
; byte
++)
11935 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11936 elpart
>>= BITS_PER_UNIT
;
11938 if (CONST_DOUBLE_P (el
))
11939 elpart
= CONST_DOUBLE_HIGH (el
);
11943 /* Sanity check. */
11944 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11948 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11949 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11951 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11952 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11954 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11955 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11957 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11958 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11960 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11962 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11964 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11965 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11967 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11968 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11970 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11971 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11973 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11974 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11976 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11978 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11980 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11981 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11983 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11984 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11986 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11987 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11989 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11990 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11992 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11994 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11995 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12003 *elementwidth
= elsize
;
12007 unsigned HOST_WIDE_INT imm
= 0;
12009 /* Un-invert bytes of recognized vector, if necessary. */
12011 for (i
= 0; i
< idx
; i
++)
12012 bytes
[i
] ^= invmask
;
12016 /* FIXME: Broken on 32-bit H_W_I hosts. */
12017 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12019 for (i
= 0; i
< 8; i
++)
12020 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12021 << (i
* BITS_PER_UNIT
);
12023 *modconst
= GEN_INT (imm
);
12027 unsigned HOST_WIDE_INT imm
= 0;
12029 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12030 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12032 *modconst
= GEN_INT (imm
);
12040 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12041 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12042 float elements), and a modified constant (whatever should be output for a
12043 VMOV) in *MODCONST. */
12046 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12047 rtx
*modconst
, int *elementwidth
)
12051 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12057 *modconst
= tmpconst
;
12060 *elementwidth
= tmpwidth
;
12065 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12066 the immediate is valid, write a constant suitable for using as an operand
12067 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12068 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12071 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12072 rtx
*modconst
, int *elementwidth
)
12076 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12078 if (retval
< 0 || retval
> 5)
12082 *modconst
= tmpconst
;
12085 *elementwidth
= tmpwidth
;
12090 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12091 the immediate is valid, write a constant suitable for using as an operand
12092 to VSHR/VSHL to *MODCONST and the corresponding element width to
12093 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12094 because they have different limitations. */
12097 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12098 rtx
*modconst
, int *elementwidth
,
12101 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12102 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12103 unsigned HOST_WIDE_INT last_elt
= 0;
12104 unsigned HOST_WIDE_INT maxshift
;
12106 /* Split vector constant out into a byte vector. */
12107 for (i
= 0; i
< n_elts
; i
++)
12109 rtx el
= CONST_VECTOR_ELT (op
, i
);
12110 unsigned HOST_WIDE_INT elpart
;
12112 if (CONST_INT_P (el
))
12113 elpart
= INTVAL (el
);
12114 else if (CONST_DOUBLE_P (el
))
12117 gcc_unreachable ();
12119 if (i
!= 0 && elpart
!= last_elt
)
12125 /* Shift less than element size. */
12126 maxshift
= innersize
* 8;
12130 /* Left shift immediate value can be from 0 to <size>-1. */
12131 if (last_elt
>= maxshift
)
12136 /* Right shift immediate value can be from 1 to <size>. */
12137 if (last_elt
== 0 || last_elt
> maxshift
)
12142 *elementwidth
= innersize
* 8;
12145 *modconst
= CONST_VECTOR_ELT (op
, 0);
12150 /* Return a string suitable for output of Neon immediate logic operation
12154 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12155 int inverse
, int quad
)
12157 int width
, is_valid
;
12158 static char templ
[40];
12160 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12162 gcc_assert (is_valid
!= 0);
12165 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12167 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12172 /* Return a string suitable for output of Neon immediate shift operation
12173 (VSHR or VSHL) MNEM. */
12176 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12177 enum machine_mode mode
, int quad
,
12180 int width
, is_valid
;
12181 static char templ
[40];
12183 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12184 gcc_assert (is_valid
!= 0);
12187 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12189 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12194 /* Output a sequence of pairwise operations to implement a reduction.
12195 NOTE: We do "too much work" here, because pairwise operations work on two
12196 registers-worth of operands in one go. Unfortunately we can't exploit those
12197 extra calculations to do the full operation in fewer steps, I don't think.
12198 Although all vector elements of the result but the first are ignored, we
12199 actually calculate the same result in each of the elements. An alternative
12200 such as initially loading a vector with zero to use as each of the second
12201 operands would use up an additional register and take an extra instruction,
12202 for no particular gain. */
12205 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12206 rtx (*reduc
) (rtx
, rtx
, rtx
))
12208 enum machine_mode inner
= GET_MODE_INNER (mode
);
12209 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12212 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12214 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12215 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12220 /* If VALS is a vector constant that can be loaded into a register
12221 using VDUP, generate instructions to do so and return an RTX to
12222 assign to the register. Otherwise return NULL_RTX. */
12225 neon_vdup_constant (rtx vals
)
12227 enum machine_mode mode
= GET_MODE (vals
);
12228 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12229 int n_elts
= GET_MODE_NUNITS (mode
);
12230 bool all_same
= true;
12234 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12237 for (i
= 0; i
< n_elts
; ++i
)
12239 x
= XVECEXP (vals
, 0, i
);
12240 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12245 /* The elements are not all the same. We could handle repeating
12246 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12247 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12251 /* We can load this constant by using VDUP and a constant in a
12252 single ARM register. This will be cheaper than a vector
12255 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12256 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12259 /* Generate code to load VALS, which is a PARALLEL containing only
12260 constants (for vec_init) or CONST_VECTOR, efficiently into a
12261 register. Returns an RTX to copy into the register, or NULL_RTX
12262 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12265 neon_make_constant (rtx vals
)
12267 enum machine_mode mode
= GET_MODE (vals
);
12269 rtx const_vec
= NULL_RTX
;
12270 int n_elts
= GET_MODE_NUNITS (mode
);
12274 if (GET_CODE (vals
) == CONST_VECTOR
)
12276 else if (GET_CODE (vals
) == PARALLEL
)
12278 /* A CONST_VECTOR must contain only CONST_INTs and
12279 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12280 Only store valid constants in a CONST_VECTOR. */
12281 for (i
= 0; i
< n_elts
; ++i
)
12283 rtx x
= XVECEXP (vals
, 0, i
);
12284 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12287 if (n_const
== n_elts
)
12288 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12291 gcc_unreachable ();
12293 if (const_vec
!= NULL
12294 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12295 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12297 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12298 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12299 pipeline cycle; creating the constant takes one or two ARM
12300 pipeline cycles. */
12302 else if (const_vec
!= NULL_RTX
)
12303 /* Load from constant pool. On Cortex-A8 this takes two cycles
12304 (for either double or quad vectors). We can not take advantage
12305 of single-cycle VLD1 because we need a PC-relative addressing
12309 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12310 We can not construct an initializer. */
12314 /* Initialize vector TARGET to VALS. */
12317 neon_expand_vector_init (rtx target
, rtx vals
)
12319 enum machine_mode mode
= GET_MODE (target
);
12320 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12321 int n_elts
= GET_MODE_NUNITS (mode
);
12322 int n_var
= 0, one_var
= -1;
12323 bool all_same
= true;
12327 for (i
= 0; i
< n_elts
; ++i
)
12329 x
= XVECEXP (vals
, 0, i
);
12330 if (!CONSTANT_P (x
))
12331 ++n_var
, one_var
= i
;
12333 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12339 rtx constant
= neon_make_constant (vals
);
12340 if (constant
!= NULL_RTX
)
12342 emit_move_insn (target
, constant
);
12347 /* Splat a single non-constant element if we can. */
12348 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12350 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12351 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12352 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12356 /* One field is non-constant. Load constant then overwrite varying
12357 field. This is more efficient than using the stack. */
12360 rtx copy
= copy_rtx (vals
);
12361 rtx index
= GEN_INT (one_var
);
12363 /* Load constant part of vector, substitute neighboring value for
12364 varying element. */
12365 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12366 neon_expand_vector_init (target
, copy
);
12368 /* Insert variable. */
12369 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12373 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12376 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12379 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12382 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12385 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12388 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12391 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12394 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12397 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12400 gcc_unreachable ();
12405 /* Construct the vector in memory one field at a time
12406 and load the whole vector. */
12407 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12408 for (i
= 0; i
< n_elts
; i
++)
12409 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12410 i
* GET_MODE_SIZE (inner_mode
)),
12411 XVECEXP (vals
, 0, i
));
12412 emit_move_insn (target
, mem
);
12415 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12416 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12417 reported source locations are bogus. */
12420 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12423 HOST_WIDE_INT lane
;
12425 gcc_assert (CONST_INT_P (operand
));
12427 lane
= INTVAL (operand
);
12429 if (lane
< low
|| lane
>= high
)
12433 /* Bounds-check lanes. */
12436 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12438 bounds_check (operand
, low
, high
, "lane out of range");
12441 /* Bounds-check constants. */
12444 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12446 bounds_check (operand
, low
, high
, "constant out of range");
12450 neon_element_bits (enum machine_mode mode
)
12452 if (mode
== DImode
)
12453 return GET_MODE_BITSIZE (mode
);
12455 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12459 /* Predicates for `match_operand' and `match_operator'. */
12461 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12462 WB is true if full writeback address modes are allowed and is false
12463 if limited writeback address modes (POST_INC and PRE_DEC) are
12467 arm_coproc_mem_operand (rtx op
, bool wb
)
12471 /* Reject eliminable registers. */
12472 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12473 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12474 || reg_mentioned_p (arg_pointer_rtx
, op
)
12475 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12476 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12477 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12478 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12481 /* Constants are converted into offsets from labels. */
12485 ind
= XEXP (op
, 0);
12487 if (reload_completed
12488 && (GET_CODE (ind
) == LABEL_REF
12489 || (GET_CODE (ind
) == CONST
12490 && GET_CODE (XEXP (ind
, 0)) == PLUS
12491 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12492 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12495 /* Match: (mem (reg)). */
12497 return arm_address_register_rtx_p (ind
, 0);
12499 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12500 acceptable in any case (subject to verification by
12501 arm_address_register_rtx_p). We need WB to be true to accept
12502 PRE_INC and POST_DEC. */
12503 if (GET_CODE (ind
) == POST_INC
12504 || GET_CODE (ind
) == PRE_DEC
12506 && (GET_CODE (ind
) == PRE_INC
12507 || GET_CODE (ind
) == POST_DEC
)))
12508 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12511 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12512 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12513 && GET_CODE (XEXP (ind
, 1)) == PLUS
12514 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12515 ind
= XEXP (ind
, 1);
12520 if (GET_CODE (ind
) == PLUS
12521 && REG_P (XEXP (ind
, 0))
12522 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12523 && CONST_INT_P (XEXP (ind
, 1))
12524 && INTVAL (XEXP (ind
, 1)) > -1024
12525 && INTVAL (XEXP (ind
, 1)) < 1024
12526 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12532 /* Return TRUE if OP is a memory operand which we can load or store a vector
12533 to/from. TYPE is one of the following values:
12534 0 - Vector load/stor (vldr)
12535 1 - Core registers (ldm)
12536 2 - Element/structure loads (vld1)
12539 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12543 /* Reject eliminable registers. */
12544 if (! (reload_in_progress
|| reload_completed
)
12545 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12546 || reg_mentioned_p (arg_pointer_rtx
, op
)
12547 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12548 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12549 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12550 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12553 /* Constants are converted into offsets from labels. */
12557 ind
= XEXP (op
, 0);
12559 if (reload_completed
12560 && (GET_CODE (ind
) == LABEL_REF
12561 || (GET_CODE (ind
) == CONST
12562 && GET_CODE (XEXP (ind
, 0)) == PLUS
12563 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12564 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12567 /* Match: (mem (reg)). */
12569 return arm_address_register_rtx_p (ind
, 0);
12571 /* Allow post-increment with Neon registers. */
12572 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12573 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12574 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12576 /* FIXME: vld1 allows register post-modify. */
12582 && GET_CODE (ind
) == PLUS
12583 && REG_P (XEXP (ind
, 0))
12584 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12585 && CONST_INT_P (XEXP (ind
, 1))
12586 && INTVAL (XEXP (ind
, 1)) > -1024
12587 /* For quad modes, we restrict the constant offset to be slightly less
12588 than what the instruction format permits. We have no such constraint
12589 on double mode offsets. (This must match arm_legitimate_index_p.) */
12590 && (INTVAL (XEXP (ind
, 1))
12591 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12592 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12598 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12601 neon_struct_mem_operand (rtx op
)
12605 /* Reject eliminable registers. */
12606 if (! (reload_in_progress
|| reload_completed
)
12607 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12608 || reg_mentioned_p (arg_pointer_rtx
, op
)
12609 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12610 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12611 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12612 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12615 /* Constants are converted into offsets from labels. */
12619 ind
= XEXP (op
, 0);
12621 if (reload_completed
12622 && (GET_CODE (ind
) == LABEL_REF
12623 || (GET_CODE (ind
) == CONST
12624 && GET_CODE (XEXP (ind
, 0)) == PLUS
12625 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12626 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12629 /* Match: (mem (reg)). */
12631 return arm_address_register_rtx_p (ind
, 0);
12633 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12634 if (GET_CODE (ind
) == POST_INC
12635 || GET_CODE (ind
) == PRE_DEC
)
12636 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12641 /* Return true if X is a register that will be eliminated later on. */
12643 arm_eliminable_register (rtx x
)
12645 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12646 || REGNO (x
) == ARG_POINTER_REGNUM
12647 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12648 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12651 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12652 coprocessor registers. Otherwise return NO_REGS. */
12655 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12657 if (mode
== HFmode
)
12659 if (!TARGET_NEON_FP16
)
12660 return GENERAL_REGS
;
12661 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12663 return GENERAL_REGS
;
12666 /* The neon move patterns handle all legitimate vector and struct
12669 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12670 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12671 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12672 || VALID_NEON_STRUCT_MODE (mode
)))
12675 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12678 return GENERAL_REGS
;
12681 /* Values which must be returned in the most-significant end of the return
12685 arm_return_in_msb (const_tree valtype
)
12687 return (TARGET_AAPCS_BASED
12688 && BYTES_BIG_ENDIAN
12689 && (AGGREGATE_TYPE_P (valtype
)
12690 || TREE_CODE (valtype
) == COMPLEX_TYPE
12691 || FIXED_POINT_TYPE_P (valtype
)));
12694 /* Return TRUE if X references a SYMBOL_REF. */
12696 symbol_mentioned_p (rtx x
)
12701 if (GET_CODE (x
) == SYMBOL_REF
)
12704 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12705 are constant offsets, not symbols. */
12706 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12709 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12711 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12717 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12718 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12721 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12728 /* Return TRUE if X references a LABEL_REF. */
12730 label_mentioned_p (rtx x
)
12735 if (GET_CODE (x
) == LABEL_REF
)
12738 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12739 instruction, but they are constant offsets, not symbols. */
12740 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12743 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12744 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12750 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12751 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12754 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12762 tls_mentioned_p (rtx x
)
12764 switch (GET_CODE (x
))
12767 return tls_mentioned_p (XEXP (x
, 0));
12770 if (XINT (x
, 1) == UNSPEC_TLS
)
12778 /* Must not copy any rtx that uses a pc-relative address. */
12781 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
12783 if (GET_CODE (*x
) == UNSPEC
12784 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
12785 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
12791 arm_cannot_copy_insn_p (rtx insn
)
12793 /* The tls call insn cannot be copied, as it is paired with a data
12795 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12798 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
12802 minmax_code (rtx x
)
12804 enum rtx_code code
= GET_CODE (x
);
12817 gcc_unreachable ();
12821 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12824 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12825 int *mask
, bool *signed_sat
)
12827 /* The high bound must be a power of two minus one. */
12828 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12832 /* The low bound is either zero (for usat) or one less than the
12833 negation of the high bound (for ssat). */
12834 if (INTVAL (lo_bound
) == 0)
12839 *signed_sat
= false;
12844 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12849 *signed_sat
= true;
12857 /* Return 1 if memory locations are adjacent. */
12859 adjacent_mem_locations (rtx a
, rtx b
)
12861 /* We don't guarantee to preserve the order of these memory refs. */
12862 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12865 if ((REG_P (XEXP (a
, 0))
12866 || (GET_CODE (XEXP (a
, 0)) == PLUS
12867 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12868 && (REG_P (XEXP (b
, 0))
12869 || (GET_CODE (XEXP (b
, 0)) == PLUS
12870 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12872 HOST_WIDE_INT val0
= 0, val1
= 0;
12876 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12878 reg0
= XEXP (XEXP (a
, 0), 0);
12879 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12882 reg0
= XEXP (a
, 0);
12884 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12886 reg1
= XEXP (XEXP (b
, 0), 0);
12887 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12890 reg1
= XEXP (b
, 0);
12892 /* Don't accept any offset that will require multiple
12893 instructions to handle, since this would cause the
12894 arith_adjacentmem pattern to output an overlong sequence. */
12895 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12898 /* Don't allow an eliminable register: register elimination can make
12899 the offset too large. */
12900 if (arm_eliminable_register (reg0
))
12903 val_diff
= val1
- val0
;
12907 /* If the target has load delay slots, then there's no benefit
12908 to using an ldm instruction unless the offset is zero and
12909 we are optimizing for size. */
12910 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12911 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12912 && (val_diff
== 4 || val_diff
== -4));
12915 return ((REGNO (reg0
) == REGNO (reg1
))
12916 && (val_diff
== 4 || val_diff
== -4));
12922 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12923 for load operations, false for store operations. CONSECUTIVE is true
12924 if the register numbers in the operation must be consecutive in the register
12925 bank. RETURN_PC is true if value is to be loaded in PC.
12926 The pattern we are trying to match for load is:
12927 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12928 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12931 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12934 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12935 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12936 3. If consecutive is TRUE, then for kth register being loaded,
12937 REGNO (R_dk) = REGNO (R_d0) + k.
12938 The pattern for store is similar. */
12940 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
12941 bool consecutive
, bool return_pc
)
12943 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12944 rtx reg
, mem
, addr
;
12946 unsigned first_regno
;
12947 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12949 bool addr_reg_in_reglist
= false;
12950 bool update
= false;
12955 /* If not in SImode, then registers must be consecutive
12956 (e.g., VLDM instructions for DFmode). */
12957 gcc_assert ((mode
== SImode
) || consecutive
);
12958 /* Setting return_pc for stores is illegal. */
12959 gcc_assert (!return_pc
|| load
);
12961 /* Set up the increments and the regs per val based on the mode. */
12962 reg_increment
= GET_MODE_SIZE (mode
);
12963 regs_per_val
= reg_increment
/ 4;
12964 offset_adj
= return_pc
? 1 : 0;
12967 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12968 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12971 /* Check if this is a write-back. */
12972 elt
= XVECEXP (op
, 0, offset_adj
);
12973 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12979 /* The offset adjustment must be the number of registers being
12980 popped times the size of a single register. */
12981 if (!REG_P (SET_DEST (elt
))
12982 || !REG_P (XEXP (SET_SRC (elt
), 0))
12983 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12984 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12985 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12986 ((count
- 1 - offset_adj
) * reg_increment
))
12990 i
= i
+ offset_adj
;
12991 base
= base
+ offset_adj
;
12992 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12993 success depends on the type: VLDM can do just one reg,
12994 LDM must do at least two. */
12995 if ((count
<= i
) && (mode
== SImode
))
12998 elt
= XVECEXP (op
, 0, i
- 1);
12999 if (GET_CODE (elt
) != SET
)
13004 reg
= SET_DEST (elt
);
13005 mem
= SET_SRC (elt
);
13009 reg
= SET_SRC (elt
);
13010 mem
= SET_DEST (elt
);
13013 if (!REG_P (reg
) || !MEM_P (mem
))
13016 regno
= REGNO (reg
);
13017 first_regno
= regno
;
13018 addr
= XEXP (mem
, 0);
13019 if (GET_CODE (addr
) == PLUS
)
13021 if (!CONST_INT_P (XEXP (addr
, 1)))
13024 offset
= INTVAL (XEXP (addr
, 1));
13025 addr
= XEXP (addr
, 0);
13031 /* Don't allow SP to be loaded unless it is also the base register. It
13032 guarantees that SP is reset correctly when an LDM instruction
13033 is interrupted. Otherwise, we might end up with a corrupt stack. */
13034 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13037 for (; i
< count
; i
++)
13039 elt
= XVECEXP (op
, 0, i
);
13040 if (GET_CODE (elt
) != SET
)
13045 reg
= SET_DEST (elt
);
13046 mem
= SET_SRC (elt
);
13050 reg
= SET_SRC (elt
);
13051 mem
= SET_DEST (elt
);
13055 || GET_MODE (reg
) != mode
13056 || REGNO (reg
) <= regno
13059 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13060 /* Don't allow SP to be loaded unless it is also the base register. It
13061 guarantees that SP is reset correctly when an LDM instruction
13062 is interrupted. Otherwise, we might end up with a corrupt stack. */
13063 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13065 || GET_MODE (mem
) != mode
13066 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13067 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13068 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13069 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13070 offset
+ (i
- base
) * reg_increment
))
13071 && (!REG_P (XEXP (mem
, 0))
13072 || offset
+ (i
- base
) * reg_increment
!= 0)))
13075 regno
= REGNO (reg
);
13076 if (regno
== REGNO (addr
))
13077 addr_reg_in_reglist
= true;
13082 if (update
&& addr_reg_in_reglist
)
13085 /* For Thumb-1, address register is always modified - either by write-back
13086 or by explicit load. If the pattern does not describe an update,
13087 then the address register must be in the list of loaded registers. */
13089 return update
|| addr_reg_in_reglist
;
13095 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13096 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13097 instruction. ADD_OFFSET is nonzero if the base address register needs
13098 to be modified with an add instruction before we can use it. */
13101 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13102 int nops
, HOST_WIDE_INT add_offset
)
13104 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13105 if the offset isn't small enough. The reason 2 ldrs are faster
13106 is because these ARMs are able to do more than one cache access
13107 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13108 whilst the ARM8 has a double bandwidth cache. This means that
13109 these cores can do both an instruction fetch and a data fetch in
13110 a single cycle, so the trick of calculating the address into a
13111 scratch register (one of the result regs) and then doing a load
13112 multiple actually becomes slower (and no smaller in code size).
13113 That is the transformation
13115 ldr rd1, [rbase + offset]
13116 ldr rd2, [rbase + offset + 4]
13120 add rd1, rbase, offset
13121 ldmia rd1, {rd1, rd2}
13123 produces worse code -- '3 cycles + any stalls on rd2' instead of
13124 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13125 access per cycle, the first sequence could never complete in less
13126 than 6 cycles, whereas the ldm sequence would only take 5 and
13127 would make better use of sequential accesses if not hitting the
13130 We cheat here and test 'arm_ld_sched' which we currently know to
13131 only be true for the ARM8, ARM9 and StrongARM. If this ever
13132 changes, then the test below needs to be reworked. */
13133 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13136 /* XScale has load-store double instructions, but they have stricter
13137 alignment requirements than load-store multiple, so we cannot
13140 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13141 the pipeline until completion.
13149 An ldr instruction takes 1-3 cycles, but does not block the
13158 Best case ldr will always win. However, the more ldr instructions
13159 we issue, the less likely we are to be able to schedule them well.
13160 Using ldr instructions also increases code size.
13162 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13163 for counts of 3 or 4 regs. */
13164 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13169 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13170 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13171 an array ORDER which describes the sequence to use when accessing the
13172 offsets that produces an ascending order. In this sequence, each
13173 offset must be larger by exactly 4 than the previous one. ORDER[0]
13174 must have been filled in with the lowest offset by the caller.
13175 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13176 we use to verify that ORDER produces an ascending order of registers.
13177 Return true if it was possible to construct such an order, false if
13181 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13182 int *unsorted_regs
)
13185 for (i
= 1; i
< nops
; i
++)
13189 order
[i
] = order
[i
- 1];
13190 for (j
= 0; j
< nops
; j
++)
13191 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13193 /* We must find exactly one offset that is higher than the
13194 previous one by 4. */
13195 if (order
[i
] != order
[i
- 1])
13199 if (order
[i
] == order
[i
- 1])
13201 /* The register numbers must be ascending. */
13202 if (unsorted_regs
!= NULL
13203 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13209 /* Used to determine in a peephole whether a sequence of load
13210 instructions can be changed into a load-multiple instruction.
13211 NOPS is the number of separate load instructions we are examining. The
13212 first NOPS entries in OPERANDS are the destination registers, the
13213 next NOPS entries are memory operands. If this function is
13214 successful, *BASE is set to the common base register of the memory
13215 accesses; *LOAD_OFFSET is set to the first memory location's offset
13216 from that base register.
13217 REGS is an array filled in with the destination register numbers.
13218 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13219 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13220 the sequence of registers in REGS matches the loads from ascending memory
13221 locations, and the function verifies that the register numbers are
13222 themselves ascending. If CHECK_REGS is false, the register numbers
13223 are stored in the order they are found in the operands. */
13225 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13226 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13228 int unsorted_regs
[MAX_LDM_STM_OPS
];
13229 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13230 int order
[MAX_LDM_STM_OPS
];
13231 rtx base_reg_rtx
= NULL
;
13235 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13236 easily extended if required. */
13237 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13239 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13241 /* Loop over the operands and check that the memory references are
13242 suitable (i.e. immediate offsets from the same base register). At
13243 the same time, extract the target register, and the memory
13245 for (i
= 0; i
< nops
; i
++)
13250 /* Convert a subreg of a mem into the mem itself. */
13251 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13252 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13254 gcc_assert (MEM_P (operands
[nops
+ i
]));
13256 /* Don't reorder volatile memory references; it doesn't seem worth
13257 looking for the case where the order is ok anyway. */
13258 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13261 offset
= const0_rtx
;
13263 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13264 || (GET_CODE (reg
) == SUBREG
13265 && REG_P (reg
= SUBREG_REG (reg
))))
13266 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13267 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13268 || (GET_CODE (reg
) == SUBREG
13269 && REG_P (reg
= SUBREG_REG (reg
))))
13270 && (CONST_INT_P (offset
13271 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13275 base_reg
= REGNO (reg
);
13276 base_reg_rtx
= reg
;
13277 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13280 else if (base_reg
!= (int) REGNO (reg
))
13281 /* Not addressed from the same base register. */
13284 unsorted_regs
[i
] = (REG_P (operands
[i
])
13285 ? REGNO (operands
[i
])
13286 : REGNO (SUBREG_REG (operands
[i
])));
13288 /* If it isn't an integer register, or if it overwrites the
13289 base register but isn't the last insn in the list, then
13290 we can't do this. */
13291 if (unsorted_regs
[i
] < 0
13292 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13293 || unsorted_regs
[i
] > 14
13294 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13297 /* Don't allow SP to be loaded unless it is also the base
13298 register. It guarantees that SP is reset correctly when
13299 an LDM instruction is interrupted. Otherwise, we might
13300 end up with a corrupt stack. */
13301 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13304 unsorted_offsets
[i
] = INTVAL (offset
);
13305 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13309 /* Not a suitable memory address. */
13313 /* All the useful information has now been extracted from the
13314 operands into unsorted_regs and unsorted_offsets; additionally,
13315 order[0] has been set to the lowest offset in the list. Sort
13316 the offsets into order, verifying that they are adjacent, and
13317 check that the register numbers are ascending. */
13318 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13319 check_regs
? unsorted_regs
: NULL
))
13323 memcpy (saved_order
, order
, sizeof order
);
13329 for (i
= 0; i
< nops
; i
++)
13330 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13332 *load_offset
= unsorted_offsets
[order
[0]];
13336 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13339 if (unsorted_offsets
[order
[0]] == 0)
13340 ldm_case
= 1; /* ldmia */
13341 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13342 ldm_case
= 2; /* ldmib */
13343 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13344 ldm_case
= 3; /* ldmda */
13345 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13346 ldm_case
= 4; /* ldmdb */
13347 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13348 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13353 if (!multiple_operation_profitable_p (false, nops
,
13355 ? unsorted_offsets
[order
[0]] : 0))
13361 /* Used to determine in a peephole whether a sequence of store instructions can
13362 be changed into a store-multiple instruction.
13363 NOPS is the number of separate store instructions we are examining.
13364 NOPS_TOTAL is the total number of instructions recognized by the peephole
13366 The first NOPS entries in OPERANDS are the source registers, the next
13367 NOPS entries are memory operands. If this function is successful, *BASE is
13368 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13369 to the first memory location's offset from that base register. REGS is an
13370 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13371 likewise filled with the corresponding rtx's.
13372 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13373 numbers to an ascending order of stores.
13374 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13375 from ascending memory locations, and the function verifies that the register
13376 numbers are themselves ascending. If CHECK_REGS is false, the register
13377 numbers are stored in the order they are found in the operands. */
13379 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13380 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13381 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13383 int unsorted_regs
[MAX_LDM_STM_OPS
];
13384 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13385 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13386 int order
[MAX_LDM_STM_OPS
];
13388 rtx base_reg_rtx
= NULL
;
13391 /* Write back of base register is currently only supported for Thumb 1. */
13392 int base_writeback
= TARGET_THUMB1
;
13394 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13395 easily extended if required. */
13396 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13398 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13400 /* Loop over the operands and check that the memory references are
13401 suitable (i.e. immediate offsets from the same base register). At
13402 the same time, extract the target register, and the memory
13404 for (i
= 0; i
< nops
; i
++)
13409 /* Convert a subreg of a mem into the mem itself. */
13410 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13411 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13413 gcc_assert (MEM_P (operands
[nops
+ i
]));
13415 /* Don't reorder volatile memory references; it doesn't seem worth
13416 looking for the case where the order is ok anyway. */
13417 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13420 offset
= const0_rtx
;
13422 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13423 || (GET_CODE (reg
) == SUBREG
13424 && REG_P (reg
= SUBREG_REG (reg
))))
13425 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13426 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13427 || (GET_CODE (reg
) == SUBREG
13428 && REG_P (reg
= SUBREG_REG (reg
))))
13429 && (CONST_INT_P (offset
13430 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13432 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13433 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13434 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13438 base_reg
= REGNO (reg
);
13439 base_reg_rtx
= reg
;
13440 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13443 else if (base_reg
!= (int) REGNO (reg
))
13444 /* Not addressed from the same base register. */
13447 /* If it isn't an integer register, then we can't do this. */
13448 if (unsorted_regs
[i
] < 0
13449 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13450 /* The effects are unpredictable if the base register is
13451 both updated and stored. */
13452 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13453 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13454 || unsorted_regs
[i
] > 14)
13457 unsorted_offsets
[i
] = INTVAL (offset
);
13458 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13462 /* Not a suitable memory address. */
13466 /* All the useful information has now been extracted from the
13467 operands into unsorted_regs and unsorted_offsets; additionally,
13468 order[0] has been set to the lowest offset in the list. Sort
13469 the offsets into order, verifying that they are adjacent, and
13470 check that the register numbers are ascending. */
13471 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13472 check_regs
? unsorted_regs
: NULL
))
13476 memcpy (saved_order
, order
, sizeof order
);
13482 for (i
= 0; i
< nops
; i
++)
13484 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13486 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13489 *load_offset
= unsorted_offsets
[order
[0]];
13493 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13496 if (unsorted_offsets
[order
[0]] == 0)
13497 stm_case
= 1; /* stmia */
13498 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13499 stm_case
= 2; /* stmib */
13500 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13501 stm_case
= 3; /* stmda */
13502 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13503 stm_case
= 4; /* stmdb */
13507 if (!multiple_operation_profitable_p (false, nops
, 0))
13513 /* Routines for use in generating RTL. */
13515 /* Generate a load-multiple instruction. COUNT is the number of loads in
13516 the instruction; REGS and MEMS are arrays containing the operands.
13517 BASEREG is the base register to be used in addressing the memory operands.
13518 WBACK_OFFSET is nonzero if the instruction should update the base
13522 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13523 HOST_WIDE_INT wback_offset
)
13528 if (!multiple_operation_profitable_p (false, count
, 0))
13534 for (i
= 0; i
< count
; i
++)
13535 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13537 if (wback_offset
!= 0)
13538 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13540 seq
= get_insns ();
13546 result
= gen_rtx_PARALLEL (VOIDmode
,
13547 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13548 if (wback_offset
!= 0)
13550 XVECEXP (result
, 0, 0)
13551 = gen_rtx_SET (VOIDmode
, basereg
,
13552 plus_constant (Pmode
, basereg
, wback_offset
));
13557 for (j
= 0; i
< count
; i
++, j
++)
13558 XVECEXP (result
, 0, i
)
13559 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13564 /* Generate a store-multiple instruction. COUNT is the number of stores in
13565 the instruction; REGS and MEMS are arrays containing the operands.
13566 BASEREG is the base register to be used in addressing the memory operands.
13567 WBACK_OFFSET is nonzero if the instruction should update the base
13571 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13572 HOST_WIDE_INT wback_offset
)
13577 if (GET_CODE (basereg
) == PLUS
)
13578 basereg
= XEXP (basereg
, 0);
13580 if (!multiple_operation_profitable_p (false, count
, 0))
13586 for (i
= 0; i
< count
; i
++)
13587 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13589 if (wback_offset
!= 0)
13590 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13592 seq
= get_insns ();
13598 result
= gen_rtx_PARALLEL (VOIDmode
,
13599 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13600 if (wback_offset
!= 0)
13602 XVECEXP (result
, 0, 0)
13603 = gen_rtx_SET (VOIDmode
, basereg
,
13604 plus_constant (Pmode
, basereg
, wback_offset
));
13609 for (j
= 0; i
< count
; i
++, j
++)
13610 XVECEXP (result
, 0, i
)
13611 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13616 /* Generate either a load-multiple or a store-multiple instruction. This
13617 function can be used in situations where we can start with a single MEM
13618 rtx and adjust its address upwards.
13619 COUNT is the number of operations in the instruction, not counting a
13620 possible update of the base register. REGS is an array containing the
13622 BASEREG is the base register to be used in addressing the memory operands,
13623 which are constructed from BASEMEM.
13624 WRITE_BACK specifies whether the generated instruction should include an
13625 update of the base register.
13626 OFFSETP is used to pass an offset to and from this function; this offset
13627 is not used when constructing the address (instead BASEMEM should have an
13628 appropriate offset in its address), it is used only for setting
13629 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13632 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13633 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13635 rtx mems
[MAX_LDM_STM_OPS
];
13636 HOST_WIDE_INT offset
= *offsetp
;
13639 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13641 if (GET_CODE (basereg
) == PLUS
)
13642 basereg
= XEXP (basereg
, 0);
13644 for (i
= 0; i
< count
; i
++)
13646 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13647 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13655 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13656 write_back
? 4 * count
: 0);
13658 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13659 write_back
? 4 * count
: 0);
13663 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13664 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13666 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13671 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13672 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13674 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13678 /* Called from a peephole2 expander to turn a sequence of loads into an
13679 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13680 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13681 is true if we can reorder the registers because they are used commutatively
13683 Returns true iff we could generate a new instruction. */
13686 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13688 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13689 rtx mems
[MAX_LDM_STM_OPS
];
13690 int i
, j
, base_reg
;
13692 HOST_WIDE_INT offset
;
13693 int write_back
= FALSE
;
13697 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13698 &base_reg
, &offset
, !sort_regs
);
13704 for (i
= 0; i
< nops
- 1; i
++)
13705 for (j
= i
+ 1; j
< nops
; j
++)
13706 if (regs
[i
] > regs
[j
])
13712 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13716 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13717 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13723 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13724 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13726 if (!TARGET_THUMB1
)
13728 base_reg
= regs
[0];
13729 base_reg_rtx
= newbase
;
13733 for (i
= 0; i
< nops
; i
++)
13735 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13736 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13739 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13740 write_back
? offset
+ i
* 4 : 0));
13744 /* Called from a peephole2 expander to turn a sequence of stores into an
13745 STM instruction. OPERANDS are the operands found by the peephole matcher;
13746 NOPS indicates how many separate stores we are trying to combine.
13747 Returns true iff we could generate a new instruction. */
13750 gen_stm_seq (rtx
*operands
, int nops
)
13753 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13754 rtx mems
[MAX_LDM_STM_OPS
];
13757 HOST_WIDE_INT offset
;
13758 int write_back
= FALSE
;
13761 bool base_reg_dies
;
13763 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13764 mem_order
, &base_reg
, &offset
, true);
13769 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13771 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13774 gcc_assert (base_reg_dies
);
13780 gcc_assert (base_reg_dies
);
13781 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13785 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13787 for (i
= 0; i
< nops
; i
++)
13789 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13790 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13793 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13794 write_back
? offset
+ i
* 4 : 0));
13798 /* Called from a peephole2 expander to turn a sequence of stores that are
13799 preceded by constant loads into an STM instruction. OPERANDS are the
13800 operands found by the peephole matcher; NOPS indicates how many
13801 separate stores we are trying to combine; there are 2 * NOPS
13802 instructions in the peephole.
13803 Returns true iff we could generate a new instruction. */
13806 gen_const_stm_seq (rtx
*operands
, int nops
)
13808 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13809 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13810 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13811 rtx mems
[MAX_LDM_STM_OPS
];
13814 HOST_WIDE_INT offset
;
13815 int write_back
= FALSE
;
13818 bool base_reg_dies
;
13820 HARD_REG_SET allocated
;
13822 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13823 mem_order
, &base_reg
, &offset
, false);
13828 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13830 /* If the same register is used more than once, try to find a free
13832 CLEAR_HARD_REG_SET (allocated
);
13833 for (i
= 0; i
< nops
; i
++)
13835 for (j
= i
+ 1; j
< nops
; j
++)
13836 if (regs
[i
] == regs
[j
])
13838 rtx t
= peep2_find_free_register (0, nops
* 2,
13839 TARGET_THUMB1
? "l" : "r",
13840 SImode
, &allocated
);
13844 regs
[i
] = REGNO (t
);
13848 /* Compute an ordering that maps the register numbers to an ascending
13851 for (i
= 0; i
< nops
; i
++)
13852 if (regs
[i
] < regs
[reg_order
[0]])
13855 for (i
= 1; i
< nops
; i
++)
13857 int this_order
= reg_order
[i
- 1];
13858 for (j
= 0; j
< nops
; j
++)
13859 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13860 && (this_order
== reg_order
[i
- 1]
13861 || regs
[j
] < regs
[this_order
]))
13863 reg_order
[i
] = this_order
;
13866 /* Ensure that registers that must be live after the instruction end
13867 up with the correct value. */
13868 for (i
= 0; i
< nops
; i
++)
13870 int this_order
= reg_order
[i
];
13871 if ((this_order
!= mem_order
[i
]
13872 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13873 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13877 /* Load the constants. */
13878 for (i
= 0; i
< nops
; i
++)
13880 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13881 sorted_regs
[i
] = regs
[reg_order
[i
]];
13882 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13885 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13887 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13890 gcc_assert (base_reg_dies
);
13896 gcc_assert (base_reg_dies
);
13897 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13901 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13903 for (i
= 0; i
< nops
; i
++)
13905 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13906 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13909 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13910 write_back
? offset
+ i
* 4 : 0));
13914 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13915 unaligned copies on processors which support unaligned semantics for those
13916 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13917 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13918 An interleave factor of 1 (the minimum) will perform no interleaving.
13919 Load/store multiple are used for aligned addresses where possible. */
13922 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13923 HOST_WIDE_INT length
,
13924 unsigned int interleave_factor
)
13926 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13927 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13928 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13929 HOST_WIDE_INT i
, j
;
13930 HOST_WIDE_INT remaining
= length
, words
;
13931 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13933 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13934 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13935 HOST_WIDE_INT srcoffset
, dstoffset
;
13936 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13939 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13941 /* Use hard registers if we have aligned source or destination so we can use
13942 load/store multiple with contiguous registers. */
13943 if (dst_aligned
|| src_aligned
)
13944 for (i
= 0; i
< interleave_factor
; i
++)
13945 regs
[i
] = gen_rtx_REG (SImode
, i
);
13947 for (i
= 0; i
< interleave_factor
; i
++)
13948 regs
[i
] = gen_reg_rtx (SImode
);
13950 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13951 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13953 srcoffset
= dstoffset
= 0;
13955 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13956 For copying the last bytes we want to subtract this offset again. */
13957 src_autoinc
= dst_autoinc
= 0;
13959 for (i
= 0; i
< interleave_factor
; i
++)
13962 /* Copy BLOCK_SIZE_BYTES chunks. */
13964 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13967 if (src_aligned
&& interleave_factor
> 1)
13969 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13970 TRUE
, srcbase
, &srcoffset
));
13971 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13975 for (j
= 0; j
< interleave_factor
; j
++)
13977 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13979 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13980 srcoffset
+ j
* UNITS_PER_WORD
);
13981 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13983 srcoffset
+= block_size_bytes
;
13987 if (dst_aligned
&& interleave_factor
> 1)
13989 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13990 TRUE
, dstbase
, &dstoffset
));
13991 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13995 for (j
= 0; j
< interleave_factor
; j
++)
13997 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13999 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14000 dstoffset
+ j
* UNITS_PER_WORD
);
14001 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14003 dstoffset
+= block_size_bytes
;
14006 remaining
-= block_size_bytes
;
14009 /* Copy any whole words left (note these aren't interleaved with any
14010 subsequent halfword/byte load/stores in the interests of simplicity). */
14012 words
= remaining
/ UNITS_PER_WORD
;
14014 gcc_assert (words
< interleave_factor
);
14016 if (src_aligned
&& words
> 1)
14018 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14020 src_autoinc
+= UNITS_PER_WORD
* words
;
14024 for (j
= 0; j
< words
; j
++)
14026 addr
= plus_constant (Pmode
, src
,
14027 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14028 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14029 srcoffset
+ j
* UNITS_PER_WORD
);
14030 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14032 srcoffset
+= words
* UNITS_PER_WORD
;
14035 if (dst_aligned
&& words
> 1)
14037 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14039 dst_autoinc
+= words
* UNITS_PER_WORD
;
14043 for (j
= 0; j
< words
; j
++)
14045 addr
= plus_constant (Pmode
, dst
,
14046 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14047 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14048 dstoffset
+ j
* UNITS_PER_WORD
);
14049 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14051 dstoffset
+= words
* UNITS_PER_WORD
;
14054 remaining
-= words
* UNITS_PER_WORD
;
14056 gcc_assert (remaining
< 4);
14058 /* Copy a halfword if necessary. */
14060 if (remaining
>= 2)
14062 halfword_tmp
= gen_reg_rtx (SImode
);
14064 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14065 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14066 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14068 /* Either write out immediately, or delay until we've loaded the last
14069 byte, depending on interleave factor. */
14070 if (interleave_factor
== 1)
14072 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14073 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14074 emit_insn (gen_unaligned_storehi (mem
,
14075 gen_lowpart (HImode
, halfword_tmp
)));
14076 halfword_tmp
= NULL
;
14084 gcc_assert (remaining
< 2);
14086 /* Copy last byte. */
14088 if ((remaining
& 1) != 0)
14090 byte_tmp
= gen_reg_rtx (SImode
);
14092 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14093 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14094 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14096 if (interleave_factor
== 1)
14098 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14099 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14100 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14109 /* Store last halfword if we haven't done so already. */
14113 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14114 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14115 emit_insn (gen_unaligned_storehi (mem
,
14116 gen_lowpart (HImode
, halfword_tmp
)));
14120 /* Likewise for last byte. */
14124 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14125 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14126 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14130 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14133 /* From mips_adjust_block_mem:
14135 Helper function for doing a loop-based block operation on memory
14136 reference MEM. Each iteration of the loop will operate on LENGTH
14139 Create a new base register for use within the loop and point it to
14140 the start of MEM. Create a new memory reference that uses this
14141 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14144 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14147 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14149 /* Although the new mem does not refer to a known location,
14150 it does keep up to LENGTH bytes of alignment. */
14151 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14152 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14155 /* From mips_block_move_loop:
14157 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14158 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14159 the memory regions do not overlap. */
14162 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14163 unsigned int interleave_factor
,
14164 HOST_WIDE_INT bytes_per_iter
)
14166 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14167 HOST_WIDE_INT leftover
;
14169 leftover
= length
% bytes_per_iter
;
14170 length
-= leftover
;
14172 /* Create registers and memory references for use within the loop. */
14173 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14174 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14176 /* Calculate the value that SRC_REG should have after the last iteration of
14178 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14179 0, 0, OPTAB_WIDEN
);
14181 /* Emit the start of the loop. */
14182 label
= gen_label_rtx ();
14183 emit_label (label
);
14185 /* Emit the loop body. */
14186 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14187 interleave_factor
);
14189 /* Move on to the next block. */
14190 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14191 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14193 /* Emit the loop condition. */
14194 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14195 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14197 /* Mop up any left-over bytes. */
14199 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14202 /* Emit a block move when either the source or destination is unaligned (not
14203 aligned to a four-byte boundary). This may need further tuning depending on
14204 core type, optimize_size setting, etc. */
14207 arm_movmemqi_unaligned (rtx
*operands
)
14209 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14213 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14214 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14215 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14216 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14217 or dst_aligned though: allow more interleaving in those cases since the
14218 resulting code can be smaller. */
14219 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14220 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14223 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14224 interleave_factor
, bytes_per_iter
);
14226 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14227 interleave_factor
);
14231 /* Note that the loop created by arm_block_move_unaligned_loop may be
14232 subject to loop unrolling, which makes tuning this condition a little
14235 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14237 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14244 arm_gen_movmemqi (rtx
*operands
)
14246 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14247 HOST_WIDE_INT srcoffset
, dstoffset
;
14249 rtx src
, dst
, srcbase
, dstbase
;
14250 rtx part_bytes_reg
= NULL
;
14253 if (!CONST_INT_P (operands
[2])
14254 || !CONST_INT_P (operands
[3])
14255 || INTVAL (operands
[2]) > 64)
14258 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14259 return arm_movmemqi_unaligned (operands
);
14261 if (INTVAL (operands
[3]) & 3)
14264 dstbase
= operands
[0];
14265 srcbase
= operands
[1];
14267 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14268 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14270 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14271 out_words_to_go
= INTVAL (operands
[2]) / 4;
14272 last_bytes
= INTVAL (operands
[2]) & 3;
14273 dstoffset
= srcoffset
= 0;
14275 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14276 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14278 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14280 if (in_words_to_go
> 4)
14281 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14282 TRUE
, srcbase
, &srcoffset
));
14284 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14285 src
, FALSE
, srcbase
,
14288 if (out_words_to_go
)
14290 if (out_words_to_go
> 4)
14291 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14292 TRUE
, dstbase
, &dstoffset
));
14293 else if (out_words_to_go
!= 1)
14294 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14295 out_words_to_go
, dst
,
14298 dstbase
, &dstoffset
));
14301 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14302 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14303 if (last_bytes
!= 0)
14305 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14311 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14312 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14315 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14316 if (out_words_to_go
)
14320 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14321 sreg
= copy_to_reg (mem
);
14323 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14324 emit_move_insn (mem
, sreg
);
14327 gcc_assert (!in_words_to_go
); /* Sanity check */
14330 if (in_words_to_go
)
14332 gcc_assert (in_words_to_go
> 0);
14334 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14335 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14338 gcc_assert (!last_bytes
|| part_bytes_reg
);
14340 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14342 rtx tmp
= gen_reg_rtx (SImode
);
14344 /* The bytes we want are in the top end of the word. */
14345 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14346 GEN_INT (8 * (4 - last_bytes
))));
14347 part_bytes_reg
= tmp
;
14351 mem
= adjust_automodify_address (dstbase
, QImode
,
14352 plus_constant (Pmode
, dst
,
14354 dstoffset
+ last_bytes
- 1);
14355 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14359 tmp
= gen_reg_rtx (SImode
);
14360 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14361 part_bytes_reg
= tmp
;
14368 if (last_bytes
> 1)
14370 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14371 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14375 rtx tmp
= gen_reg_rtx (SImode
);
14376 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14377 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14378 part_bytes_reg
= tmp
;
14385 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14386 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14393 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14396 next_consecutive_mem (rtx mem
)
14398 enum machine_mode mode
= GET_MODE (mem
);
14399 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14400 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14402 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14405 /* Copy using LDRD/STRD instructions whenever possible.
14406 Returns true upon success. */
14408 gen_movmem_ldrd_strd (rtx
*operands
)
14410 unsigned HOST_WIDE_INT len
;
14411 HOST_WIDE_INT align
;
14412 rtx src
, dst
, base
;
14414 bool src_aligned
, dst_aligned
;
14415 bool src_volatile
, dst_volatile
;
14417 gcc_assert (CONST_INT_P (operands
[2]));
14418 gcc_assert (CONST_INT_P (operands
[3]));
14420 len
= UINTVAL (operands
[2]);
14424 /* Maximum alignment we can assume for both src and dst buffers. */
14425 align
= INTVAL (operands
[3]);
14427 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14430 /* Place src and dst addresses in registers
14431 and update the corresponding mem rtx. */
14433 dst_volatile
= MEM_VOLATILE_P (dst
);
14434 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14435 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14436 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14439 src_volatile
= MEM_VOLATILE_P (src
);
14440 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14441 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14442 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14444 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14447 if (src_volatile
|| dst_volatile
)
14450 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14451 if (!(dst_aligned
|| src_aligned
))
14452 return arm_gen_movmemqi (operands
);
14454 src
= adjust_address (src
, DImode
, 0);
14455 dst
= adjust_address (dst
, DImode
, 0);
14459 reg0
= gen_reg_rtx (DImode
);
14461 emit_move_insn (reg0
, src
);
14463 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14466 emit_move_insn (dst
, reg0
);
14468 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14470 src
= next_consecutive_mem (src
);
14471 dst
= next_consecutive_mem (dst
);
14474 gcc_assert (len
< 8);
14477 /* More than a word but less than a double-word to copy. Copy a word. */
14478 reg0
= gen_reg_rtx (SImode
);
14479 src
= adjust_address (src
, SImode
, 0);
14480 dst
= adjust_address (dst
, SImode
, 0);
14482 emit_move_insn (reg0
, src
);
14484 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14487 emit_move_insn (dst
, reg0
);
14489 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14491 src
= next_consecutive_mem (src
);
14492 dst
= next_consecutive_mem (dst
);
14499 /* Copy the remaining bytes. */
14502 dst
= adjust_address (dst
, HImode
, 0);
14503 src
= adjust_address (src
, HImode
, 0);
14504 reg0
= gen_reg_rtx (SImode
);
14506 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14508 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14511 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14513 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14515 src
= next_consecutive_mem (src
);
14516 dst
= next_consecutive_mem (dst
);
14521 dst
= adjust_address (dst
, QImode
, 0);
14522 src
= adjust_address (src
, QImode
, 0);
14523 reg0
= gen_reg_rtx (QImode
);
14524 emit_move_insn (reg0
, src
);
14525 emit_move_insn (dst
, reg0
);
14529 /* Select a dominance comparison mode if possible for a test of the general
14530 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14531 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14532 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14533 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14534 In all cases OP will be either EQ or NE, but we don't need to know which
14535 here. If we are unable to support a dominance comparison we return
14536 CC mode. This will then fail to match for the RTL expressions that
14537 generate this call. */
14539 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14541 enum rtx_code cond1
, cond2
;
14544 /* Currently we will probably get the wrong result if the individual
14545 comparisons are not simple. This also ensures that it is safe to
14546 reverse a comparison if necessary. */
14547 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14549 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14553 /* The if_then_else variant of this tests the second condition if the
14554 first passes, but is true if the first fails. Reverse the first
14555 condition to get a true "inclusive-or" expression. */
14556 if (cond_or
== DOM_CC_NX_OR_Y
)
14557 cond1
= reverse_condition (cond1
);
14559 /* If the comparisons are not equal, and one doesn't dominate the other,
14560 then we can't do this. */
14562 && !comparison_dominates_p (cond1
, cond2
)
14563 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14568 enum rtx_code temp
= cond1
;
14576 if (cond_or
== DOM_CC_X_AND_Y
)
14581 case EQ
: return CC_DEQmode
;
14582 case LE
: return CC_DLEmode
;
14583 case LEU
: return CC_DLEUmode
;
14584 case GE
: return CC_DGEmode
;
14585 case GEU
: return CC_DGEUmode
;
14586 default: gcc_unreachable ();
14590 if (cond_or
== DOM_CC_X_AND_Y
)
14602 gcc_unreachable ();
14606 if (cond_or
== DOM_CC_X_AND_Y
)
14618 gcc_unreachable ();
14622 if (cond_or
== DOM_CC_X_AND_Y
)
14623 return CC_DLTUmode
;
14628 return CC_DLTUmode
;
14630 return CC_DLEUmode
;
14634 gcc_unreachable ();
14638 if (cond_or
== DOM_CC_X_AND_Y
)
14639 return CC_DGTUmode
;
14644 return CC_DGTUmode
;
14646 return CC_DGEUmode
;
14650 gcc_unreachable ();
14653 /* The remaining cases only occur when both comparisons are the
14656 gcc_assert (cond1
== cond2
);
14660 gcc_assert (cond1
== cond2
);
14664 gcc_assert (cond1
== cond2
);
14668 gcc_assert (cond1
== cond2
);
14669 return CC_DLEUmode
;
14672 gcc_assert (cond1
== cond2
);
14673 return CC_DGEUmode
;
14676 gcc_unreachable ();
14681 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14683 /* All floating point compares return CCFP if it is an equality
14684 comparison, and CCFPE otherwise. */
14685 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14708 gcc_unreachable ();
14712 /* A compare with a shifted operand. Because of canonicalization, the
14713 comparison will have to be swapped when we emit the assembler. */
14714 if (GET_MODE (y
) == SImode
14715 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14716 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14717 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14718 || GET_CODE (x
) == ROTATERT
))
14721 /* This operation is performed swapped, but since we only rely on the Z
14722 flag we don't need an additional mode. */
14723 if (GET_MODE (y
) == SImode
14724 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14725 && GET_CODE (x
) == NEG
14726 && (op
== EQ
|| op
== NE
))
14729 /* This is a special case that is used by combine to allow a
14730 comparison of a shifted byte load to be split into a zero-extend
14731 followed by a comparison of the shifted integer (only valid for
14732 equalities and unsigned inequalities). */
14733 if (GET_MODE (x
) == SImode
14734 && GET_CODE (x
) == ASHIFT
14735 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14736 && GET_CODE (XEXP (x
, 0)) == SUBREG
14737 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14738 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14739 && (op
== EQ
|| op
== NE
14740 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14741 && CONST_INT_P (y
))
14744 /* A construct for a conditional compare, if the false arm contains
14745 0, then both conditions must be true, otherwise either condition
14746 must be true. Not all conditions are possible, so CCmode is
14747 returned if it can't be done. */
14748 if (GET_CODE (x
) == IF_THEN_ELSE
14749 && (XEXP (x
, 2) == const0_rtx
14750 || XEXP (x
, 2) == const1_rtx
)
14751 && COMPARISON_P (XEXP (x
, 0))
14752 && COMPARISON_P (XEXP (x
, 1)))
14753 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14754 INTVAL (XEXP (x
, 2)));
14756 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14757 if (GET_CODE (x
) == AND
14758 && (op
== EQ
|| op
== NE
)
14759 && COMPARISON_P (XEXP (x
, 0))
14760 && COMPARISON_P (XEXP (x
, 1)))
14761 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14764 if (GET_CODE (x
) == IOR
14765 && (op
== EQ
|| op
== NE
)
14766 && COMPARISON_P (XEXP (x
, 0))
14767 && COMPARISON_P (XEXP (x
, 1)))
14768 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14771 /* An operation (on Thumb) where we want to test for a single bit.
14772 This is done by shifting that bit up into the top bit of a
14773 scratch register; we can then branch on the sign bit. */
14775 && GET_MODE (x
) == SImode
14776 && (op
== EQ
|| op
== NE
)
14777 && GET_CODE (x
) == ZERO_EXTRACT
14778 && XEXP (x
, 1) == const1_rtx
)
14781 /* An operation that sets the condition codes as a side-effect, the
14782 V flag is not set correctly, so we can only use comparisons where
14783 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14785 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14786 if (GET_MODE (x
) == SImode
14788 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14789 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14790 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14791 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14792 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14793 || GET_CODE (x
) == LSHIFTRT
14794 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14795 || GET_CODE (x
) == ROTATERT
14796 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14797 return CC_NOOVmode
;
14799 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14802 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14803 && GET_CODE (x
) == PLUS
14804 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14807 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14813 /* A DImode comparison against zero can be implemented by
14814 or'ing the two halves together. */
14815 if (y
== const0_rtx
)
14818 /* We can do an equality test in three Thumb instructions. */
14828 /* DImode unsigned comparisons can be implemented by cmp +
14829 cmpeq without a scratch register. Not worth doing in
14840 /* DImode signed and unsigned comparisons can be implemented
14841 by cmp + sbcs with a scratch register, but that does not
14842 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14843 gcc_assert (op
!= EQ
&& op
!= NE
);
14847 gcc_unreachable ();
14851 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14852 return GET_MODE (x
);
14857 /* X and Y are two things to compare using CODE. Emit the compare insn and
14858 return the rtx for register 0 in the proper mode. FP means this is a
14859 floating point compare: I don't think that it is needed on the arm. */
14861 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14863 enum machine_mode mode
;
14865 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14867 /* We might have X as a constant, Y as a register because of the predicates
14868 used for cmpdi. If so, force X to a register here. */
14869 if (dimode_comparison
&& !REG_P (x
))
14870 x
= force_reg (DImode
, x
);
14872 mode
= SELECT_CC_MODE (code
, x
, y
);
14873 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14875 if (dimode_comparison
14876 && mode
!= CC_CZmode
)
14880 /* To compare two non-zero values for equality, XOR them and
14881 then compare against zero. Not used for ARM mode; there
14882 CC_CZmode is cheaper. */
14883 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14885 gcc_assert (!reload_completed
);
14886 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14890 /* A scratch register is required. */
14891 if (reload_completed
)
14892 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14894 scratch
= gen_rtx_SCRATCH (SImode
);
14896 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14897 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14898 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14901 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14906 /* Generate a sequence of insns that will generate the correct return
14907 address mask depending on the physical architecture that the program
14910 arm_gen_return_addr_mask (void)
14912 rtx reg
= gen_reg_rtx (Pmode
);
14914 emit_insn (gen_return_addr_mask (reg
));
14919 arm_reload_in_hi (rtx
*operands
)
14921 rtx ref
= operands
[1];
14923 HOST_WIDE_INT offset
= 0;
14925 if (GET_CODE (ref
) == SUBREG
)
14927 offset
= SUBREG_BYTE (ref
);
14928 ref
= SUBREG_REG (ref
);
14933 /* We have a pseudo which has been spilt onto the stack; there
14934 are two cases here: the first where there is a simple
14935 stack-slot replacement and a second where the stack-slot is
14936 out of range, or is used as a subreg. */
14937 if (reg_equiv_mem (REGNO (ref
)))
14939 ref
= reg_equiv_mem (REGNO (ref
));
14940 base
= find_replacement (&XEXP (ref
, 0));
14943 /* The slot is out of range, or was dressed up in a SUBREG. */
14944 base
= reg_equiv_address (REGNO (ref
));
14947 base
= find_replacement (&XEXP (ref
, 0));
14949 /* Handle the case where the address is too complex to be offset by 1. */
14950 if (GET_CODE (base
) == MINUS
14951 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14953 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14955 emit_set_insn (base_plus
, base
);
14958 else if (GET_CODE (base
) == PLUS
)
14960 /* The addend must be CONST_INT, or we would have dealt with it above. */
14961 HOST_WIDE_INT hi
, lo
;
14963 offset
+= INTVAL (XEXP (base
, 1));
14964 base
= XEXP (base
, 0);
14966 /* Rework the address into a legal sequence of insns. */
14967 /* Valid range for lo is -4095 -> 4095 */
14970 : -((-offset
) & 0xfff));
14972 /* Corner case, if lo is the max offset then we would be out of range
14973 once we have added the additional 1 below, so bump the msb into the
14974 pre-loading insn(s). */
14978 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14979 ^ (HOST_WIDE_INT
) 0x80000000)
14980 - (HOST_WIDE_INT
) 0x80000000);
14982 gcc_assert (hi
+ lo
== offset
);
14986 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14988 /* Get the base address; addsi3 knows how to handle constants
14989 that require more than one insn. */
14990 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14996 /* Operands[2] may overlap operands[0] (though it won't overlap
14997 operands[1]), that's why we asked for a DImode reg -- so we can
14998 use the bit that does not overlap. */
14999 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15000 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15002 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15004 emit_insn (gen_zero_extendqisi2 (scratch
,
15005 gen_rtx_MEM (QImode
,
15006 plus_constant (Pmode
, base
,
15008 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15009 gen_rtx_MEM (QImode
,
15010 plus_constant (Pmode
, base
,
15012 if (!BYTES_BIG_ENDIAN
)
15013 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15014 gen_rtx_IOR (SImode
,
15017 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15021 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15022 gen_rtx_IOR (SImode
,
15023 gen_rtx_ASHIFT (SImode
, scratch
,
15025 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15028 /* Handle storing a half-word to memory during reload by synthesizing as two
15029 byte stores. Take care not to clobber the input values until after we
15030 have moved them somewhere safe. This code assumes that if the DImode
15031 scratch in operands[2] overlaps either the input value or output address
15032 in some way, then that value must die in this insn (we absolutely need
15033 two scratch registers for some corner cases). */
15035 arm_reload_out_hi (rtx
*operands
)
15037 rtx ref
= operands
[0];
15038 rtx outval
= operands
[1];
15040 HOST_WIDE_INT offset
= 0;
15042 if (GET_CODE (ref
) == SUBREG
)
15044 offset
= SUBREG_BYTE (ref
);
15045 ref
= SUBREG_REG (ref
);
15050 /* We have a pseudo which has been spilt onto the stack; there
15051 are two cases here: the first where there is a simple
15052 stack-slot replacement and a second where the stack-slot is
15053 out of range, or is used as a subreg. */
15054 if (reg_equiv_mem (REGNO (ref
)))
15056 ref
= reg_equiv_mem (REGNO (ref
));
15057 base
= find_replacement (&XEXP (ref
, 0));
15060 /* The slot is out of range, or was dressed up in a SUBREG. */
15061 base
= reg_equiv_address (REGNO (ref
));
15064 base
= find_replacement (&XEXP (ref
, 0));
15066 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15068 /* Handle the case where the address is too complex to be offset by 1. */
15069 if (GET_CODE (base
) == MINUS
15070 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15072 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15074 /* Be careful not to destroy OUTVAL. */
15075 if (reg_overlap_mentioned_p (base_plus
, outval
))
15077 /* Updating base_plus might destroy outval, see if we can
15078 swap the scratch and base_plus. */
15079 if (!reg_overlap_mentioned_p (scratch
, outval
))
15082 scratch
= base_plus
;
15087 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15089 /* Be conservative and copy OUTVAL into the scratch now,
15090 this should only be necessary if outval is a subreg
15091 of something larger than a word. */
15092 /* XXX Might this clobber base? I can't see how it can,
15093 since scratch is known to overlap with OUTVAL, and
15094 must be wider than a word. */
15095 emit_insn (gen_movhi (scratch_hi
, outval
));
15096 outval
= scratch_hi
;
15100 emit_set_insn (base_plus
, base
);
15103 else if (GET_CODE (base
) == PLUS
)
15105 /* The addend must be CONST_INT, or we would have dealt with it above. */
15106 HOST_WIDE_INT hi
, lo
;
15108 offset
+= INTVAL (XEXP (base
, 1));
15109 base
= XEXP (base
, 0);
15111 /* Rework the address into a legal sequence of insns. */
15112 /* Valid range for lo is -4095 -> 4095 */
15115 : -((-offset
) & 0xfff));
15117 /* Corner case, if lo is the max offset then we would be out of range
15118 once we have added the additional 1 below, so bump the msb into the
15119 pre-loading insn(s). */
15123 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15124 ^ (HOST_WIDE_INT
) 0x80000000)
15125 - (HOST_WIDE_INT
) 0x80000000);
15127 gcc_assert (hi
+ lo
== offset
);
15131 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15133 /* Be careful not to destroy OUTVAL. */
15134 if (reg_overlap_mentioned_p (base_plus
, outval
))
15136 /* Updating base_plus might destroy outval, see if we
15137 can swap the scratch and base_plus. */
15138 if (!reg_overlap_mentioned_p (scratch
, outval
))
15141 scratch
= base_plus
;
15146 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15148 /* Be conservative and copy outval into scratch now,
15149 this should only be necessary if outval is a
15150 subreg of something larger than a word. */
15151 /* XXX Might this clobber base? I can't see how it
15152 can, since scratch is known to overlap with
15154 emit_insn (gen_movhi (scratch_hi
, outval
));
15155 outval
= scratch_hi
;
15159 /* Get the base address; addsi3 knows how to handle constants
15160 that require more than one insn. */
15161 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15167 if (BYTES_BIG_ENDIAN
)
15169 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15170 plus_constant (Pmode
, base
,
15172 gen_lowpart (QImode
, outval
)));
15173 emit_insn (gen_lshrsi3 (scratch
,
15174 gen_rtx_SUBREG (SImode
, outval
, 0),
15176 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15178 gen_lowpart (QImode
, scratch
)));
15182 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15184 gen_lowpart (QImode
, outval
)));
15185 emit_insn (gen_lshrsi3 (scratch
,
15186 gen_rtx_SUBREG (SImode
, outval
, 0),
15188 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15189 plus_constant (Pmode
, base
,
15191 gen_lowpart (QImode
, scratch
)));
15195 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15196 (padded to the size of a word) should be passed in a register. */
15199 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15201 if (TARGET_AAPCS_BASED
)
15202 return must_pass_in_stack_var_size (mode
, type
);
15204 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15208 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15209 Return true if an argument passed on the stack should be padded upwards,
15210 i.e. if the least-significant byte has useful data.
15211 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15212 aggregate types are placed in the lowest memory address. */
15215 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15217 if (!TARGET_AAPCS_BASED
)
15218 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15220 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15227 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15228 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15229 register has useful data, and return the opposite if the most
15230 significant byte does. */
15233 arm_pad_reg_upward (enum machine_mode mode
,
15234 tree type
, int first ATTRIBUTE_UNUSED
)
15236 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15238 /* For AAPCS, small aggregates, small fixed-point types,
15239 and small complex types are always padded upwards. */
15242 if ((AGGREGATE_TYPE_P (type
)
15243 || TREE_CODE (type
) == COMPLEX_TYPE
15244 || FIXED_POINT_TYPE_P (type
))
15245 && int_size_in_bytes (type
) <= 4)
15250 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15251 && GET_MODE_SIZE (mode
) <= 4)
15256 /* Otherwise, use default padding. */
15257 return !BYTES_BIG_ENDIAN
;
15260 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15261 assuming that the address in the base register is word aligned. */
15263 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15265 HOST_WIDE_INT max_offset
;
15267 /* Offset must be a multiple of 4 in Thumb mode. */
15268 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15273 else if (TARGET_ARM
)
15278 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15281 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15282 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15283 Assumes that the address in the base register RN is word aligned. Pattern
15284 guarantees that both memory accesses use the same base register,
15285 the offsets are constants within the range, and the gap between the offsets is 4.
15286 If preload complete then check that registers are legal. WBACK indicates whether
15287 address is updated. LOAD indicates whether memory access is load or store. */
15289 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15290 bool wback
, bool load
)
15292 unsigned int t
, t2
, n
;
15294 if (!reload_completed
)
15297 if (!offset_ok_for_ldrd_strd (offset
))
15304 if ((TARGET_THUMB2
)
15305 && ((wback
&& (n
== t
|| n
== t2
))
15306 || (t
== SP_REGNUM
)
15307 || (t
== PC_REGNUM
)
15308 || (t2
== SP_REGNUM
)
15309 || (t2
== PC_REGNUM
)
15310 || (!load
&& (n
== PC_REGNUM
))
15311 || (load
&& (t
== t2
))
15312 /* Triggers Cortex-M3 LDRD errata. */
15313 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15317 && ((wback
&& (n
== t
|| n
== t2
))
15318 || (t2
== PC_REGNUM
)
15319 || (t
% 2 != 0) /* First destination register is not even. */
15321 /* PC can be used as base register (for offset addressing only),
15322 but it is depricated. */
15323 || (n
== PC_REGNUM
)))
15329 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15330 operand MEM's address contains an immediate offset from the base
15331 register and has no side effects, in which case it sets BASE and
15332 OFFSET accordingly. */
15334 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15338 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15340 /* TODO: Handle more general memory operand patterns, such as
15341 PRE_DEC and PRE_INC. */
15343 if (side_effects_p (mem
))
15346 /* Can't deal with subregs. */
15347 if (GET_CODE (mem
) == SUBREG
)
15350 gcc_assert (MEM_P (mem
));
15352 *offset
= const0_rtx
;
15354 addr
= XEXP (mem
, 0);
15356 /* If addr isn't valid for DImode, then we can't handle it. */
15357 if (!arm_legitimate_address_p (DImode
, addr
,
15358 reload_in_progress
|| reload_completed
))
15366 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15368 *base
= XEXP (addr
, 0);
15369 *offset
= XEXP (addr
, 1);
15370 return (REG_P (*base
) && CONST_INT_P (*offset
));
15376 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15378 /* Called from a peephole2 to replace two word-size accesses with a
15379 single LDRD/STRD instruction. Returns true iff we can generate a
15380 new instruction sequence. That is, both accesses use the same base
15381 register and the gap between constant offsets is 4. This function
15382 may reorder its operands to match ldrd/strd RTL templates.
15383 OPERANDS are the operands found by the peephole matcher;
15384 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15385 corresponding memory operands. LOAD indicaates whether the access
15386 is load or store. CONST_STORE indicates a store of constant
15387 integer values held in OPERANDS[4,5] and assumes that the pattern
15388 is of length 4 insn, for the purpose of checking dead registers.
15389 COMMUTE indicates that register operands may be reordered. */
15391 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15392 bool const_store
, bool commute
)
15395 HOST_WIDE_INT offsets
[2], offset
;
15396 rtx base
= NULL_RTX
;
15397 rtx cur_base
, cur_offset
, tmp
;
15399 HARD_REG_SET regset
;
15401 gcc_assert (!const_store
|| !load
);
15402 /* Check that the memory references are immediate offsets from the
15403 same base register. Extract the base register, the destination
15404 registers, and the corresponding memory offsets. */
15405 for (i
= 0; i
< nops
; i
++)
15407 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15412 else if (REGNO (base
) != REGNO (cur_base
))
15415 offsets
[i
] = INTVAL (cur_offset
);
15416 if (GET_CODE (operands
[i
]) == SUBREG
)
15418 tmp
= SUBREG_REG (operands
[i
]);
15419 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15424 /* Make sure there is no dependency between the individual loads. */
15425 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15426 return false; /* RAW */
15428 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15429 return false; /* WAW */
15431 /* If the same input register is used in both stores
15432 when storing different constants, try to find a free register.
15433 For example, the code
15438 can be transformed into
15441 in Thumb mode assuming that r1 is free. */
15443 && REGNO (operands
[0]) == REGNO (operands
[1])
15444 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15448 CLEAR_HARD_REG_SET (regset
);
15449 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15450 if (tmp
== NULL_RTX
)
15453 /* Use the new register in the first load to ensure that
15454 if the original input register is not dead after peephole,
15455 then it will have the correct constant value. */
15458 else if (TARGET_ARM
)
15461 int regno
= REGNO (operands
[0]);
15462 if (!peep2_reg_dead_p (4, operands
[0]))
15464 /* When the input register is even and is not dead after the
15465 pattern, it has to hold the second constant but we cannot
15466 form a legal STRD in ARM mode with this register as the second
15468 if (regno
% 2 == 0)
15471 /* Is regno-1 free? */
15472 SET_HARD_REG_SET (regset
);
15473 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15474 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15475 if (tmp
== NULL_RTX
)
15482 /* Find a DImode register. */
15483 CLEAR_HARD_REG_SET (regset
);
15484 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15485 if (tmp
!= NULL_RTX
)
15487 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15488 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15492 /* Can we use the input register to form a DI register? */
15493 SET_HARD_REG_SET (regset
);
15494 CLEAR_HARD_REG_BIT(regset
,
15495 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15496 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15497 if (tmp
== NULL_RTX
)
15499 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15503 gcc_assert (operands
[0] != NULL_RTX
);
15504 gcc_assert (operands
[1] != NULL_RTX
);
15505 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15506 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15510 /* Make sure the instructions are ordered with lower memory access first. */
15511 if (offsets
[0] > offsets
[1])
15513 gap
= offsets
[0] - offsets
[1];
15514 offset
= offsets
[1];
15516 /* Swap the instructions such that lower memory is accessed first. */
15517 SWAP_RTX (operands
[0], operands
[1]);
15518 SWAP_RTX (operands
[2], operands
[3]);
15520 SWAP_RTX (operands
[4], operands
[5]);
15524 gap
= offsets
[1] - offsets
[0];
15525 offset
= offsets
[0];
15528 /* Make sure accesses are to consecutive memory locations. */
15532 /* Make sure we generate legal instructions. */
15533 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15537 /* In Thumb state, where registers are almost unconstrained, there
15538 is little hope to fix it. */
15542 if (load
&& commute
)
15544 /* Try reordering registers. */
15545 SWAP_RTX (operands
[0], operands
[1]);
15546 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15553 /* If input registers are dead after this pattern, they can be
15554 reordered or replaced by other registers that are free in the
15555 current pattern. */
15556 if (!peep2_reg_dead_p (4, operands
[0])
15557 || !peep2_reg_dead_p (4, operands
[1]))
15560 /* Try to reorder the input registers. */
15561 /* For example, the code
15566 can be transformed into
15571 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15574 SWAP_RTX (operands
[0], operands
[1]);
15578 /* Try to find a free DI register. */
15579 CLEAR_HARD_REG_SET (regset
);
15580 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15581 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15584 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15585 if (tmp
== NULL_RTX
)
15588 /* DREG must be an even-numbered register in DImode.
15589 Split it into SI registers. */
15590 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15591 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15592 gcc_assert (operands
[0] != NULL_RTX
);
15593 gcc_assert (operands
[1] != NULL_RTX
);
15594 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15595 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15597 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15610 /* Print a symbolic form of X to the debug file, F. */
15612 arm_print_value (FILE *f
, rtx x
)
15614 switch (GET_CODE (x
))
15617 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15621 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15629 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15631 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15632 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15640 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15644 fprintf (f
, "`%s'", XSTR (x
, 0));
15648 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15652 arm_print_value (f
, XEXP (x
, 0));
15656 arm_print_value (f
, XEXP (x
, 0));
15658 arm_print_value (f
, XEXP (x
, 1));
15666 fprintf (f
, "????");
15671 /* Routines for manipulation of the constant pool. */
15673 /* Arm instructions cannot load a large constant directly into a
15674 register; they have to come from a pc relative load. The constant
15675 must therefore be placed in the addressable range of the pc
15676 relative load. Depending on the precise pc relative load
15677 instruction the range is somewhere between 256 bytes and 4k. This
15678 means that we often have to dump a constant inside a function, and
15679 generate code to branch around it.
15681 It is important to minimize this, since the branches will slow
15682 things down and make the code larger.
15684 Normally we can hide the table after an existing unconditional
15685 branch so that there is no interruption of the flow, but in the
15686 worst case the code looks like this:
15704 We fix this by performing a scan after scheduling, which notices
15705 which instructions need to have their operands fetched from the
15706 constant table and builds the table.
15708 The algorithm starts by building a table of all the constants that
15709 need fixing up and all the natural barriers in the function (places
15710 where a constant table can be dropped without breaking the flow).
15711 For each fixup we note how far the pc-relative replacement will be
15712 able to reach and the offset of the instruction into the function.
15714 Having built the table we then group the fixes together to form
15715 tables that are as large as possible (subject to addressing
15716 constraints) and emit each table of constants after the last
15717 barrier that is within range of all the instructions in the group.
15718 If a group does not contain a barrier, then we forcibly create one
15719 by inserting a jump instruction into the flow. Once the table has
15720 been inserted, the insns are then modified to reference the
15721 relevant entry in the pool.
15723 Possible enhancements to the algorithm (not implemented) are:
15725 1) For some processors and object formats, there may be benefit in
15726 aligning the pools to the start of cache lines; this alignment
15727 would need to be taken into account when calculating addressability
15730 /* These typedefs are located at the start of this file, so that
15731 they can be used in the prototypes there. This comment is to
15732 remind readers of that fact so that the following structures
15733 can be understood more easily.
15735 typedef struct minipool_node Mnode;
15736 typedef struct minipool_fixup Mfix; */
15738 struct minipool_node
15740 /* Doubly linked chain of entries. */
15743 /* The maximum offset into the code that this entry can be placed. While
15744 pushing fixes for forward references, all entries are sorted in order
15745 of increasing max_address. */
15746 HOST_WIDE_INT max_address
;
15747 /* Similarly for an entry inserted for a backwards ref. */
15748 HOST_WIDE_INT min_address
;
15749 /* The number of fixes referencing this entry. This can become zero
15750 if we "unpush" an entry. In this case we ignore the entry when we
15751 come to emit the code. */
15753 /* The offset from the start of the minipool. */
15754 HOST_WIDE_INT offset
;
15755 /* The value in table. */
15757 /* The mode of value. */
15758 enum machine_mode mode
;
15759 /* The size of the value. With iWMMXt enabled
15760 sizes > 4 also imply an alignment of 8-bytes. */
15764 struct minipool_fixup
15768 HOST_WIDE_INT address
;
15770 enum machine_mode mode
;
15774 HOST_WIDE_INT forwards
;
15775 HOST_WIDE_INT backwards
;
15778 /* Fixes less than a word need padding out to a word boundary. */
15779 #define MINIPOOL_FIX_SIZE(mode) \
15780 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15782 static Mnode
* minipool_vector_head
;
15783 static Mnode
* minipool_vector_tail
;
15784 static rtx minipool_vector_label
;
15785 static int minipool_pad
;
15787 /* The linked list of all minipool fixes required for this function. */
15788 Mfix
* minipool_fix_head
;
15789 Mfix
* minipool_fix_tail
;
15790 /* The fix entry for the current minipool, once it has been placed. */
15791 Mfix
* minipool_barrier
;
15793 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15794 #define JUMP_TABLES_IN_TEXT_SECTION 0
15797 static HOST_WIDE_INT
15798 get_jump_table_size (rtx insn
)
15800 /* ADDR_VECs only take room if read-only data does into the text
15802 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15804 rtx body
= PATTERN (insn
);
15805 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15806 HOST_WIDE_INT size
;
15807 HOST_WIDE_INT modesize
;
15809 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15810 size
= modesize
* XVECLEN (body
, elt
);
15814 /* Round up size of TBB table to a halfword boundary. */
15815 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
15818 /* No padding necessary for TBH. */
15821 /* Add two bytes for alignment on Thumb. */
15826 gcc_unreachable ();
15834 /* Return the maximum amount of padding that will be inserted before
15837 static HOST_WIDE_INT
15838 get_label_padding (rtx label
)
15840 HOST_WIDE_INT align
, min_insn_size
;
15842 align
= 1 << label_to_alignment (label
);
15843 min_insn_size
= TARGET_THUMB
? 2 : 4;
15844 return align
> min_insn_size
? align
- min_insn_size
: 0;
15847 /* Move a minipool fix MP from its current location to before MAX_MP.
15848 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15849 constraints may need updating. */
15851 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15852 HOST_WIDE_INT max_address
)
15854 /* The code below assumes these are different. */
15855 gcc_assert (mp
!= max_mp
);
15857 if (max_mp
== NULL
)
15859 if (max_address
< mp
->max_address
)
15860 mp
->max_address
= max_address
;
15864 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15865 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15867 mp
->max_address
= max_address
;
15869 /* Unlink MP from its current position. Since max_mp is non-null,
15870 mp->prev must be non-null. */
15871 mp
->prev
->next
= mp
->next
;
15872 if (mp
->next
!= NULL
)
15873 mp
->next
->prev
= mp
->prev
;
15875 minipool_vector_tail
= mp
->prev
;
15877 /* Re-insert it before MAX_MP. */
15879 mp
->prev
= max_mp
->prev
;
15882 if (mp
->prev
!= NULL
)
15883 mp
->prev
->next
= mp
;
15885 minipool_vector_head
= mp
;
15888 /* Save the new entry. */
15891 /* Scan over the preceding entries and adjust their addresses as
15893 while (mp
->prev
!= NULL
15894 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15896 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15903 /* Add a constant to the minipool for a forward reference. Returns the
15904 node added or NULL if the constant will not fit in this pool. */
15906 add_minipool_forward_ref (Mfix
*fix
)
15908 /* If set, max_mp is the first pool_entry that has a lower
15909 constraint than the one we are trying to add. */
15910 Mnode
* max_mp
= NULL
;
15911 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15914 /* If the minipool starts before the end of FIX->INSN then this FIX
15915 can not be placed into the current pool. Furthermore, adding the
15916 new constant pool entry may cause the pool to start FIX_SIZE bytes
15918 if (minipool_vector_head
&&
15919 (fix
->address
+ get_attr_length (fix
->insn
)
15920 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15923 /* Scan the pool to see if a constant with the same value has
15924 already been added. While we are doing this, also note the
15925 location where we must insert the constant if it doesn't already
15927 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15929 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15930 && fix
->mode
== mp
->mode
15931 && (!LABEL_P (fix
->value
)
15932 || (CODE_LABEL_NUMBER (fix
->value
)
15933 == CODE_LABEL_NUMBER (mp
->value
)))
15934 && rtx_equal_p (fix
->value
, mp
->value
))
15936 /* More than one fix references this entry. */
15938 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15941 /* Note the insertion point if necessary. */
15943 && mp
->max_address
> max_address
)
15946 /* If we are inserting an 8-bytes aligned quantity and
15947 we have not already found an insertion point, then
15948 make sure that all such 8-byte aligned quantities are
15949 placed at the start of the pool. */
15950 if (ARM_DOUBLEWORD_ALIGN
15952 && fix
->fix_size
>= 8
15953 && mp
->fix_size
< 8)
15956 max_address
= mp
->max_address
;
15960 /* The value is not currently in the minipool, so we need to create
15961 a new entry for it. If MAX_MP is NULL, the entry will be put on
15962 the end of the list since the placement is less constrained than
15963 any existing entry. Otherwise, we insert the new fix before
15964 MAX_MP and, if necessary, adjust the constraints on the other
15967 mp
->fix_size
= fix
->fix_size
;
15968 mp
->mode
= fix
->mode
;
15969 mp
->value
= fix
->value
;
15971 /* Not yet required for a backwards ref. */
15972 mp
->min_address
= -65536;
15974 if (max_mp
== NULL
)
15976 mp
->max_address
= max_address
;
15978 mp
->prev
= minipool_vector_tail
;
15980 if (mp
->prev
== NULL
)
15982 minipool_vector_head
= mp
;
15983 minipool_vector_label
= gen_label_rtx ();
15986 mp
->prev
->next
= mp
;
15988 minipool_vector_tail
= mp
;
15992 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15993 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15995 mp
->max_address
= max_address
;
15998 mp
->prev
= max_mp
->prev
;
16000 if (mp
->prev
!= NULL
)
16001 mp
->prev
->next
= mp
;
16003 minipool_vector_head
= mp
;
16006 /* Save the new entry. */
16009 /* Scan over the preceding entries and adjust their addresses as
16011 while (mp
->prev
!= NULL
16012 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16014 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16022 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16023 HOST_WIDE_INT min_address
)
16025 HOST_WIDE_INT offset
;
16027 /* The code below assumes these are different. */
16028 gcc_assert (mp
!= min_mp
);
16030 if (min_mp
== NULL
)
16032 if (min_address
> mp
->min_address
)
16033 mp
->min_address
= min_address
;
16037 /* We will adjust this below if it is too loose. */
16038 mp
->min_address
= min_address
;
16040 /* Unlink MP from its current position. Since min_mp is non-null,
16041 mp->next must be non-null. */
16042 mp
->next
->prev
= mp
->prev
;
16043 if (mp
->prev
!= NULL
)
16044 mp
->prev
->next
= mp
->next
;
16046 minipool_vector_head
= mp
->next
;
16048 /* Reinsert it after MIN_MP. */
16050 mp
->next
= min_mp
->next
;
16052 if (mp
->next
!= NULL
)
16053 mp
->next
->prev
= mp
;
16055 minipool_vector_tail
= mp
;
16061 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16063 mp
->offset
= offset
;
16064 if (mp
->refcount
> 0)
16065 offset
+= mp
->fix_size
;
16067 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16068 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16074 /* Add a constant to the minipool for a backward reference. Returns the
16075 node added or NULL if the constant will not fit in this pool.
16077 Note that the code for insertion for a backwards reference can be
16078 somewhat confusing because the calculated offsets for each fix do
16079 not take into account the size of the pool (which is still under
16082 add_minipool_backward_ref (Mfix
*fix
)
16084 /* If set, min_mp is the last pool_entry that has a lower constraint
16085 than the one we are trying to add. */
16086 Mnode
*min_mp
= NULL
;
16087 /* This can be negative, since it is only a constraint. */
16088 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16091 /* If we can't reach the current pool from this insn, or if we can't
16092 insert this entry at the end of the pool without pushing other
16093 fixes out of range, then we don't try. This ensures that we
16094 can't fail later on. */
16095 if (min_address
>= minipool_barrier
->address
16096 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16097 >= minipool_barrier
->address
))
16100 /* Scan the pool to see if a constant with the same value has
16101 already been added. While we are doing this, also note the
16102 location where we must insert the constant if it doesn't already
16104 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16106 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16107 && fix
->mode
== mp
->mode
16108 && (!LABEL_P (fix
->value
)
16109 || (CODE_LABEL_NUMBER (fix
->value
)
16110 == CODE_LABEL_NUMBER (mp
->value
)))
16111 && rtx_equal_p (fix
->value
, mp
->value
)
16112 /* Check that there is enough slack to move this entry to the
16113 end of the table (this is conservative). */
16114 && (mp
->max_address
16115 > (minipool_barrier
->address
16116 + minipool_vector_tail
->offset
16117 + minipool_vector_tail
->fix_size
)))
16120 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16123 if (min_mp
!= NULL
)
16124 mp
->min_address
+= fix
->fix_size
;
16127 /* Note the insertion point if necessary. */
16128 if (mp
->min_address
< min_address
)
16130 /* For now, we do not allow the insertion of 8-byte alignment
16131 requiring nodes anywhere but at the start of the pool. */
16132 if (ARM_DOUBLEWORD_ALIGN
16133 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16138 else if (mp
->max_address
16139 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16141 /* Inserting before this entry would push the fix beyond
16142 its maximum address (which can happen if we have
16143 re-located a forwards fix); force the new fix to come
16145 if (ARM_DOUBLEWORD_ALIGN
16146 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16151 min_address
= mp
->min_address
+ fix
->fix_size
;
16154 /* Do not insert a non-8-byte aligned quantity before 8-byte
16155 aligned quantities. */
16156 else if (ARM_DOUBLEWORD_ALIGN
16157 && fix
->fix_size
< 8
16158 && mp
->fix_size
>= 8)
16161 min_address
= mp
->min_address
+ fix
->fix_size
;
16166 /* We need to create a new entry. */
16168 mp
->fix_size
= fix
->fix_size
;
16169 mp
->mode
= fix
->mode
;
16170 mp
->value
= fix
->value
;
16172 mp
->max_address
= minipool_barrier
->address
+ 65536;
16174 mp
->min_address
= min_address
;
16176 if (min_mp
== NULL
)
16179 mp
->next
= minipool_vector_head
;
16181 if (mp
->next
== NULL
)
16183 minipool_vector_tail
= mp
;
16184 minipool_vector_label
= gen_label_rtx ();
16187 mp
->next
->prev
= mp
;
16189 minipool_vector_head
= mp
;
16193 mp
->next
= min_mp
->next
;
16197 if (mp
->next
!= NULL
)
16198 mp
->next
->prev
= mp
;
16200 minipool_vector_tail
= mp
;
16203 /* Save the new entry. */
16211 /* Scan over the following entries and adjust their offsets. */
16212 while (mp
->next
!= NULL
)
16214 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16215 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16218 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16220 mp
->next
->offset
= mp
->offset
;
16229 assign_minipool_offsets (Mfix
*barrier
)
16231 HOST_WIDE_INT offset
= 0;
16234 minipool_barrier
= barrier
;
16236 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16238 mp
->offset
= offset
;
16240 if (mp
->refcount
> 0)
16241 offset
+= mp
->fix_size
;
16245 /* Output the literal table */
16247 dump_minipool (rtx scan
)
16253 if (ARM_DOUBLEWORD_ALIGN
)
16254 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16255 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16262 fprintf (dump_file
,
16263 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16264 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16266 scan
= emit_label_after (gen_label_rtx (), scan
);
16267 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16268 scan
= emit_label_after (minipool_vector_label
, scan
);
16270 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16272 if (mp
->refcount
> 0)
16276 fprintf (dump_file
,
16277 ";; Offset %u, min %ld, max %ld ",
16278 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16279 (unsigned long) mp
->max_address
);
16280 arm_print_value (dump_file
, mp
->value
);
16281 fputc ('\n', dump_file
);
16284 switch (mp
->fix_size
)
16286 #ifdef HAVE_consttable_1
16288 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16292 #ifdef HAVE_consttable_2
16294 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16298 #ifdef HAVE_consttable_4
16300 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16304 #ifdef HAVE_consttable_8
16306 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16310 #ifdef HAVE_consttable_16
16312 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16317 gcc_unreachable ();
16325 minipool_vector_head
= minipool_vector_tail
= NULL
;
16326 scan
= emit_insn_after (gen_consttable_end (), scan
);
16327 scan
= emit_barrier_after (scan
);
16330 /* Return the cost of forcibly inserting a barrier after INSN. */
16332 arm_barrier_cost (rtx insn
)
16334 /* Basing the location of the pool on the loop depth is preferable,
16335 but at the moment, the basic block information seems to be
16336 corrupt by this stage of the compilation. */
16337 int base_cost
= 50;
16338 rtx next
= next_nonnote_insn (insn
);
16340 if (next
!= NULL
&& LABEL_P (next
))
16343 switch (GET_CODE (insn
))
16346 /* It will always be better to place the table before the label, rather
16355 return base_cost
- 10;
16358 return base_cost
+ 10;
16362 /* Find the best place in the insn stream in the range
16363 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16364 Create the barrier by inserting a jump and add a new fix entry for
16367 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16369 HOST_WIDE_INT count
= 0;
16371 rtx from
= fix
->insn
;
16372 /* The instruction after which we will insert the jump. */
16373 rtx selected
= NULL
;
16375 /* The address at which the jump instruction will be placed. */
16376 HOST_WIDE_INT selected_address
;
16378 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16379 rtx label
= gen_label_rtx ();
16381 selected_cost
= arm_barrier_cost (from
);
16382 selected_address
= fix
->address
;
16384 while (from
&& count
< max_count
)
16389 /* This code shouldn't have been called if there was a natural barrier
16391 gcc_assert (!BARRIER_P (from
));
16393 /* Count the length of this insn. This must stay in sync with the
16394 code that pushes minipool fixes. */
16395 if (LABEL_P (from
))
16396 count
+= get_label_padding (from
);
16398 count
+= get_attr_length (from
);
16400 /* If there is a jump table, add its length. */
16401 if (tablejump_p (from
, NULL
, &tmp
))
16403 count
+= get_jump_table_size (tmp
);
16405 /* Jump tables aren't in a basic block, so base the cost on
16406 the dispatch insn. If we select this location, we will
16407 still put the pool after the table. */
16408 new_cost
= arm_barrier_cost (from
);
16410 if (count
< max_count
16411 && (!selected
|| new_cost
<= selected_cost
))
16414 selected_cost
= new_cost
;
16415 selected_address
= fix
->address
+ count
;
16418 /* Continue after the dispatch table. */
16419 from
= NEXT_INSN (tmp
);
16423 new_cost
= arm_barrier_cost (from
);
16425 if (count
< max_count
16426 && (!selected
|| new_cost
<= selected_cost
))
16429 selected_cost
= new_cost
;
16430 selected_address
= fix
->address
+ count
;
16433 from
= NEXT_INSN (from
);
16436 /* Make sure that we found a place to insert the jump. */
16437 gcc_assert (selected
);
16439 /* Make sure we do not split a call and its corresponding
16440 CALL_ARG_LOCATION note. */
16441 if (CALL_P (selected
))
16443 rtx next
= NEXT_INSN (selected
);
16444 if (next
&& NOTE_P (next
)
16445 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16449 /* Create a new JUMP_INSN that branches around a barrier. */
16450 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16451 JUMP_LABEL (from
) = label
;
16452 barrier
= emit_barrier_after (from
);
16453 emit_label_after (label
, barrier
);
16455 /* Create a minipool barrier entry for the new barrier. */
16456 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16457 new_fix
->insn
= barrier
;
16458 new_fix
->address
= selected_address
;
16459 new_fix
->next
= fix
->next
;
16460 fix
->next
= new_fix
;
16465 /* Record that there is a natural barrier in the insn stream at
16468 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16470 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16473 fix
->address
= address
;
16476 if (minipool_fix_head
!= NULL
)
16477 minipool_fix_tail
->next
= fix
;
16479 minipool_fix_head
= fix
;
16481 minipool_fix_tail
= fix
;
16484 /* Record INSN, which will need fixing up to load a value from the
16485 minipool. ADDRESS is the offset of the insn since the start of the
16486 function; LOC is a pointer to the part of the insn which requires
16487 fixing; VALUE is the constant that must be loaded, which is of type
16490 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16491 enum machine_mode mode
, rtx value
)
16493 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16496 fix
->address
= address
;
16499 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16500 fix
->value
= value
;
16501 fix
->forwards
= get_attr_pool_range (insn
);
16502 fix
->backwards
= get_attr_neg_pool_range (insn
);
16503 fix
->minipool
= NULL
;
16505 /* If an insn doesn't have a range defined for it, then it isn't
16506 expecting to be reworked by this code. Better to stop now than
16507 to generate duff assembly code. */
16508 gcc_assert (fix
->forwards
|| fix
->backwards
);
16510 /* If an entry requires 8-byte alignment then assume all constant pools
16511 require 4 bytes of padding. Trying to do this later on a per-pool
16512 basis is awkward because existing pool entries have to be modified. */
16513 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16518 fprintf (dump_file
,
16519 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16520 GET_MODE_NAME (mode
),
16521 INSN_UID (insn
), (unsigned long) address
,
16522 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16523 arm_print_value (dump_file
, fix
->value
);
16524 fprintf (dump_file
, "\n");
16527 /* Add it to the chain of fixes. */
16530 if (minipool_fix_head
!= NULL
)
16531 minipool_fix_tail
->next
= fix
;
16533 minipool_fix_head
= fix
;
16535 minipool_fix_tail
= fix
;
16538 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16539 Returns the number of insns needed, or 99 if we always want to synthesize
16542 arm_max_const_double_inline_cost ()
16544 /* Let the value get synthesized to avoid the use of literal pools. */
16545 if (arm_disable_literal_pool
)
16548 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16551 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16552 Returns the number of insns needed, or 99 if we don't know how to
16555 arm_const_double_inline_cost (rtx val
)
16557 rtx lowpart
, highpart
;
16558 enum machine_mode mode
;
16560 mode
= GET_MODE (val
);
16562 if (mode
== VOIDmode
)
16565 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16567 lowpart
= gen_lowpart (SImode
, val
);
16568 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16570 gcc_assert (CONST_INT_P (lowpart
));
16571 gcc_assert (CONST_INT_P (highpart
));
16573 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16574 NULL_RTX
, NULL_RTX
, 0, 0)
16575 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16576 NULL_RTX
, NULL_RTX
, 0, 0));
16579 /* Return true if it is worthwhile to split a 64-bit constant into two
16580 32-bit operations. This is the case if optimizing for size, or
16581 if we have load delay slots, or if one 32-bit part can be done with
16582 a single data operation. */
16584 arm_const_double_by_parts (rtx val
)
16586 enum machine_mode mode
= GET_MODE (val
);
16589 if (optimize_size
|| arm_ld_sched
)
16592 if (mode
== VOIDmode
)
16595 part
= gen_highpart_mode (SImode
, mode
, val
);
16597 gcc_assert (CONST_INT_P (part
));
16599 if (const_ok_for_arm (INTVAL (part
))
16600 || const_ok_for_arm (~INTVAL (part
)))
16603 part
= gen_lowpart (SImode
, val
);
16605 gcc_assert (CONST_INT_P (part
));
16607 if (const_ok_for_arm (INTVAL (part
))
16608 || const_ok_for_arm (~INTVAL (part
)))
16614 /* Return true if it is possible to inline both the high and low parts
16615 of a 64-bit constant into 32-bit data processing instructions. */
16617 arm_const_double_by_immediates (rtx val
)
16619 enum machine_mode mode
= GET_MODE (val
);
16622 if (mode
== VOIDmode
)
16625 part
= gen_highpart_mode (SImode
, mode
, val
);
16627 gcc_assert (CONST_INT_P (part
));
16629 if (!const_ok_for_arm (INTVAL (part
)))
16632 part
= gen_lowpart (SImode
, val
);
16634 gcc_assert (CONST_INT_P (part
));
16636 if (!const_ok_for_arm (INTVAL (part
)))
16642 /* Scan INSN and note any of its operands that need fixing.
16643 If DO_PUSHES is false we do not actually push any of the fixups
16646 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16650 extract_insn (insn
);
16652 if (!constrain_operands (1))
16653 fatal_insn_not_found (insn
);
16655 if (recog_data
.n_alternatives
== 0)
16658 /* Fill in recog_op_alt with information about the constraints of
16660 preprocess_constraints ();
16662 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16664 /* Things we need to fix can only occur in inputs. */
16665 if (recog_data
.operand_type
[opno
] != OP_IN
)
16668 /* If this alternative is a memory reference, then any mention
16669 of constants in this alternative is really to fool reload
16670 into allowing us to accept one there. We need to fix them up
16671 now so that we output the right code. */
16672 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16674 rtx op
= recog_data
.operand
[opno
];
16676 if (CONSTANT_P (op
))
16679 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16680 recog_data
.operand_mode
[opno
], op
);
16682 else if (MEM_P (op
)
16683 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16684 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16688 rtx cop
= avoid_constant_pool_reference (op
);
16690 /* Casting the address of something to a mode narrower
16691 than a word can cause avoid_constant_pool_reference()
16692 to return the pool reference itself. That's no good to
16693 us here. Lets just hope that we can use the
16694 constant pool value directly. */
16696 cop
= get_pool_constant (XEXP (op
, 0));
16698 push_minipool_fix (insn
, address
,
16699 recog_data
.operand_loc
[opno
],
16700 recog_data
.operand_mode
[opno
], cop
);
16710 /* Rewrite move insn into subtract of 0 if the condition codes will
16711 be useful in next conditional jump insn. */
16714 thumb1_reorg (void)
16718 FOR_EACH_BB_FN (bb
, cfun
)
16721 rtx pat
, op0
, set
= NULL
;
16722 rtx prev
, insn
= BB_END (bb
);
16723 bool insn_clobbered
= false;
16725 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16726 insn
= PREV_INSN (insn
);
16728 /* Find the last cbranchsi4_insn in basic block BB. */
16729 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16732 /* Get the register with which we are comparing. */
16733 pat
= PATTERN (insn
);
16734 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16736 /* Find the first flag setting insn before INSN in basic block BB. */
16737 gcc_assert (insn
!= BB_HEAD (bb
));
16738 for (prev
= PREV_INSN (insn
);
16740 && prev
!= BB_HEAD (bb
)
16742 || DEBUG_INSN_P (prev
)
16743 || ((set
= single_set (prev
)) != NULL
16744 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16745 prev
= PREV_INSN (prev
))
16747 if (reg_set_p (op0
, prev
))
16748 insn_clobbered
= true;
16751 /* Skip if op0 is clobbered by insn other than prev. */
16752 if (insn_clobbered
)
16758 dest
= SET_DEST (set
);
16759 src
= SET_SRC (set
);
16760 if (!low_register_operand (dest
, SImode
)
16761 || !low_register_operand (src
, SImode
))
16764 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16765 in INSN. Both src and dest of the move insn are checked. */
16766 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16768 dest
= copy_rtx (dest
);
16769 src
= copy_rtx (src
);
16770 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16771 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16772 INSN_CODE (prev
) = -1;
16773 /* Set test register in INSN to dest. */
16774 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16775 INSN_CODE (insn
) = -1;
16780 /* Convert instructions to their cc-clobbering variant if possible, since
16781 that allows us to use smaller encodings. */
16784 thumb2_reorg (void)
16789 INIT_REG_SET (&live
);
16791 /* We are freeing block_for_insn in the toplev to keep compatibility
16792 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16793 compute_bb_for_insn ();
16796 FOR_EACH_BB_FN (bb
, cfun
)
16800 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16801 df_simulate_initialize_backwards (bb
, &live
);
16802 FOR_BB_INSNS_REVERSE (bb
, insn
)
16804 if (NONJUMP_INSN_P (insn
)
16805 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16806 && GET_CODE (PATTERN (insn
)) == SET
)
16808 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
16809 rtx pat
= PATTERN (insn
);
16810 rtx dst
= XEXP (pat
, 0);
16811 rtx src
= XEXP (pat
, 1);
16812 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16814 if (!OBJECT_P (src
))
16815 op0
= XEXP (src
, 0);
16817 if (BINARY_P (src
))
16818 op1
= XEXP (src
, 1);
16820 if (low_register_operand (dst
, SImode
))
16822 switch (GET_CODE (src
))
16825 /* Adding two registers and storing the result
16826 in the first source is already a 16-bit
16828 if (rtx_equal_p (dst
, op0
)
16829 && register_operand (op1
, SImode
))
16832 if (low_register_operand (op0
, SImode
))
16834 /* ADDS <Rd>,<Rn>,<Rm> */
16835 if (low_register_operand (op1
, SImode
))
16837 /* ADDS <Rdn>,#<imm8> */
16838 /* SUBS <Rdn>,#<imm8> */
16839 else if (rtx_equal_p (dst
, op0
)
16840 && CONST_INT_P (op1
)
16841 && IN_RANGE (INTVAL (op1
), -255, 255))
16843 /* ADDS <Rd>,<Rn>,#<imm3> */
16844 /* SUBS <Rd>,<Rn>,#<imm3> */
16845 else if (CONST_INT_P (op1
)
16846 && IN_RANGE (INTVAL (op1
), -7, 7))
16849 /* ADCS <Rd>, <Rn> */
16850 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16851 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16852 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16854 && COMPARISON_P (op1
)
16855 && cc_register (XEXP (op1
, 0), VOIDmode
)
16856 && maybe_get_arm_condition_code (op1
) == ARM_CS
16857 && XEXP (op1
, 1) == const0_rtx
)
16862 /* RSBS <Rd>,<Rn>,#0
16863 Not handled here: see NEG below. */
16864 /* SUBS <Rd>,<Rn>,#<imm3>
16866 Not handled here: see PLUS above. */
16867 /* SUBS <Rd>,<Rn>,<Rm> */
16868 if (low_register_operand (op0
, SImode
)
16869 && low_register_operand (op1
, SImode
))
16874 /* MULS <Rdm>,<Rn>,<Rdm>
16875 As an exception to the rule, this is only used
16876 when optimizing for size since MULS is slow on all
16877 known implementations. We do not even want to use
16878 MULS in cold code, if optimizing for speed, so we
16879 test the global flag here. */
16880 if (!optimize_size
)
16882 /* else fall through. */
16886 /* ANDS <Rdn>,<Rm> */
16887 if (rtx_equal_p (dst
, op0
)
16888 && low_register_operand (op1
, SImode
))
16890 else if (rtx_equal_p (dst
, op1
)
16891 && low_register_operand (op0
, SImode
))
16892 action
= SWAP_CONV
;
16898 /* ASRS <Rdn>,<Rm> */
16899 /* LSRS <Rdn>,<Rm> */
16900 /* LSLS <Rdn>,<Rm> */
16901 if (rtx_equal_p (dst
, op0
)
16902 && low_register_operand (op1
, SImode
))
16904 /* ASRS <Rd>,<Rm>,#<imm5> */
16905 /* LSRS <Rd>,<Rm>,#<imm5> */
16906 /* LSLS <Rd>,<Rm>,#<imm5> */
16907 else if (low_register_operand (op0
, SImode
)
16908 && CONST_INT_P (op1
)
16909 && IN_RANGE (INTVAL (op1
), 0, 31))
16914 /* RORS <Rdn>,<Rm> */
16915 if (rtx_equal_p (dst
, op0
)
16916 && low_register_operand (op1
, SImode
))
16922 /* MVNS <Rd>,<Rm> */
16923 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16924 if (low_register_operand (op0
, SImode
))
16929 /* MOVS <Rd>,#<imm8> */
16930 if (CONST_INT_P (src
)
16931 && IN_RANGE (INTVAL (src
), 0, 255))
16936 /* MOVS and MOV<c> with registers have different
16937 encodings, so are not relevant here. */
16945 if (action
!= SKIP
)
16947 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16948 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16951 if (action
== SWAP_CONV
)
16953 src
= copy_rtx (src
);
16954 XEXP (src
, 0) = op1
;
16955 XEXP (src
, 1) = op0
;
16956 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
16957 vec
= gen_rtvec (2, pat
, clobber
);
16959 else /* action == CONV */
16960 vec
= gen_rtvec (2, pat
, clobber
);
16962 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16963 INSN_CODE (insn
) = -1;
16967 if (NONDEBUG_INSN_P (insn
))
16968 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16972 CLEAR_REG_SET (&live
);
16975 /* Gcc puts the pool in the wrong place for ARM, since we can only
16976 load addresses a limited distance around the pc. We do some
16977 special munging to move the constant pool values to the correct
16978 point in the code. */
16983 HOST_WIDE_INT address
= 0;
16988 else if (TARGET_THUMB2
)
16991 /* Ensure all insns that must be split have been split at this point.
16992 Otherwise, the pool placement code below may compute incorrect
16993 insn lengths. Note that when optimizing, all insns have already
16994 been split at this point. */
16996 split_all_insns_noflow ();
16998 minipool_fix_head
= minipool_fix_tail
= NULL
;
17000 /* The first insn must always be a note, or the code below won't
17001 scan it properly. */
17002 insn
= get_insns ();
17003 gcc_assert (NOTE_P (insn
));
17006 /* Scan all the insns and record the operands that will need fixing. */
17007 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17009 if (BARRIER_P (insn
))
17010 push_minipool_barrier (insn
, address
);
17011 else if (INSN_P (insn
))
17015 note_invalid_constants (insn
, address
, true);
17016 address
+= get_attr_length (insn
);
17018 /* If the insn is a vector jump, add the size of the table
17019 and skip the table. */
17020 if (tablejump_p (insn
, NULL
, &table
))
17022 address
+= get_jump_table_size (table
);
17026 else if (LABEL_P (insn
))
17027 /* Add the worst-case padding due to alignment. We don't add
17028 the _current_ padding because the minipool insertions
17029 themselves might change it. */
17030 address
+= get_label_padding (insn
);
17033 fix
= minipool_fix_head
;
17035 /* Now scan the fixups and perform the required changes. */
17040 Mfix
* last_added_fix
;
17041 Mfix
* last_barrier
= NULL
;
17044 /* Skip any further barriers before the next fix. */
17045 while (fix
&& BARRIER_P (fix
->insn
))
17048 /* No more fixes. */
17052 last_added_fix
= NULL
;
17054 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17056 if (BARRIER_P (ftmp
->insn
))
17058 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17061 last_barrier
= ftmp
;
17063 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17066 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17069 /* If we found a barrier, drop back to that; any fixes that we
17070 could have reached but come after the barrier will now go in
17071 the next mini-pool. */
17072 if (last_barrier
!= NULL
)
17074 /* Reduce the refcount for those fixes that won't go into this
17076 for (fdel
= last_barrier
->next
;
17077 fdel
&& fdel
!= ftmp
;
17080 fdel
->minipool
->refcount
--;
17081 fdel
->minipool
= NULL
;
17084 ftmp
= last_barrier
;
17088 /* ftmp is first fix that we can't fit into this pool and
17089 there no natural barriers that we could use. Insert a
17090 new barrier in the code somewhere between the previous
17091 fix and this one, and arrange to jump around it. */
17092 HOST_WIDE_INT max_address
;
17094 /* The last item on the list of fixes must be a barrier, so
17095 we can never run off the end of the list of fixes without
17096 last_barrier being set. */
17099 max_address
= minipool_vector_head
->max_address
;
17100 /* Check that there isn't another fix that is in range that
17101 we couldn't fit into this pool because the pool was
17102 already too large: we need to put the pool before such an
17103 instruction. The pool itself may come just after the
17104 fix because create_fix_barrier also allows space for a
17105 jump instruction. */
17106 if (ftmp
->address
< max_address
)
17107 max_address
= ftmp
->address
+ 1;
17109 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17112 assign_minipool_offsets (last_barrier
);
17116 if (!BARRIER_P (ftmp
->insn
)
17117 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17124 /* Scan over the fixes we have identified for this pool, fixing them
17125 up and adding the constants to the pool itself. */
17126 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17127 this_fix
= this_fix
->next
)
17128 if (!BARRIER_P (this_fix
->insn
))
17131 = plus_constant (Pmode
,
17132 gen_rtx_LABEL_REF (VOIDmode
,
17133 minipool_vector_label
),
17134 this_fix
->minipool
->offset
);
17135 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17138 dump_minipool (last_barrier
->insn
);
17142 /* From now on we must synthesize any constants that we can't handle
17143 directly. This can happen if the RTL gets split during final
17144 instruction generation. */
17145 after_arm_reorg
= 1;
17147 /* Free the minipool memory. */
17148 obstack_free (&minipool_obstack
, minipool_startobj
);
17151 /* Routines to output assembly language. */
17153 /* If the rtx is the correct value then return the string of the number.
17154 In this way we can ensure that valid double constants are generated even
17155 when cross compiling. */
17157 fp_immediate_constant (rtx x
)
17161 if (!fp_consts_inited
)
17164 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17166 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17170 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17171 static const char *
17172 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17174 if (!fp_consts_inited
)
17177 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17181 /* OPERANDS[0] is the entire list of insns that constitute pop,
17182 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17183 is in the list, UPDATE is true iff the list contains explicit
17184 update of base register. */
17186 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17192 const char *conditional
;
17193 int num_saves
= XVECLEN (operands
[0], 0);
17194 unsigned int regno
;
17195 unsigned int regno_base
= REGNO (operands
[1]);
17198 offset
+= update
? 1 : 0;
17199 offset
+= return_pc
? 1 : 0;
17201 /* Is the base register in the list? */
17202 for (i
= offset
; i
< num_saves
; i
++)
17204 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17205 /* If SP is in the list, then the base register must be SP. */
17206 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17207 /* If base register is in the list, there must be no explicit update. */
17208 if (regno
== regno_base
)
17209 gcc_assert (!update
);
17212 conditional
= reverse
? "%?%D0" : "%?%d0";
17213 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17215 /* Output pop (not stmfd) because it has a shorter encoding. */
17216 gcc_assert (update
);
17217 sprintf (pattern
, "pop%s\t{", conditional
);
17221 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17222 It's just a convention, their semantics are identical. */
17223 if (regno_base
== SP_REGNUM
)
17224 sprintf (pattern
, "ldm%sfd\t", conditional
);
17225 else if (TARGET_UNIFIED_ASM
)
17226 sprintf (pattern
, "ldmia%s\t", conditional
);
17228 sprintf (pattern
, "ldm%sia\t", conditional
);
17230 strcat (pattern
, reg_names
[regno_base
]);
17232 strcat (pattern
, "!, {");
17234 strcat (pattern
, ", {");
17237 /* Output the first destination register. */
17239 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17241 /* Output the rest of the destination registers. */
17242 for (i
= offset
+ 1; i
< num_saves
; i
++)
17244 strcat (pattern
, ", ");
17246 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17249 strcat (pattern
, "}");
17251 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17252 strcat (pattern
, "^");
17254 output_asm_insn (pattern
, &cond
);
17258 /* Output the assembly for a store multiple. */
17261 vfp_output_fstmd (rtx
* operands
)
17268 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17269 p
= strlen (pattern
);
17271 gcc_assert (REG_P (operands
[1]));
17273 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17274 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17276 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17278 strcpy (&pattern
[p
], "}");
17280 output_asm_insn (pattern
, operands
);
17285 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17286 number of bytes pushed. */
17289 vfp_emit_fstmd (int base_reg
, int count
)
17296 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17297 register pairs are stored by a store multiple insn. We avoid this
17298 by pushing an extra pair. */
17299 if (count
== 2 && !arm_arch6
)
17301 if (base_reg
== LAST_VFP_REGNUM
- 3)
17306 /* FSTMD may not store more than 16 doubleword registers at once. Split
17307 larger stores into multiple parts (up to a maximum of two, in
17312 /* NOTE: base_reg is an internal register number, so each D register
17314 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17315 saved
+= vfp_emit_fstmd (base_reg
, 16);
17319 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17320 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17322 reg
= gen_rtx_REG (DFmode
, base_reg
);
17325 XVECEXP (par
, 0, 0)
17326 = gen_rtx_SET (VOIDmode
,
17329 gen_rtx_PRE_MODIFY (Pmode
,
17332 (Pmode
, stack_pointer_rtx
,
17335 gen_rtx_UNSPEC (BLKmode
,
17336 gen_rtvec (1, reg
),
17337 UNSPEC_PUSH_MULT
));
17339 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17340 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17341 RTX_FRAME_RELATED_P (tmp
) = 1;
17342 XVECEXP (dwarf
, 0, 0) = tmp
;
17344 tmp
= gen_rtx_SET (VOIDmode
,
17345 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17347 RTX_FRAME_RELATED_P (tmp
) = 1;
17348 XVECEXP (dwarf
, 0, 1) = tmp
;
17350 for (i
= 1; i
< count
; i
++)
17352 reg
= gen_rtx_REG (DFmode
, base_reg
);
17354 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17356 tmp
= gen_rtx_SET (VOIDmode
,
17357 gen_frame_mem (DFmode
,
17358 plus_constant (Pmode
,
17362 RTX_FRAME_RELATED_P (tmp
) = 1;
17363 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17366 par
= emit_insn (par
);
17367 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17368 RTX_FRAME_RELATED_P (par
) = 1;
17373 /* Emit a call instruction with pattern PAT. ADDR is the address of
17374 the call target. */
17377 arm_emit_call_insn (rtx pat
, rtx addr
)
17381 insn
= emit_call_insn (pat
);
17383 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17384 If the call might use such an entry, add a use of the PIC register
17385 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17386 if (TARGET_VXWORKS_RTP
17388 && GET_CODE (addr
) == SYMBOL_REF
17389 && (SYMBOL_REF_DECL (addr
)
17390 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17391 : !SYMBOL_REF_LOCAL_P (addr
)))
17393 require_pic_register ();
17394 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17398 /* Output a 'call' insn. */
17400 output_call (rtx
*operands
)
17402 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17404 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17405 if (REGNO (operands
[0]) == LR_REGNUM
)
17407 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17408 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17411 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17413 if (TARGET_INTERWORK
|| arm_arch4t
)
17414 output_asm_insn ("bx%?\t%0", operands
);
17416 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17421 /* Output a 'call' insn that is a reference in memory. This is
17422 disabled for ARMv5 and we prefer a blx instead because otherwise
17423 there's a significant performance overhead. */
17425 output_call_mem (rtx
*operands
)
17427 gcc_assert (!arm_arch5
);
17428 if (TARGET_INTERWORK
)
17430 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17431 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17432 output_asm_insn ("bx%?\t%|ip", operands
);
17434 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17436 /* LR is used in the memory address. We load the address in the
17437 first instruction. It's safe to use IP as the target of the
17438 load since the call will kill it anyway. */
17439 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17440 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17442 output_asm_insn ("bx%?\t%|ip", operands
);
17444 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17448 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17449 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17456 /* Output a move from arm registers to arm registers of a long double
17457 OPERANDS[0] is the destination.
17458 OPERANDS[1] is the source. */
17460 output_mov_long_double_arm_from_arm (rtx
*operands
)
17462 /* We have to be careful here because the two might overlap. */
17463 int dest_start
= REGNO (operands
[0]);
17464 int src_start
= REGNO (operands
[1]);
17468 if (dest_start
< src_start
)
17470 for (i
= 0; i
< 3; i
++)
17472 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17473 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17474 output_asm_insn ("mov%?\t%0, %1", ops
);
17479 for (i
= 2; i
>= 0; i
--)
17481 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17482 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17483 output_asm_insn ("mov%?\t%0, %1", ops
);
17491 arm_emit_movpair (rtx dest
, rtx src
)
17493 /* If the src is an immediate, simplify it. */
17494 if (CONST_INT_P (src
))
17496 HOST_WIDE_INT val
= INTVAL (src
);
17497 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17498 if ((val
>> 16) & 0x0000ffff)
17499 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17501 GEN_INT ((val
>> 16) & 0x0000ffff));
17504 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17505 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17508 /* Output a move between double words. It must be REG<-MEM
17511 output_move_double (rtx
*operands
, bool emit
, int *count
)
17513 enum rtx_code code0
= GET_CODE (operands
[0]);
17514 enum rtx_code code1
= GET_CODE (operands
[1]);
17519 /* The only case when this might happen is when
17520 you are looking at the length of a DImode instruction
17521 that has an invalid constant in it. */
17522 if (code0
== REG
&& code1
!= MEM
)
17524 gcc_assert (!emit
);
17531 unsigned int reg0
= REGNO (operands
[0]);
17533 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17535 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17537 switch (GET_CODE (XEXP (operands
[1], 0)))
17544 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17545 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17547 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17552 gcc_assert (TARGET_LDRD
);
17554 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17561 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17563 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17571 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17573 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17578 gcc_assert (TARGET_LDRD
);
17580 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17585 /* Autoicrement addressing modes should never have overlapping
17586 base and destination registers, and overlapping index registers
17587 are already prohibited, so this doesn't need to worry about
17589 otherops
[0] = operands
[0];
17590 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17591 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17593 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17595 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17597 /* Registers overlap so split out the increment. */
17600 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17601 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17608 /* Use a single insn if we can.
17609 FIXME: IWMMXT allows offsets larger than ldrd can
17610 handle, fix these up with a pair of ldr. */
17612 || !CONST_INT_P (otherops
[2])
17613 || (INTVAL (otherops
[2]) > -256
17614 && INTVAL (otherops
[2]) < 256))
17617 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17623 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17624 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17634 /* Use a single insn if we can.
17635 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17636 fix these up with a pair of ldr. */
17638 || !CONST_INT_P (otherops
[2])
17639 || (INTVAL (otherops
[2]) > -256
17640 && INTVAL (otherops
[2]) < 256))
17643 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17649 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17650 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17660 /* We might be able to use ldrd %0, %1 here. However the range is
17661 different to ldr/adr, and it is broken on some ARMv7-M
17662 implementations. */
17663 /* Use the second register of the pair to avoid problematic
17665 otherops
[1] = operands
[1];
17667 output_asm_insn ("adr%?\t%0, %1", otherops
);
17668 operands
[1] = otherops
[0];
17672 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17674 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17681 /* ??? This needs checking for thumb2. */
17683 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17684 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17686 otherops
[0] = operands
[0];
17687 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17688 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17690 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17692 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17694 switch ((int) INTVAL (otherops
[2]))
17698 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17704 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17710 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17714 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17715 operands
[1] = otherops
[0];
17717 && (REG_P (otherops
[2])
17719 || (CONST_INT_P (otherops
[2])
17720 && INTVAL (otherops
[2]) > -256
17721 && INTVAL (otherops
[2]) < 256)))
17723 if (reg_overlap_mentioned_p (operands
[0],
17727 /* Swap base and index registers over to
17728 avoid a conflict. */
17730 otherops
[1] = otherops
[2];
17733 /* If both registers conflict, it will usually
17734 have been fixed by a splitter. */
17735 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17736 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17740 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17741 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17748 otherops
[0] = operands
[0];
17750 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17755 if (CONST_INT_P (otherops
[2]))
17759 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17760 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17762 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17768 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17774 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17781 return "ldr%(d%)\t%0, [%1]";
17783 return "ldm%(ia%)\t%1, %M0";
17787 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17788 /* Take care of overlapping base/data reg. */
17789 if (reg_mentioned_p (operands
[0], operands
[1]))
17793 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17794 output_asm_insn ("ldr%?\t%0, %1", operands
);
17804 output_asm_insn ("ldr%?\t%0, %1", operands
);
17805 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17815 /* Constraints should ensure this. */
17816 gcc_assert (code0
== MEM
&& code1
== REG
);
17817 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17818 || (TARGET_ARM
&& TARGET_LDRD
));
17820 switch (GET_CODE (XEXP (operands
[0], 0)))
17826 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
17828 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17833 gcc_assert (TARGET_LDRD
);
17835 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
17842 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
17844 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
17852 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
17854 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
17859 gcc_assert (TARGET_LDRD
);
17861 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
17866 otherops
[0] = operands
[1];
17867 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17868 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17870 /* IWMMXT allows offsets larger than ldrd can handle,
17871 fix these up with a pair of ldr. */
17873 && CONST_INT_P (otherops
[2])
17874 && (INTVAL(otherops
[2]) <= -256
17875 || INTVAL(otherops
[2]) >= 256))
17877 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17881 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17882 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17891 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17892 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17898 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17901 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
17906 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
17911 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17912 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17914 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17918 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
17925 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
17932 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
17937 && (REG_P (otherops
[2])
17939 || (CONST_INT_P (otherops
[2])
17940 && INTVAL (otherops
[2]) > -256
17941 && INTVAL (otherops
[2]) < 256)))
17943 otherops
[0] = operands
[1];
17944 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17946 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
17952 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17953 otherops
[1] = operands
[1];
17956 output_asm_insn ("str%?\t%1, %0", operands
);
17957 output_asm_insn ("str%?\t%H1, %0", otherops
);
17967 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17968 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17971 output_move_quad (rtx
*operands
)
17973 if (REG_P (operands
[0]))
17975 /* Load, or reg->reg move. */
17977 if (MEM_P (operands
[1]))
17979 switch (GET_CODE (XEXP (operands
[1], 0)))
17982 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17987 output_asm_insn ("adr%?\t%0, %1", operands
);
17988 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
17992 gcc_unreachable ();
18000 gcc_assert (REG_P (operands
[1]));
18002 dest
= REGNO (operands
[0]);
18003 src
= REGNO (operands
[1]);
18005 /* This seems pretty dumb, but hopefully GCC won't try to do it
18008 for (i
= 0; i
< 4; i
++)
18010 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18011 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18012 output_asm_insn ("mov%?\t%0, %1", ops
);
18015 for (i
= 3; i
>= 0; i
--)
18017 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18018 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18019 output_asm_insn ("mov%?\t%0, %1", ops
);
18025 gcc_assert (MEM_P (operands
[0]));
18026 gcc_assert (REG_P (operands
[1]));
18027 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18029 switch (GET_CODE (XEXP (operands
[0], 0)))
18032 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18036 gcc_unreachable ();
18043 /* Output a VFP load or store instruction. */
18046 output_move_vfp (rtx
*operands
)
18048 rtx reg
, mem
, addr
, ops
[2];
18049 int load
= REG_P (operands
[0]);
18050 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18051 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18054 enum machine_mode mode
;
18056 reg
= operands
[!load
];
18057 mem
= operands
[load
];
18059 mode
= GET_MODE (reg
);
18061 gcc_assert (REG_P (reg
));
18062 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18063 gcc_assert (mode
== SFmode
18067 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18068 gcc_assert (MEM_P (mem
));
18070 addr
= XEXP (mem
, 0);
18072 switch (GET_CODE (addr
))
18075 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18076 ops
[0] = XEXP (addr
, 0);
18081 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18082 ops
[0] = XEXP (addr
, 0);
18087 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18093 sprintf (buff
, templ
,
18094 load
? "ld" : "st",
18097 integer_p
? "\t%@ int" : "");
18098 output_asm_insn (buff
, ops
);
18103 /* Output a Neon double-word or quad-word load or store, or a load
18104 or store for larger structure modes.
18106 WARNING: The ordering of elements is weird in big-endian mode,
18107 because the EABI requires that vectors stored in memory appear
18108 as though they were stored by a VSTM, as required by the EABI.
18109 GCC RTL defines element ordering based on in-memory order.
18110 This can be different from the architectural ordering of elements
18111 within a NEON register. The intrinsics defined in arm_neon.h use the
18112 NEON register element ordering, not the GCC RTL element ordering.
18114 For example, the in-memory ordering of a big-endian a quadword
18115 vector with 16-bit elements when stored from register pair {d0,d1}
18116 will be (lowest address first, d0[N] is NEON register element N):
18118 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18120 When necessary, quadword registers (dN, dN+1) are moved to ARM
18121 registers from rN in the order:
18123 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18125 So that STM/LDM can be used on vectors in ARM registers, and the
18126 same memory layout will result as if VSTM/VLDM were used.
18128 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18129 possible, which allows use of appropriate alignment tags.
18130 Note that the choice of "64" is independent of the actual vector
18131 element size; this size simply ensures that the behavior is
18132 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18134 Due to limitations of those instructions, use of VST1.64/VLD1.64
18135 is not possible if:
18136 - the address contains PRE_DEC, or
18137 - the mode refers to more than 4 double-word registers
18139 In those cases, it would be possible to replace VSTM/VLDM by a
18140 sequence of instructions; this is not currently implemented since
18141 this is not certain to actually improve performance. */
18144 output_move_neon (rtx
*operands
)
18146 rtx reg
, mem
, addr
, ops
[2];
18147 int regno
, nregs
, load
= REG_P (operands
[0]);
18150 enum machine_mode mode
;
18152 reg
= operands
[!load
];
18153 mem
= operands
[load
];
18155 mode
= GET_MODE (reg
);
18157 gcc_assert (REG_P (reg
));
18158 regno
= REGNO (reg
);
18159 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18160 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18161 || NEON_REGNO_OK_FOR_QUAD (regno
));
18162 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18163 || VALID_NEON_QREG_MODE (mode
)
18164 || VALID_NEON_STRUCT_MODE (mode
));
18165 gcc_assert (MEM_P (mem
));
18167 addr
= XEXP (mem
, 0);
18169 /* Strip off const from addresses like (const (plus (...))). */
18170 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18171 addr
= XEXP (addr
, 0);
18173 switch (GET_CODE (addr
))
18176 /* We have to use vldm / vstm for too-large modes. */
18179 templ
= "v%smia%%?\t%%0!, %%h1";
18180 ops
[0] = XEXP (addr
, 0);
18184 templ
= "v%s1.64\t%%h1, %%A0";
18191 /* We have to use vldm / vstm in this case, since there is no
18192 pre-decrement form of the vld1 / vst1 instructions. */
18193 templ
= "v%smdb%%?\t%%0!, %%h1";
18194 ops
[0] = XEXP (addr
, 0);
18199 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18200 gcc_unreachable ();
18207 for (i
= 0; i
< nregs
; i
++)
18209 /* We're only using DImode here because it's a convenient size. */
18210 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18211 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18212 if (reg_overlap_mentioned_p (ops
[0], mem
))
18214 gcc_assert (overlap
== -1);
18219 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18220 output_asm_insn (buff
, ops
);
18225 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18226 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18227 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18228 output_asm_insn (buff
, ops
);
18235 /* We have to use vldm / vstm for too-large modes. */
18237 templ
= "v%smia%%?\t%%m0, %%h1";
18239 templ
= "v%s1.64\t%%h1, %%A0";
18245 sprintf (buff
, templ
, load
? "ld" : "st");
18246 output_asm_insn (buff
, ops
);
18251 /* Compute and return the length of neon_mov<mode>, where <mode> is
18252 one of VSTRUCT modes: EI, OI, CI or XI. */
18254 arm_attr_length_move_neon (rtx insn
)
18256 rtx reg
, mem
, addr
;
18258 enum machine_mode mode
;
18260 extract_insn_cached (insn
);
18262 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18264 mode
= GET_MODE (recog_data
.operand
[0]);
18275 gcc_unreachable ();
18279 load
= REG_P (recog_data
.operand
[0]);
18280 reg
= recog_data
.operand
[!load
];
18281 mem
= recog_data
.operand
[load
];
18283 gcc_assert (MEM_P (mem
));
18285 mode
= GET_MODE (reg
);
18286 addr
= XEXP (mem
, 0);
18288 /* Strip off const from addresses like (const (plus (...))). */
18289 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18290 addr
= XEXP (addr
, 0);
18292 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18294 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18301 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18305 arm_address_offset_is_imm (rtx insn
)
18309 extract_insn_cached (insn
);
18311 if (REG_P (recog_data
.operand
[0]))
18314 mem
= recog_data
.operand
[0];
18316 gcc_assert (MEM_P (mem
));
18318 addr
= XEXP (mem
, 0);
18321 || (GET_CODE (addr
) == PLUS
18322 && REG_P (XEXP (addr
, 0))
18323 && CONST_INT_P (XEXP (addr
, 1))))
18329 /* Output an ADD r, s, #n where n may be too big for one instruction.
18330 If adding zero to one register, output nothing. */
18332 output_add_immediate (rtx
*operands
)
18334 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18336 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18339 output_multi_immediate (operands
,
18340 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18343 output_multi_immediate (operands
,
18344 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18351 /* Output a multiple immediate operation.
18352 OPERANDS is the vector of operands referred to in the output patterns.
18353 INSTR1 is the output pattern to use for the first constant.
18354 INSTR2 is the output pattern to use for subsequent constants.
18355 IMMED_OP is the index of the constant slot in OPERANDS.
18356 N is the constant value. */
18357 static const char *
18358 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18359 int immed_op
, HOST_WIDE_INT n
)
18361 #if HOST_BITS_PER_WIDE_INT > 32
18367 /* Quick and easy output. */
18368 operands
[immed_op
] = const0_rtx
;
18369 output_asm_insn (instr1
, operands
);
18374 const char * instr
= instr1
;
18376 /* Note that n is never zero here (which would give no output). */
18377 for (i
= 0; i
< 32; i
+= 2)
18381 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18382 output_asm_insn (instr
, operands
);
18392 /* Return the name of a shifter operation. */
18393 static const char *
18394 arm_shift_nmem(enum rtx_code code
)
18399 return ARM_LSL_NAME
;
18415 /* Return the appropriate ARM instruction for the operation code.
18416 The returned result should not be overwritten. OP is the rtx of the
18417 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18420 arithmetic_instr (rtx op
, int shift_first_arg
)
18422 switch (GET_CODE (op
))
18428 return shift_first_arg
? "rsb" : "sub";
18443 return arm_shift_nmem(GET_CODE(op
));
18446 gcc_unreachable ();
18450 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18451 for the operation code. The returned result should not be overwritten.
18452 OP is the rtx code of the shift.
18453 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18455 static const char *
18456 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18459 enum rtx_code code
= GET_CODE (op
);
18464 if (!CONST_INT_P (XEXP (op
, 1)))
18466 output_operand_lossage ("invalid shift operand");
18471 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18479 mnem
= arm_shift_nmem(code
);
18480 if (CONST_INT_P (XEXP (op
, 1)))
18482 *amountp
= INTVAL (XEXP (op
, 1));
18484 else if (REG_P (XEXP (op
, 1)))
18491 output_operand_lossage ("invalid shift operand");
18497 /* We never have to worry about the amount being other than a
18498 power of 2, since this case can never be reloaded from a reg. */
18499 if (!CONST_INT_P (XEXP (op
, 1)))
18501 output_operand_lossage ("invalid shift operand");
18505 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18507 /* Amount must be a power of two. */
18508 if (*amountp
& (*amountp
- 1))
18510 output_operand_lossage ("invalid shift operand");
18514 *amountp
= int_log2 (*amountp
);
18515 return ARM_LSL_NAME
;
18518 output_operand_lossage ("invalid shift operand");
18522 /* This is not 100% correct, but follows from the desire to merge
18523 multiplication by a power of 2 with the recognizer for a
18524 shift. >=32 is not a valid shift for "lsl", so we must try and
18525 output a shift that produces the correct arithmetical result.
18526 Using lsr #32 is identical except for the fact that the carry bit
18527 is not set correctly if we set the flags; but we never use the
18528 carry bit from such an operation, so we can ignore that. */
18529 if (code
== ROTATERT
)
18530 /* Rotate is just modulo 32. */
18532 else if (*amountp
!= (*amountp
& 31))
18534 if (code
== ASHIFT
)
18539 /* Shifts of 0 are no-ops. */
18546 /* Obtain the shift from the POWER of two. */
18548 static HOST_WIDE_INT
18549 int_log2 (HOST_WIDE_INT power
)
18551 HOST_WIDE_INT shift
= 0;
18553 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18555 gcc_assert (shift
<= 31);
18562 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18563 because /bin/as is horribly restrictive. The judgement about
18564 whether or not each character is 'printable' (and can be output as
18565 is) or not (and must be printed with an octal escape) must be made
18566 with reference to the *host* character set -- the situation is
18567 similar to that discussed in the comments above pp_c_char in
18568 c-pretty-print.c. */
18570 #define MAX_ASCII_LEN 51
18573 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18576 int len_so_far
= 0;
18578 fputs ("\t.ascii\t\"", stream
);
18580 for (i
= 0; i
< len
; i
++)
18584 if (len_so_far
>= MAX_ASCII_LEN
)
18586 fputs ("\"\n\t.ascii\t\"", stream
);
18592 if (c
== '\\' || c
== '\"')
18594 putc ('\\', stream
);
18602 fprintf (stream
, "\\%03o", c
);
18607 fputs ("\"\n", stream
);
18610 /* Compute the register save mask for registers 0 through 12
18611 inclusive. This code is used by arm_compute_save_reg_mask. */
18613 static unsigned long
18614 arm_compute_save_reg0_reg12_mask (void)
18616 unsigned long func_type
= arm_current_func_type ();
18617 unsigned long save_reg_mask
= 0;
18620 if (IS_INTERRUPT (func_type
))
18622 unsigned int max_reg
;
18623 /* Interrupt functions must not corrupt any registers,
18624 even call clobbered ones. If this is a leaf function
18625 we can just examine the registers used by the RTL, but
18626 otherwise we have to assume that whatever function is
18627 called might clobber anything, and so we have to save
18628 all the call-clobbered registers as well. */
18629 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18630 /* FIQ handlers have registers r8 - r12 banked, so
18631 we only need to check r0 - r7, Normal ISRs only
18632 bank r14 and r15, so we must check up to r12.
18633 r13 is the stack pointer which is always preserved,
18634 so we do not need to consider it here. */
18639 for (reg
= 0; reg
<= max_reg
; reg
++)
18640 if (df_regs_ever_live_p (reg
)
18641 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18642 save_reg_mask
|= (1 << reg
);
18644 /* Also save the pic base register if necessary. */
18646 && !TARGET_SINGLE_PIC_BASE
18647 && arm_pic_register
!= INVALID_REGNUM
18648 && crtl
->uses_pic_offset_table
)
18649 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18651 else if (IS_VOLATILE(func_type
))
18653 /* For noreturn functions we historically omitted register saves
18654 altogether. However this really messes up debugging. As a
18655 compromise save just the frame pointers. Combined with the link
18656 register saved elsewhere this should be sufficient to get
18658 if (frame_pointer_needed
)
18659 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18660 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18661 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18662 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18663 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18667 /* In the normal case we only need to save those registers
18668 which are call saved and which are used by this function. */
18669 for (reg
= 0; reg
<= 11; reg
++)
18670 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18671 save_reg_mask
|= (1 << reg
);
18673 /* Handle the frame pointer as a special case. */
18674 if (frame_pointer_needed
)
18675 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18677 /* If we aren't loading the PIC register,
18678 don't stack it even though it may be live. */
18680 && !TARGET_SINGLE_PIC_BASE
18681 && arm_pic_register
!= INVALID_REGNUM
18682 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18683 || crtl
->uses_pic_offset_table
))
18684 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18686 /* The prologue will copy SP into R0, so save it. */
18687 if (IS_STACKALIGN (func_type
))
18688 save_reg_mask
|= 1;
18691 /* Save registers so the exception handler can modify them. */
18692 if (crtl
->calls_eh_return
)
18698 reg
= EH_RETURN_DATA_REGNO (i
);
18699 if (reg
== INVALID_REGNUM
)
18701 save_reg_mask
|= 1 << reg
;
18705 return save_reg_mask
;
18708 /* Return true if r3 is live at the start of the function. */
18711 arm_r3_live_at_start_p (void)
18713 /* Just look at cfg info, which is still close enough to correct at this
18714 point. This gives false positives for broken functions that might use
18715 uninitialized data that happens to be allocated in r3, but who cares? */
18716 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18719 /* Compute the number of bytes used to store the static chain register on the
18720 stack, above the stack frame. We need to know this accurately to get the
18721 alignment of the rest of the stack frame correct. */
18724 arm_compute_static_chain_stack_bytes (void)
18726 /* See the defining assertion in arm_expand_prologue. */
18727 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18728 && IS_NESTED (arm_current_func_type ())
18729 && arm_r3_live_at_start_p ()
18730 && crtl
->args
.pretend_args_size
== 0)
18736 /* Compute a bit mask of which registers need to be
18737 saved on the stack for the current function.
18738 This is used by arm_get_frame_offsets, which may add extra registers. */
18740 static unsigned long
18741 arm_compute_save_reg_mask (void)
18743 unsigned int save_reg_mask
= 0;
18744 unsigned long func_type
= arm_current_func_type ();
18747 if (IS_NAKED (func_type
))
18748 /* This should never really happen. */
18751 /* If we are creating a stack frame, then we must save the frame pointer,
18752 IP (which will hold the old stack pointer), LR and the PC. */
18753 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18755 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18758 | (1 << PC_REGNUM
);
18760 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18762 /* Decide if we need to save the link register.
18763 Interrupt routines have their own banked link register,
18764 so they never need to save it.
18765 Otherwise if we do not use the link register we do not need to save
18766 it. If we are pushing other registers onto the stack however, we
18767 can save an instruction in the epilogue by pushing the link register
18768 now and then popping it back into the PC. This incurs extra memory
18769 accesses though, so we only do it when optimizing for size, and only
18770 if we know that we will not need a fancy return sequence. */
18771 if (df_regs_ever_live_p (LR_REGNUM
)
18774 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18775 && !crtl
->calls_eh_return
))
18776 save_reg_mask
|= 1 << LR_REGNUM
;
18778 if (cfun
->machine
->lr_save_eliminated
)
18779 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18781 if (TARGET_REALLY_IWMMXT
18782 && ((bit_count (save_reg_mask
)
18783 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18784 arm_compute_static_chain_stack_bytes())
18787 /* The total number of registers that are going to be pushed
18788 onto the stack is odd. We need to ensure that the stack
18789 is 64-bit aligned before we start to save iWMMXt registers,
18790 and also before we start to create locals. (A local variable
18791 might be a double or long long which we will load/store using
18792 an iWMMXt instruction). Therefore we need to push another
18793 ARM register, so that the stack will be 64-bit aligned. We
18794 try to avoid using the arg registers (r0 -r3) as they might be
18795 used to pass values in a tail call. */
18796 for (reg
= 4; reg
<= 12; reg
++)
18797 if ((save_reg_mask
& (1 << reg
)) == 0)
18801 save_reg_mask
|= (1 << reg
);
18804 cfun
->machine
->sibcall_blocked
= 1;
18805 save_reg_mask
|= (1 << 3);
18809 /* We may need to push an additional register for use initializing the
18810 PIC base register. */
18811 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18812 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18814 reg
= thumb_find_work_register (1 << 4);
18815 if (!call_used_regs
[reg
])
18816 save_reg_mask
|= (1 << reg
);
18819 return save_reg_mask
;
18823 /* Compute a bit mask of which registers need to be
18824 saved on the stack for the current function. */
18825 static unsigned long
18826 thumb1_compute_save_reg_mask (void)
18828 unsigned long mask
;
18832 for (reg
= 0; reg
< 12; reg
++)
18833 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
18837 && !TARGET_SINGLE_PIC_BASE
18838 && arm_pic_register
!= INVALID_REGNUM
18839 && crtl
->uses_pic_offset_table
)
18840 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18842 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18843 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18844 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18846 /* LR will also be pushed if any lo regs are pushed. */
18847 if (mask
& 0xff || thumb_force_lr_save ())
18848 mask
|= (1 << LR_REGNUM
);
18850 /* Make sure we have a low work register if we need one.
18851 We will need one if we are going to push a high register,
18852 but we are not currently intending to push a low register. */
18853 if ((mask
& 0xff) == 0
18854 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18856 /* Use thumb_find_work_register to choose which register
18857 we will use. If the register is live then we will
18858 have to push it. Use LAST_LO_REGNUM as our fallback
18859 choice for the register to select. */
18860 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18861 /* Make sure the register returned by thumb_find_work_register is
18862 not part of the return value. */
18863 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18864 reg
= LAST_LO_REGNUM
;
18866 if (! call_used_regs
[reg
])
18870 /* The 504 below is 8 bytes less than 512 because there are two possible
18871 alignment words. We can't tell here if they will be present or not so we
18872 have to play it safe and assume that they are. */
18873 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18874 ROUND_UP_WORD (get_frame_size ()) +
18875 crtl
->outgoing_args_size
) >= 504)
18877 /* This is the same as the code in thumb1_expand_prologue() which
18878 determines which register to use for stack decrement. */
18879 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18880 if (mask
& (1 << reg
))
18883 if (reg
> LAST_LO_REGNUM
)
18885 /* Make sure we have a register available for stack decrement. */
18886 mask
|= 1 << LAST_LO_REGNUM
;
18894 /* Return the number of bytes required to save VFP registers. */
18896 arm_get_vfp_saved_size (void)
18898 unsigned int regno
;
18903 /* Space for saved VFP registers. */
18904 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
18907 for (regno
= FIRST_VFP_REGNUM
;
18908 regno
< LAST_VFP_REGNUM
;
18911 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18912 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18916 /* Workaround ARM10 VFPr1 bug. */
18917 if (count
== 2 && !arm_arch6
)
18919 saved
+= count
* 8;
18928 if (count
== 2 && !arm_arch6
)
18930 saved
+= count
* 8;
18937 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18938 everything bar the final return instruction. If simple_return is true,
18939 then do not output epilogue, because it has already been emitted in RTL. */
18941 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18942 bool simple_return
)
18944 char conditional
[10];
18947 unsigned long live_regs_mask
;
18948 unsigned long func_type
;
18949 arm_stack_offsets
*offsets
;
18951 func_type
= arm_current_func_type ();
18953 if (IS_NAKED (func_type
))
18956 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18958 /* If this function was declared non-returning, and we have
18959 found a tail call, then we have to trust that the called
18960 function won't return. */
18965 /* Otherwise, trap an attempted return by aborting. */
18967 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18969 assemble_external_libcall (ops
[1]);
18970 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18976 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18978 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18980 cfun
->machine
->return_used_this_function
= 1;
18982 offsets
= arm_get_frame_offsets ();
18983 live_regs_mask
= offsets
->saved_regs_mask
;
18985 if (!simple_return
&& live_regs_mask
)
18987 const char * return_reg
;
18989 /* If we do not have any special requirements for function exit
18990 (e.g. interworking) then we can load the return address
18991 directly into the PC. Otherwise we must load it into LR. */
18993 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18994 return_reg
= reg_names
[PC_REGNUM
];
18996 return_reg
= reg_names
[LR_REGNUM
];
18998 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19000 /* There are three possible reasons for the IP register
19001 being saved. 1) a stack frame was created, in which case
19002 IP contains the old stack pointer, or 2) an ISR routine
19003 corrupted it, or 3) it was saved to align the stack on
19004 iWMMXt. In case 1, restore IP into SP, otherwise just
19006 if (frame_pointer_needed
)
19008 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19009 live_regs_mask
|= (1 << SP_REGNUM
);
19012 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19015 /* On some ARM architectures it is faster to use LDR rather than
19016 LDM to load a single register. On other architectures, the
19017 cost is the same. In 26 bit mode, or for exception handlers,
19018 we have to use LDM to load the PC so that the CPSR is also
19020 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19021 if (live_regs_mask
== (1U << reg
))
19024 if (reg
<= LAST_ARM_REGNUM
19025 && (reg
!= LR_REGNUM
19027 || ! IS_INTERRUPT (func_type
)))
19029 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19030 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19037 /* Generate the load multiple instruction to restore the
19038 registers. Note we can get here, even if
19039 frame_pointer_needed is true, but only if sp already
19040 points to the base of the saved core registers. */
19041 if (live_regs_mask
& (1 << SP_REGNUM
))
19043 unsigned HOST_WIDE_INT stack_adjust
;
19045 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19046 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19048 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19049 if (TARGET_UNIFIED_ASM
)
19050 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19052 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19055 /* If we can't use ldmib (SA110 bug),
19056 then try to pop r3 instead. */
19058 live_regs_mask
|= 1 << 3;
19060 if (TARGET_UNIFIED_ASM
)
19061 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19063 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19067 if (TARGET_UNIFIED_ASM
)
19068 sprintf (instr
, "pop%s\t{", conditional
);
19070 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19072 p
= instr
+ strlen (instr
);
19074 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19075 if (live_regs_mask
& (1 << reg
))
19077 int l
= strlen (reg_names
[reg
]);
19083 memcpy (p
, ", ", 2);
19087 memcpy (p
, "%|", 2);
19088 memcpy (p
+ 2, reg_names
[reg
], l
);
19092 if (live_regs_mask
& (1 << LR_REGNUM
))
19094 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19095 /* If returning from an interrupt, restore the CPSR. */
19096 if (IS_INTERRUPT (func_type
))
19103 output_asm_insn (instr
, & operand
);
19105 /* See if we need to generate an extra instruction to
19106 perform the actual function return. */
19108 && func_type
!= ARM_FT_INTERWORKED
19109 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19111 /* The return has already been handled
19112 by loading the LR into the PC. */
19119 switch ((int) ARM_FUNC_TYPE (func_type
))
19123 /* ??? This is wrong for unified assembly syntax. */
19124 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19127 case ARM_FT_INTERWORKED
:
19128 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19131 case ARM_FT_EXCEPTION
:
19132 /* ??? This is wrong for unified assembly syntax. */
19133 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19137 /* Use bx if it's available. */
19138 if (arm_arch5
|| arm_arch4t
)
19139 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19141 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19145 output_asm_insn (instr
, & operand
);
19151 /* Write the function name into the code section, directly preceding
19152 the function prologue.
19154 Code will be output similar to this:
19156 .ascii "arm_poke_function_name", 0
19159 .word 0xff000000 + (t1 - t0)
19160 arm_poke_function_name
19162 stmfd sp!, {fp, ip, lr, pc}
19165 When performing a stack backtrace, code can inspect the value
19166 of 'pc' stored at 'fp' + 0. If the trace function then looks
19167 at location pc - 12 and the top 8 bits are set, then we know
19168 that there is a function name embedded immediately preceding this
19169 location and has length ((pc[-3]) & 0xff000000).
19171 We assume that pc is declared as a pointer to an unsigned long.
19173 It is of no benefit to output the function name if we are assembling
19174 a leaf function. These function types will not contain a stack
19175 backtrace structure, therefore it is not possible to determine the
19178 arm_poke_function_name (FILE *stream
, const char *name
)
19180 unsigned long alignlength
;
19181 unsigned long length
;
19184 length
= strlen (name
) + 1;
19185 alignlength
= ROUND_UP_WORD (length
);
19187 ASM_OUTPUT_ASCII (stream
, name
, length
);
19188 ASM_OUTPUT_ALIGN (stream
, 2);
19189 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19190 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19193 /* Place some comments into the assembler stream
19194 describing the current function. */
19196 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19198 unsigned long func_type
;
19200 /* ??? Do we want to print some of the below anyway? */
19204 /* Sanity check. */
19205 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19207 func_type
= arm_current_func_type ();
19209 switch ((int) ARM_FUNC_TYPE (func_type
))
19212 case ARM_FT_NORMAL
:
19214 case ARM_FT_INTERWORKED
:
19215 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19218 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19221 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19223 case ARM_FT_EXCEPTION
:
19224 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19228 if (IS_NAKED (func_type
))
19229 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19231 if (IS_VOLATILE (func_type
))
19232 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19234 if (IS_NESTED (func_type
))
19235 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19236 if (IS_STACKALIGN (func_type
))
19237 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19239 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19241 crtl
->args
.pretend_args_size
, frame_size
);
19243 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19244 frame_pointer_needed
,
19245 cfun
->machine
->uses_anonymous_args
);
19247 if (cfun
->machine
->lr_save_eliminated
)
19248 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19250 if (crtl
->calls_eh_return
)
19251 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19256 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19257 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19259 arm_stack_offsets
*offsets
;
19265 /* Emit any call-via-reg trampolines that are needed for v4t support
19266 of call_reg and call_value_reg type insns. */
19267 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19269 rtx label
= cfun
->machine
->call_via
[regno
];
19273 switch_to_section (function_section (current_function_decl
));
19274 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19275 CODE_LABEL_NUMBER (label
));
19276 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19280 /* ??? Probably not safe to set this here, since it assumes that a
19281 function will be emitted as assembly immediately after we generate
19282 RTL for it. This does not happen for inline functions. */
19283 cfun
->machine
->return_used_this_function
= 0;
19285 else /* TARGET_32BIT */
19287 /* We need to take into account any stack-frame rounding. */
19288 offsets
= arm_get_frame_offsets ();
19290 gcc_assert (!use_return_insn (FALSE
, NULL
)
19291 || (cfun
->machine
->return_used_this_function
!= 0)
19292 || offsets
->saved_regs
== offsets
->outgoing_args
19293 || frame_pointer_needed
);
19295 /* Reset the ARM-specific per-function variables. */
19296 after_arm_reorg
= 0;
19300 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19301 STR and STRD. If an even number of registers are being pushed, one
19302 or more STRD patterns are created for each register pair. If an
19303 odd number of registers are pushed, emit an initial STR followed by
19304 as many STRD instructions as are needed. This works best when the
19305 stack is initially 64-bit aligned (the normal case), since it
19306 ensures that each STRD is also 64-bit aligned. */
19308 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19313 rtx par
= NULL_RTX
;
19314 rtx dwarf
= NULL_RTX
;
19318 num_regs
= bit_count (saved_regs_mask
);
19320 /* Must be at least one register to save, and can't save SP or PC. */
19321 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19322 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19323 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19325 /* Create sequence for DWARF info. All the frame-related data for
19326 debugging is held in this wrapper. */
19327 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19329 /* Describe the stack adjustment. */
19330 tmp
= gen_rtx_SET (VOIDmode
,
19332 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19333 RTX_FRAME_RELATED_P (tmp
) = 1;
19334 XVECEXP (dwarf
, 0, 0) = tmp
;
19336 /* Find the first register. */
19337 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19342 /* If there's an odd number of registers to push. Start off by
19343 pushing a single register. This ensures that subsequent strd
19344 operations are dword aligned (assuming that SP was originally
19345 64-bit aligned). */
19346 if ((num_regs
& 1) != 0)
19348 rtx reg
, mem
, insn
;
19350 reg
= gen_rtx_REG (SImode
, regno
);
19352 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19353 stack_pointer_rtx
));
19355 mem
= gen_frame_mem (Pmode
,
19357 (Pmode
, stack_pointer_rtx
,
19358 plus_constant (Pmode
, stack_pointer_rtx
,
19361 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19362 RTX_FRAME_RELATED_P (tmp
) = 1;
19363 insn
= emit_insn (tmp
);
19364 RTX_FRAME_RELATED_P (insn
) = 1;
19365 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19366 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19368 RTX_FRAME_RELATED_P (tmp
) = 1;
19371 XVECEXP (dwarf
, 0, i
) = tmp
;
19375 while (i
< num_regs
)
19376 if (saved_regs_mask
& (1 << regno
))
19378 rtx reg1
, reg2
, mem1
, mem2
;
19379 rtx tmp0
, tmp1
, tmp2
;
19382 /* Find the register to pair with this one. */
19383 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19387 reg1
= gen_rtx_REG (SImode
, regno
);
19388 reg2
= gen_rtx_REG (SImode
, regno2
);
19395 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19398 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19400 -4 * (num_regs
- 1)));
19401 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19402 plus_constant (Pmode
, stack_pointer_rtx
,
19404 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19405 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19406 RTX_FRAME_RELATED_P (tmp0
) = 1;
19407 RTX_FRAME_RELATED_P (tmp1
) = 1;
19408 RTX_FRAME_RELATED_P (tmp2
) = 1;
19409 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19410 XVECEXP (par
, 0, 0) = tmp0
;
19411 XVECEXP (par
, 0, 1) = tmp1
;
19412 XVECEXP (par
, 0, 2) = tmp2
;
19413 insn
= emit_insn (par
);
19414 RTX_FRAME_RELATED_P (insn
) = 1;
19415 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19419 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19422 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19425 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19426 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19427 RTX_FRAME_RELATED_P (tmp1
) = 1;
19428 RTX_FRAME_RELATED_P (tmp2
) = 1;
19429 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19430 XVECEXP (par
, 0, 0) = tmp1
;
19431 XVECEXP (par
, 0, 1) = tmp2
;
19435 /* Create unwind information. This is an approximation. */
19436 tmp1
= gen_rtx_SET (VOIDmode
,
19437 gen_frame_mem (Pmode
,
19438 plus_constant (Pmode
,
19442 tmp2
= gen_rtx_SET (VOIDmode
,
19443 gen_frame_mem (Pmode
,
19444 plus_constant (Pmode
,
19449 RTX_FRAME_RELATED_P (tmp1
) = 1;
19450 RTX_FRAME_RELATED_P (tmp2
) = 1;
19451 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19452 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19454 regno
= regno2
+ 1;
19462 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19463 whenever possible, otherwise it emits single-word stores. The first store
19464 also allocates stack space for all saved registers, using writeback with
19465 post-addressing mode. All other stores use offset addressing. If no STRD
19466 can be emitted, this function emits a sequence of single-word stores,
19467 and not an STM as before, because single-word stores provide more freedom
19468 scheduling and can be turned into an STM by peephole optimizations. */
19470 arm_emit_strd_push (unsigned long saved_regs_mask
)
19473 int i
, j
, dwarf_index
= 0;
19475 rtx dwarf
= NULL_RTX
;
19476 rtx insn
= NULL_RTX
;
19479 /* TODO: A more efficient code can be emitted by changing the
19480 layout, e.g., first push all pairs that can use STRD to keep the
19481 stack aligned, and then push all other registers. */
19482 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19483 if (saved_regs_mask
& (1 << i
))
19486 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19487 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19488 gcc_assert (num_regs
> 0);
19490 /* Create sequence for DWARF info. */
19491 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19493 /* For dwarf info, we generate explicit stack update. */
19494 tmp
= gen_rtx_SET (VOIDmode
,
19496 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19497 RTX_FRAME_RELATED_P (tmp
) = 1;
19498 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19500 /* Save registers. */
19501 offset
= - 4 * num_regs
;
19503 while (j
<= LAST_ARM_REGNUM
)
19504 if (saved_regs_mask
& (1 << j
))
19507 && (saved_regs_mask
& (1 << (j
+ 1))))
19509 /* Current register and previous register form register pair for
19510 which STRD can be generated. */
19513 /* Allocate stack space for all saved registers. */
19514 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19515 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19516 mem
= gen_frame_mem (DImode
, tmp
);
19519 else if (offset
> 0)
19520 mem
= gen_frame_mem (DImode
,
19521 plus_constant (Pmode
,
19525 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19527 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19528 RTX_FRAME_RELATED_P (tmp
) = 1;
19529 tmp
= emit_insn (tmp
);
19531 /* Record the first store insn. */
19532 if (dwarf_index
== 1)
19535 /* Generate dwarf info. */
19536 mem
= gen_frame_mem (SImode
,
19537 plus_constant (Pmode
,
19540 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19541 RTX_FRAME_RELATED_P (tmp
) = 1;
19542 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19544 mem
= gen_frame_mem (SImode
,
19545 plus_constant (Pmode
,
19548 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19549 RTX_FRAME_RELATED_P (tmp
) = 1;
19550 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19557 /* Emit a single word store. */
19560 /* Allocate stack space for all saved registers. */
19561 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19562 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19563 mem
= gen_frame_mem (SImode
, tmp
);
19566 else if (offset
> 0)
19567 mem
= gen_frame_mem (SImode
,
19568 plus_constant (Pmode
,
19572 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19574 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19575 RTX_FRAME_RELATED_P (tmp
) = 1;
19576 tmp
= emit_insn (tmp
);
19578 /* Record the first store insn. */
19579 if (dwarf_index
== 1)
19582 /* Generate dwarf info. */
19583 mem
= gen_frame_mem (SImode
,
19584 plus_constant(Pmode
,
19587 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19588 RTX_FRAME_RELATED_P (tmp
) = 1;
19589 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19598 /* Attach dwarf info to the first insn we generate. */
19599 gcc_assert (insn
!= NULL_RTX
);
19600 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19601 RTX_FRAME_RELATED_P (insn
) = 1;
19604 /* Generate and emit an insn that we will recognize as a push_multi.
19605 Unfortunately, since this insn does not reflect very well the actual
19606 semantics of the operation, we need to annotate the insn for the benefit
19607 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19608 MASK for registers that should be annotated for DWARF2 frame unwind
19611 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19614 int num_dwarf_regs
= 0;
19618 int dwarf_par_index
;
19621 /* We don't record the PC in the dwarf frame information. */
19622 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19624 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19626 if (mask
& (1 << i
))
19628 if (dwarf_regs_mask
& (1 << i
))
19632 gcc_assert (num_regs
&& num_regs
<= 16);
19633 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19635 /* For the body of the insn we are going to generate an UNSPEC in
19636 parallel with several USEs. This allows the insn to be recognized
19637 by the push_multi pattern in the arm.md file.
19639 The body of the insn looks something like this:
19642 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19643 (const_int:SI <num>)))
19644 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19650 For the frame note however, we try to be more explicit and actually
19651 show each register being stored into the stack frame, plus a (single)
19652 decrement of the stack pointer. We do it this way in order to be
19653 friendly to the stack unwinding code, which only wants to see a single
19654 stack decrement per instruction. The RTL we generate for the note looks
19655 something like this:
19658 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19659 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19660 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19661 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19665 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19666 instead we'd have a parallel expression detailing all
19667 the stores to the various memory addresses so that debug
19668 information is more up-to-date. Remember however while writing
19669 this to take care of the constraints with the push instruction.
19671 Note also that this has to be taken care of for the VFP registers.
19673 For more see PR43399. */
19675 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19676 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19677 dwarf_par_index
= 1;
19679 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19681 if (mask
& (1 << i
))
19683 reg
= gen_rtx_REG (SImode
, i
);
19685 XVECEXP (par
, 0, 0)
19686 = gen_rtx_SET (VOIDmode
,
19689 gen_rtx_PRE_MODIFY (Pmode
,
19692 (Pmode
, stack_pointer_rtx
,
19695 gen_rtx_UNSPEC (BLKmode
,
19696 gen_rtvec (1, reg
),
19697 UNSPEC_PUSH_MULT
));
19699 if (dwarf_regs_mask
& (1 << i
))
19701 tmp
= gen_rtx_SET (VOIDmode
,
19702 gen_frame_mem (SImode
, stack_pointer_rtx
),
19704 RTX_FRAME_RELATED_P (tmp
) = 1;
19705 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19712 for (j
= 1, i
++; j
< num_regs
; i
++)
19714 if (mask
& (1 << i
))
19716 reg
= gen_rtx_REG (SImode
, i
);
19718 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19720 if (dwarf_regs_mask
& (1 << i
))
19723 = gen_rtx_SET (VOIDmode
,
19726 plus_constant (Pmode
, stack_pointer_rtx
,
19729 RTX_FRAME_RELATED_P (tmp
) = 1;
19730 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19737 par
= emit_insn (par
);
19739 tmp
= gen_rtx_SET (VOIDmode
,
19741 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19742 RTX_FRAME_RELATED_P (tmp
) = 1;
19743 XVECEXP (dwarf
, 0, 0) = tmp
;
19745 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19750 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19751 SIZE is the offset to be adjusted.
19752 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19754 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19758 RTX_FRAME_RELATED_P (insn
) = 1;
19759 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19760 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19763 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19764 SAVED_REGS_MASK shows which registers need to be restored.
19766 Unfortunately, since this insn does not reflect very well the actual
19767 semantics of the operation, we need to annotate the insn for the benefit
19768 of DWARF2 frame unwind information. */
19770 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19775 rtx dwarf
= NULL_RTX
;
19781 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19782 offset_adj
= return_in_pc
? 1 : 0;
19783 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19784 if (saved_regs_mask
& (1 << i
))
19787 gcc_assert (num_regs
&& num_regs
<= 16);
19789 /* If SP is in reglist, then we don't emit SP update insn. */
19790 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19792 /* The parallel needs to hold num_regs SETs
19793 and one SET for the stack update. */
19794 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19799 XVECEXP (par
, 0, 0) = tmp
;
19804 /* Increment the stack pointer, based on there being
19805 num_regs 4-byte registers to restore. */
19806 tmp
= gen_rtx_SET (VOIDmode
,
19808 plus_constant (Pmode
,
19811 RTX_FRAME_RELATED_P (tmp
) = 1;
19812 XVECEXP (par
, 0, offset_adj
) = tmp
;
19815 /* Now restore every reg, which may include PC. */
19816 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19817 if (saved_regs_mask
& (1 << i
))
19819 reg
= gen_rtx_REG (SImode
, i
);
19820 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19822 /* Emit single load with writeback. */
19823 tmp
= gen_frame_mem (SImode
,
19824 gen_rtx_POST_INC (Pmode
,
19825 stack_pointer_rtx
));
19826 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
19827 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19831 tmp
= gen_rtx_SET (VOIDmode
,
19835 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19836 RTX_FRAME_RELATED_P (tmp
) = 1;
19837 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19839 /* We need to maintain a sequence for DWARF info too. As dwarf info
19840 should not have PC, skip PC. */
19841 if (i
!= PC_REGNUM
)
19842 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19848 par
= emit_jump_insn (par
);
19850 par
= emit_insn (par
);
19852 REG_NOTES (par
) = dwarf
;
19854 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19855 stack_pointer_rtx
, stack_pointer_rtx
);
19858 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19859 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19861 Unfortunately, since this insn does not reflect very well the actual
19862 semantics of the operation, we need to annotate the insn for the benefit
19863 of DWARF2 frame unwind information. */
19865 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19869 rtx dwarf
= NULL_RTX
;
19872 gcc_assert (num_regs
&& num_regs
<= 32);
19874 /* Workaround ARM10 VFPr1 bug. */
19875 if (num_regs
== 2 && !arm_arch6
)
19877 if (first_reg
== 15)
19883 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19884 there could be up to 32 D-registers to restore.
19885 If there are more than 16 D-registers, make two recursive calls,
19886 each of which emits one pop_multi instruction. */
19889 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19890 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19894 /* The parallel needs to hold num_regs SETs
19895 and one SET for the stack update. */
19896 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19898 /* Increment the stack pointer, based on there being
19899 num_regs 8-byte registers to restore. */
19900 tmp
= gen_rtx_SET (VOIDmode
,
19902 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19903 RTX_FRAME_RELATED_P (tmp
) = 1;
19904 XVECEXP (par
, 0, 0) = tmp
;
19906 /* Now show every reg that will be restored, using a SET for each. */
19907 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19909 reg
= gen_rtx_REG (DFmode
, i
);
19911 tmp
= gen_rtx_SET (VOIDmode
,
19915 plus_constant (Pmode
, base_reg
, 8 * j
)));
19916 RTX_FRAME_RELATED_P (tmp
) = 1;
19917 XVECEXP (par
, 0, j
+ 1) = tmp
;
19919 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19924 par
= emit_insn (par
);
19925 REG_NOTES (par
) = dwarf
;
19927 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19928 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
19930 RTX_FRAME_RELATED_P (par
) = 1;
19931 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
19934 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19935 base_reg
, base_reg
);
19938 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19939 number of registers are being popped, multiple LDRD patterns are created for
19940 all register pairs. If odd number of registers are popped, last register is
19941 loaded by using LDR pattern. */
19943 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19947 rtx par
= NULL_RTX
;
19948 rtx dwarf
= NULL_RTX
;
19949 rtx tmp
, reg
, tmp1
;
19952 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19953 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19954 if (saved_regs_mask
& (1 << i
))
19957 gcc_assert (num_regs
&& num_regs
<= 16);
19959 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19960 to be popped. So, if num_regs is even, now it will become odd,
19961 and we can generate pop with PC. If num_regs is odd, it will be
19962 even now, and ldr with return can be generated for PC. */
19966 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19968 /* Var j iterates over all the registers to gather all the registers in
19969 saved_regs_mask. Var i gives index of saved registers in stack frame.
19970 A PARALLEL RTX of register-pair is created here, so that pattern for
19971 LDRD can be matched. As PC is always last register to be popped, and
19972 we have already decremented num_regs if PC, we don't have to worry
19973 about PC in this loop. */
19974 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19975 if (saved_regs_mask
& (1 << j
))
19977 /* Create RTX for memory load. */
19978 reg
= gen_rtx_REG (SImode
, j
);
19979 tmp
= gen_rtx_SET (SImode
,
19981 gen_frame_mem (SImode
,
19982 plus_constant (Pmode
,
19983 stack_pointer_rtx
, 4 * i
)));
19984 RTX_FRAME_RELATED_P (tmp
) = 1;
19988 /* When saved-register index (i) is even, the RTX to be emitted is
19989 yet to be created. Hence create it first. The LDRD pattern we
19990 are generating is :
19991 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19992 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19993 where target registers need not be consecutive. */
19994 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19998 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19999 added as 0th element and if i is odd, reg_i is added as 1st element
20000 of LDRD pattern shown above. */
20001 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20002 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20006 /* When saved-register index (i) is odd, RTXs for both the registers
20007 to be loaded are generated in above given LDRD pattern, and the
20008 pattern can be emitted now. */
20009 par
= emit_insn (par
);
20010 REG_NOTES (par
) = dwarf
;
20011 RTX_FRAME_RELATED_P (par
) = 1;
20017 /* If the number of registers pushed is odd AND return_in_pc is false OR
20018 number of registers are even AND return_in_pc is true, last register is
20019 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20020 then LDR with post increment. */
20022 /* Increment the stack pointer, based on there being
20023 num_regs 4-byte registers to restore. */
20024 tmp
= gen_rtx_SET (VOIDmode
,
20026 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20027 RTX_FRAME_RELATED_P (tmp
) = 1;
20028 tmp
= emit_insn (tmp
);
20031 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20032 stack_pointer_rtx
, stack_pointer_rtx
);
20037 if (((num_regs
% 2) == 1 && !return_in_pc
)
20038 || ((num_regs
% 2) == 0 && return_in_pc
))
20040 /* Scan for the single register to be popped. Skip until the saved
20041 register is found. */
20042 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20044 /* Gen LDR with post increment here. */
20045 tmp1
= gen_rtx_MEM (SImode
,
20046 gen_rtx_POST_INC (SImode
,
20047 stack_pointer_rtx
));
20048 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20050 reg
= gen_rtx_REG (SImode
, j
);
20051 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20052 RTX_FRAME_RELATED_P (tmp
) = 1;
20053 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20057 /* If return_in_pc, j must be PC_REGNUM. */
20058 gcc_assert (j
== PC_REGNUM
);
20059 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20060 XVECEXP (par
, 0, 0) = ret_rtx
;
20061 XVECEXP (par
, 0, 1) = tmp
;
20062 par
= emit_jump_insn (par
);
20066 par
= emit_insn (tmp
);
20067 REG_NOTES (par
) = dwarf
;
20068 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20069 stack_pointer_rtx
, stack_pointer_rtx
);
20073 else if ((num_regs
% 2) == 1 && return_in_pc
)
20075 /* There are 2 registers to be popped. So, generate the pattern
20076 pop_multiple_with_stack_update_and_return to pop in PC. */
20077 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20083 /* LDRD in ARM mode needs consecutive registers as operands. This function
20084 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20085 offset addressing and then generates one separate stack udpate. This provides
20086 more scheduling freedom, compared to writeback on every load. However,
20087 if the function returns using load into PC directly
20088 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20089 before the last load. TODO: Add a peephole optimization to recognize
20090 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20091 peephole optimization to merge the load at stack-offset zero
20092 with the stack update instruction using load with writeback
20093 in post-index addressing mode. */
20095 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20099 rtx par
= NULL_RTX
;
20100 rtx dwarf
= NULL_RTX
;
20103 /* Restore saved registers. */
20104 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20106 while (j
<= LAST_ARM_REGNUM
)
20107 if (saved_regs_mask
& (1 << j
))
20110 && (saved_regs_mask
& (1 << (j
+ 1)))
20111 && (j
+ 1) != PC_REGNUM
)
20113 /* Current register and next register form register pair for which
20114 LDRD can be generated. PC is always the last register popped, and
20115 we handle it separately. */
20117 mem
= gen_frame_mem (DImode
,
20118 plus_constant (Pmode
,
20122 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20124 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20125 tmp
= emit_insn (tmp
);
20126 RTX_FRAME_RELATED_P (tmp
) = 1;
20128 /* Generate dwarf info. */
20130 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20131 gen_rtx_REG (SImode
, j
),
20133 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20134 gen_rtx_REG (SImode
, j
+ 1),
20137 REG_NOTES (tmp
) = dwarf
;
20142 else if (j
!= PC_REGNUM
)
20144 /* Emit a single word load. */
20146 mem
= gen_frame_mem (SImode
,
20147 plus_constant (Pmode
,
20151 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20153 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20154 tmp
= emit_insn (tmp
);
20155 RTX_FRAME_RELATED_P (tmp
) = 1;
20157 /* Generate dwarf info. */
20158 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20159 gen_rtx_REG (SImode
, j
),
20165 else /* j == PC_REGNUM */
20171 /* Update the stack. */
20174 tmp
= gen_rtx_SET (Pmode
,
20176 plus_constant (Pmode
,
20179 tmp
= emit_insn (tmp
);
20180 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20181 stack_pointer_rtx
, stack_pointer_rtx
);
20185 if (saved_regs_mask
& (1 << PC_REGNUM
))
20187 /* Only PC is to be popped. */
20188 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20189 XVECEXP (par
, 0, 0) = ret_rtx
;
20190 tmp
= gen_rtx_SET (SImode
,
20191 gen_rtx_REG (SImode
, PC_REGNUM
),
20192 gen_frame_mem (SImode
,
20193 gen_rtx_POST_INC (SImode
,
20194 stack_pointer_rtx
)));
20195 RTX_FRAME_RELATED_P (tmp
) = 1;
20196 XVECEXP (par
, 0, 1) = tmp
;
20197 par
= emit_jump_insn (par
);
20199 /* Generate dwarf info. */
20200 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20201 gen_rtx_REG (SImode
, PC_REGNUM
),
20203 REG_NOTES (par
) = dwarf
;
20204 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20205 stack_pointer_rtx
, stack_pointer_rtx
);
20209 /* Calculate the size of the return value that is passed in registers. */
20211 arm_size_return_regs (void)
20213 enum machine_mode mode
;
20215 if (crtl
->return_rtx
!= 0)
20216 mode
= GET_MODE (crtl
->return_rtx
);
20218 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20220 return GET_MODE_SIZE (mode
);
20223 /* Return true if the current function needs to save/restore LR. */
20225 thumb_force_lr_save (void)
20227 return !cfun
->machine
->lr_save_eliminated
20228 && (!leaf_function_p ()
20229 || thumb_far_jump_used_p ()
20230 || df_regs_ever_live_p (LR_REGNUM
));
20233 /* We do not know if r3 will be available because
20234 we do have an indirect tailcall happening in this
20235 particular case. */
20237 is_indirect_tailcall_p (rtx call
)
20239 rtx pat
= PATTERN (call
);
20241 /* Indirect tail call. */
20242 pat
= XVECEXP (pat
, 0, 0);
20243 if (GET_CODE (pat
) == SET
)
20244 pat
= SET_SRC (pat
);
20246 pat
= XEXP (XEXP (pat
, 0), 0);
20247 return REG_P (pat
);
20250 /* Return true if r3 is used by any of the tail call insns in the
20251 current function. */
20253 any_sibcall_could_use_r3 (void)
20258 if (!crtl
->tail_call_emit
)
20260 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20261 if (e
->flags
& EDGE_SIBCALL
)
20263 rtx call
= BB_END (e
->src
);
20264 if (!CALL_P (call
))
20265 call
= prev_nonnote_nondebug_insn (call
);
20266 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20267 if (find_regno_fusage (call
, USE
, 3)
20268 || is_indirect_tailcall_p (call
))
20275 /* Compute the distance from register FROM to register TO.
20276 These can be the arg pointer (26), the soft frame pointer (25),
20277 the stack pointer (13) or the hard frame pointer (11).
20278 In thumb mode r7 is used as the soft frame pointer, if needed.
20279 Typical stack layout looks like this:
20281 old stack pointer -> | |
20284 | | saved arguments for
20285 | | vararg functions
20288 hard FP & arg pointer -> | | \
20296 soft frame pointer -> | | /
20301 locals base pointer -> | | /
20306 current stack pointer -> | | /
20309 For a given function some or all of these stack components
20310 may not be needed, giving rise to the possibility of
20311 eliminating some of the registers.
20313 The values returned by this function must reflect the behavior
20314 of arm_expand_prologue() and arm_compute_save_reg_mask().
20316 The sign of the number returned reflects the direction of stack
20317 growth, so the values are positive for all eliminations except
20318 from the soft frame pointer to the hard frame pointer.
20320 SFP may point just inside the local variables block to ensure correct
20324 /* Calculate stack offsets. These are used to calculate register elimination
20325 offsets and in prologue/epilogue code. Also calculates which registers
20326 should be saved. */
20328 static arm_stack_offsets
*
20329 arm_get_frame_offsets (void)
20331 struct arm_stack_offsets
*offsets
;
20332 unsigned long func_type
;
20336 HOST_WIDE_INT frame_size
;
20339 offsets
= &cfun
->machine
->stack_offsets
;
20341 /* We need to know if we are a leaf function. Unfortunately, it
20342 is possible to be called after start_sequence has been called,
20343 which causes get_insns to return the insns for the sequence,
20344 not the function, which will cause leaf_function_p to return
20345 the incorrect result.
20347 to know about leaf functions once reload has completed, and the
20348 frame size cannot be changed after that time, so we can safely
20349 use the cached value. */
20351 if (reload_completed
)
20354 /* Initially this is the size of the local variables. It will translated
20355 into an offset once we have determined the size of preceding data. */
20356 frame_size
= ROUND_UP_WORD (get_frame_size ());
20358 leaf
= leaf_function_p ();
20360 /* Space for variadic functions. */
20361 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20363 /* In Thumb mode this is incorrect, but never used. */
20365 = (offsets
->saved_args
20366 + arm_compute_static_chain_stack_bytes ()
20367 + (frame_pointer_needed
? 4 : 0));
20371 unsigned int regno
;
20373 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20374 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20375 saved
= core_saved
;
20377 /* We know that SP will be doubleword aligned on entry, and we must
20378 preserve that condition at any subroutine call. We also require the
20379 soft frame pointer to be doubleword aligned. */
20381 if (TARGET_REALLY_IWMMXT
)
20383 /* Check for the call-saved iWMMXt registers. */
20384 for (regno
= FIRST_IWMMXT_REGNUM
;
20385 regno
<= LAST_IWMMXT_REGNUM
;
20387 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20391 func_type
= arm_current_func_type ();
20392 /* Space for saved VFP registers. */
20393 if (! IS_VOLATILE (func_type
)
20394 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20395 saved
+= arm_get_vfp_saved_size ();
20397 else /* TARGET_THUMB1 */
20399 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20400 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20401 saved
= core_saved
;
20402 if (TARGET_BACKTRACE
)
20406 /* Saved registers include the stack frame. */
20407 offsets
->saved_regs
20408 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20409 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20411 /* A leaf function does not need any stack alignment if it has nothing
20413 if (leaf
&& frame_size
== 0
20414 /* However if it calls alloca(), we have a dynamically allocated
20415 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20416 && ! cfun
->calls_alloca
)
20418 offsets
->outgoing_args
= offsets
->soft_frame
;
20419 offsets
->locals_base
= offsets
->soft_frame
;
20423 /* Ensure SFP has the correct alignment. */
20424 if (ARM_DOUBLEWORD_ALIGN
20425 && (offsets
->soft_frame
& 7))
20427 offsets
->soft_frame
+= 4;
20428 /* Try to align stack by pushing an extra reg. Don't bother doing this
20429 when there is a stack frame as the alignment will be rolled into
20430 the normal stack adjustment. */
20431 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20435 /* If it is safe to use r3, then do so. This sometimes
20436 generates better code on Thumb-2 by avoiding the need to
20437 use 32-bit push/pop instructions. */
20438 if (! any_sibcall_could_use_r3 ()
20439 && arm_size_return_regs () <= 12
20440 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20442 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20447 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20449 /* Avoid fixed registers; they may be changed at
20450 arbitrary times so it's unsafe to restore them
20451 during the epilogue. */
20453 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20462 offsets
->saved_regs
+= 4;
20463 offsets
->saved_regs_mask
|= (1 << reg
);
20468 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20469 offsets
->outgoing_args
= (offsets
->locals_base
20470 + crtl
->outgoing_args_size
);
20472 if (ARM_DOUBLEWORD_ALIGN
)
20474 /* Ensure SP remains doubleword aligned. */
20475 if (offsets
->outgoing_args
& 7)
20476 offsets
->outgoing_args
+= 4;
20477 gcc_assert (!(offsets
->outgoing_args
& 7));
20484 /* Calculate the relative offsets for the different stack pointers. Positive
20485 offsets are in the direction of stack growth. */
20488 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20490 arm_stack_offsets
*offsets
;
20492 offsets
= arm_get_frame_offsets ();
20494 /* OK, now we have enough information to compute the distances.
20495 There must be an entry in these switch tables for each pair
20496 of registers in ELIMINABLE_REGS, even if some of the entries
20497 seem to be redundant or useless. */
20500 case ARG_POINTER_REGNUM
:
20503 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20506 case FRAME_POINTER_REGNUM
:
20507 /* This is the reverse of the soft frame pointer
20508 to hard frame pointer elimination below. */
20509 return offsets
->soft_frame
- offsets
->saved_args
;
20511 case ARM_HARD_FRAME_POINTER_REGNUM
:
20512 /* This is only non-zero in the case where the static chain register
20513 is stored above the frame. */
20514 return offsets
->frame
- offsets
->saved_args
- 4;
20516 case STACK_POINTER_REGNUM
:
20517 /* If nothing has been pushed on the stack at all
20518 then this will return -4. This *is* correct! */
20519 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20522 gcc_unreachable ();
20524 gcc_unreachable ();
20526 case FRAME_POINTER_REGNUM
:
20529 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20532 case ARM_HARD_FRAME_POINTER_REGNUM
:
20533 /* The hard frame pointer points to the top entry in the
20534 stack frame. The soft frame pointer to the bottom entry
20535 in the stack frame. If there is no stack frame at all,
20536 then they are identical. */
20538 return offsets
->frame
- offsets
->soft_frame
;
20540 case STACK_POINTER_REGNUM
:
20541 return offsets
->outgoing_args
- offsets
->soft_frame
;
20544 gcc_unreachable ();
20546 gcc_unreachable ();
20549 /* You cannot eliminate from the stack pointer.
20550 In theory you could eliminate from the hard frame
20551 pointer to the stack pointer, but this will never
20552 happen, since if a stack frame is not needed the
20553 hard frame pointer will never be used. */
20554 gcc_unreachable ();
20558 /* Given FROM and TO register numbers, say whether this elimination is
20559 allowed. Frame pointer elimination is automatically handled.
20561 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20562 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20563 pointer, we must eliminate FRAME_POINTER_REGNUM into
20564 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20565 ARG_POINTER_REGNUM. */
20568 arm_can_eliminate (const int from
, const int to
)
20570 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20571 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20572 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20573 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20577 /* Emit RTL to save coprocessor registers on function entry. Returns the
20578 number of bytes pushed. */
20581 arm_save_coproc_regs(void)
20583 int saved_size
= 0;
20585 unsigned start_reg
;
20588 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20589 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20591 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20592 insn
= gen_rtx_MEM (V2SImode
, insn
);
20593 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20594 RTX_FRAME_RELATED_P (insn
) = 1;
20598 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20600 start_reg
= FIRST_VFP_REGNUM
;
20602 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20604 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20605 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20607 if (start_reg
!= reg
)
20608 saved_size
+= vfp_emit_fstmd (start_reg
,
20609 (reg
- start_reg
) / 2);
20610 start_reg
= reg
+ 2;
20613 if (start_reg
!= reg
)
20614 saved_size
+= vfp_emit_fstmd (start_reg
,
20615 (reg
- start_reg
) / 2);
20621 /* Set the Thumb frame pointer from the stack pointer. */
20624 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20626 HOST_WIDE_INT amount
;
20629 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20631 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20632 stack_pointer_rtx
, GEN_INT (amount
)));
20635 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20636 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20637 expects the first two operands to be the same. */
20640 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20642 hard_frame_pointer_rtx
));
20646 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20647 hard_frame_pointer_rtx
,
20648 stack_pointer_rtx
));
20650 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20651 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20652 RTX_FRAME_RELATED_P (dwarf
) = 1;
20653 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20656 RTX_FRAME_RELATED_P (insn
) = 1;
20659 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20662 arm_expand_prologue (void)
20667 unsigned long live_regs_mask
;
20668 unsigned long func_type
;
20670 int saved_pretend_args
= 0;
20671 int saved_regs
= 0;
20672 unsigned HOST_WIDE_INT args_to_push
;
20673 arm_stack_offsets
*offsets
;
20675 func_type
= arm_current_func_type ();
20677 /* Naked functions don't have prologues. */
20678 if (IS_NAKED (func_type
))
20681 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20682 args_to_push
= crtl
->args
.pretend_args_size
;
20684 /* Compute which register we will have to save onto the stack. */
20685 offsets
= arm_get_frame_offsets ();
20686 live_regs_mask
= offsets
->saved_regs_mask
;
20688 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20690 if (IS_STACKALIGN (func_type
))
20694 /* Handle a word-aligned stack pointer. We generate the following:
20699 <save and restore r0 in normal prologue/epilogue>
20703 The unwinder doesn't need to know about the stack realignment.
20704 Just tell it we saved SP in r0. */
20705 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20707 r0
= gen_rtx_REG (SImode
, 0);
20708 r1
= gen_rtx_REG (SImode
, 1);
20710 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20711 RTX_FRAME_RELATED_P (insn
) = 1;
20712 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20714 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20716 /* ??? The CFA changes here, which may cause GDB to conclude that it
20717 has entered a different function. That said, the unwind info is
20718 correct, individually, before and after this instruction because
20719 we've described the save of SP, which will override the default
20720 handling of SP as restoring from the CFA. */
20721 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20724 /* For APCS frames, if IP register is clobbered
20725 when creating frame, save that register in a special
20727 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20729 if (IS_INTERRUPT (func_type
))
20731 /* Interrupt functions must not corrupt any registers.
20732 Creating a frame pointer however, corrupts the IP
20733 register, so we must push it first. */
20734 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
20736 /* Do not set RTX_FRAME_RELATED_P on this insn.
20737 The dwarf stack unwinding code only wants to see one
20738 stack decrement per function, and this is not it. If
20739 this instruction is labeled as being part of the frame
20740 creation sequence then dwarf2out_frame_debug_expr will
20741 die when it encounters the assignment of IP to FP
20742 later on, since the use of SP here establishes SP as
20743 the CFA register and not IP.
20745 Anyway this instruction is not really part of the stack
20746 frame creation although it is part of the prologue. */
20748 else if (IS_NESTED (func_type
))
20750 /* The static chain register is the same as the IP register
20751 used as a scratch register during stack frame creation.
20752 To get around this need to find somewhere to store IP
20753 whilst the frame is being created. We try the following
20756 1. The last argument register r3 if it is available.
20757 2. A slot on the stack above the frame if there are no
20758 arguments to push onto the stack.
20759 3. Register r3 again, after pushing the argument registers
20760 onto the stack, if this is a varargs function.
20761 4. The last slot on the stack created for the arguments to
20762 push, if this isn't a varargs function.
20764 Note - we only need to tell the dwarf2 backend about the SP
20765 adjustment in the second variant; the static chain register
20766 doesn't need to be unwound, as it doesn't contain a value
20767 inherited from the caller. */
20769 if (!arm_r3_live_at_start_p ())
20770 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20771 else if (args_to_push
== 0)
20775 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20778 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20779 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20782 /* Just tell the dwarf backend that we adjusted SP. */
20783 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20784 plus_constant (Pmode
, stack_pointer_rtx
,
20786 RTX_FRAME_RELATED_P (insn
) = 1;
20787 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20791 /* Store the args on the stack. */
20792 if (cfun
->machine
->uses_anonymous_args
)
20795 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20796 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20797 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20798 saved_pretend_args
= 1;
20804 if (args_to_push
== 4)
20805 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20808 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
20809 plus_constant (Pmode
,
20813 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20815 /* Just tell the dwarf backend that we adjusted SP. */
20817 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20818 plus_constant (Pmode
, stack_pointer_rtx
,
20820 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20823 RTX_FRAME_RELATED_P (insn
) = 1;
20824 fp_offset
= args_to_push
;
20829 insn
= emit_set_insn (ip_rtx
,
20830 plus_constant (Pmode
, stack_pointer_rtx
,
20832 RTX_FRAME_RELATED_P (insn
) = 1;
20837 /* Push the argument registers, or reserve space for them. */
20838 if (cfun
->machine
->uses_anonymous_args
)
20839 insn
= emit_multi_reg_push
20840 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20841 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20844 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20845 GEN_INT (- args_to_push
)));
20846 RTX_FRAME_RELATED_P (insn
) = 1;
20849 /* If this is an interrupt service routine, and the link register
20850 is going to be pushed, and we're not generating extra
20851 push of IP (needed when frame is needed and frame layout if apcs),
20852 subtracting four from LR now will mean that the function return
20853 can be done with a single instruction. */
20854 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20855 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20856 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20859 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20861 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20864 if (live_regs_mask
)
20866 unsigned long dwarf_regs_mask
= live_regs_mask
;
20868 saved_regs
+= bit_count (live_regs_mask
) * 4;
20869 if (optimize_size
&& !frame_pointer_needed
20870 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20872 /* If no coprocessor registers are being pushed and we don't have
20873 to worry about a frame pointer then push extra registers to
20874 create the stack frame. This is done is a way that does not
20875 alter the frame layout, so is independent of the epilogue. */
20879 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20881 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20882 if (frame
&& n
* 4 >= frame
)
20885 live_regs_mask
|= (1 << n
) - 1;
20886 saved_regs
+= frame
;
20891 && current_tune
->prefer_ldrd_strd
20892 && !optimize_function_for_size_p (cfun
))
20894 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
20896 thumb2_emit_strd_push (live_regs_mask
);
20897 else if (TARGET_ARM
20898 && !TARGET_APCS_FRAME
20899 && !IS_INTERRUPT (func_type
))
20900 arm_emit_strd_push (live_regs_mask
);
20903 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
20904 RTX_FRAME_RELATED_P (insn
) = 1;
20909 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
20910 RTX_FRAME_RELATED_P (insn
) = 1;
20914 if (! IS_VOLATILE (func_type
))
20915 saved_regs
+= arm_save_coproc_regs ();
20917 if (frame_pointer_needed
&& TARGET_ARM
)
20919 /* Create the new frame pointer. */
20920 if (TARGET_APCS_FRAME
)
20922 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20923 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20924 RTX_FRAME_RELATED_P (insn
) = 1;
20926 if (IS_NESTED (func_type
))
20928 /* Recover the static chain register. */
20929 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20930 insn
= gen_rtx_REG (SImode
, 3);
20933 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20934 insn
= gen_frame_mem (SImode
, insn
);
20936 emit_set_insn (ip_rtx
, insn
);
20937 /* Add a USE to stop propagate_one_insn() from barfing. */
20938 emit_insn (gen_force_register_use (ip_rtx
));
20943 insn
= GEN_INT (saved_regs
- 4);
20944 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20945 stack_pointer_rtx
, insn
));
20946 RTX_FRAME_RELATED_P (insn
) = 1;
20950 if (flag_stack_usage_info
)
20951 current_function_static_stack_size
20952 = offsets
->outgoing_args
- offsets
->saved_args
;
20954 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20956 /* This add can produce multiple insns for a large constant, so we
20957 need to get tricky. */
20958 rtx last
= get_last_insn ();
20960 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20961 - offsets
->outgoing_args
);
20963 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20967 last
= last
? NEXT_INSN (last
) : get_insns ();
20968 RTX_FRAME_RELATED_P (last
) = 1;
20970 while (last
!= insn
);
20972 /* If the frame pointer is needed, emit a special barrier that
20973 will prevent the scheduler from moving stores to the frame
20974 before the stack adjustment. */
20975 if (frame_pointer_needed
)
20976 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20977 hard_frame_pointer_rtx
));
20981 if (frame_pointer_needed
&& TARGET_THUMB2
)
20982 thumb_set_frame_pointer (offsets
);
20984 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20986 unsigned long mask
;
20988 mask
= live_regs_mask
;
20989 mask
&= THUMB2_WORK_REGS
;
20990 if (!IS_NESTED (func_type
))
20991 mask
|= (1 << IP_REGNUM
);
20992 arm_load_pic_register (mask
);
20995 /* If we are profiling, make sure no instructions are scheduled before
20996 the call to mcount. Similarly if the user has requested no
20997 scheduling in the prolog. Similarly if we want non-call exceptions
20998 using the EABI unwinder, to prevent faulting instructions from being
20999 swapped with a stack adjustment. */
21000 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21001 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21002 && cfun
->can_throw_non_call_exceptions
))
21003 emit_insn (gen_blockage ());
21005 /* If the link register is being kept alive, with the return address in it,
21006 then make sure that it does not get reused by the ce2 pass. */
21007 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21008 cfun
->machine
->lr_save_eliminated
= 1;
21011 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21013 arm_print_condition (FILE *stream
)
21015 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21017 /* Branch conversion is not implemented for Thumb-2. */
21020 output_operand_lossage ("predicated Thumb instruction");
21023 if (current_insn_predicate
!= NULL
)
21025 output_operand_lossage
21026 ("predicated instruction in conditional sequence");
21030 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21032 else if (current_insn_predicate
)
21034 enum arm_cond_code code
;
21038 output_operand_lossage ("predicated Thumb instruction");
21042 code
= get_arm_condition_code (current_insn_predicate
);
21043 fputs (arm_condition_codes
[code
], stream
);
21048 /* If CODE is 'd', then the X is a condition operand and the instruction
21049 should only be executed if the condition is true.
21050 if CODE is 'D', then the X is a condition operand and the instruction
21051 should only be executed if the condition is false: however, if the mode
21052 of the comparison is CCFPEmode, then always execute the instruction -- we
21053 do this because in these circumstances !GE does not necessarily imply LT;
21054 in these cases the instruction pattern will take care to make sure that
21055 an instruction containing %d will follow, thereby undoing the effects of
21056 doing this instruction unconditionally.
21057 If CODE is 'N' then X is a floating point operand that must be negated
21059 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21060 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21062 arm_print_operand (FILE *stream
, rtx x
, int code
)
21067 fputs (ASM_COMMENT_START
, stream
);
21071 fputs (user_label_prefix
, stream
);
21075 fputs (REGISTER_PREFIX
, stream
);
21079 arm_print_condition (stream
);
21083 /* Nothing in unified syntax, otherwise the current condition code. */
21084 if (!TARGET_UNIFIED_ASM
)
21085 arm_print_condition (stream
);
21089 /* The current condition code in unified syntax, otherwise nothing. */
21090 if (TARGET_UNIFIED_ASM
)
21091 arm_print_condition (stream
);
21095 /* The current condition code for a condition code setting instruction.
21096 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21097 if (TARGET_UNIFIED_ASM
)
21099 fputc('s', stream
);
21100 arm_print_condition (stream
);
21104 arm_print_condition (stream
);
21105 fputc('s', stream
);
21110 /* If the instruction is conditionally executed then print
21111 the current condition code, otherwise print 's'. */
21112 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21113 if (current_insn_predicate
)
21114 arm_print_condition (stream
);
21116 fputc('s', stream
);
21119 /* %# is a "break" sequence. It doesn't output anything, but is used to
21120 separate e.g. operand numbers from following text, if that text consists
21121 of further digits which we don't want to be part of the operand
21129 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21130 r
= real_value_negate (&r
);
21131 fprintf (stream
, "%s", fp_const_from_val (&r
));
21135 /* An integer or symbol address without a preceding # sign. */
21137 switch (GET_CODE (x
))
21140 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21144 output_addr_const (stream
, x
);
21148 if (GET_CODE (XEXP (x
, 0)) == PLUS
21149 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21151 output_addr_const (stream
, x
);
21154 /* Fall through. */
21157 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21161 /* An integer that we want to print in HEX. */
21163 switch (GET_CODE (x
))
21166 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21170 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21175 if (CONST_INT_P (x
))
21178 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21179 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21183 putc ('~', stream
);
21184 output_addr_const (stream
, x
);
21189 /* The low 16 bits of an immediate constant. */
21190 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21194 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21198 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21206 shift
= shift_op (x
, &val
);
21210 fprintf (stream
, ", %s ", shift
);
21212 arm_print_operand (stream
, XEXP (x
, 1), 0);
21214 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21219 /* An explanation of the 'Q', 'R' and 'H' register operands:
21221 In a pair of registers containing a DI or DF value the 'Q'
21222 operand returns the register number of the register containing
21223 the least significant part of the value. The 'R' operand returns
21224 the register number of the register containing the most
21225 significant part of the value.
21227 The 'H' operand returns the higher of the two register numbers.
21228 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21229 same as the 'Q' operand, since the most significant part of the
21230 value is held in the lower number register. The reverse is true
21231 on systems where WORDS_BIG_ENDIAN is false.
21233 The purpose of these operands is to distinguish between cases
21234 where the endian-ness of the values is important (for example
21235 when they are added together), and cases where the endian-ness
21236 is irrelevant, but the order of register operations is important.
21237 For example when loading a value from memory into a register
21238 pair, the endian-ness does not matter. Provided that the value
21239 from the lower memory address is put into the lower numbered
21240 register, and the value from the higher address is put into the
21241 higher numbered register, the load will work regardless of whether
21242 the value being loaded is big-wordian or little-wordian. The
21243 order of the two register loads can matter however, if the address
21244 of the memory location is actually held in one of the registers
21245 being overwritten by the load.
21247 The 'Q' and 'R' constraints are also available for 64-bit
21250 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21252 rtx part
= gen_lowpart (SImode
, x
);
21253 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21257 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21259 output_operand_lossage ("invalid operand for code '%c'", code
);
21263 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21267 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21269 enum machine_mode mode
= GET_MODE (x
);
21272 if (mode
== VOIDmode
)
21274 part
= gen_highpart_mode (SImode
, mode
, x
);
21275 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21279 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21281 output_operand_lossage ("invalid operand for code '%c'", code
);
21285 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21289 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21291 output_operand_lossage ("invalid operand for code '%c'", code
);
21295 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21299 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21301 output_operand_lossage ("invalid operand for code '%c'", code
);
21305 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21309 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21311 output_operand_lossage ("invalid operand for code '%c'", code
);
21315 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21319 asm_fprintf (stream
, "%r",
21320 REG_P (XEXP (x
, 0))
21321 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21325 asm_fprintf (stream
, "{%r-%r}",
21327 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21330 /* Like 'M', but writing doubleword vector registers, for use by Neon
21334 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21335 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21337 asm_fprintf (stream
, "{d%d}", regno
);
21339 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21344 /* CONST_TRUE_RTX means always -- that's the default. */
21345 if (x
== const_true_rtx
)
21348 if (!COMPARISON_P (x
))
21350 output_operand_lossage ("invalid operand for code '%c'", code
);
21354 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21359 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21360 want to do that. */
21361 if (x
== const_true_rtx
)
21363 output_operand_lossage ("instruction never executed");
21366 if (!COMPARISON_P (x
))
21368 output_operand_lossage ("invalid operand for code '%c'", code
);
21372 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21373 (get_arm_condition_code (x
))],
21383 /* Former Maverick support, removed after GCC-4.7. */
21384 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21389 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21390 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21391 /* Bad value for wCG register number. */
21393 output_operand_lossage ("invalid operand for code '%c'", code
);
21398 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21401 /* Print an iWMMXt control register name. */
21403 if (!CONST_INT_P (x
)
21405 || INTVAL (x
) >= 16)
21406 /* Bad value for wC register number. */
21408 output_operand_lossage ("invalid operand for code '%c'", code
);
21414 static const char * wc_reg_names
[16] =
21416 "wCID", "wCon", "wCSSF", "wCASF",
21417 "wC4", "wC5", "wC6", "wC7",
21418 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21419 "wC12", "wC13", "wC14", "wC15"
21422 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21426 /* Print the high single-precision register of a VFP double-precision
21430 int mode
= GET_MODE (x
);
21433 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21435 output_operand_lossage ("invalid operand for code '%c'", code
);
21440 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21442 output_operand_lossage ("invalid operand for code '%c'", code
);
21446 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21450 /* Print a VFP/Neon double precision or quad precision register name. */
21454 int mode
= GET_MODE (x
);
21455 int is_quad
= (code
== 'q');
21458 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21460 output_operand_lossage ("invalid operand for code '%c'", code
);
21465 || !IS_VFP_REGNUM (REGNO (x
)))
21467 output_operand_lossage ("invalid operand for code '%c'", code
);
21472 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21473 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21475 output_operand_lossage ("invalid operand for code '%c'", code
);
21479 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21480 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21484 /* These two codes print the low/high doubleword register of a Neon quad
21485 register, respectively. For pair-structure types, can also print
21486 low/high quadword registers. */
21490 int mode
= GET_MODE (x
);
21493 if ((GET_MODE_SIZE (mode
) != 16
21494 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21496 output_operand_lossage ("invalid operand for code '%c'", code
);
21501 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21503 output_operand_lossage ("invalid operand for code '%c'", code
);
21507 if (GET_MODE_SIZE (mode
) == 16)
21508 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21509 + (code
== 'f' ? 1 : 0));
21511 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21512 + (code
== 'f' ? 1 : 0));
21516 /* Print a VFPv3 floating-point constant, represented as an integer
21520 int index
= vfp3_const_double_index (x
);
21521 gcc_assert (index
!= -1);
21522 fprintf (stream
, "%d", index
);
21526 /* Print bits representing opcode features for Neon.
21528 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21529 and polynomials as unsigned.
21531 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21533 Bit 2 is 1 for rounding functions, 0 otherwise. */
21535 /* Identify the type as 's', 'u', 'p' or 'f'. */
21538 HOST_WIDE_INT bits
= INTVAL (x
);
21539 fputc ("uspf"[bits
& 3], stream
);
21543 /* Likewise, but signed and unsigned integers are both 'i'. */
21546 HOST_WIDE_INT bits
= INTVAL (x
);
21547 fputc ("iipf"[bits
& 3], stream
);
21551 /* As for 'T', but emit 'u' instead of 'p'. */
21554 HOST_WIDE_INT bits
= INTVAL (x
);
21555 fputc ("usuf"[bits
& 3], stream
);
21559 /* Bit 2: rounding (vs none). */
21562 HOST_WIDE_INT bits
= INTVAL (x
);
21563 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21567 /* Memory operand for vld1/vst1 instruction. */
21571 bool postinc
= FALSE
;
21572 unsigned align
, memsize
, align_bits
;
21574 gcc_assert (MEM_P (x
));
21575 addr
= XEXP (x
, 0);
21576 if (GET_CODE (addr
) == POST_INC
)
21579 addr
= XEXP (addr
, 0);
21581 asm_fprintf (stream
, "[%r", REGNO (addr
));
21583 /* We know the alignment of this access, so we can emit a hint in the
21584 instruction (for some alignments) as an aid to the memory subsystem
21586 align
= MEM_ALIGN (x
) >> 3;
21587 memsize
= MEM_SIZE (x
);
21589 /* Only certain alignment specifiers are supported by the hardware. */
21590 if (memsize
== 32 && (align
% 32) == 0)
21592 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21594 else if (memsize
>= 8 && (align
% 8) == 0)
21599 if (align_bits
!= 0)
21600 asm_fprintf (stream
, ":%d", align_bits
);
21602 asm_fprintf (stream
, "]");
21605 fputs("!", stream
);
21613 gcc_assert (MEM_P (x
));
21614 addr
= XEXP (x
, 0);
21615 gcc_assert (REG_P (addr
));
21616 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21620 /* Translate an S register number into a D register number and element index. */
21623 int mode
= GET_MODE (x
);
21626 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21628 output_operand_lossage ("invalid operand for code '%c'", code
);
21633 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21635 output_operand_lossage ("invalid operand for code '%c'", code
);
21639 regno
= regno
- FIRST_VFP_REGNUM
;
21640 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21645 gcc_assert (CONST_DOUBLE_P (x
));
21647 result
= vfp3_const_double_for_fract_bits (x
);
21649 result
= vfp3_const_double_for_bits (x
);
21650 fprintf (stream
, "#%d", result
);
21653 /* Register specifier for vld1.16/vst1.16. Translate the S register
21654 number into a D register number and element index. */
21657 int mode
= GET_MODE (x
);
21660 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21662 output_operand_lossage ("invalid operand for code '%c'", code
);
21667 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21669 output_operand_lossage ("invalid operand for code '%c'", code
);
21673 regno
= regno
- FIRST_VFP_REGNUM
;
21674 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21681 output_operand_lossage ("missing operand");
21685 switch (GET_CODE (x
))
21688 asm_fprintf (stream
, "%r", REGNO (x
));
21692 output_memory_reference_mode
= GET_MODE (x
);
21693 output_address (XEXP (x
, 0));
21700 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21701 sizeof (fpstr
), 0, 1);
21702 fprintf (stream
, "#%s", fpstr
);
21705 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21709 gcc_assert (GET_CODE (x
) != NEG
);
21710 fputc ('#', stream
);
21711 if (GET_CODE (x
) == HIGH
)
21713 fputs (":lower16:", stream
);
21717 output_addr_const (stream
, x
);
21723 /* Target hook for printing a memory address. */
21725 arm_print_operand_address (FILE *stream
, rtx x
)
21729 int is_minus
= GET_CODE (x
) == MINUS
;
21732 asm_fprintf (stream
, "[%r]", REGNO (x
));
21733 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21735 rtx base
= XEXP (x
, 0);
21736 rtx index
= XEXP (x
, 1);
21737 HOST_WIDE_INT offset
= 0;
21739 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21741 /* Ensure that BASE is a register. */
21742 /* (one of them must be). */
21743 /* Also ensure the SP is not used as in index register. */
21748 switch (GET_CODE (index
))
21751 offset
= INTVAL (index
);
21754 asm_fprintf (stream
, "[%r, #%wd]",
21755 REGNO (base
), offset
);
21759 asm_fprintf (stream
, "[%r, %s%r]",
21760 REGNO (base
), is_minus
? "-" : "",
21770 asm_fprintf (stream
, "[%r, %s%r",
21771 REGNO (base
), is_minus
? "-" : "",
21772 REGNO (XEXP (index
, 0)));
21773 arm_print_operand (stream
, index
, 'S');
21774 fputs ("]", stream
);
21779 gcc_unreachable ();
21782 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21783 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21785 extern enum machine_mode output_memory_reference_mode
;
21787 gcc_assert (REG_P (XEXP (x
, 0)));
21789 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21790 asm_fprintf (stream
, "[%r, #%s%d]!",
21791 REGNO (XEXP (x
, 0)),
21792 GET_CODE (x
) == PRE_DEC
? "-" : "",
21793 GET_MODE_SIZE (output_memory_reference_mode
));
21795 asm_fprintf (stream
, "[%r], #%s%d",
21796 REGNO (XEXP (x
, 0)),
21797 GET_CODE (x
) == POST_DEC
? "-" : "",
21798 GET_MODE_SIZE (output_memory_reference_mode
));
21800 else if (GET_CODE (x
) == PRE_MODIFY
)
21802 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21803 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21804 asm_fprintf (stream
, "#%wd]!",
21805 INTVAL (XEXP (XEXP (x
, 1), 1)));
21807 asm_fprintf (stream
, "%r]!",
21808 REGNO (XEXP (XEXP (x
, 1), 1)));
21810 else if (GET_CODE (x
) == POST_MODIFY
)
21812 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21813 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21814 asm_fprintf (stream
, "#%wd",
21815 INTVAL (XEXP (XEXP (x
, 1), 1)));
21817 asm_fprintf (stream
, "%r",
21818 REGNO (XEXP (XEXP (x
, 1), 1)));
21820 else output_addr_const (stream
, x
);
21825 asm_fprintf (stream
, "[%r]", REGNO (x
));
21826 else if (GET_CODE (x
) == POST_INC
)
21827 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21828 else if (GET_CODE (x
) == PLUS
)
21830 gcc_assert (REG_P (XEXP (x
, 0)));
21831 if (CONST_INT_P (XEXP (x
, 1)))
21832 asm_fprintf (stream
, "[%r, #%wd]",
21833 REGNO (XEXP (x
, 0)),
21834 INTVAL (XEXP (x
, 1)));
21836 asm_fprintf (stream
, "[%r, %r]",
21837 REGNO (XEXP (x
, 0)),
21838 REGNO (XEXP (x
, 1)));
21841 output_addr_const (stream
, x
);
21845 /* Target hook for indicating whether a punctuation character for
21846 TARGET_PRINT_OPERAND is valid. */
21848 arm_print_operand_punct_valid_p (unsigned char code
)
21850 return (code
== '@' || code
== '|' || code
== '.'
21851 || code
== '(' || code
== ')' || code
== '#'
21852 || (TARGET_32BIT
&& (code
== '?'))
21853 || (TARGET_THUMB2
&& (code
== '!'))
21854 || (TARGET_THUMB
&& (code
== '_')));
21857 /* Target hook for assembling integer objects. The ARM version needs to
21858 handle word-sized values specially. */
21860 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21862 enum machine_mode mode
;
21864 if (size
== UNITS_PER_WORD
&& aligned_p
)
21866 fputs ("\t.word\t", asm_out_file
);
21867 output_addr_const (asm_out_file
, x
);
21869 /* Mark symbols as position independent. We only do this in the
21870 .text segment, not in the .data segment. */
21871 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21872 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21874 /* See legitimize_pic_address for an explanation of the
21875 TARGET_VXWORKS_RTP check. */
21876 if (!arm_pic_data_is_text_relative
21877 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21878 fputs ("(GOT)", asm_out_file
);
21880 fputs ("(GOTOFF)", asm_out_file
);
21882 fputc ('\n', asm_out_file
);
21886 mode
= GET_MODE (x
);
21888 if (arm_vector_mode_supported_p (mode
))
21892 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21894 units
= CONST_VECTOR_NUNITS (x
);
21895 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
21897 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21898 for (i
= 0; i
< units
; i
++)
21900 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21902 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21905 for (i
= 0; i
< units
; i
++)
21907 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21908 REAL_VALUE_TYPE rval
;
21910 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
21913 (rval
, GET_MODE_INNER (mode
),
21914 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21920 return default_assemble_integer (x
, size
, aligned_p
);
21924 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21928 if (!TARGET_AAPCS_BASED
)
21931 default_named_section_asm_out_constructor
21932 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21936 /* Put these in the .init_array section, using a special relocation. */
21937 if (priority
!= DEFAULT_INIT_PRIORITY
)
21940 sprintf (buf
, "%s.%.5u",
21941 is_ctor
? ".init_array" : ".fini_array",
21943 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21950 switch_to_section (s
);
21951 assemble_align (POINTER_SIZE
);
21952 fputs ("\t.word\t", asm_out_file
);
21953 output_addr_const (asm_out_file
, symbol
);
21954 fputs ("(target1)\n", asm_out_file
);
21957 /* Add a function to the list of static constructors. */
21960 arm_elf_asm_constructor (rtx symbol
, int priority
)
21962 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21965 /* Add a function to the list of static destructors. */
21968 arm_elf_asm_destructor (rtx symbol
, int priority
)
21970 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21973 /* A finite state machine takes care of noticing whether or not instructions
21974 can be conditionally executed, and thus decrease execution time and code
21975 size by deleting branch instructions. The fsm is controlled by
21976 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21978 /* The state of the fsm controlling condition codes are:
21979 0: normal, do nothing special
21980 1: make ASM_OUTPUT_OPCODE not output this instruction
21981 2: make ASM_OUTPUT_OPCODE not output this instruction
21982 3: make instructions conditional
21983 4: make instructions conditional
21985 State transitions (state->state by whom under condition):
21986 0 -> 1 final_prescan_insn if the `target' is a label
21987 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21988 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21989 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21990 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21991 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21992 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21993 (the target insn is arm_target_insn).
21995 If the jump clobbers the conditions then we use states 2 and 4.
21997 A similar thing can be done with conditional return insns.
21999 XXX In case the `target' is an unconditional branch, this conditionalising
22000 of the instructions always reduces code size, but not always execution
22001 time. But then, I want to reduce the code size to somewhere near what
22002 /bin/cc produces. */
22004 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22005 instructions. When a COND_EXEC instruction is seen the subsequent
22006 instructions are scanned so that multiple conditional instructions can be
22007 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22008 specify the length and true/false mask for the IT block. These will be
22009 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22011 /* Returns the index of the ARM condition code string in
22012 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22013 COMPARISON should be an rtx like `(eq (...) (...))'. */
22016 maybe_get_arm_condition_code (rtx comparison
)
22018 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22019 enum arm_cond_code code
;
22020 enum rtx_code comp_code
= GET_CODE (comparison
);
22022 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22023 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22024 XEXP (comparison
, 1));
22028 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22029 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22030 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22031 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22032 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22033 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22034 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22035 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22036 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22037 case CC_DLTUmode
: code
= ARM_CC
;
22040 if (comp_code
== EQ
)
22041 return ARM_INVERSE_CONDITION_CODE (code
);
22042 if (comp_code
== NE
)
22049 case NE
: return ARM_NE
;
22050 case EQ
: return ARM_EQ
;
22051 case GE
: return ARM_PL
;
22052 case LT
: return ARM_MI
;
22053 default: return ARM_NV
;
22059 case NE
: return ARM_NE
;
22060 case EQ
: return ARM_EQ
;
22061 default: return ARM_NV
;
22067 case NE
: return ARM_MI
;
22068 case EQ
: return ARM_PL
;
22069 default: return ARM_NV
;
22074 /* We can handle all cases except UNEQ and LTGT. */
22077 case GE
: return ARM_GE
;
22078 case GT
: return ARM_GT
;
22079 case LE
: return ARM_LS
;
22080 case LT
: return ARM_MI
;
22081 case NE
: return ARM_NE
;
22082 case EQ
: return ARM_EQ
;
22083 case ORDERED
: return ARM_VC
;
22084 case UNORDERED
: return ARM_VS
;
22085 case UNLT
: return ARM_LT
;
22086 case UNLE
: return ARM_LE
;
22087 case UNGT
: return ARM_HI
;
22088 case UNGE
: return ARM_PL
;
22089 /* UNEQ and LTGT do not have a representation. */
22090 case UNEQ
: /* Fall through. */
22091 case LTGT
: /* Fall through. */
22092 default: return ARM_NV
;
22098 case NE
: return ARM_NE
;
22099 case EQ
: return ARM_EQ
;
22100 case GE
: return ARM_LE
;
22101 case GT
: return ARM_LT
;
22102 case LE
: return ARM_GE
;
22103 case LT
: return ARM_GT
;
22104 case GEU
: return ARM_LS
;
22105 case GTU
: return ARM_CC
;
22106 case LEU
: return ARM_CS
;
22107 case LTU
: return ARM_HI
;
22108 default: return ARM_NV
;
22114 case LTU
: return ARM_CS
;
22115 case GEU
: return ARM_CC
;
22116 default: return ARM_NV
;
22122 case NE
: return ARM_NE
;
22123 case EQ
: return ARM_EQ
;
22124 case GEU
: return ARM_CS
;
22125 case GTU
: return ARM_HI
;
22126 case LEU
: return ARM_LS
;
22127 case LTU
: return ARM_CC
;
22128 default: return ARM_NV
;
22134 case GE
: return ARM_GE
;
22135 case LT
: return ARM_LT
;
22136 case GEU
: return ARM_CS
;
22137 case LTU
: return ARM_CC
;
22138 default: return ARM_NV
;
22144 case NE
: return ARM_NE
;
22145 case EQ
: return ARM_EQ
;
22146 case GE
: return ARM_GE
;
22147 case GT
: return ARM_GT
;
22148 case LE
: return ARM_LE
;
22149 case LT
: return ARM_LT
;
22150 case GEU
: return ARM_CS
;
22151 case GTU
: return ARM_HI
;
22152 case LEU
: return ARM_LS
;
22153 case LTU
: return ARM_CC
;
22154 default: return ARM_NV
;
22157 default: gcc_unreachable ();
22161 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22162 static enum arm_cond_code
22163 get_arm_condition_code (rtx comparison
)
22165 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22166 gcc_assert (code
!= ARM_NV
);
22170 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22173 thumb2_final_prescan_insn (rtx insn
)
22175 rtx first_insn
= insn
;
22176 rtx body
= PATTERN (insn
);
22178 enum arm_cond_code code
;
22183 /* max_insns_skipped in the tune was already taken into account in the
22184 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22185 just emit the IT blocks as we can. It does not make sense to split
22187 max
= MAX_INSN_PER_IT_BLOCK
;
22189 /* Remove the previous insn from the count of insns to be output. */
22190 if (arm_condexec_count
)
22191 arm_condexec_count
--;
22193 /* Nothing to do if we are already inside a conditional block. */
22194 if (arm_condexec_count
)
22197 if (GET_CODE (body
) != COND_EXEC
)
22200 /* Conditional jumps are implemented directly. */
22204 predicate
= COND_EXEC_TEST (body
);
22205 arm_current_cc
= get_arm_condition_code (predicate
);
22207 n
= get_attr_ce_count (insn
);
22208 arm_condexec_count
= 1;
22209 arm_condexec_mask
= (1 << n
) - 1;
22210 arm_condexec_masklen
= n
;
22211 /* See if subsequent instructions can be combined into the same block. */
22214 insn
= next_nonnote_insn (insn
);
22216 /* Jumping into the middle of an IT block is illegal, so a label or
22217 barrier terminates the block. */
22218 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22221 body
= PATTERN (insn
);
22222 /* USE and CLOBBER aren't really insns, so just skip them. */
22223 if (GET_CODE (body
) == USE
22224 || GET_CODE (body
) == CLOBBER
)
22227 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22228 if (GET_CODE (body
) != COND_EXEC
)
22230 /* Maximum number of conditionally executed instructions in a block. */
22231 n
= get_attr_ce_count (insn
);
22232 if (arm_condexec_masklen
+ n
> max
)
22235 predicate
= COND_EXEC_TEST (body
);
22236 code
= get_arm_condition_code (predicate
);
22237 mask
= (1 << n
) - 1;
22238 if (arm_current_cc
== code
)
22239 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22240 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22243 arm_condexec_count
++;
22244 arm_condexec_masklen
+= n
;
22246 /* A jump must be the last instruction in a conditional block. */
22250 /* Restore recog_data (getting the attributes of other insns can
22251 destroy this array, but final.c assumes that it remains intact
22252 across this call). */
22253 extract_constrain_insn_cached (first_insn
);
22257 arm_final_prescan_insn (rtx insn
)
22259 /* BODY will hold the body of INSN. */
22260 rtx body
= PATTERN (insn
);
22262 /* This will be 1 if trying to repeat the trick, and things need to be
22263 reversed if it appears to fail. */
22266 /* If we start with a return insn, we only succeed if we find another one. */
22267 int seeking_return
= 0;
22268 enum rtx_code return_code
= UNKNOWN
;
22270 /* START_INSN will hold the insn from where we start looking. This is the
22271 first insn after the following code_label if REVERSE is true. */
22272 rtx start_insn
= insn
;
22274 /* If in state 4, check if the target branch is reached, in order to
22275 change back to state 0. */
22276 if (arm_ccfsm_state
== 4)
22278 if (insn
== arm_target_insn
)
22280 arm_target_insn
= NULL
;
22281 arm_ccfsm_state
= 0;
22286 /* If in state 3, it is possible to repeat the trick, if this insn is an
22287 unconditional branch to a label, and immediately following this branch
22288 is the previous target label which is only used once, and the label this
22289 branch jumps to is not too far off. */
22290 if (arm_ccfsm_state
== 3)
22292 if (simplejump_p (insn
))
22294 start_insn
= next_nonnote_insn (start_insn
);
22295 if (BARRIER_P (start_insn
))
22297 /* XXX Isn't this always a barrier? */
22298 start_insn
= next_nonnote_insn (start_insn
);
22300 if (LABEL_P (start_insn
)
22301 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22302 && LABEL_NUSES (start_insn
) == 1)
22307 else if (ANY_RETURN_P (body
))
22309 start_insn
= next_nonnote_insn (start_insn
);
22310 if (BARRIER_P (start_insn
))
22311 start_insn
= next_nonnote_insn (start_insn
);
22312 if (LABEL_P (start_insn
)
22313 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22314 && LABEL_NUSES (start_insn
) == 1)
22317 seeking_return
= 1;
22318 return_code
= GET_CODE (body
);
22327 gcc_assert (!arm_ccfsm_state
|| reverse
);
22328 if (!JUMP_P (insn
))
22331 /* This jump might be paralleled with a clobber of the condition codes
22332 the jump should always come first */
22333 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22334 body
= XVECEXP (body
, 0, 0);
22337 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22338 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22341 int fail
= FALSE
, succeed
= FALSE
;
22342 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22343 int then_not_else
= TRUE
;
22344 rtx this_insn
= start_insn
, label
= 0;
22346 /* Register the insn jumped to. */
22349 if (!seeking_return
)
22350 label
= XEXP (SET_SRC (body
), 0);
22352 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22353 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22354 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22356 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22357 then_not_else
= FALSE
;
22359 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22361 seeking_return
= 1;
22362 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22364 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22366 seeking_return
= 1;
22367 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22368 then_not_else
= FALSE
;
22371 gcc_unreachable ();
22373 /* See how many insns this branch skips, and what kind of insns. If all
22374 insns are okay, and the label or unconditional branch to the same
22375 label is not too far away, succeed. */
22376 for (insns_skipped
= 0;
22377 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22381 this_insn
= next_nonnote_insn (this_insn
);
22385 switch (GET_CODE (this_insn
))
22388 /* Succeed if it is the target label, otherwise fail since
22389 control falls in from somewhere else. */
22390 if (this_insn
== label
)
22392 arm_ccfsm_state
= 1;
22400 /* Succeed if the following insn is the target label.
22402 If return insns are used then the last insn in a function
22403 will be a barrier. */
22404 this_insn
= next_nonnote_insn (this_insn
);
22405 if (this_insn
&& this_insn
== label
)
22407 arm_ccfsm_state
= 1;
22415 /* The AAPCS says that conditional calls should not be
22416 used since they make interworking inefficient (the
22417 linker can't transform BL<cond> into BLX). That's
22418 only a problem if the machine has BLX. */
22425 /* Succeed if the following insn is the target label, or
22426 if the following two insns are a barrier and the
22428 this_insn
= next_nonnote_insn (this_insn
);
22429 if (this_insn
&& BARRIER_P (this_insn
))
22430 this_insn
= next_nonnote_insn (this_insn
);
22432 if (this_insn
&& this_insn
== label
22433 && insns_skipped
< max_insns_skipped
)
22435 arm_ccfsm_state
= 1;
22443 /* If this is an unconditional branch to the same label, succeed.
22444 If it is to another label, do nothing. If it is conditional,
22446 /* XXX Probably, the tests for SET and the PC are
22449 scanbody
= PATTERN (this_insn
);
22450 if (GET_CODE (scanbody
) == SET
22451 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22453 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22454 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22456 arm_ccfsm_state
= 2;
22459 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22462 /* Fail if a conditional return is undesirable (e.g. on a
22463 StrongARM), but still allow this if optimizing for size. */
22464 else if (GET_CODE (scanbody
) == return_code
22465 && !use_return_insn (TRUE
, NULL
)
22468 else if (GET_CODE (scanbody
) == return_code
)
22470 arm_ccfsm_state
= 2;
22473 else if (GET_CODE (scanbody
) == PARALLEL
)
22475 switch (get_attr_conds (this_insn
))
22485 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22490 /* Instructions using or affecting the condition codes make it
22492 scanbody
= PATTERN (this_insn
);
22493 if (!(GET_CODE (scanbody
) == SET
22494 || GET_CODE (scanbody
) == PARALLEL
)
22495 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22505 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22506 arm_target_label
= CODE_LABEL_NUMBER (label
);
22509 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22511 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22513 this_insn
= next_nonnote_insn (this_insn
);
22514 gcc_assert (!this_insn
22515 || (!BARRIER_P (this_insn
)
22516 && !LABEL_P (this_insn
)));
22520 /* Oh, dear! we ran off the end.. give up. */
22521 extract_constrain_insn_cached (insn
);
22522 arm_ccfsm_state
= 0;
22523 arm_target_insn
= NULL
;
22526 arm_target_insn
= this_insn
;
22529 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22532 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22534 if (reverse
|| then_not_else
)
22535 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22538 /* Restore recog_data (getting the attributes of other insns can
22539 destroy this array, but final.c assumes that it remains intact
22540 across this call. */
22541 extract_constrain_insn_cached (insn
);
22545 /* Output IT instructions. */
22547 thumb2_asm_output_opcode (FILE * stream
)
22552 if (arm_condexec_mask
)
22554 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22555 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22557 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22558 arm_condition_codes
[arm_current_cc
]);
22559 arm_condexec_mask
= 0;
22563 /* Returns true if REGNO is a valid register
22564 for holding a quantity of type MODE. */
22566 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22568 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22569 return (regno
== CC_REGNUM
22570 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22571 && regno
== VFPCC_REGNUM
));
22574 /* For the Thumb we only allow values bigger than SImode in
22575 registers 0 - 6, so that there is always a second low
22576 register available to hold the upper part of the value.
22577 We probably we ought to ensure that the register is the
22578 start of an even numbered register pair. */
22579 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22581 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22582 && IS_VFP_REGNUM (regno
))
22584 if (mode
== SFmode
|| mode
== SImode
)
22585 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22587 if (mode
== DFmode
)
22588 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22590 /* VFP registers can hold HFmode values, but there is no point in
22591 putting them there unless we have hardware conversion insns. */
22592 if (mode
== HFmode
)
22593 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22596 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22597 || (VALID_NEON_QREG_MODE (mode
)
22598 && NEON_REGNO_OK_FOR_QUAD (regno
))
22599 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22600 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22601 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22602 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22603 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22608 if (TARGET_REALLY_IWMMXT
)
22610 if (IS_IWMMXT_GR_REGNUM (regno
))
22611 return mode
== SImode
;
22613 if (IS_IWMMXT_REGNUM (regno
))
22614 return VALID_IWMMXT_REG_MODE (mode
);
22617 /* We allow almost any value to be stored in the general registers.
22618 Restrict doubleword quantities to even register pairs so that we can
22619 use ldrd. Do not allow very large Neon structure opaque modes in
22620 general registers; they would use too many. */
22621 if (regno
<= LAST_ARM_REGNUM
)
22622 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
22623 && ARM_NUM_REGS (mode
) <= 4;
22625 if (regno
== FRAME_POINTER_REGNUM
22626 || regno
== ARG_POINTER_REGNUM
)
22627 /* We only allow integers in the fake hard registers. */
22628 return GET_MODE_CLASS (mode
) == MODE_INT
;
22633 /* Implement MODES_TIEABLE_P. */
22636 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22638 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22641 /* We specifically want to allow elements of "structure" modes to
22642 be tieable to the structure. This more general condition allows
22643 other rarer situations too. */
22645 && (VALID_NEON_DREG_MODE (mode1
)
22646 || VALID_NEON_QREG_MODE (mode1
)
22647 || VALID_NEON_STRUCT_MODE (mode1
))
22648 && (VALID_NEON_DREG_MODE (mode2
)
22649 || VALID_NEON_QREG_MODE (mode2
)
22650 || VALID_NEON_STRUCT_MODE (mode2
)))
22656 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22657 not used in arm mode. */
22660 arm_regno_class (int regno
)
22664 if (regno
== STACK_POINTER_REGNUM
)
22666 if (regno
== CC_REGNUM
)
22673 if (TARGET_THUMB2
&& regno
< 8)
22676 if ( regno
<= LAST_ARM_REGNUM
22677 || regno
== FRAME_POINTER_REGNUM
22678 || regno
== ARG_POINTER_REGNUM
)
22679 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22681 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22682 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22684 if (IS_VFP_REGNUM (regno
))
22686 if (regno
<= D7_VFP_REGNUM
)
22687 return VFP_D0_D7_REGS
;
22688 else if (regno
<= LAST_LO_VFP_REGNUM
)
22689 return VFP_LO_REGS
;
22691 return VFP_HI_REGS
;
22694 if (IS_IWMMXT_REGNUM (regno
))
22695 return IWMMXT_REGS
;
22697 if (IS_IWMMXT_GR_REGNUM (regno
))
22698 return IWMMXT_GR_REGS
;
22703 /* Handle a special case when computing the offset
22704 of an argument from the frame pointer. */
22706 arm_debugger_arg_offset (int value
, rtx addr
)
22710 /* We are only interested if dbxout_parms() failed to compute the offset. */
22714 /* We can only cope with the case where the address is held in a register. */
22718 /* If we are using the frame pointer to point at the argument, then
22719 an offset of 0 is correct. */
22720 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22723 /* If we are using the stack pointer to point at the
22724 argument, then an offset of 0 is correct. */
22725 /* ??? Check this is consistent with thumb2 frame layout. */
22726 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22727 && REGNO (addr
) == SP_REGNUM
)
22730 /* Oh dear. The argument is pointed to by a register rather
22731 than being held in a register, or being stored at a known
22732 offset from the frame pointer. Since GDB only understands
22733 those two kinds of argument we must translate the address
22734 held in the register into an offset from the frame pointer.
22735 We do this by searching through the insns for the function
22736 looking to see where this register gets its value. If the
22737 register is initialized from the frame pointer plus an offset
22738 then we are in luck and we can continue, otherwise we give up.
22740 This code is exercised by producing debugging information
22741 for a function with arguments like this:
22743 double func (double a, double b, int c, double d) {return d;}
22745 Without this code the stab for parameter 'd' will be set to
22746 an offset of 0 from the frame pointer, rather than 8. */
22748 /* The if() statement says:
22750 If the insn is a normal instruction
22751 and if the insn is setting the value in a register
22752 and if the register being set is the register holding the address of the argument
22753 and if the address is computing by an addition
22754 that involves adding to a register
22755 which is the frame pointer
22760 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22762 if ( NONJUMP_INSN_P (insn
)
22763 && GET_CODE (PATTERN (insn
)) == SET
22764 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22765 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22766 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22767 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22768 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22771 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22780 warning (0, "unable to compute real location of stacked parameter");
22781 value
= 8; /* XXX magic hack */
22802 T_MAX
/* Size of enum. Keep last. */
22803 } neon_builtin_type_mode
;
22805 #define TYPE_MODE_BIT(X) (1 << (X))
22807 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22808 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22809 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22810 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22811 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22812 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22814 #define v8qi_UP T_V8QI
22815 #define v4hi_UP T_V4HI
22816 #define v4hf_UP T_V4HF
22817 #define v2si_UP T_V2SI
22818 #define v2sf_UP T_V2SF
22820 #define v16qi_UP T_V16QI
22821 #define v8hi_UP T_V8HI
22822 #define v4si_UP T_V4SI
22823 #define v4sf_UP T_V4SF
22824 #define v2di_UP T_V2DI
22829 #define UP(X) X##_UP
22865 NEON_LOADSTRUCTLANE
,
22867 NEON_STORESTRUCTLANE
,
22876 const neon_itype itype
;
22877 const neon_builtin_type_mode mode
;
22878 const enum insn_code code
;
22879 unsigned int fcode
;
22880 } neon_builtin_datum
;
22882 #define CF(N,X) CODE_FOR_neon_##N##X
22884 #define VAR1(T, N, A) \
22885 {#N, NEON_##T, UP (A), CF (N, A), 0}
22886 #define VAR2(T, N, A, B) \
22888 {#N, NEON_##T, UP (B), CF (N, B), 0}
22889 #define VAR3(T, N, A, B, C) \
22890 VAR2 (T, N, A, B), \
22891 {#N, NEON_##T, UP (C), CF (N, C), 0}
22892 #define VAR4(T, N, A, B, C, D) \
22893 VAR3 (T, N, A, B, C), \
22894 {#N, NEON_##T, UP (D), CF (N, D), 0}
22895 #define VAR5(T, N, A, B, C, D, E) \
22896 VAR4 (T, N, A, B, C, D), \
22897 {#N, NEON_##T, UP (E), CF (N, E), 0}
22898 #define VAR6(T, N, A, B, C, D, E, F) \
22899 VAR5 (T, N, A, B, C, D, E), \
22900 {#N, NEON_##T, UP (F), CF (N, F), 0}
22901 #define VAR7(T, N, A, B, C, D, E, F, G) \
22902 VAR6 (T, N, A, B, C, D, E, F), \
22903 {#N, NEON_##T, UP (G), CF (N, G), 0}
22904 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22905 VAR7 (T, N, A, B, C, D, E, F, G), \
22906 {#N, NEON_##T, UP (H), CF (N, H), 0}
22907 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22908 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22909 {#N, NEON_##T, UP (I), CF (N, I), 0}
22910 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22911 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22912 {#N, NEON_##T, UP (J), CF (N, J), 0}
22914 /* The NEON builtin data can be found in arm_neon_builtins.def.
22915 The mode entries in the following table correspond to the "key" type of the
22916 instruction variant, i.e. equivalent to that which would be specified after
22917 the assembler mnemonic, which usually refers to the last vector operand.
22918 (Signed/unsigned/polynomial types are not differentiated between though, and
22919 are all mapped onto the same mode for a given element size.) The modes
22920 listed per instruction should be the same as those defined for that
22921 instruction's pattern in neon.md. */
22923 static neon_builtin_datum neon_builtin_data
[] =
22925 #include "arm_neon_builtins.def"
22940 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22941 #define VAR1(T, N, A) \
22943 #define VAR2(T, N, A, B) \
22946 #define VAR3(T, N, A, B, C) \
22947 VAR2 (T, N, A, B), \
22949 #define VAR4(T, N, A, B, C, D) \
22950 VAR3 (T, N, A, B, C), \
22952 #define VAR5(T, N, A, B, C, D, E) \
22953 VAR4 (T, N, A, B, C, D), \
22955 #define VAR6(T, N, A, B, C, D, E, F) \
22956 VAR5 (T, N, A, B, C, D, E), \
22958 #define VAR7(T, N, A, B, C, D, E, F, G) \
22959 VAR6 (T, N, A, B, C, D, E, F), \
22961 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22962 VAR7 (T, N, A, B, C, D, E, F, G), \
22964 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22965 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22967 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22968 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22972 ARM_BUILTIN_GETWCGR0
,
22973 ARM_BUILTIN_GETWCGR1
,
22974 ARM_BUILTIN_GETWCGR2
,
22975 ARM_BUILTIN_GETWCGR3
,
22977 ARM_BUILTIN_SETWCGR0
,
22978 ARM_BUILTIN_SETWCGR1
,
22979 ARM_BUILTIN_SETWCGR2
,
22980 ARM_BUILTIN_SETWCGR3
,
22984 ARM_BUILTIN_WAVG2BR
,
22985 ARM_BUILTIN_WAVG2HR
,
22986 ARM_BUILTIN_WAVG2B
,
22987 ARM_BUILTIN_WAVG2H
,
22994 ARM_BUILTIN_WMACSZ
,
22996 ARM_BUILTIN_WMACUZ
,
22999 ARM_BUILTIN_WSADBZ
,
23001 ARM_BUILTIN_WSADHZ
,
23003 ARM_BUILTIN_WALIGNI
,
23004 ARM_BUILTIN_WALIGNR0
,
23005 ARM_BUILTIN_WALIGNR1
,
23006 ARM_BUILTIN_WALIGNR2
,
23007 ARM_BUILTIN_WALIGNR3
,
23010 ARM_BUILTIN_TMIAPH
,
23011 ARM_BUILTIN_TMIABB
,
23012 ARM_BUILTIN_TMIABT
,
23013 ARM_BUILTIN_TMIATB
,
23014 ARM_BUILTIN_TMIATT
,
23016 ARM_BUILTIN_TMOVMSKB
,
23017 ARM_BUILTIN_TMOVMSKH
,
23018 ARM_BUILTIN_TMOVMSKW
,
23020 ARM_BUILTIN_TBCSTB
,
23021 ARM_BUILTIN_TBCSTH
,
23022 ARM_BUILTIN_TBCSTW
,
23024 ARM_BUILTIN_WMADDS
,
23025 ARM_BUILTIN_WMADDU
,
23027 ARM_BUILTIN_WPACKHSS
,
23028 ARM_BUILTIN_WPACKWSS
,
23029 ARM_BUILTIN_WPACKDSS
,
23030 ARM_BUILTIN_WPACKHUS
,
23031 ARM_BUILTIN_WPACKWUS
,
23032 ARM_BUILTIN_WPACKDUS
,
23037 ARM_BUILTIN_WADDSSB
,
23038 ARM_BUILTIN_WADDSSH
,
23039 ARM_BUILTIN_WADDSSW
,
23040 ARM_BUILTIN_WADDUSB
,
23041 ARM_BUILTIN_WADDUSH
,
23042 ARM_BUILTIN_WADDUSW
,
23046 ARM_BUILTIN_WSUBSSB
,
23047 ARM_BUILTIN_WSUBSSH
,
23048 ARM_BUILTIN_WSUBSSW
,
23049 ARM_BUILTIN_WSUBUSB
,
23050 ARM_BUILTIN_WSUBUSH
,
23051 ARM_BUILTIN_WSUBUSW
,
23058 ARM_BUILTIN_WCMPEQB
,
23059 ARM_BUILTIN_WCMPEQH
,
23060 ARM_BUILTIN_WCMPEQW
,
23061 ARM_BUILTIN_WCMPGTUB
,
23062 ARM_BUILTIN_WCMPGTUH
,
23063 ARM_BUILTIN_WCMPGTUW
,
23064 ARM_BUILTIN_WCMPGTSB
,
23065 ARM_BUILTIN_WCMPGTSH
,
23066 ARM_BUILTIN_WCMPGTSW
,
23068 ARM_BUILTIN_TEXTRMSB
,
23069 ARM_BUILTIN_TEXTRMSH
,
23070 ARM_BUILTIN_TEXTRMSW
,
23071 ARM_BUILTIN_TEXTRMUB
,
23072 ARM_BUILTIN_TEXTRMUH
,
23073 ARM_BUILTIN_TEXTRMUW
,
23074 ARM_BUILTIN_TINSRB
,
23075 ARM_BUILTIN_TINSRH
,
23076 ARM_BUILTIN_TINSRW
,
23078 ARM_BUILTIN_WMAXSW
,
23079 ARM_BUILTIN_WMAXSH
,
23080 ARM_BUILTIN_WMAXSB
,
23081 ARM_BUILTIN_WMAXUW
,
23082 ARM_BUILTIN_WMAXUH
,
23083 ARM_BUILTIN_WMAXUB
,
23084 ARM_BUILTIN_WMINSW
,
23085 ARM_BUILTIN_WMINSH
,
23086 ARM_BUILTIN_WMINSB
,
23087 ARM_BUILTIN_WMINUW
,
23088 ARM_BUILTIN_WMINUH
,
23089 ARM_BUILTIN_WMINUB
,
23091 ARM_BUILTIN_WMULUM
,
23092 ARM_BUILTIN_WMULSM
,
23093 ARM_BUILTIN_WMULUL
,
23095 ARM_BUILTIN_PSADBH
,
23096 ARM_BUILTIN_WSHUFH
,
23110 ARM_BUILTIN_WSLLHI
,
23111 ARM_BUILTIN_WSLLWI
,
23112 ARM_BUILTIN_WSLLDI
,
23113 ARM_BUILTIN_WSRAHI
,
23114 ARM_BUILTIN_WSRAWI
,
23115 ARM_BUILTIN_WSRADI
,
23116 ARM_BUILTIN_WSRLHI
,
23117 ARM_BUILTIN_WSRLWI
,
23118 ARM_BUILTIN_WSRLDI
,
23119 ARM_BUILTIN_WRORHI
,
23120 ARM_BUILTIN_WRORWI
,
23121 ARM_BUILTIN_WRORDI
,
23123 ARM_BUILTIN_WUNPCKIHB
,
23124 ARM_BUILTIN_WUNPCKIHH
,
23125 ARM_BUILTIN_WUNPCKIHW
,
23126 ARM_BUILTIN_WUNPCKILB
,
23127 ARM_BUILTIN_WUNPCKILH
,
23128 ARM_BUILTIN_WUNPCKILW
,
23130 ARM_BUILTIN_WUNPCKEHSB
,
23131 ARM_BUILTIN_WUNPCKEHSH
,
23132 ARM_BUILTIN_WUNPCKEHSW
,
23133 ARM_BUILTIN_WUNPCKEHUB
,
23134 ARM_BUILTIN_WUNPCKEHUH
,
23135 ARM_BUILTIN_WUNPCKEHUW
,
23136 ARM_BUILTIN_WUNPCKELSB
,
23137 ARM_BUILTIN_WUNPCKELSH
,
23138 ARM_BUILTIN_WUNPCKELSW
,
23139 ARM_BUILTIN_WUNPCKELUB
,
23140 ARM_BUILTIN_WUNPCKELUH
,
23141 ARM_BUILTIN_WUNPCKELUW
,
23147 ARM_BUILTIN_WADDSUBHX
,
23148 ARM_BUILTIN_WSUBADDHX
,
23150 ARM_BUILTIN_WABSDIFFB
,
23151 ARM_BUILTIN_WABSDIFFH
,
23152 ARM_BUILTIN_WABSDIFFW
,
23154 ARM_BUILTIN_WADDCH
,
23155 ARM_BUILTIN_WADDCW
,
23158 ARM_BUILTIN_WAVG4R
,
23160 ARM_BUILTIN_WMADDSX
,
23161 ARM_BUILTIN_WMADDUX
,
23163 ARM_BUILTIN_WMADDSN
,
23164 ARM_BUILTIN_WMADDUN
,
23166 ARM_BUILTIN_WMULWSM
,
23167 ARM_BUILTIN_WMULWUM
,
23169 ARM_BUILTIN_WMULWSMR
,
23170 ARM_BUILTIN_WMULWUMR
,
23172 ARM_BUILTIN_WMULWL
,
23174 ARM_BUILTIN_WMULSMR
,
23175 ARM_BUILTIN_WMULUMR
,
23177 ARM_BUILTIN_WQMULM
,
23178 ARM_BUILTIN_WQMULMR
,
23180 ARM_BUILTIN_WQMULWM
,
23181 ARM_BUILTIN_WQMULWMR
,
23183 ARM_BUILTIN_WADDBHUSM
,
23184 ARM_BUILTIN_WADDBHUSL
,
23186 ARM_BUILTIN_WQMIABB
,
23187 ARM_BUILTIN_WQMIABT
,
23188 ARM_BUILTIN_WQMIATB
,
23189 ARM_BUILTIN_WQMIATT
,
23191 ARM_BUILTIN_WQMIABBN
,
23192 ARM_BUILTIN_WQMIABTN
,
23193 ARM_BUILTIN_WQMIATBN
,
23194 ARM_BUILTIN_WQMIATTN
,
23196 ARM_BUILTIN_WMIABB
,
23197 ARM_BUILTIN_WMIABT
,
23198 ARM_BUILTIN_WMIATB
,
23199 ARM_BUILTIN_WMIATT
,
23201 ARM_BUILTIN_WMIABBN
,
23202 ARM_BUILTIN_WMIABTN
,
23203 ARM_BUILTIN_WMIATBN
,
23204 ARM_BUILTIN_WMIATTN
,
23206 ARM_BUILTIN_WMIAWBB
,
23207 ARM_BUILTIN_WMIAWBT
,
23208 ARM_BUILTIN_WMIAWTB
,
23209 ARM_BUILTIN_WMIAWTT
,
23211 ARM_BUILTIN_WMIAWBBN
,
23212 ARM_BUILTIN_WMIAWBTN
,
23213 ARM_BUILTIN_WMIAWTBN
,
23214 ARM_BUILTIN_WMIAWTTN
,
23216 ARM_BUILTIN_WMERGE
,
23218 ARM_BUILTIN_CRC32B
,
23219 ARM_BUILTIN_CRC32H
,
23220 ARM_BUILTIN_CRC32W
,
23221 ARM_BUILTIN_CRC32CB
,
23222 ARM_BUILTIN_CRC32CH
,
23223 ARM_BUILTIN_CRC32CW
,
23229 #define CRYPTO1(L, U, M1, M2) \
23230 ARM_BUILTIN_CRYPTO_##U,
23231 #define CRYPTO2(L, U, M1, M2, M3) \
23232 ARM_BUILTIN_CRYPTO_##U,
23233 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23234 ARM_BUILTIN_CRYPTO_##U,
23236 #include "crypto.def"
23242 #include "arm_neon_builtins.def"
23247 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23261 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23263 #define NUM_DREG_TYPES 5
23264 #define NUM_QREG_TYPES 6
23267 arm_init_neon_builtins (void)
23269 unsigned int i
, fcode
;
23272 tree neon_intQI_type_node
;
23273 tree neon_intHI_type_node
;
23274 tree neon_floatHF_type_node
;
23275 tree neon_polyQI_type_node
;
23276 tree neon_polyHI_type_node
;
23277 tree neon_intSI_type_node
;
23278 tree neon_intDI_type_node
;
23279 tree neon_intUTI_type_node
;
23280 tree neon_float_type_node
;
23282 tree intQI_pointer_node
;
23283 tree intHI_pointer_node
;
23284 tree intSI_pointer_node
;
23285 tree intDI_pointer_node
;
23286 tree float_pointer_node
;
23288 tree const_intQI_node
;
23289 tree const_intHI_node
;
23290 tree const_intSI_node
;
23291 tree const_intDI_node
;
23292 tree const_float_node
;
23294 tree const_intQI_pointer_node
;
23295 tree const_intHI_pointer_node
;
23296 tree const_intSI_pointer_node
;
23297 tree const_intDI_pointer_node
;
23298 tree const_float_pointer_node
;
23300 tree V8QI_type_node
;
23301 tree V4HI_type_node
;
23302 tree V4HF_type_node
;
23303 tree V2SI_type_node
;
23304 tree V2SF_type_node
;
23305 tree V16QI_type_node
;
23306 tree V8HI_type_node
;
23307 tree V4SI_type_node
;
23308 tree V4SF_type_node
;
23309 tree V2DI_type_node
;
23311 tree intUQI_type_node
;
23312 tree intUHI_type_node
;
23313 tree intUSI_type_node
;
23314 tree intUDI_type_node
;
23316 tree intEI_type_node
;
23317 tree intOI_type_node
;
23318 tree intCI_type_node
;
23319 tree intXI_type_node
;
23321 tree V8QI_pointer_node
;
23322 tree V4HI_pointer_node
;
23323 tree V2SI_pointer_node
;
23324 tree V2SF_pointer_node
;
23325 tree V16QI_pointer_node
;
23326 tree V8HI_pointer_node
;
23327 tree V4SI_pointer_node
;
23328 tree V4SF_pointer_node
;
23329 tree V2DI_pointer_node
;
23331 tree void_ftype_pv8qi_v8qi_v8qi
;
23332 tree void_ftype_pv4hi_v4hi_v4hi
;
23333 tree void_ftype_pv2si_v2si_v2si
;
23334 tree void_ftype_pv2sf_v2sf_v2sf
;
23335 tree void_ftype_pdi_di_di
;
23336 tree void_ftype_pv16qi_v16qi_v16qi
;
23337 tree void_ftype_pv8hi_v8hi_v8hi
;
23338 tree void_ftype_pv4si_v4si_v4si
;
23339 tree void_ftype_pv4sf_v4sf_v4sf
;
23340 tree void_ftype_pv2di_v2di_v2di
;
23342 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23343 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23344 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23346 /* Create distinguished type nodes for NEON vector element types,
23347 and pointers to values of such types, so we can detect them later. */
23348 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23349 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23350 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23351 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23352 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23353 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23354 neon_float_type_node
= make_node (REAL_TYPE
);
23355 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23356 layout_type (neon_float_type_node
);
23357 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23358 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23359 layout_type (neon_floatHF_type_node
);
23361 /* Define typedefs which exactly correspond to the modes we are basing vector
23362 types on. If you change these names you'll need to change
23363 the table used by arm_mangle_type too. */
23364 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23365 "__builtin_neon_qi");
23366 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23367 "__builtin_neon_hi");
23368 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23369 "__builtin_neon_hf");
23370 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23371 "__builtin_neon_si");
23372 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23373 "__builtin_neon_sf");
23374 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23375 "__builtin_neon_di");
23376 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23377 "__builtin_neon_poly8");
23378 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23379 "__builtin_neon_poly16");
23381 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23382 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23383 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23384 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23385 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23387 /* Next create constant-qualified versions of the above types. */
23388 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23390 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23392 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23394 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23396 const_float_node
= build_qualified_type (neon_float_type_node
,
23399 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23400 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23401 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23402 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23403 const_float_pointer_node
= build_pointer_type (const_float_node
);
23405 /* Now create vector types based on our NEON element types. */
23406 /* 64-bit vectors. */
23408 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23410 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23412 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23414 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23416 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23417 /* 128-bit vectors. */
23419 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23421 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23423 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23425 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23427 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23429 /* Unsigned integer types for various mode sizes. */
23430 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23431 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23432 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23433 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23434 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23437 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23438 "__builtin_neon_uqi");
23439 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23440 "__builtin_neon_uhi");
23441 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23442 "__builtin_neon_usi");
23443 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23444 "__builtin_neon_udi");
23445 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23446 "__builtin_neon_poly64");
23447 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23448 "__builtin_neon_poly128");
23450 /* Opaque integer types for structures of vectors. */
23451 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23452 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23453 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23454 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23456 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23457 "__builtin_neon_ti");
23458 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23459 "__builtin_neon_ei");
23460 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23461 "__builtin_neon_oi");
23462 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23463 "__builtin_neon_ci");
23464 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23465 "__builtin_neon_xi");
23467 /* Pointers to vector types. */
23468 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
23469 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
23470 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
23471 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
23472 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
23473 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
23474 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
23475 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
23476 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
23478 /* Operations which return results as pairs. */
23479 void_ftype_pv8qi_v8qi_v8qi
=
23480 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
23481 V8QI_type_node
, NULL
);
23482 void_ftype_pv4hi_v4hi_v4hi
=
23483 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
23484 V4HI_type_node
, NULL
);
23485 void_ftype_pv2si_v2si_v2si
=
23486 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
23487 V2SI_type_node
, NULL
);
23488 void_ftype_pv2sf_v2sf_v2sf
=
23489 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
23490 V2SF_type_node
, NULL
);
23491 void_ftype_pdi_di_di
=
23492 build_function_type_list (void_type_node
, intDI_pointer_node
,
23493 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
23494 void_ftype_pv16qi_v16qi_v16qi
=
23495 build_function_type_list (void_type_node
, V16QI_pointer_node
,
23496 V16QI_type_node
, V16QI_type_node
, NULL
);
23497 void_ftype_pv8hi_v8hi_v8hi
=
23498 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
23499 V8HI_type_node
, NULL
);
23500 void_ftype_pv4si_v4si_v4si
=
23501 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
23502 V4SI_type_node
, NULL
);
23503 void_ftype_pv4sf_v4sf_v4sf
=
23504 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
23505 V4SF_type_node
, NULL
);
23506 void_ftype_pv2di_v2di_v2di
=
23507 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
23508 V2DI_type_node
, NULL
);
23510 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23512 tree V4USI_type_node
=
23513 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23515 tree V16UQI_type_node
=
23516 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23518 tree v16uqi_ftype_v16uqi
23519 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23521 tree v16uqi_ftype_v16uqi_v16uqi
23522 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23523 V16UQI_type_node
, NULL_TREE
);
23525 tree v4usi_ftype_v4usi
23526 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23528 tree v4usi_ftype_v4usi_v4usi
23529 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23530 V4USI_type_node
, NULL_TREE
);
23532 tree v4usi_ftype_v4usi_v4usi_v4usi
23533 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23534 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23536 tree uti_ftype_udi_udi
23537 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23538 intUDI_type_node
, NULL_TREE
);
23551 ARM_BUILTIN_CRYPTO_##U
23553 "__builtin_arm_crypto_"#L
23554 #define FT1(R, A) \
23556 #define FT2(R, A1, A2) \
23557 R##_ftype_##A1##_##A2
23558 #define FT3(R, A1, A2, A3) \
23559 R##_ftype_##A1##_##A2##_##A3
23560 #define CRYPTO1(L, U, R, A) \
23561 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23562 C (U), BUILT_IN_MD, \
23564 #define CRYPTO2(L, U, R, A1, A2) \
23565 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23566 C (U), BUILT_IN_MD, \
23569 #define CRYPTO3(L, U, R, A1, A2, A3) \
23570 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23571 C (U), BUILT_IN_MD, \
23573 #include "crypto.def"
23584 dreg_types
[0] = V8QI_type_node
;
23585 dreg_types
[1] = V4HI_type_node
;
23586 dreg_types
[2] = V2SI_type_node
;
23587 dreg_types
[3] = V2SF_type_node
;
23588 dreg_types
[4] = neon_intDI_type_node
;
23590 qreg_types
[0] = V16QI_type_node
;
23591 qreg_types
[1] = V8HI_type_node
;
23592 qreg_types
[2] = V4SI_type_node
;
23593 qreg_types
[3] = V4SF_type_node
;
23594 qreg_types
[4] = V2DI_type_node
;
23595 qreg_types
[5] = neon_intUTI_type_node
;
23597 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23600 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23602 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23603 reinterp_ftype_dreg
[i
][j
]
23604 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23606 reinterp_ftype_qreg
[i
][j
]
23607 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23611 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23612 i
< ARRAY_SIZE (neon_builtin_data
);
23615 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23617 const char* const modenames
[] = {
23618 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23619 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23624 int is_load
= 0, is_store
= 0;
23626 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23633 case NEON_LOAD1LANE
:
23634 case NEON_LOADSTRUCT
:
23635 case NEON_LOADSTRUCTLANE
:
23637 /* Fall through. */
23639 case NEON_STORE1LANE
:
23640 case NEON_STORESTRUCT
:
23641 case NEON_STORESTRUCTLANE
:
23644 /* Fall through. */
23648 case NEON_LOGICBINOP
:
23649 case NEON_SHIFTINSERT
:
23656 case NEON_SHIFTIMM
:
23657 case NEON_SHIFTACC
:
23663 case NEON_LANEMULL
:
23664 case NEON_LANEMULH
:
23666 case NEON_SCALARMUL
:
23667 case NEON_SCALARMULL
:
23668 case NEON_SCALARMULH
:
23669 case NEON_SCALARMAC
:
23675 tree return_type
= void_type_node
, args
= void_list_node
;
23677 /* Build a function type directly from the insn_data for
23678 this builtin. The build_function_type() function takes
23679 care of removing duplicates for us. */
23680 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23684 if (is_load
&& k
== 1)
23686 /* Neon load patterns always have the memory
23687 operand in the operand 1 position. */
23688 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23689 == neon_struct_operand
);
23695 eltype
= const_intQI_pointer_node
;
23700 eltype
= const_intHI_pointer_node
;
23705 eltype
= const_intSI_pointer_node
;
23710 eltype
= const_float_pointer_node
;
23715 eltype
= const_intDI_pointer_node
;
23718 default: gcc_unreachable ();
23721 else if (is_store
&& k
== 0)
23723 /* Similarly, Neon store patterns use operand 0 as
23724 the memory location to store to. */
23725 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23726 == neon_struct_operand
);
23732 eltype
= intQI_pointer_node
;
23737 eltype
= intHI_pointer_node
;
23742 eltype
= intSI_pointer_node
;
23747 eltype
= float_pointer_node
;
23752 eltype
= intDI_pointer_node
;
23755 default: gcc_unreachable ();
23760 switch (insn_data
[d
->code
].operand
[k
].mode
)
23762 case VOIDmode
: eltype
= void_type_node
; break;
23764 case QImode
: eltype
= neon_intQI_type_node
; break;
23765 case HImode
: eltype
= neon_intHI_type_node
; break;
23766 case SImode
: eltype
= neon_intSI_type_node
; break;
23767 case SFmode
: eltype
= neon_float_type_node
; break;
23768 case DImode
: eltype
= neon_intDI_type_node
; break;
23769 case TImode
: eltype
= intTI_type_node
; break;
23770 case EImode
: eltype
= intEI_type_node
; break;
23771 case OImode
: eltype
= intOI_type_node
; break;
23772 case CImode
: eltype
= intCI_type_node
; break;
23773 case XImode
: eltype
= intXI_type_node
; break;
23774 /* 64-bit vectors. */
23775 case V8QImode
: eltype
= V8QI_type_node
; break;
23776 case V4HImode
: eltype
= V4HI_type_node
; break;
23777 case V2SImode
: eltype
= V2SI_type_node
; break;
23778 case V2SFmode
: eltype
= V2SF_type_node
; break;
23779 /* 128-bit vectors. */
23780 case V16QImode
: eltype
= V16QI_type_node
; break;
23781 case V8HImode
: eltype
= V8HI_type_node
; break;
23782 case V4SImode
: eltype
= V4SI_type_node
; break;
23783 case V4SFmode
: eltype
= V4SF_type_node
; break;
23784 case V2DImode
: eltype
= V2DI_type_node
; break;
23785 default: gcc_unreachable ();
23789 if (k
== 0 && !is_store
)
23790 return_type
= eltype
;
23792 args
= tree_cons (NULL_TREE
, eltype
, args
);
23795 ftype
= build_function_type (return_type
, args
);
23799 case NEON_RESULTPAIR
:
23801 switch (insn_data
[d
->code
].operand
[1].mode
)
23803 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
23804 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
23805 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
23806 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
23807 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
23808 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
23809 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
23810 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
23811 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
23812 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
23813 default: gcc_unreachable ();
23818 case NEON_REINTERP
:
23820 /* We iterate over NUM_DREG_TYPES doubleword types,
23821 then NUM_QREG_TYPES quadword types.
23822 V4HF is not a type used in reinterpret, so we translate
23823 d->mode to the correct index in reinterp_ftype_dreg. */
23825 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
23826 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
23828 switch (insn_data
[d
->code
].operand
[0].mode
)
23830 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
23831 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
23832 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
23833 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
23834 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
23835 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
23836 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
23837 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
23838 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
23839 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
23840 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
23841 default: gcc_unreachable ();
23845 case NEON_FLOAT_WIDEN
:
23847 tree eltype
= NULL_TREE
;
23848 tree return_type
= NULL_TREE
;
23850 switch (insn_data
[d
->code
].operand
[1].mode
)
23853 eltype
= V4HF_type_node
;
23854 return_type
= V4SF_type_node
;
23856 default: gcc_unreachable ();
23858 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23861 case NEON_FLOAT_NARROW
:
23863 tree eltype
= NULL_TREE
;
23864 tree return_type
= NULL_TREE
;
23866 switch (insn_data
[d
->code
].operand
[1].mode
)
23869 eltype
= V4SF_type_node
;
23870 return_type
= V4HF_type_node
;
23872 default: gcc_unreachable ();
23874 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23878 gcc_unreachable ();
23881 gcc_assert (ftype
!= NULL
);
23883 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
23885 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
23887 arm_builtin_decls
[fcode
] = decl
;
23891 #undef NUM_DREG_TYPES
23892 #undef NUM_QREG_TYPES
23894 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23897 if ((MASK) & insn_flags) \
23900 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23901 BUILT_IN_MD, NULL, NULL_TREE); \
23902 arm_builtin_decls[CODE] = bdecl; \
23907 struct builtin_description
23909 const unsigned int mask
;
23910 const enum insn_code icode
;
23911 const char * const name
;
23912 const enum arm_builtins code
;
23913 const enum rtx_code comparison
;
23914 const unsigned int flag
;
23917 static const struct builtin_description bdesc_2arg
[] =
23919 #define IWMMXT_BUILTIN(code, string, builtin) \
23920 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23921 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23923 #define IWMMXT2_BUILTIN(code, string, builtin) \
23924 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23925 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23927 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
23928 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
23929 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
23930 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
23931 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
23932 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
23933 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
23934 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
23935 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
23936 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
23937 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
23938 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
23939 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
23940 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
23941 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
23942 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
23943 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
23944 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
23945 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
23946 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
23947 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
23948 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
23949 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
23950 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
23951 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
23952 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
23953 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
23954 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
23955 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
23956 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
23957 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
23958 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
23959 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
23960 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
23961 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
23962 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
23963 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
23964 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
23965 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
23966 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
23967 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
23968 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
23969 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
23970 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
23971 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
23972 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
23973 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
23974 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
23975 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
23976 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
23977 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
23978 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
23979 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
23980 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
23981 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
23982 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
23983 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
23984 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
23985 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
23986 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
23987 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
23988 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
23989 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
23990 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
23991 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
23992 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
23993 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
23994 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
23995 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
23996 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
23997 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
23998 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
23999 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
24000 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
24001 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
24002 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
24003 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
24004 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
24006 #define IWMMXT_BUILTIN2(code, builtin) \
24007 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24009 #define IWMMXT2_BUILTIN2(code, builtin) \
24010 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24012 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24013 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24014 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24015 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24016 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24017 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24018 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24019 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24020 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24021 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24023 #define CRC32_BUILTIN(L, U) \
24024 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24026 CRC32_BUILTIN (crc32b
, CRC32B
)
24027 CRC32_BUILTIN (crc32h
, CRC32H
)
24028 CRC32_BUILTIN (crc32w
, CRC32W
)
24029 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24030 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24031 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24032 #undef CRC32_BUILTIN
24035 #define CRYPTO_BUILTIN(L, U) \
24036 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24041 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24042 #define CRYPTO1(L, U, R, A)
24043 #define CRYPTO3(L, U, R, A1, A2, A3)
24044 #include "crypto.def"
24051 static const struct builtin_description bdesc_1arg
[] =
24053 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24054 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24055 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24056 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24057 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24058 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24059 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24060 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24061 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24062 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24063 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24064 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24065 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24066 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24067 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24068 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24069 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24070 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24071 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24072 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24073 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24074 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24075 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24076 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24078 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24079 #define CRYPTO2(L, U, R, A1, A2)
24080 #define CRYPTO3(L, U, R, A1, A2, A3)
24081 #include "crypto.def"
24087 static const struct builtin_description bdesc_3arg
[] =
24089 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24090 #define CRYPTO1(L, U, R, A)
24091 #define CRYPTO2(L, U, R, A1, A2)
24092 #include "crypto.def"
24097 #undef CRYPTO_BUILTIN
24099 /* Set up all the iWMMXt builtins. This is not called if
24100 TARGET_IWMMXT is zero. */
24103 arm_init_iwmmxt_builtins (void)
24105 const struct builtin_description
* d
;
24108 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24109 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24110 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24112 tree v8qi_ftype_v8qi_v8qi_int
24113 = build_function_type_list (V8QI_type_node
,
24114 V8QI_type_node
, V8QI_type_node
,
24115 integer_type_node
, NULL_TREE
);
24116 tree v4hi_ftype_v4hi_int
24117 = build_function_type_list (V4HI_type_node
,
24118 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24119 tree v2si_ftype_v2si_int
24120 = build_function_type_list (V2SI_type_node
,
24121 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24122 tree v2si_ftype_di_di
24123 = build_function_type_list (V2SI_type_node
,
24124 long_long_integer_type_node
,
24125 long_long_integer_type_node
,
24127 tree di_ftype_di_int
24128 = build_function_type_list (long_long_integer_type_node
,
24129 long_long_integer_type_node
,
24130 integer_type_node
, NULL_TREE
);
24131 tree di_ftype_di_int_int
24132 = build_function_type_list (long_long_integer_type_node
,
24133 long_long_integer_type_node
,
24135 integer_type_node
, NULL_TREE
);
24136 tree int_ftype_v8qi
24137 = build_function_type_list (integer_type_node
,
24138 V8QI_type_node
, NULL_TREE
);
24139 tree int_ftype_v4hi
24140 = build_function_type_list (integer_type_node
,
24141 V4HI_type_node
, NULL_TREE
);
24142 tree int_ftype_v2si
24143 = build_function_type_list (integer_type_node
,
24144 V2SI_type_node
, NULL_TREE
);
24145 tree int_ftype_v8qi_int
24146 = build_function_type_list (integer_type_node
,
24147 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24148 tree int_ftype_v4hi_int
24149 = build_function_type_list (integer_type_node
,
24150 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24151 tree int_ftype_v2si_int
24152 = build_function_type_list (integer_type_node
,
24153 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24154 tree v8qi_ftype_v8qi_int_int
24155 = build_function_type_list (V8QI_type_node
,
24156 V8QI_type_node
, integer_type_node
,
24157 integer_type_node
, NULL_TREE
);
24158 tree v4hi_ftype_v4hi_int_int
24159 = build_function_type_list (V4HI_type_node
,
24160 V4HI_type_node
, integer_type_node
,
24161 integer_type_node
, NULL_TREE
);
24162 tree v2si_ftype_v2si_int_int
24163 = build_function_type_list (V2SI_type_node
,
24164 V2SI_type_node
, integer_type_node
,
24165 integer_type_node
, NULL_TREE
);
24166 /* Miscellaneous. */
24167 tree v8qi_ftype_v4hi_v4hi
24168 = build_function_type_list (V8QI_type_node
,
24169 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24170 tree v4hi_ftype_v2si_v2si
24171 = build_function_type_list (V4HI_type_node
,
24172 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24173 tree v8qi_ftype_v4hi_v8qi
24174 = build_function_type_list (V8QI_type_node
,
24175 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24176 tree v2si_ftype_v4hi_v4hi
24177 = build_function_type_list (V2SI_type_node
,
24178 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24179 tree v2si_ftype_v8qi_v8qi
24180 = build_function_type_list (V2SI_type_node
,
24181 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24182 tree v4hi_ftype_v4hi_di
24183 = build_function_type_list (V4HI_type_node
,
24184 V4HI_type_node
, long_long_integer_type_node
,
24186 tree v2si_ftype_v2si_di
24187 = build_function_type_list (V2SI_type_node
,
24188 V2SI_type_node
, long_long_integer_type_node
,
24191 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24192 tree int_ftype_void
24193 = build_function_type_list (integer_type_node
, NULL_TREE
);
24195 = build_function_type_list (long_long_integer_type_node
,
24196 V8QI_type_node
, NULL_TREE
);
24198 = build_function_type_list (long_long_integer_type_node
,
24199 V4HI_type_node
, NULL_TREE
);
24201 = build_function_type_list (long_long_integer_type_node
,
24202 V2SI_type_node
, NULL_TREE
);
24203 tree v2si_ftype_v4hi
24204 = build_function_type_list (V2SI_type_node
,
24205 V4HI_type_node
, NULL_TREE
);
24206 tree v4hi_ftype_v8qi
24207 = build_function_type_list (V4HI_type_node
,
24208 V8QI_type_node
, NULL_TREE
);
24209 tree v8qi_ftype_v8qi
24210 = build_function_type_list (V8QI_type_node
,
24211 V8QI_type_node
, NULL_TREE
);
24212 tree v4hi_ftype_v4hi
24213 = build_function_type_list (V4HI_type_node
,
24214 V4HI_type_node
, NULL_TREE
);
24215 tree v2si_ftype_v2si
24216 = build_function_type_list (V2SI_type_node
,
24217 V2SI_type_node
, NULL_TREE
);
24219 tree di_ftype_di_v4hi_v4hi
24220 = build_function_type_list (long_long_unsigned_type_node
,
24221 long_long_unsigned_type_node
,
24222 V4HI_type_node
, V4HI_type_node
,
24225 tree di_ftype_v4hi_v4hi
24226 = build_function_type_list (long_long_unsigned_type_node
,
24227 V4HI_type_node
,V4HI_type_node
,
24230 tree v2si_ftype_v2si_v4hi_v4hi
24231 = build_function_type_list (V2SI_type_node
,
24232 V2SI_type_node
, V4HI_type_node
,
24233 V4HI_type_node
, NULL_TREE
);
24235 tree v2si_ftype_v2si_v8qi_v8qi
24236 = build_function_type_list (V2SI_type_node
,
24237 V2SI_type_node
, V8QI_type_node
,
24238 V8QI_type_node
, NULL_TREE
);
24240 tree di_ftype_di_v2si_v2si
24241 = build_function_type_list (long_long_unsigned_type_node
,
24242 long_long_unsigned_type_node
,
24243 V2SI_type_node
, V2SI_type_node
,
24246 tree di_ftype_di_di_int
24247 = build_function_type_list (long_long_unsigned_type_node
,
24248 long_long_unsigned_type_node
,
24249 long_long_unsigned_type_node
,
24250 integer_type_node
, NULL_TREE
);
24252 tree void_ftype_int
24253 = build_function_type_list (void_type_node
,
24254 integer_type_node
, NULL_TREE
);
24256 tree v8qi_ftype_char
24257 = build_function_type_list (V8QI_type_node
,
24258 signed_char_type_node
, NULL_TREE
);
24260 tree v4hi_ftype_short
24261 = build_function_type_list (V4HI_type_node
,
24262 short_integer_type_node
, NULL_TREE
);
24264 tree v2si_ftype_int
24265 = build_function_type_list (V2SI_type_node
,
24266 integer_type_node
, NULL_TREE
);
24268 /* Normal vector binops. */
24269 tree v8qi_ftype_v8qi_v8qi
24270 = build_function_type_list (V8QI_type_node
,
24271 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24272 tree v4hi_ftype_v4hi_v4hi
24273 = build_function_type_list (V4HI_type_node
,
24274 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24275 tree v2si_ftype_v2si_v2si
24276 = build_function_type_list (V2SI_type_node
,
24277 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24278 tree di_ftype_di_di
24279 = build_function_type_list (long_long_unsigned_type_node
,
24280 long_long_unsigned_type_node
,
24281 long_long_unsigned_type_node
,
24284 /* Add all builtins that are more or less simple operations on two
24286 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24288 /* Use one of the operands; the target can have a different mode for
24289 mask-generating compares. */
24290 enum machine_mode mode
;
24293 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24296 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24301 type
= v8qi_ftype_v8qi_v8qi
;
24304 type
= v4hi_ftype_v4hi_v4hi
;
24307 type
= v2si_ftype_v2si_v2si
;
24310 type
= di_ftype_di_di
;
24314 gcc_unreachable ();
24317 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24320 /* Add the remaining MMX insns with somewhat more complicated types. */
24321 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24322 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24323 ARM_BUILTIN_ ## CODE)
24325 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24326 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24327 ARM_BUILTIN_ ## CODE)
24329 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24330 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24331 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24332 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24333 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24334 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24335 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24336 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24337 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24339 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24340 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24341 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24342 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24343 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24344 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24346 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24347 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24348 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24349 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24350 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24351 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24353 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24354 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24355 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24356 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24357 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24358 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24360 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24361 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24362 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24363 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24364 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24365 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24367 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24369 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24370 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24371 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24372 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24373 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24374 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24375 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24376 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24377 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24378 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24380 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24381 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24382 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24383 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24384 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24385 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24386 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24387 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24388 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24390 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24391 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24392 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24394 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24395 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24396 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24398 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24399 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24401 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24402 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24403 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24404 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24405 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24406 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24408 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24409 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24410 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24411 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24412 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24413 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24414 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24415 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24416 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24417 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24418 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24419 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24421 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24422 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24423 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24424 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24426 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24427 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24428 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24429 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24430 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24431 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24432 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24434 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24435 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24436 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24438 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24439 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24440 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24441 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24443 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24444 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24445 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24446 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24448 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24449 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24450 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24451 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24453 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24454 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24455 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24456 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24458 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24459 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24460 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24461 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24463 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24464 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24465 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24466 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24468 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24470 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24471 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24472 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24474 #undef iwmmx_mbuiltin
24475 #undef iwmmx2_mbuiltin
24479 arm_init_fp16_builtins (void)
24481 tree fp16_type
= make_node (REAL_TYPE
);
24482 TYPE_PRECISION (fp16_type
) = 16;
24483 layout_type (fp16_type
);
24484 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24488 arm_init_crc32_builtins ()
24490 tree si_ftype_si_qi
24491 = build_function_type_list (unsigned_intSI_type_node
,
24492 unsigned_intSI_type_node
,
24493 unsigned_intQI_type_node
, NULL_TREE
);
24494 tree si_ftype_si_hi
24495 = build_function_type_list (unsigned_intSI_type_node
,
24496 unsigned_intSI_type_node
,
24497 unsigned_intHI_type_node
, NULL_TREE
);
24498 tree si_ftype_si_si
24499 = build_function_type_list (unsigned_intSI_type_node
,
24500 unsigned_intSI_type_node
,
24501 unsigned_intSI_type_node
, NULL_TREE
);
24503 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24504 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24505 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24506 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24507 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24508 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24509 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24510 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24511 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24512 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24513 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24514 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24515 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24516 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24517 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24518 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24519 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24520 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24524 arm_init_builtins (void)
24526 if (TARGET_REALLY_IWMMXT
)
24527 arm_init_iwmmxt_builtins ();
24530 arm_init_neon_builtins ();
24532 if (arm_fp16_format
)
24533 arm_init_fp16_builtins ();
24536 arm_init_crc32_builtins ();
24539 /* Return the ARM builtin for CODE. */
24542 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24544 if (code
>= ARM_BUILTIN_MAX
)
24545 return error_mark_node
;
24547 return arm_builtin_decls
[code
];
24550 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24552 static const char *
24553 arm_invalid_parameter_type (const_tree t
)
24555 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24556 return N_("function parameters cannot have __fp16 type");
24560 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24562 static const char *
24563 arm_invalid_return_type (const_tree t
)
24565 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24566 return N_("functions cannot return __fp16 type");
24570 /* Implement TARGET_PROMOTED_TYPE. */
24573 arm_promoted_type (const_tree t
)
24575 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24576 return float_type_node
;
24580 /* Implement TARGET_CONVERT_TO_TYPE.
24581 Specifically, this hook implements the peculiarity of the ARM
24582 half-precision floating-point C semantics that requires conversions between
24583 __fp16 to or from double to do an intermediate conversion to float. */
24586 arm_convert_to_type (tree type
, tree expr
)
24588 tree fromtype
= TREE_TYPE (expr
);
24589 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24591 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24592 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24593 return convert (type
, convert (float_type_node
, expr
));
24597 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24598 This simply adds HFmode as a supported mode; even though we don't
24599 implement arithmetic on this type directly, it's supported by
24600 optabs conversions, much the way the double-word arithmetic is
24601 special-cased in the default hook. */
24604 arm_scalar_mode_supported_p (enum machine_mode mode
)
24606 if (mode
== HFmode
)
24607 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24608 else if (ALL_FIXED_POINT_MODE_P (mode
))
24611 return default_scalar_mode_supported_p (mode
);
24614 /* Errors in the source file can cause expand_expr to return const0_rtx
24615 where we expect a vector. To avoid crashing, use one of the vector
24616 clear instructions. */
24619 safe_vector_operand (rtx x
, enum machine_mode mode
)
24621 if (x
!= const0_rtx
)
24623 x
= gen_reg_rtx (mode
);
24625 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24626 : gen_rtx_SUBREG (DImode
, x
, 0)));
24630 /* Function to expand ternary builtins. */
24632 arm_expand_ternop_builtin (enum insn_code icode
,
24633 tree exp
, rtx target
)
24636 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24637 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24638 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24640 rtx op0
= expand_normal (arg0
);
24641 rtx op1
= expand_normal (arg1
);
24642 rtx op2
= expand_normal (arg2
);
24643 rtx op3
= NULL_RTX
;
24645 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24646 lane operand depending on endianness. */
24647 bool builtin_sha1cpm_p
= false;
24649 if (insn_data
[icode
].n_operands
== 5)
24651 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24652 || icode
== CODE_FOR_crypto_sha1p
24653 || icode
== CODE_FOR_crypto_sha1m
);
24654 builtin_sha1cpm_p
= true;
24656 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24657 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24658 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24659 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24662 if (VECTOR_MODE_P (mode0
))
24663 op0
= safe_vector_operand (op0
, mode0
);
24664 if (VECTOR_MODE_P (mode1
))
24665 op1
= safe_vector_operand (op1
, mode1
);
24666 if (VECTOR_MODE_P (mode2
))
24667 op2
= safe_vector_operand (op2
, mode2
);
24670 || GET_MODE (target
) != tmode
24671 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24672 target
= gen_reg_rtx (tmode
);
24674 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24675 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24676 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24678 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24679 op0
= copy_to_mode_reg (mode0
, op0
);
24680 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24681 op1
= copy_to_mode_reg (mode1
, op1
);
24682 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24683 op2
= copy_to_mode_reg (mode2
, op2
);
24684 if (builtin_sha1cpm_p
)
24685 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24687 if (builtin_sha1cpm_p
)
24688 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24690 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24697 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24700 arm_expand_binop_builtin (enum insn_code icode
,
24701 tree exp
, rtx target
)
24704 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24705 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24706 rtx op0
= expand_normal (arg0
);
24707 rtx op1
= expand_normal (arg1
);
24708 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24709 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24710 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24712 if (VECTOR_MODE_P (mode0
))
24713 op0
= safe_vector_operand (op0
, mode0
);
24714 if (VECTOR_MODE_P (mode1
))
24715 op1
= safe_vector_operand (op1
, mode1
);
24718 || GET_MODE (target
) != tmode
24719 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24720 target
= gen_reg_rtx (tmode
);
24722 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24723 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24725 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24726 op0
= copy_to_mode_reg (mode0
, op0
);
24727 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24728 op1
= copy_to_mode_reg (mode1
, op1
);
24730 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24737 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24740 arm_expand_unop_builtin (enum insn_code icode
,
24741 tree exp
, rtx target
, int do_load
)
24744 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24745 rtx op0
= expand_normal (arg0
);
24746 rtx op1
= NULL_RTX
;
24747 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24748 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24749 bool builtin_sha1h_p
= false;
24751 if (insn_data
[icode
].n_operands
== 3)
24753 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24754 builtin_sha1h_p
= true;
24758 || GET_MODE (target
) != tmode
24759 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24760 target
= gen_reg_rtx (tmode
);
24762 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
24765 if (VECTOR_MODE_P (mode0
))
24766 op0
= safe_vector_operand (op0
, mode0
);
24768 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24769 op0
= copy_to_mode_reg (mode0
, op0
);
24771 if (builtin_sha1h_p
)
24772 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24774 if (builtin_sha1h_p
)
24775 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24777 pat
= GEN_FCN (icode
) (target
, op0
);
24785 NEON_ARG_COPY_TO_REG
,
24791 #define NEON_MAX_BUILTIN_ARGS 5
24793 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24794 and return an expression for the accessed memory.
24796 The intrinsic function operates on a block of registers that has
24797 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24798 function references the memory at EXP of type TYPE and in mode
24799 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24803 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
24804 enum machine_mode reg_mode
,
24805 neon_builtin_type_mode type_mode
)
24807 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
24808 tree elem_type
, upper_bound
, array_type
;
24810 /* Work out the size of the register block in bytes. */
24811 reg_size
= GET_MODE_SIZE (reg_mode
);
24813 /* Work out the size of each vector in bytes. */
24814 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
24815 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
24817 /* Work out how many vectors there are. */
24818 gcc_assert (reg_size
% vector_size
== 0);
24819 nvectors
= reg_size
/ vector_size
;
24821 /* Work out the type of each element. */
24822 gcc_assert (POINTER_TYPE_P (type
));
24823 elem_type
= TREE_TYPE (type
);
24825 /* Work out how many elements are being loaded or stored.
24826 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24827 and memory elements; anything else implies a lane load or store. */
24828 if (mem_mode
== reg_mode
)
24829 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
24833 /* Create a type that describes the full access. */
24834 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
24835 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
24837 /* Dereference EXP using that type. */
24838 return fold_build2 (MEM_REF
, array_type
, exp
,
24839 build_int_cst (build_pointer_type (array_type
), 0));
24842 /* Expand a Neon builtin. */
24844 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
24845 neon_builtin_type_mode type_mode
,
24846 tree exp
, int fcode
, ...)
24850 tree arg
[NEON_MAX_BUILTIN_ARGS
];
24851 rtx op
[NEON_MAX_BUILTIN_ARGS
];
24854 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24855 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
24856 enum machine_mode other_mode
;
24862 || GET_MODE (target
) != tmode
24863 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
24864 target
= gen_reg_rtx (tmode
);
24866 va_start (ap
, fcode
);
24868 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
24872 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
24874 if (thisarg
== NEON_ARG_STOP
)
24878 opno
= argc
+ have_retval
;
24879 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
24880 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
24881 arg_type
= TREE_VALUE (formals
);
24882 if (thisarg
== NEON_ARG_MEMORY
)
24884 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
24885 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
24886 mode
[argc
], other_mode
,
24890 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24892 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
24893 (thisarg
== NEON_ARG_MEMORY
24894 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
24898 case NEON_ARG_COPY_TO_REG
:
24899 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24900 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24901 (op
[argc
], mode
[argc
]))
24902 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
24905 case NEON_ARG_CONSTANT
:
24906 /* FIXME: This error message is somewhat unhelpful. */
24907 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24908 (op
[argc
], mode
[argc
]))
24909 error ("argument must be a constant");
24912 case NEON_ARG_MEMORY
:
24913 /* Check if expand failed. */
24914 if (op
[argc
] == const0_rtx
)
24916 gcc_assert (MEM_P (op
[argc
]));
24917 PUT_MODE (op
[argc
], mode
[argc
]);
24918 /* ??? arm_neon.h uses the same built-in functions for signed
24919 and unsigned accesses, casting where necessary. This isn't
24921 set_mem_alias_set (op
[argc
], 0);
24922 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24923 (op
[argc
], mode
[argc
]))
24924 op
[argc
] = (replace_equiv_address
24925 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
24928 case NEON_ARG_STOP
:
24929 gcc_unreachable ();
24933 formals
= TREE_CHAIN (formals
);
24943 pat
= GEN_FCN (icode
) (target
, op
[0]);
24947 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
24951 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
24955 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
24959 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
24963 gcc_unreachable ();
24969 pat
= GEN_FCN (icode
) (op
[0]);
24973 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
24977 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
24981 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
24985 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
24989 gcc_unreachable ();
25000 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25001 constants defined per-instruction or per instruction-variant. Instead, the
25002 required info is looked up in the table neon_builtin_data. */
25004 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
25006 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
25007 neon_itype itype
= d
->itype
;
25008 enum insn_code icode
= d
->code
;
25009 neon_builtin_type_mode type_mode
= d
->mode
;
25016 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25017 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25021 case NEON_SCALARMUL
:
25022 case NEON_SCALARMULL
:
25023 case NEON_SCALARMULH
:
25024 case NEON_SHIFTINSERT
:
25025 case NEON_LOGICBINOP
:
25026 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25027 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25031 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25032 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25033 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25037 case NEON_SHIFTIMM
:
25038 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25039 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25043 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25044 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25049 case NEON_FLOAT_WIDEN
:
25050 case NEON_FLOAT_NARROW
:
25051 case NEON_REINTERP
:
25052 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25053 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25057 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25058 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25060 case NEON_RESULTPAIR
:
25061 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25062 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25066 case NEON_LANEMULL
:
25067 case NEON_LANEMULH
:
25068 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25069 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25070 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25073 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25074 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25075 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25077 case NEON_SHIFTACC
:
25078 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25079 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25080 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25082 case NEON_SCALARMAC
:
25083 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25084 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25085 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25089 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25090 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25094 case NEON_LOADSTRUCT
:
25095 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25096 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25098 case NEON_LOAD1LANE
:
25099 case NEON_LOADSTRUCTLANE
:
25100 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25101 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25105 case NEON_STORESTRUCT
:
25106 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25107 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25109 case NEON_STORE1LANE
:
25110 case NEON_STORESTRUCTLANE
:
25111 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25112 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25116 gcc_unreachable ();
25119 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25121 neon_reinterpret (rtx dest
, rtx src
)
25123 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25126 /* Emit code to place a Neon pair result in memory locations (with equal
25129 neon_emit_pair_result_insn (enum machine_mode mode
,
25130 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
25133 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
25134 rtx tmp1
= gen_reg_rtx (mode
);
25135 rtx tmp2
= gen_reg_rtx (mode
);
25137 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
25139 emit_move_insn (mem
, tmp1
);
25140 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
25141 emit_move_insn (mem
, tmp2
);
25144 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25145 not to early-clobber SRC registers in the process.
25147 We assume that the operands described by SRC and DEST represent a
25148 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25149 number of components into which the copy has been decomposed. */
25151 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25155 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25156 || REGNO (operands
[0]) < REGNO (operands
[1]))
25158 for (i
= 0; i
< count
; i
++)
25160 operands
[2 * i
] = dest
[i
];
25161 operands
[2 * i
+ 1] = src
[i
];
25166 for (i
= 0; i
< count
; i
++)
25168 operands
[2 * i
] = dest
[count
- i
- 1];
25169 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25174 /* Split operands into moves from op[1] + op[2] into op[0]. */
25177 neon_split_vcombine (rtx operands
[3])
25179 unsigned int dest
= REGNO (operands
[0]);
25180 unsigned int src1
= REGNO (operands
[1]);
25181 unsigned int src2
= REGNO (operands
[2]);
25182 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25183 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25184 rtx destlo
, desthi
;
25186 if (src1
== dest
&& src2
== dest
+ halfregs
)
25188 /* No-op move. Can't split to nothing; emit something. */
25189 emit_note (NOTE_INSN_DELETED
);
25193 /* Preserve register attributes for variable tracking. */
25194 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25195 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25196 GET_MODE_SIZE (halfmode
));
25198 /* Special case of reversed high/low parts. Use VSWP. */
25199 if (src2
== dest
&& src1
== dest
+ halfregs
)
25201 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25202 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25203 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25207 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25209 /* Try to avoid unnecessary moves if part of the result
25210 is in the right place already. */
25212 emit_move_insn (destlo
, operands
[1]);
25213 if (src2
!= dest
+ halfregs
)
25214 emit_move_insn (desthi
, operands
[2]);
25218 if (src2
!= dest
+ halfregs
)
25219 emit_move_insn (desthi
, operands
[2]);
25221 emit_move_insn (destlo
, operands
[1]);
25225 /* Expand an expression EXP that calls a built-in function,
25226 with result going to TARGET if that's convenient
25227 (and in mode MODE if that's convenient).
25228 SUBTARGET may be used as the target for computing one of EXP's operands.
25229 IGNORE is nonzero if the value is to be ignored. */
25232 arm_expand_builtin (tree exp
,
25234 rtx subtarget ATTRIBUTE_UNUSED
,
25235 enum machine_mode mode ATTRIBUTE_UNUSED
,
25236 int ignore ATTRIBUTE_UNUSED
)
25238 const struct builtin_description
* d
;
25239 enum insn_code icode
;
25240 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25248 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25250 enum machine_mode tmode
;
25251 enum machine_mode mode0
;
25252 enum machine_mode mode1
;
25253 enum machine_mode mode2
;
25259 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25260 return arm_expand_neon_builtin (fcode
, exp
, target
);
25264 case ARM_BUILTIN_TEXTRMSB
:
25265 case ARM_BUILTIN_TEXTRMUB
:
25266 case ARM_BUILTIN_TEXTRMSH
:
25267 case ARM_BUILTIN_TEXTRMUH
:
25268 case ARM_BUILTIN_TEXTRMSW
:
25269 case ARM_BUILTIN_TEXTRMUW
:
25270 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25271 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25272 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25273 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25274 : CODE_FOR_iwmmxt_textrmw
);
25276 arg0
= CALL_EXPR_ARG (exp
, 0);
25277 arg1
= CALL_EXPR_ARG (exp
, 1);
25278 op0
= expand_normal (arg0
);
25279 op1
= expand_normal (arg1
);
25280 tmode
= insn_data
[icode
].operand
[0].mode
;
25281 mode0
= insn_data
[icode
].operand
[1].mode
;
25282 mode1
= insn_data
[icode
].operand
[2].mode
;
25284 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25285 op0
= copy_to_mode_reg (mode0
, op0
);
25286 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25288 /* @@@ better error message */
25289 error ("selector must be an immediate");
25290 return gen_reg_rtx (tmode
);
25293 opint
= INTVAL (op1
);
25294 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25296 if (opint
> 7 || opint
< 0)
25297 error ("the range of selector should be in 0 to 7");
25299 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25301 if (opint
> 3 || opint
< 0)
25302 error ("the range of selector should be in 0 to 3");
25304 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25306 if (opint
> 1 || opint
< 0)
25307 error ("the range of selector should be in 0 to 1");
25311 || GET_MODE (target
) != tmode
25312 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25313 target
= gen_reg_rtx (tmode
);
25314 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25320 case ARM_BUILTIN_WALIGNI
:
25321 /* If op2 is immediate, call walighi, else call walighr. */
25322 arg0
= CALL_EXPR_ARG (exp
, 0);
25323 arg1
= CALL_EXPR_ARG (exp
, 1);
25324 arg2
= CALL_EXPR_ARG (exp
, 2);
25325 op0
= expand_normal (arg0
);
25326 op1
= expand_normal (arg1
);
25327 op2
= expand_normal (arg2
);
25328 if (CONST_INT_P (op2
))
25330 icode
= CODE_FOR_iwmmxt_waligni
;
25331 tmode
= insn_data
[icode
].operand
[0].mode
;
25332 mode0
= insn_data
[icode
].operand
[1].mode
;
25333 mode1
= insn_data
[icode
].operand
[2].mode
;
25334 mode2
= insn_data
[icode
].operand
[3].mode
;
25335 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25336 op0
= copy_to_mode_reg (mode0
, op0
);
25337 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25338 op1
= copy_to_mode_reg (mode1
, op1
);
25339 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25340 selector
= INTVAL (op2
);
25341 if (selector
> 7 || selector
< 0)
25342 error ("the range of selector should be in 0 to 7");
25346 icode
= CODE_FOR_iwmmxt_walignr
;
25347 tmode
= insn_data
[icode
].operand
[0].mode
;
25348 mode0
= insn_data
[icode
].operand
[1].mode
;
25349 mode1
= insn_data
[icode
].operand
[2].mode
;
25350 mode2
= insn_data
[icode
].operand
[3].mode
;
25351 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25352 op0
= copy_to_mode_reg (mode0
, op0
);
25353 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25354 op1
= copy_to_mode_reg (mode1
, op1
);
25355 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25356 op2
= copy_to_mode_reg (mode2
, op2
);
25359 || GET_MODE (target
) != tmode
25360 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25361 target
= gen_reg_rtx (tmode
);
25362 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25368 case ARM_BUILTIN_TINSRB
:
25369 case ARM_BUILTIN_TINSRH
:
25370 case ARM_BUILTIN_TINSRW
:
25371 case ARM_BUILTIN_WMERGE
:
25372 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25373 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25374 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25375 : CODE_FOR_iwmmxt_tinsrw
);
25376 arg0
= CALL_EXPR_ARG (exp
, 0);
25377 arg1
= CALL_EXPR_ARG (exp
, 1);
25378 arg2
= CALL_EXPR_ARG (exp
, 2);
25379 op0
= expand_normal (arg0
);
25380 op1
= expand_normal (arg1
);
25381 op2
= expand_normal (arg2
);
25382 tmode
= insn_data
[icode
].operand
[0].mode
;
25383 mode0
= insn_data
[icode
].operand
[1].mode
;
25384 mode1
= insn_data
[icode
].operand
[2].mode
;
25385 mode2
= insn_data
[icode
].operand
[3].mode
;
25387 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25388 op0
= copy_to_mode_reg (mode0
, op0
);
25389 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25390 op1
= copy_to_mode_reg (mode1
, op1
);
25391 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25393 error ("selector must be an immediate");
25396 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25398 selector
= INTVAL (op2
);
25399 if (selector
> 7 || selector
< 0)
25400 error ("the range of selector should be in 0 to 7");
25402 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25403 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25404 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25407 selector
= INTVAL (op2
);
25408 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25409 error ("the range of selector should be in 0 to 7");
25410 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25411 error ("the range of selector should be in 0 to 3");
25412 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25413 error ("the range of selector should be in 0 to 1");
25415 op2
= GEN_INT (mask
);
25418 || GET_MODE (target
) != tmode
25419 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25420 target
= gen_reg_rtx (tmode
);
25421 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25427 case ARM_BUILTIN_SETWCGR0
:
25428 case ARM_BUILTIN_SETWCGR1
:
25429 case ARM_BUILTIN_SETWCGR2
:
25430 case ARM_BUILTIN_SETWCGR3
:
25431 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25432 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25433 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25434 : CODE_FOR_iwmmxt_setwcgr3
);
25435 arg0
= CALL_EXPR_ARG (exp
, 0);
25436 op0
= expand_normal (arg0
);
25437 mode0
= insn_data
[icode
].operand
[0].mode
;
25438 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25439 op0
= copy_to_mode_reg (mode0
, op0
);
25440 pat
= GEN_FCN (icode
) (op0
);
25446 case ARM_BUILTIN_GETWCGR0
:
25447 case ARM_BUILTIN_GETWCGR1
:
25448 case ARM_BUILTIN_GETWCGR2
:
25449 case ARM_BUILTIN_GETWCGR3
:
25450 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25451 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25452 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25453 : CODE_FOR_iwmmxt_getwcgr3
);
25454 tmode
= insn_data
[icode
].operand
[0].mode
;
25456 || GET_MODE (target
) != tmode
25457 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25458 target
= gen_reg_rtx (tmode
);
25459 pat
= GEN_FCN (icode
) (target
);
25465 case ARM_BUILTIN_WSHUFH
:
25466 icode
= CODE_FOR_iwmmxt_wshufh
;
25467 arg0
= CALL_EXPR_ARG (exp
, 0);
25468 arg1
= CALL_EXPR_ARG (exp
, 1);
25469 op0
= expand_normal (arg0
);
25470 op1
= expand_normal (arg1
);
25471 tmode
= insn_data
[icode
].operand
[0].mode
;
25472 mode1
= insn_data
[icode
].operand
[1].mode
;
25473 mode2
= insn_data
[icode
].operand
[2].mode
;
25475 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25476 op0
= copy_to_mode_reg (mode1
, op0
);
25477 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25479 error ("mask must be an immediate");
25482 selector
= INTVAL (op1
);
25483 if (selector
< 0 || selector
> 255)
25484 error ("the range of mask should be in 0 to 255");
25486 || GET_MODE (target
) != tmode
25487 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25488 target
= gen_reg_rtx (tmode
);
25489 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25495 case ARM_BUILTIN_WMADDS
:
25496 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25497 case ARM_BUILTIN_WMADDSX
:
25498 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25499 case ARM_BUILTIN_WMADDSN
:
25500 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25501 case ARM_BUILTIN_WMADDU
:
25502 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25503 case ARM_BUILTIN_WMADDUX
:
25504 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25505 case ARM_BUILTIN_WMADDUN
:
25506 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25507 case ARM_BUILTIN_WSADBZ
:
25508 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25509 case ARM_BUILTIN_WSADHZ
:
25510 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25512 /* Several three-argument builtins. */
25513 case ARM_BUILTIN_WMACS
:
25514 case ARM_BUILTIN_WMACU
:
25515 case ARM_BUILTIN_TMIA
:
25516 case ARM_BUILTIN_TMIAPH
:
25517 case ARM_BUILTIN_TMIATT
:
25518 case ARM_BUILTIN_TMIATB
:
25519 case ARM_BUILTIN_TMIABT
:
25520 case ARM_BUILTIN_TMIABB
:
25521 case ARM_BUILTIN_WQMIABB
:
25522 case ARM_BUILTIN_WQMIABT
:
25523 case ARM_BUILTIN_WQMIATB
:
25524 case ARM_BUILTIN_WQMIATT
:
25525 case ARM_BUILTIN_WQMIABBN
:
25526 case ARM_BUILTIN_WQMIABTN
:
25527 case ARM_BUILTIN_WQMIATBN
:
25528 case ARM_BUILTIN_WQMIATTN
:
25529 case ARM_BUILTIN_WMIABB
:
25530 case ARM_BUILTIN_WMIABT
:
25531 case ARM_BUILTIN_WMIATB
:
25532 case ARM_BUILTIN_WMIATT
:
25533 case ARM_BUILTIN_WMIABBN
:
25534 case ARM_BUILTIN_WMIABTN
:
25535 case ARM_BUILTIN_WMIATBN
:
25536 case ARM_BUILTIN_WMIATTN
:
25537 case ARM_BUILTIN_WMIAWBB
:
25538 case ARM_BUILTIN_WMIAWBT
:
25539 case ARM_BUILTIN_WMIAWTB
:
25540 case ARM_BUILTIN_WMIAWTT
:
25541 case ARM_BUILTIN_WMIAWBBN
:
25542 case ARM_BUILTIN_WMIAWBTN
:
25543 case ARM_BUILTIN_WMIAWTBN
:
25544 case ARM_BUILTIN_WMIAWTTN
:
25545 case ARM_BUILTIN_WSADB
:
25546 case ARM_BUILTIN_WSADH
:
25547 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25548 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25549 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25550 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25551 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25552 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25553 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25554 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25555 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25556 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25557 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25558 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25559 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25560 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25561 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25562 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25563 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25564 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25565 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25566 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25567 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25568 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25569 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25570 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25571 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25572 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25573 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25574 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25575 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25576 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25577 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25578 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25579 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25580 : CODE_FOR_iwmmxt_wsadh
);
25581 arg0
= CALL_EXPR_ARG (exp
, 0);
25582 arg1
= CALL_EXPR_ARG (exp
, 1);
25583 arg2
= CALL_EXPR_ARG (exp
, 2);
25584 op0
= expand_normal (arg0
);
25585 op1
= expand_normal (arg1
);
25586 op2
= expand_normal (arg2
);
25587 tmode
= insn_data
[icode
].operand
[0].mode
;
25588 mode0
= insn_data
[icode
].operand
[1].mode
;
25589 mode1
= insn_data
[icode
].operand
[2].mode
;
25590 mode2
= insn_data
[icode
].operand
[3].mode
;
25592 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25593 op0
= copy_to_mode_reg (mode0
, op0
);
25594 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25595 op1
= copy_to_mode_reg (mode1
, op1
);
25596 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25597 op2
= copy_to_mode_reg (mode2
, op2
);
25599 || GET_MODE (target
) != tmode
25600 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25601 target
= gen_reg_rtx (tmode
);
25602 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25608 case ARM_BUILTIN_WZERO
:
25609 target
= gen_reg_rtx (DImode
);
25610 emit_insn (gen_iwmmxt_clrdi (target
));
25613 case ARM_BUILTIN_WSRLHI
:
25614 case ARM_BUILTIN_WSRLWI
:
25615 case ARM_BUILTIN_WSRLDI
:
25616 case ARM_BUILTIN_WSLLHI
:
25617 case ARM_BUILTIN_WSLLWI
:
25618 case ARM_BUILTIN_WSLLDI
:
25619 case ARM_BUILTIN_WSRAHI
:
25620 case ARM_BUILTIN_WSRAWI
:
25621 case ARM_BUILTIN_WSRADI
:
25622 case ARM_BUILTIN_WRORHI
:
25623 case ARM_BUILTIN_WRORWI
:
25624 case ARM_BUILTIN_WRORDI
:
25625 case ARM_BUILTIN_WSRLH
:
25626 case ARM_BUILTIN_WSRLW
:
25627 case ARM_BUILTIN_WSRLD
:
25628 case ARM_BUILTIN_WSLLH
:
25629 case ARM_BUILTIN_WSLLW
:
25630 case ARM_BUILTIN_WSLLD
:
25631 case ARM_BUILTIN_WSRAH
:
25632 case ARM_BUILTIN_WSRAW
:
25633 case ARM_BUILTIN_WSRAD
:
25634 case ARM_BUILTIN_WRORH
:
25635 case ARM_BUILTIN_WRORW
:
25636 case ARM_BUILTIN_WRORD
:
25637 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25638 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25639 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25640 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25641 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25642 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25643 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25644 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25645 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25646 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25647 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25648 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25649 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25650 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25651 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25652 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25653 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25654 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25655 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25656 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25657 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25658 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25659 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25660 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25661 : CODE_FOR_nothing
);
25662 arg1
= CALL_EXPR_ARG (exp
, 1);
25663 op1
= expand_normal (arg1
);
25664 if (GET_MODE (op1
) == VOIDmode
)
25666 imm
= INTVAL (op1
);
25667 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25668 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25669 && (imm
< 0 || imm
> 32))
25671 if (fcode
== ARM_BUILTIN_WRORHI
)
25672 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25673 else if (fcode
== ARM_BUILTIN_WRORWI
)
25674 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25675 else if (fcode
== ARM_BUILTIN_WRORH
)
25676 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25678 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25680 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25681 && (imm
< 0 || imm
> 64))
25683 if (fcode
== ARM_BUILTIN_WRORDI
)
25684 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25686 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25690 if (fcode
== ARM_BUILTIN_WSRLHI
)
25691 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25692 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25693 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25694 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25695 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25696 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25697 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25698 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25699 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25700 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25701 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25702 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25703 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25704 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25705 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25706 else if (fcode
== ARM_BUILTIN_WSRADI
)
25707 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25708 else if (fcode
== ARM_BUILTIN_WSRLH
)
25709 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25710 else if (fcode
== ARM_BUILTIN_WSRLW
)
25711 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25712 else if (fcode
== ARM_BUILTIN_WSRLD
)
25713 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25714 else if (fcode
== ARM_BUILTIN_WSLLH
)
25715 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25716 else if (fcode
== ARM_BUILTIN_WSLLW
)
25717 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25718 else if (fcode
== ARM_BUILTIN_WSLLD
)
25719 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25720 else if (fcode
== ARM_BUILTIN_WSRAH
)
25721 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25722 else if (fcode
== ARM_BUILTIN_WSRAW
)
25723 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25725 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25728 return arm_expand_binop_builtin (icode
, exp
, target
);
25734 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25735 if (d
->code
== (const enum arm_builtins
) fcode
)
25736 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25738 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25739 if (d
->code
== (const enum arm_builtins
) fcode
)
25740 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25742 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25743 if (d
->code
== (const enum arm_builtins
) fcode
)
25744 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25746 /* @@@ Should really do something sensible here. */
25750 /* Return the number (counting from 0) of
25751 the least significant set bit in MASK. */
25754 number_of_first_bit_set (unsigned mask
)
25756 return ctz_hwi (mask
);
25759 /* Like emit_multi_reg_push, but allowing for a different set of
25760 registers to be described as saved. MASK is the set of registers
25761 to be saved; REAL_REGS is the set of registers to be described as
25762 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25765 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25767 unsigned long regno
;
25768 rtx par
[10], tmp
, reg
, insn
;
25771 /* Build the parallel of the registers actually being stored. */
25772 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25774 regno
= ctz_hwi (mask
);
25775 reg
= gen_rtx_REG (SImode
, regno
);
25778 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25780 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25785 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25786 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25787 tmp
= gen_frame_mem (BLKmode
, tmp
);
25788 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
25791 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25792 insn
= emit_insn (tmp
);
25794 /* Always build the stack adjustment note for unwind info. */
25795 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25796 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
25799 /* Build the parallel of the registers recorded as saved for unwind. */
25800 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
25802 regno
= ctz_hwi (real_regs
);
25803 reg
= gen_rtx_REG (SImode
, regno
);
25805 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
25806 tmp
= gen_frame_mem (SImode
, tmp
);
25807 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
25808 RTX_FRAME_RELATED_P (tmp
) = 1;
25816 RTX_FRAME_RELATED_P (par
[0]) = 1;
25817 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
25820 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
25825 /* Emit code to push or pop registers to or from the stack. F is the
25826 assembly file. MASK is the registers to pop. */
25828 thumb_pop (FILE *f
, unsigned long mask
)
25831 int lo_mask
= mask
& 0xFF;
25832 int pushed_words
= 0;
25836 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
25838 /* Special case. Do not generate a POP PC statement here, do it in
25840 thumb_exit (f
, -1);
25844 fprintf (f
, "\tpop\t{");
25846 /* Look at the low registers first. */
25847 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
25851 asm_fprintf (f
, "%r", regno
);
25853 if ((lo_mask
& ~1) != 0)
25860 if (mask
& (1 << PC_REGNUM
))
25862 /* Catch popping the PC. */
25863 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
25864 || crtl
->calls_eh_return
)
25866 /* The PC is never poped directly, instead
25867 it is popped into r3 and then BX is used. */
25868 fprintf (f
, "}\n");
25870 thumb_exit (f
, -1);
25879 asm_fprintf (f
, "%r", PC_REGNUM
);
25883 fprintf (f
, "}\n");
25886 /* Generate code to return from a thumb function.
25887 If 'reg_containing_return_addr' is -1, then the return address is
25888 actually on the stack, at the stack pointer. */
25890 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25892 unsigned regs_available_for_popping
;
25893 unsigned regs_to_pop
;
25895 unsigned available
;
25899 int restore_a4
= FALSE
;
25901 /* Compute the registers we need to pop. */
25905 if (reg_containing_return_addr
== -1)
25907 regs_to_pop
|= 1 << LR_REGNUM
;
25911 if (TARGET_BACKTRACE
)
25913 /* Restore the (ARM) frame pointer and stack pointer. */
25914 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25918 /* If there is nothing to pop then just emit the BX instruction and
25920 if (pops_needed
== 0)
25922 if (crtl
->calls_eh_return
)
25923 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25925 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25928 /* Otherwise if we are not supporting interworking and we have not created
25929 a backtrace structure and the function was not entered in ARM mode then
25930 just pop the return address straight into the PC. */
25931 else if (!TARGET_INTERWORK
25932 && !TARGET_BACKTRACE
25933 && !is_called_in_ARM_mode (current_function_decl
)
25934 && !crtl
->calls_eh_return
)
25936 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25940 /* Find out how many of the (return) argument registers we can corrupt. */
25941 regs_available_for_popping
= 0;
25943 /* If returning via __builtin_eh_return, the bottom three registers
25944 all contain information needed for the return. */
25945 if (crtl
->calls_eh_return
)
25949 /* If we can deduce the registers used from the function's
25950 return value. This is more reliable that examining
25951 df_regs_ever_live_p () because that will be set if the register is
25952 ever used in the function, not just if the register is used
25953 to hold a return value. */
25955 if (crtl
->return_rtx
!= 0)
25956 mode
= GET_MODE (crtl
->return_rtx
);
25958 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25960 size
= GET_MODE_SIZE (mode
);
25964 /* In a void function we can use any argument register.
25965 In a function that returns a structure on the stack
25966 we can use the second and third argument registers. */
25967 if (mode
== VOIDmode
)
25968 regs_available_for_popping
=
25969 (1 << ARG_REGISTER (1))
25970 | (1 << ARG_REGISTER (2))
25971 | (1 << ARG_REGISTER (3));
25973 regs_available_for_popping
=
25974 (1 << ARG_REGISTER (2))
25975 | (1 << ARG_REGISTER (3));
25977 else if (size
<= 4)
25978 regs_available_for_popping
=
25979 (1 << ARG_REGISTER (2))
25980 | (1 << ARG_REGISTER (3));
25981 else if (size
<= 8)
25982 regs_available_for_popping
=
25983 (1 << ARG_REGISTER (3));
25986 /* Match registers to be popped with registers into which we pop them. */
25987 for (available
= regs_available_for_popping
,
25988 required
= regs_to_pop
;
25989 required
!= 0 && available
!= 0;
25990 available
&= ~(available
& - available
),
25991 required
&= ~(required
& - required
))
25994 /* If we have any popping registers left over, remove them. */
25996 regs_available_for_popping
&= ~available
;
25998 /* Otherwise if we need another popping register we can use
25999 the fourth argument register. */
26000 else if (pops_needed
)
26002 /* If we have not found any free argument registers and
26003 reg a4 contains the return address, we must move it. */
26004 if (regs_available_for_popping
== 0
26005 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26007 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26008 reg_containing_return_addr
= LR_REGNUM
;
26010 else if (size
> 12)
26012 /* Register a4 is being used to hold part of the return value,
26013 but we have dire need of a free, low register. */
26016 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26019 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26021 /* The fourth argument register is available. */
26022 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26028 /* Pop as many registers as we can. */
26029 thumb_pop (f
, regs_available_for_popping
);
26031 /* Process the registers we popped. */
26032 if (reg_containing_return_addr
== -1)
26034 /* The return address was popped into the lowest numbered register. */
26035 regs_to_pop
&= ~(1 << LR_REGNUM
);
26037 reg_containing_return_addr
=
26038 number_of_first_bit_set (regs_available_for_popping
);
26040 /* Remove this register for the mask of available registers, so that
26041 the return address will not be corrupted by further pops. */
26042 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26045 /* If we popped other registers then handle them here. */
26046 if (regs_available_for_popping
)
26050 /* Work out which register currently contains the frame pointer. */
26051 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26053 /* Move it into the correct place. */
26054 asm_fprintf (f
, "\tmov\t%r, %r\n",
26055 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26057 /* (Temporarily) remove it from the mask of popped registers. */
26058 regs_available_for_popping
&= ~(1 << frame_pointer
);
26059 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26061 if (regs_available_for_popping
)
26065 /* We popped the stack pointer as well,
26066 find the register that contains it. */
26067 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26069 /* Move it into the stack register. */
26070 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26072 /* At this point we have popped all necessary registers, so
26073 do not worry about restoring regs_available_for_popping
26074 to its correct value:
26076 assert (pops_needed == 0)
26077 assert (regs_available_for_popping == (1 << frame_pointer))
26078 assert (regs_to_pop == (1 << STACK_POINTER)) */
26082 /* Since we have just move the popped value into the frame
26083 pointer, the popping register is available for reuse, and
26084 we know that we still have the stack pointer left to pop. */
26085 regs_available_for_popping
|= (1 << frame_pointer
);
26089 /* If we still have registers left on the stack, but we no longer have
26090 any registers into which we can pop them, then we must move the return
26091 address into the link register and make available the register that
26093 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26095 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26097 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26098 reg_containing_return_addr
);
26100 reg_containing_return_addr
= LR_REGNUM
;
26103 /* If we have registers left on the stack then pop some more.
26104 We know that at most we will want to pop FP and SP. */
26105 if (pops_needed
> 0)
26110 thumb_pop (f
, regs_available_for_popping
);
26112 /* We have popped either FP or SP.
26113 Move whichever one it is into the correct register. */
26114 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26115 move_to
= number_of_first_bit_set (regs_to_pop
);
26117 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26119 regs_to_pop
&= ~(1 << move_to
);
26124 /* If we still have not popped everything then we must have only
26125 had one register available to us and we are now popping the SP. */
26126 if (pops_needed
> 0)
26130 thumb_pop (f
, regs_available_for_popping
);
26132 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26134 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26136 assert (regs_to_pop == (1 << STACK_POINTER))
26137 assert (pops_needed == 1)
26141 /* If necessary restore the a4 register. */
26144 if (reg_containing_return_addr
!= LR_REGNUM
)
26146 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26147 reg_containing_return_addr
= LR_REGNUM
;
26150 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26153 if (crtl
->calls_eh_return
)
26154 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26156 /* Return to caller. */
26157 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26160 /* Scan INSN just before assembler is output for it.
26161 For Thumb-1, we track the status of the condition codes; this
26162 information is used in the cbranchsi4_insn pattern. */
26164 thumb1_final_prescan_insn (rtx insn
)
26166 if (flag_print_asm_name
)
26167 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26168 INSN_ADDRESSES (INSN_UID (insn
)));
26169 /* Don't overwrite the previous setter when we get to a cbranch. */
26170 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26172 enum attr_conds conds
;
26174 if (cfun
->machine
->thumb1_cc_insn
)
26176 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26177 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26180 conds
= get_attr_conds (insn
);
26181 if (conds
== CONDS_SET
)
26183 rtx set
= single_set (insn
);
26184 cfun
->machine
->thumb1_cc_insn
= insn
;
26185 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26186 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26187 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26188 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26190 rtx src1
= XEXP (SET_SRC (set
), 1);
26191 if (src1
== const0_rtx
)
26192 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26194 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26196 /* Record the src register operand instead of dest because
26197 cprop_hardreg pass propagates src. */
26198 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26201 else if (conds
!= CONDS_NOCOND
)
26202 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26205 /* Check if unexpected far jump is used. */
26206 if (cfun
->machine
->lr_save_eliminated
26207 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26208 internal_error("Unexpected thumb1 far jump");
26212 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26214 unsigned HOST_WIDE_INT mask
= 0xff;
26217 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26218 if (val
== 0) /* XXX */
26221 for (i
= 0; i
< 25; i
++)
26222 if ((val
& (mask
<< i
)) == val
)
26228 /* Returns nonzero if the current function contains,
26229 or might contain a far jump. */
26231 thumb_far_jump_used_p (void)
26234 bool far_jump
= false;
26235 unsigned int func_size
= 0;
26237 /* This test is only important for leaf functions. */
26238 /* assert (!leaf_function_p ()); */
26240 /* If we have already decided that far jumps may be used,
26241 do not bother checking again, and always return true even if
26242 it turns out that they are not being used. Once we have made
26243 the decision that far jumps are present (and that hence the link
26244 register will be pushed onto the stack) we cannot go back on it. */
26245 if (cfun
->machine
->far_jump_used
)
26248 /* If this function is not being called from the prologue/epilogue
26249 generation code then it must be being called from the
26250 INITIAL_ELIMINATION_OFFSET macro. */
26251 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26253 /* In this case we know that we are being asked about the elimination
26254 of the arg pointer register. If that register is not being used,
26255 then there are no arguments on the stack, and we do not have to
26256 worry that a far jump might force the prologue to push the link
26257 register, changing the stack offsets. In this case we can just
26258 return false, since the presence of far jumps in the function will
26259 not affect stack offsets.
26261 If the arg pointer is live (or if it was live, but has now been
26262 eliminated and so set to dead) then we do have to test to see if
26263 the function might contain a far jump. This test can lead to some
26264 false negatives, since before reload is completed, then length of
26265 branch instructions is not known, so gcc defaults to returning their
26266 longest length, which in turn sets the far jump attribute to true.
26268 A false negative will not result in bad code being generated, but it
26269 will result in a needless push and pop of the link register. We
26270 hope that this does not occur too often.
26272 If we need doubleword stack alignment this could affect the other
26273 elimination offsets so we can't risk getting it wrong. */
26274 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26275 cfun
->machine
->arg_pointer_live
= 1;
26276 else if (!cfun
->machine
->arg_pointer_live
)
26280 /* We should not change far_jump_used during or after reload, as there is
26281 no chance to change stack frame layout. */
26282 if (reload_in_progress
|| reload_completed
)
26285 /* Check to see if the function contains a branch
26286 insn with the far jump attribute set. */
26287 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26289 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26293 func_size
+= get_attr_length (insn
);
26296 /* Attribute far_jump will always be true for thumb1 before
26297 shorten_branch pass. So checking far_jump attribute before
26298 shorten_branch isn't much useful.
26300 Following heuristic tries to estimate more accurately if a far jump
26301 may finally be used. The heuristic is very conservative as there is
26302 no chance to roll-back the decision of not to use far jump.
26304 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26305 2-byte insn is associated with a 4 byte constant pool. Using
26306 function size 2048/3 as the threshold is conservative enough. */
26309 if ((func_size
* 3) >= 2048)
26311 /* Record the fact that we have decided that
26312 the function does use far jumps. */
26313 cfun
->machine
->far_jump_used
= 1;
26321 /* Return nonzero if FUNC must be entered in ARM mode. */
26323 is_called_in_ARM_mode (tree func
)
26325 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26327 /* Ignore the problem about functions whose address is taken. */
26328 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26332 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26338 /* Given the stack offsets and register mask in OFFSETS, decide how
26339 many additional registers to push instead of subtracting a constant
26340 from SP. For epilogues the principle is the same except we use pop.
26341 FOR_PROLOGUE indicates which we're generating. */
26343 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26345 HOST_WIDE_INT amount
;
26346 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26347 /* Extract a mask of the ones we can give to the Thumb's push/pop
26349 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26350 /* Then count how many other high registers will need to be pushed. */
26351 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26352 int n_free
, reg_base
, size
;
26354 if (!for_prologue
&& frame_pointer_needed
)
26355 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26357 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26359 /* If the stack frame size is 512 exactly, we can save one load
26360 instruction, which should make this a win even when optimizing
26362 if (!optimize_size
&& amount
!= 512)
26365 /* Can't do this if there are high registers to push. */
26366 if (high_regs_pushed
!= 0)
26369 /* Shouldn't do it in the prologue if no registers would normally
26370 be pushed at all. In the epilogue, also allow it if we'll have
26371 a pop insn for the PC. */
26374 || TARGET_BACKTRACE
26375 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26376 || TARGET_INTERWORK
26377 || crtl
->args
.pretend_args_size
!= 0))
26380 /* Don't do this if thumb_expand_prologue wants to emit instructions
26381 between the push and the stack frame allocation. */
26383 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26384 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26391 size
= arm_size_return_regs ();
26392 reg_base
= ARM_NUM_INTS (size
);
26393 live_regs_mask
>>= reg_base
;
26396 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26397 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26399 live_regs_mask
>>= 1;
26405 gcc_assert (amount
/ 4 * 4 == amount
);
26407 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26408 return (amount
- 508) / 4;
26409 if (amount
<= n_free
* 4)
26414 /* The bits which aren't usefully expanded as rtl. */
26416 thumb1_unexpanded_epilogue (void)
26418 arm_stack_offsets
*offsets
;
26420 unsigned long live_regs_mask
= 0;
26421 int high_regs_pushed
= 0;
26423 int had_to_push_lr
;
26426 if (cfun
->machine
->return_used_this_function
!= 0)
26429 if (IS_NAKED (arm_current_func_type ()))
26432 offsets
= arm_get_frame_offsets ();
26433 live_regs_mask
= offsets
->saved_regs_mask
;
26434 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26436 /* If we can deduce the registers used from the function's return value.
26437 This is more reliable that examining df_regs_ever_live_p () because that
26438 will be set if the register is ever used in the function, not just if
26439 the register is used to hold a return value. */
26440 size
= arm_size_return_regs ();
26442 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26445 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26446 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26449 /* The prolog may have pushed some high registers to use as
26450 work registers. e.g. the testsuite file:
26451 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26452 compiles to produce:
26453 push {r4, r5, r6, r7, lr}
26457 as part of the prolog. We have to undo that pushing here. */
26459 if (high_regs_pushed
)
26461 unsigned long mask
= live_regs_mask
& 0xff;
26464 /* The available low registers depend on the size of the value we are
26472 /* Oh dear! We have no low registers into which we can pop
26475 ("no low registers available for popping high registers");
26477 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26478 if (live_regs_mask
& (1 << next_hi_reg
))
26481 while (high_regs_pushed
)
26483 /* Find lo register(s) into which the high register(s) can
26485 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26487 if (mask
& (1 << regno
))
26488 high_regs_pushed
--;
26489 if (high_regs_pushed
== 0)
26493 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26495 /* Pop the values into the low register(s). */
26496 thumb_pop (asm_out_file
, mask
);
26498 /* Move the value(s) into the high registers. */
26499 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26501 if (mask
& (1 << regno
))
26503 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26506 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26507 if (live_regs_mask
& (1 << next_hi_reg
))
26512 live_regs_mask
&= ~0x0f00;
26515 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26516 live_regs_mask
&= 0xff;
26518 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26520 /* Pop the return address into the PC. */
26521 if (had_to_push_lr
)
26522 live_regs_mask
|= 1 << PC_REGNUM
;
26524 /* Either no argument registers were pushed or a backtrace
26525 structure was created which includes an adjusted stack
26526 pointer, so just pop everything. */
26527 if (live_regs_mask
)
26528 thumb_pop (asm_out_file
, live_regs_mask
);
26530 /* We have either just popped the return address into the
26531 PC or it is was kept in LR for the entire function.
26532 Note that thumb_pop has already called thumb_exit if the
26533 PC was in the list. */
26534 if (!had_to_push_lr
)
26535 thumb_exit (asm_out_file
, LR_REGNUM
);
26539 /* Pop everything but the return address. */
26540 if (live_regs_mask
)
26541 thumb_pop (asm_out_file
, live_regs_mask
);
26543 if (had_to_push_lr
)
26547 /* We have no free low regs, so save one. */
26548 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26552 /* Get the return address into a temporary register. */
26553 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26557 /* Move the return address to lr. */
26558 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26560 /* Restore the low register. */
26561 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26566 regno
= LAST_ARG_REGNUM
;
26571 /* Remove the argument registers that were pushed onto the stack. */
26572 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26573 SP_REGNUM
, SP_REGNUM
,
26574 crtl
->args
.pretend_args_size
);
26576 thumb_exit (asm_out_file
, regno
);
26582 /* Functions to save and restore machine-specific function data. */
26583 static struct machine_function
*
26584 arm_init_machine_status (void)
26586 struct machine_function
*machine
;
26587 machine
= ggc_alloc_cleared_machine_function ();
26589 #if ARM_FT_UNKNOWN != 0
26590 machine
->func_type
= ARM_FT_UNKNOWN
;
26595 /* Return an RTX indicating where the return address to the
26596 calling function can be found. */
26598 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26603 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26606 /* Do anything needed before RTL is emitted for each function. */
26608 arm_init_expanders (void)
26610 /* Arrange to initialize and mark the machine per-function status. */
26611 init_machine_status
= arm_init_machine_status
;
26613 /* This is to stop the combine pass optimizing away the alignment
26614 adjustment of va_arg. */
26615 /* ??? It is claimed that this should not be necessary. */
26617 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26621 /* Like arm_compute_initial_elimination offset. Simpler because there
26622 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26623 to point at the base of the local variables after static stack
26624 space for a function has been allocated. */
26627 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26629 arm_stack_offsets
*offsets
;
26631 offsets
= arm_get_frame_offsets ();
26635 case ARG_POINTER_REGNUM
:
26638 case STACK_POINTER_REGNUM
:
26639 return offsets
->outgoing_args
- offsets
->saved_args
;
26641 case FRAME_POINTER_REGNUM
:
26642 return offsets
->soft_frame
- offsets
->saved_args
;
26644 case ARM_HARD_FRAME_POINTER_REGNUM
:
26645 return offsets
->saved_regs
- offsets
->saved_args
;
26647 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26648 return offsets
->locals_base
- offsets
->saved_args
;
26651 gcc_unreachable ();
26655 case FRAME_POINTER_REGNUM
:
26658 case STACK_POINTER_REGNUM
:
26659 return offsets
->outgoing_args
- offsets
->soft_frame
;
26661 case ARM_HARD_FRAME_POINTER_REGNUM
:
26662 return offsets
->saved_regs
- offsets
->soft_frame
;
26664 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26665 return offsets
->locals_base
- offsets
->soft_frame
;
26668 gcc_unreachable ();
26673 gcc_unreachable ();
26677 /* Generate the function's prologue. */
26680 thumb1_expand_prologue (void)
26684 HOST_WIDE_INT amount
;
26685 arm_stack_offsets
*offsets
;
26686 unsigned long func_type
;
26688 unsigned long live_regs_mask
;
26689 unsigned long l_mask
;
26690 unsigned high_regs_pushed
= 0;
26692 func_type
= arm_current_func_type ();
26694 /* Naked functions don't have prologues. */
26695 if (IS_NAKED (func_type
))
26698 if (IS_INTERRUPT (func_type
))
26700 error ("interrupt Service Routines cannot be coded in Thumb mode");
26704 if (is_called_in_ARM_mode (current_function_decl
))
26705 emit_insn (gen_prologue_thumb1_interwork ());
26707 offsets
= arm_get_frame_offsets ();
26708 live_regs_mask
= offsets
->saved_regs_mask
;
26710 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26711 l_mask
= live_regs_mask
& 0x40ff;
26712 /* Then count how many other high registers will need to be pushed. */
26713 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26715 if (crtl
->args
.pretend_args_size
)
26717 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26719 if (cfun
->machine
->uses_anonymous_args
)
26721 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26722 unsigned long mask
;
26724 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26725 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26727 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26731 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26732 stack_pointer_rtx
, x
));
26734 RTX_FRAME_RELATED_P (insn
) = 1;
26737 if (TARGET_BACKTRACE
)
26739 HOST_WIDE_INT offset
= 0;
26740 unsigned work_register
;
26741 rtx work_reg
, x
, arm_hfp_rtx
;
26743 /* We have been asked to create a stack backtrace structure.
26744 The code looks like this:
26748 0 sub SP, #16 Reserve space for 4 registers.
26749 2 push {R7} Push low registers.
26750 4 add R7, SP, #20 Get the stack pointer before the push.
26751 6 str R7, [SP, #8] Store the stack pointer
26752 (before reserving the space).
26753 8 mov R7, PC Get hold of the start of this code + 12.
26754 10 str R7, [SP, #16] Store it.
26755 12 mov R7, FP Get hold of the current frame pointer.
26756 14 str R7, [SP, #4] Store it.
26757 16 mov R7, LR Get hold of the current return address.
26758 18 str R7, [SP, #12] Store it.
26759 20 add R7, SP, #16 Point at the start of the
26760 backtrace structure.
26761 22 mov FP, R7 Put this value into the frame pointer. */
26763 work_register
= thumb_find_work_register (live_regs_mask
);
26764 work_reg
= gen_rtx_REG (SImode
, work_register
);
26765 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
26767 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26768 stack_pointer_rtx
, GEN_INT (-16)));
26769 RTX_FRAME_RELATED_P (insn
) = 1;
26773 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
26774 RTX_FRAME_RELATED_P (insn
) = 1;
26776 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
26779 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
26780 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26782 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
26783 x
= gen_frame_mem (SImode
, x
);
26784 emit_move_insn (x
, work_reg
);
26786 /* Make sure that the instruction fetching the PC is in the right place
26787 to calculate "start of backtrace creation code + 12". */
26788 /* ??? The stores using the common WORK_REG ought to be enough to
26789 prevent the scheduler from doing anything weird. Failing that
26790 we could always move all of the following into an UNSPEC_VOLATILE. */
26793 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26794 emit_move_insn (work_reg
, x
);
26796 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26797 x
= gen_frame_mem (SImode
, x
);
26798 emit_move_insn (x
, work_reg
);
26800 emit_move_insn (work_reg
, arm_hfp_rtx
);
26802 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26803 x
= gen_frame_mem (SImode
, x
);
26804 emit_move_insn (x
, work_reg
);
26808 emit_move_insn (work_reg
, arm_hfp_rtx
);
26810 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26811 x
= gen_frame_mem (SImode
, x
);
26812 emit_move_insn (x
, work_reg
);
26814 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26815 emit_move_insn (work_reg
, x
);
26817 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26818 x
= gen_frame_mem (SImode
, x
);
26819 emit_move_insn (x
, work_reg
);
26822 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
26823 emit_move_insn (work_reg
, x
);
26825 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
26826 x
= gen_frame_mem (SImode
, x
);
26827 emit_move_insn (x
, work_reg
);
26829 x
= GEN_INT (offset
+ 12);
26830 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26832 emit_move_insn (arm_hfp_rtx
, work_reg
);
26834 /* Optimization: If we are not pushing any low registers but we are going
26835 to push some high registers then delay our first push. This will just
26836 be a push of LR and we can combine it with the push of the first high
26838 else if ((l_mask
& 0xff) != 0
26839 || (high_regs_pushed
== 0 && l_mask
))
26841 unsigned long mask
= l_mask
;
26842 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
26843 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
26844 RTX_FRAME_RELATED_P (insn
) = 1;
26847 if (high_regs_pushed
)
26849 unsigned pushable_regs
;
26850 unsigned next_hi_reg
;
26851 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
26852 : crtl
->args
.info
.nregs
;
26853 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
26855 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26856 if (live_regs_mask
& (1 << next_hi_reg
))
26859 /* Here we need to mask out registers used for passing arguments
26860 even if they can be pushed. This is to avoid using them to stash the high
26861 registers. Such kind of stash may clobber the use of arguments. */
26862 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
26864 if (pushable_regs
== 0)
26865 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26867 while (high_regs_pushed
> 0)
26869 unsigned long real_regs_mask
= 0;
26871 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26873 if (pushable_regs
& (1 << regno
))
26875 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26876 gen_rtx_REG (SImode
, next_hi_reg
));
26878 high_regs_pushed
--;
26879 real_regs_mask
|= (1 << next_hi_reg
);
26881 if (high_regs_pushed
)
26883 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26885 if (live_regs_mask
& (1 << next_hi_reg
))
26890 pushable_regs
&= ~((1 << regno
) - 1);
26896 /* If we had to find a work register and we have not yet
26897 saved the LR then add it to the list of regs to push. */
26898 if (l_mask
== (1 << LR_REGNUM
))
26900 pushable_regs
|= l_mask
;
26901 real_regs_mask
|= l_mask
;
26905 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
26906 RTX_FRAME_RELATED_P (insn
) = 1;
26910 /* Load the pic register before setting the frame pointer,
26911 so we can use r7 as a temporary work register. */
26912 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26913 arm_load_pic_register (live_regs_mask
);
26915 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26916 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26917 stack_pointer_rtx
);
26919 if (flag_stack_usage_info
)
26920 current_function_static_stack_size
26921 = offsets
->outgoing_args
- offsets
->saved_args
;
26923 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26924 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26929 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26930 GEN_INT (- amount
)));
26931 RTX_FRAME_RELATED_P (insn
) = 1;
26937 /* The stack decrement is too big for an immediate value in a single
26938 insn. In theory we could issue multiple subtracts, but after
26939 three of them it becomes more space efficient to place the full
26940 value in the constant pool and load into a register. (Also the
26941 ARM debugger really likes to see only one stack decrement per
26942 function). So instead we look for a scratch register into which
26943 we can load the decrement, and then we subtract this from the
26944 stack pointer. Unfortunately on the thumb the only available
26945 scratch registers are the argument registers, and we cannot use
26946 these as they may hold arguments to the function. Instead we
26947 attempt to locate a call preserved register which is used by this
26948 function. If we can find one, then we know that it will have
26949 been pushed at the start of the prologue and so we can corrupt
26951 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26952 if (live_regs_mask
& (1 << regno
))
26955 gcc_assert(regno
<= LAST_LO_REGNUM
);
26957 reg
= gen_rtx_REG (SImode
, regno
);
26959 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26961 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26962 stack_pointer_rtx
, reg
));
26964 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26965 plus_constant (Pmode
, stack_pointer_rtx
,
26967 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26968 RTX_FRAME_RELATED_P (insn
) = 1;
26972 if (frame_pointer_needed
)
26973 thumb_set_frame_pointer (offsets
);
26975 /* If we are profiling, make sure no instructions are scheduled before
26976 the call to mcount. Similarly if the user has requested no
26977 scheduling in the prolog. Similarly if we want non-call exceptions
26978 using the EABI unwinder, to prevent faulting instructions from being
26979 swapped with a stack adjustment. */
26980 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26981 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26982 && cfun
->can_throw_non_call_exceptions
))
26983 emit_insn (gen_blockage ());
26985 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26986 if (live_regs_mask
& 0xff)
26987 cfun
->machine
->lr_save_eliminated
= 0;
26990 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26991 POP instruction can be generated. LR should be replaced by PC. All
26992 the checks required are already done by USE_RETURN_INSN (). Hence,
26993 all we really need to check here is if single register is to be
26994 returned, or multiple register return. */
26996 thumb2_expand_return (bool simple_return
)
26999 unsigned long saved_regs_mask
;
27000 arm_stack_offsets
*offsets
;
27002 offsets
= arm_get_frame_offsets ();
27003 saved_regs_mask
= offsets
->saved_regs_mask
;
27005 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27006 if (saved_regs_mask
& (1 << i
))
27009 if (!simple_return
&& saved_regs_mask
)
27013 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27014 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27015 rtx addr
= gen_rtx_MEM (SImode
,
27016 gen_rtx_POST_INC (SImode
,
27017 stack_pointer_rtx
));
27018 set_mem_alias_set (addr
, get_frame_alias_set ());
27019 XVECEXP (par
, 0, 0) = ret_rtx
;
27020 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27021 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27022 emit_jump_insn (par
);
27026 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27027 saved_regs_mask
|= (1 << PC_REGNUM
);
27028 arm_emit_multi_reg_pop (saved_regs_mask
);
27033 emit_jump_insn (simple_return_rtx
);
27038 thumb1_expand_epilogue (void)
27040 HOST_WIDE_INT amount
;
27041 arm_stack_offsets
*offsets
;
27044 /* Naked functions don't have prologues. */
27045 if (IS_NAKED (arm_current_func_type ()))
27048 offsets
= arm_get_frame_offsets ();
27049 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27051 if (frame_pointer_needed
)
27053 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27054 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27056 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27058 gcc_assert (amount
>= 0);
27061 emit_insn (gen_blockage ());
27064 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27065 GEN_INT (amount
)));
27068 /* r3 is always free in the epilogue. */
27069 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27071 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27072 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27076 /* Emit a USE (stack_pointer_rtx), so that
27077 the stack adjustment will not be deleted. */
27078 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27080 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27081 emit_insn (gen_blockage ());
27083 /* Emit a clobber for each insn that will be restored in the epilogue,
27084 so that flow2 will get register lifetimes correct. */
27085 for (regno
= 0; regno
< 13; regno
++)
27086 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27087 emit_clobber (gen_rtx_REG (SImode
, regno
));
27089 if (! df_regs_ever_live_p (LR_REGNUM
))
27090 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27093 /* Epilogue code for APCS frame. */
27095 arm_expand_epilogue_apcs_frame (bool really_return
)
27097 unsigned long func_type
;
27098 unsigned long saved_regs_mask
;
27101 int floats_from_frame
= 0;
27102 arm_stack_offsets
*offsets
;
27104 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27105 func_type
= arm_current_func_type ();
27107 /* Get frame offsets for ARM. */
27108 offsets
= arm_get_frame_offsets ();
27109 saved_regs_mask
= offsets
->saved_regs_mask
;
27111 /* Find the offset of the floating-point save area in the frame. */
27113 = (offsets
->saved_args
27114 + arm_compute_static_chain_stack_bytes ()
27117 /* Compute how many core registers saved and how far away the floats are. */
27118 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27119 if (saved_regs_mask
& (1 << i
))
27122 floats_from_frame
+= 4;
27125 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27128 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27130 /* The offset is from IP_REGNUM. */
27131 int saved_size
= arm_get_vfp_saved_size ();
27132 if (saved_size
> 0)
27135 floats_from_frame
+= saved_size
;
27136 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27137 hard_frame_pointer_rtx
,
27138 GEN_INT (-floats_from_frame
)));
27139 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27140 ip_rtx
, hard_frame_pointer_rtx
);
27143 /* Generate VFP register multi-pop. */
27144 start_reg
= FIRST_VFP_REGNUM
;
27146 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27147 /* Look for a case where a reg does not need restoring. */
27148 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27149 && (!df_regs_ever_live_p (i
+ 1)
27150 || call_used_regs
[i
+ 1]))
27152 if (start_reg
!= i
)
27153 arm_emit_vfp_multi_reg_pop (start_reg
,
27154 (i
- start_reg
) / 2,
27155 gen_rtx_REG (SImode
,
27160 /* Restore the remaining regs that we have discovered (or possibly
27161 even all of them, if the conditional in the for loop never
27163 if (start_reg
!= i
)
27164 arm_emit_vfp_multi_reg_pop (start_reg
,
27165 (i
- start_reg
) / 2,
27166 gen_rtx_REG (SImode
, IP_REGNUM
));
27171 /* The frame pointer is guaranteed to be non-double-word aligned, as
27172 it is set to double-word-aligned old_stack_pointer - 4. */
27174 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27176 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27177 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27179 rtx addr
= gen_frame_mem (V2SImode
,
27180 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27182 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27183 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27184 gen_rtx_REG (V2SImode
, i
),
27190 /* saved_regs_mask should contain IP which contains old stack pointer
27191 at the time of activation creation. Since SP and IP are adjacent registers,
27192 we can restore the value directly into SP. */
27193 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27194 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27195 saved_regs_mask
|= (1 << SP_REGNUM
);
27197 /* There are two registers left in saved_regs_mask - LR and PC. We
27198 only need to restore LR (the return address), but to
27199 save time we can load it directly into PC, unless we need a
27200 special function exit sequence, or we are not really returning. */
27202 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27203 && !crtl
->calls_eh_return
)
27204 /* Delete LR from the register mask, so that LR on
27205 the stack is loaded into the PC in the register mask. */
27206 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27208 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27210 num_regs
= bit_count (saved_regs_mask
);
27211 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27214 emit_insn (gen_blockage ());
27215 /* Unwind the stack to just below the saved registers. */
27216 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27217 hard_frame_pointer_rtx
,
27218 GEN_INT (- 4 * num_regs
)));
27220 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27221 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27224 arm_emit_multi_reg_pop (saved_regs_mask
);
27226 if (IS_INTERRUPT (func_type
))
27228 /* Interrupt handlers will have pushed the
27229 IP onto the stack, so restore it now. */
27231 rtx addr
= gen_rtx_MEM (SImode
,
27232 gen_rtx_POST_INC (SImode
,
27233 stack_pointer_rtx
));
27234 set_mem_alias_set (addr
, get_frame_alias_set ());
27235 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27236 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27237 gen_rtx_REG (SImode
, IP_REGNUM
),
27241 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27244 if (crtl
->calls_eh_return
)
27245 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27247 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27249 if (IS_STACKALIGN (func_type
))
27250 /* Restore the original stack pointer. Before prologue, the stack was
27251 realigned and the original stack pointer saved in r0. For details,
27252 see comment in arm_expand_prologue. */
27253 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27255 emit_jump_insn (simple_return_rtx
);
27258 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27259 function is not a sibcall. */
27261 arm_expand_epilogue (bool really_return
)
27263 unsigned long func_type
;
27264 unsigned long saved_regs_mask
;
27268 arm_stack_offsets
*offsets
;
27270 func_type
= arm_current_func_type ();
27272 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27273 let output_return_instruction take care of instruction emission if any. */
27274 if (IS_NAKED (func_type
)
27275 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27278 emit_jump_insn (simple_return_rtx
);
27282 /* If we are throwing an exception, then we really must be doing a
27283 return, so we can't tail-call. */
27284 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27286 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27288 arm_expand_epilogue_apcs_frame (really_return
);
27292 /* Get frame offsets for ARM. */
27293 offsets
= arm_get_frame_offsets ();
27294 saved_regs_mask
= offsets
->saved_regs_mask
;
27295 num_regs
= bit_count (saved_regs_mask
);
27297 if (frame_pointer_needed
)
27300 /* Restore stack pointer if necessary. */
27303 /* In ARM mode, frame pointer points to first saved register.
27304 Restore stack pointer to last saved register. */
27305 amount
= offsets
->frame
- offsets
->saved_regs
;
27307 /* Force out any pending memory operations that reference stacked data
27308 before stack de-allocation occurs. */
27309 emit_insn (gen_blockage ());
27310 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27311 hard_frame_pointer_rtx
,
27312 GEN_INT (amount
)));
27313 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27315 hard_frame_pointer_rtx
);
27317 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27319 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27323 /* In Thumb-2 mode, the frame pointer points to the last saved
27325 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27328 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27329 hard_frame_pointer_rtx
,
27330 GEN_INT (amount
)));
27331 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27332 hard_frame_pointer_rtx
,
27333 hard_frame_pointer_rtx
);
27336 /* Force out any pending memory operations that reference stacked data
27337 before stack de-allocation occurs. */
27338 emit_insn (gen_blockage ());
27339 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27340 hard_frame_pointer_rtx
));
27341 arm_add_cfa_adjust_cfa_note (insn
, 0,
27343 hard_frame_pointer_rtx
);
27344 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27346 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27351 /* Pop off outgoing args and local frame to adjust stack pointer to
27352 last saved register. */
27353 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27357 /* Force out any pending memory operations that reference stacked data
27358 before stack de-allocation occurs. */
27359 emit_insn (gen_blockage ());
27360 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27362 GEN_INT (amount
)));
27363 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27364 stack_pointer_rtx
, stack_pointer_rtx
);
27365 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27367 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27371 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27373 /* Generate VFP register multi-pop. */
27374 int end_reg
= LAST_VFP_REGNUM
+ 1;
27376 /* Scan the registers in reverse order. We need to match
27377 any groupings made in the prologue and generate matching
27378 vldm operations. The need to match groups is because,
27379 unlike pop, vldm can only do consecutive regs. */
27380 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27381 /* Look for a case where a reg does not need restoring. */
27382 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27383 && (!df_regs_ever_live_p (i
+ 1)
27384 || call_used_regs
[i
+ 1]))
27386 /* Restore the regs discovered so far (from reg+2 to
27388 if (end_reg
> i
+ 2)
27389 arm_emit_vfp_multi_reg_pop (i
+ 2,
27390 (end_reg
- (i
+ 2)) / 2,
27391 stack_pointer_rtx
);
27395 /* Restore the remaining regs that we have discovered (or possibly
27396 even all of them, if the conditional in the for loop never
27398 if (end_reg
> i
+ 2)
27399 arm_emit_vfp_multi_reg_pop (i
+ 2,
27400 (end_reg
- (i
+ 2)) / 2,
27401 stack_pointer_rtx
);
27405 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27406 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27409 rtx addr
= gen_rtx_MEM (V2SImode
,
27410 gen_rtx_POST_INC (SImode
,
27411 stack_pointer_rtx
));
27412 set_mem_alias_set (addr
, get_frame_alias_set ());
27413 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27414 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27415 gen_rtx_REG (V2SImode
, i
),
27417 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27418 stack_pointer_rtx
, stack_pointer_rtx
);
27421 if (saved_regs_mask
)
27424 bool return_in_pc
= false;
27426 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27427 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27428 && !IS_STACKALIGN (func_type
)
27430 && crtl
->args
.pretend_args_size
== 0
27431 && saved_regs_mask
& (1 << LR_REGNUM
)
27432 && !crtl
->calls_eh_return
)
27434 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27435 saved_regs_mask
|= (1 << PC_REGNUM
);
27436 return_in_pc
= true;
27439 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27441 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27442 if (saved_regs_mask
& (1 << i
))
27444 rtx addr
= gen_rtx_MEM (SImode
,
27445 gen_rtx_POST_INC (SImode
,
27446 stack_pointer_rtx
));
27447 set_mem_alias_set (addr
, get_frame_alias_set ());
27449 if (i
== PC_REGNUM
)
27451 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27452 XVECEXP (insn
, 0, 0) = ret_rtx
;
27453 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27454 gen_rtx_REG (SImode
, i
),
27456 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27457 insn
= emit_jump_insn (insn
);
27461 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27463 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27464 gen_rtx_REG (SImode
, i
),
27466 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27468 stack_pointer_rtx
);
27475 && current_tune
->prefer_ldrd_strd
27476 && !optimize_function_for_size_p (cfun
))
27479 thumb2_emit_ldrd_pop (saved_regs_mask
);
27480 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27481 arm_emit_ldrd_pop (saved_regs_mask
);
27483 arm_emit_multi_reg_pop (saved_regs_mask
);
27486 arm_emit_multi_reg_pop (saved_regs_mask
);
27489 if (return_in_pc
== true)
27493 if (crtl
->args
.pretend_args_size
)
27496 rtx dwarf
= NULL_RTX
;
27497 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27499 GEN_INT (crtl
->args
.pretend_args_size
)));
27501 RTX_FRAME_RELATED_P (tmp
) = 1;
27503 if (cfun
->machine
->uses_anonymous_args
)
27505 /* Restore pretend args. Refer arm_expand_prologue on how to save
27506 pretend_args in stack. */
27507 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27508 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27509 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27510 if (saved_regs_mask
& (1 << i
))
27512 rtx reg
= gen_rtx_REG (SImode
, i
);
27513 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27516 REG_NOTES (tmp
) = dwarf
;
27518 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27519 stack_pointer_rtx
, stack_pointer_rtx
);
27522 if (!really_return
)
27525 if (crtl
->calls_eh_return
)
27526 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27528 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27530 if (IS_STACKALIGN (func_type
))
27531 /* Restore the original stack pointer. Before prologue, the stack was
27532 realigned and the original stack pointer saved in r0. For details,
27533 see comment in arm_expand_prologue. */
27534 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27536 emit_jump_insn (simple_return_rtx
);
27539 /* Implementation of insn prologue_thumb1_interwork. This is the first
27540 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27543 thumb1_output_interwork (void)
27546 FILE *f
= asm_out_file
;
27548 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27549 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27551 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27553 /* Generate code sequence to switch us into Thumb mode. */
27554 /* The .code 32 directive has already been emitted by
27555 ASM_DECLARE_FUNCTION_NAME. */
27556 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27557 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27559 /* Generate a label, so that the debugger will notice the
27560 change in instruction sets. This label is also used by
27561 the assembler to bypass the ARM code when this function
27562 is called from a Thumb encoded function elsewhere in the
27563 same file. Hence the definition of STUB_NAME here must
27564 agree with the definition in gas/config/tc-arm.c. */
27566 #define STUB_NAME ".real_start_of"
27568 fprintf (f
, "\t.code\t16\n");
27570 if (arm_dllexport_name_p (name
))
27571 name
= arm_strip_name_encoding (name
);
27573 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27574 fprintf (f
, "\t.thumb_func\n");
27575 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27580 /* Handle the case of a double word load into a low register from
27581 a computed memory address. The computed address may involve a
27582 register which is overwritten by the load. */
27584 thumb_load_double_from_address (rtx
*operands
)
27592 gcc_assert (REG_P (operands
[0]));
27593 gcc_assert (MEM_P (operands
[1]));
27595 /* Get the memory address. */
27596 addr
= XEXP (operands
[1], 0);
27598 /* Work out how the memory address is computed. */
27599 switch (GET_CODE (addr
))
27602 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27604 if (REGNO (operands
[0]) == REGNO (addr
))
27606 output_asm_insn ("ldr\t%H0, %2", operands
);
27607 output_asm_insn ("ldr\t%0, %1", operands
);
27611 output_asm_insn ("ldr\t%0, %1", operands
);
27612 output_asm_insn ("ldr\t%H0, %2", operands
);
27617 /* Compute <address> + 4 for the high order load. */
27618 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27620 output_asm_insn ("ldr\t%0, %1", operands
);
27621 output_asm_insn ("ldr\t%H0, %2", operands
);
27625 arg1
= XEXP (addr
, 0);
27626 arg2
= XEXP (addr
, 1);
27628 if (CONSTANT_P (arg1
))
27629 base
= arg2
, offset
= arg1
;
27631 base
= arg1
, offset
= arg2
;
27633 gcc_assert (REG_P (base
));
27635 /* Catch the case of <address> = <reg> + <reg> */
27636 if (REG_P (offset
))
27638 int reg_offset
= REGNO (offset
);
27639 int reg_base
= REGNO (base
);
27640 int reg_dest
= REGNO (operands
[0]);
27642 /* Add the base and offset registers together into the
27643 higher destination register. */
27644 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27645 reg_dest
+ 1, reg_base
, reg_offset
);
27647 /* Load the lower destination register from the address in
27648 the higher destination register. */
27649 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27650 reg_dest
, reg_dest
+ 1);
27652 /* Load the higher destination register from its own address
27654 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27655 reg_dest
+ 1, reg_dest
+ 1);
27659 /* Compute <address> + 4 for the high order load. */
27660 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27662 /* If the computed address is held in the low order register
27663 then load the high order register first, otherwise always
27664 load the low order register first. */
27665 if (REGNO (operands
[0]) == REGNO (base
))
27667 output_asm_insn ("ldr\t%H0, %2", operands
);
27668 output_asm_insn ("ldr\t%0, %1", operands
);
27672 output_asm_insn ("ldr\t%0, %1", operands
);
27673 output_asm_insn ("ldr\t%H0, %2", operands
);
27679 /* With no registers to worry about we can just load the value
27681 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27683 output_asm_insn ("ldr\t%H0, %2", operands
);
27684 output_asm_insn ("ldr\t%0, %1", operands
);
27688 gcc_unreachable ();
27695 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27702 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27705 operands
[4] = operands
[5];
27708 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27709 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27713 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27716 operands
[4] = operands
[5];
27719 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27722 operands
[5] = operands
[6];
27725 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27728 operands
[4] = operands
[5];
27732 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27733 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27737 gcc_unreachable ();
27743 /* Output a call-via instruction for thumb state. */
27745 thumb_call_via_reg (rtx reg
)
27747 int regno
= REGNO (reg
);
27750 gcc_assert (regno
< LR_REGNUM
);
27752 /* If we are in the normal text section we can use a single instance
27753 per compilation unit. If we are doing function sections, then we need
27754 an entry per section, since we can't rely on reachability. */
27755 if (in_section
== text_section
)
27757 thumb_call_reg_needed
= 1;
27759 if (thumb_call_via_label
[regno
] == NULL
)
27760 thumb_call_via_label
[regno
] = gen_label_rtx ();
27761 labelp
= thumb_call_via_label
+ regno
;
27765 if (cfun
->machine
->call_via
[regno
] == NULL
)
27766 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27767 labelp
= cfun
->machine
->call_via
+ regno
;
27770 output_asm_insn ("bl\t%a0", labelp
);
27774 /* Routines for generating rtl. */
27776 thumb_expand_movmemqi (rtx
*operands
)
27778 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27779 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27780 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27781 HOST_WIDE_INT offset
= 0;
27785 emit_insn (gen_movmem12b (out
, in
, out
, in
));
27791 emit_insn (gen_movmem8b (out
, in
, out
, in
));
27797 rtx reg
= gen_reg_rtx (SImode
);
27798 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27799 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27806 rtx reg
= gen_reg_rtx (HImode
);
27807 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27808 plus_constant (Pmode
, in
,
27810 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27819 rtx reg
= gen_reg_rtx (QImode
);
27820 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27821 plus_constant (Pmode
, in
,
27823 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27830 thumb_reload_out_hi (rtx
*operands
)
27832 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27835 /* Handle reading a half-word from memory during reload. */
27837 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
27839 gcc_unreachable ();
27842 /* Return the length of a function name prefix
27843 that starts with the character 'c'. */
27845 arm_get_strip_length (int c
)
27849 ARM_NAME_ENCODING_LENGTHS
27854 /* Return a pointer to a function's name with any
27855 and all prefix encodings stripped from it. */
27857 arm_strip_name_encoding (const char *name
)
27861 while ((skip
= arm_get_strip_length (* name
)))
27867 /* If there is a '*' anywhere in the name's prefix, then
27868 emit the stripped name verbatim, otherwise prepend an
27869 underscore if leading underscores are being used. */
27871 arm_asm_output_labelref (FILE *stream
, const char *name
)
27876 while ((skip
= arm_get_strip_length (* name
)))
27878 verbatim
|= (*name
== '*');
27883 fputs (name
, stream
);
27885 asm_fprintf (stream
, "%U%s", name
);
27888 /* This function is used to emit an EABI tag and its associated value.
27889 We emit the numerical value of the tag in case the assembler does not
27890 support textual tags. (Eg gas prior to 2.20). If requested we include
27891 the tag name in a comment so that anyone reading the assembler output
27892 will know which tag is being set.
27894 This function is not static because arm-c.c needs it too. */
27897 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27899 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27900 if (flag_verbose_asm
|| flag_debug_asm
)
27901 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27902 asm_fprintf (asm_out_file
, "\n");
27906 arm_file_start (void)
27910 if (TARGET_UNIFIED_ASM
)
27911 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
27915 const char *fpu_name
;
27916 if (arm_selected_arch
)
27918 /* armv7ve doesn't support any extensions. */
27919 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
27921 /* Keep backward compatability for assemblers
27922 which don't support armv7ve. */
27923 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
27924 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
27925 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
27926 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
27927 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
27931 const char* pos
= strchr (arm_selected_arch
->name
, '+');
27935 gcc_assert (strlen (arm_selected_arch
->name
)
27936 <= sizeof (buf
) / sizeof (*pos
));
27937 strncpy (buf
, arm_selected_arch
->name
,
27938 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
27939 buf
[pos
- arm_selected_arch
->name
] = '\0';
27940 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
27941 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
27944 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
27947 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
27948 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
27951 const char* truncated_name
27952 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
27953 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27956 if (TARGET_SOFT_FLOAT
)
27958 fpu_name
= "softvfp";
27962 fpu_name
= arm_fpu_desc
->name
;
27963 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
27965 if (TARGET_HARD_FLOAT
)
27966 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27967 if (TARGET_HARD_FLOAT_ABI
)
27968 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27971 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
27973 /* Some of these attributes only apply when the corresponding features
27974 are used. However we don't have any easy way of figuring this out.
27975 Conservatively record the setting that would have been used. */
27977 if (flag_rounding_math
)
27978 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27980 if (!flag_unsafe_math_optimizations
)
27982 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27983 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27985 if (flag_signaling_nans
)
27986 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27988 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27989 flag_finite_math_only
? 1 : 3);
27991 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27992 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27993 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27994 flag_short_enums
? 1 : 2);
27996 /* Tag_ABI_optimization_goals. */
27999 else if (optimize
>= 2)
28005 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28007 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28010 if (arm_fp16_format
)
28011 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28012 (int) arm_fp16_format
);
28014 if (arm_lang_output_object_attributes_hook
)
28015 arm_lang_output_object_attributes_hook();
28018 default_file_start ();
28022 arm_file_end (void)
28026 if (NEED_INDICATE_EXEC_STACK
)
28027 /* Add .note.GNU-stack. */
28028 file_end_indicate_exec_stack ();
28030 if (! thumb_call_reg_needed
)
28033 switch_to_section (text_section
);
28034 asm_fprintf (asm_out_file
, "\t.code 16\n");
28035 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28037 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28039 rtx label
= thumb_call_via_label
[regno
];
28043 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28044 CODE_LABEL_NUMBER (label
));
28045 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28051 /* Symbols in the text segment can be accessed without indirecting via the
28052 constant pool; it may take an extra binary operation, but this is still
28053 faster than indirecting via memory. Don't do this when not optimizing,
28054 since we won't be calculating al of the offsets necessary to do this
28058 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28060 if (optimize
> 0 && TREE_CONSTANT (decl
))
28061 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28063 default_encode_section_info (decl
, rtl
, first
);
28065 #endif /* !ARM_PE */
28068 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28070 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28071 && !strcmp (prefix
, "L"))
28073 arm_ccfsm_state
= 0;
28074 arm_target_insn
= NULL
;
28076 default_internal_label (stream
, prefix
, labelno
);
28079 /* Output code to add DELTA to the first argument, and then jump
28080 to FUNCTION. Used for C++ multiple inheritance. */
28082 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28083 HOST_WIDE_INT delta
,
28084 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28087 static int thunk_label
= 0;
28090 int mi_delta
= delta
;
28091 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28093 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28096 mi_delta
= - mi_delta
;
28098 final_start_function (emit_barrier (), file
, 1);
28102 int labelno
= thunk_label
++;
28103 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28104 /* Thunks are entered in arm mode when avaiable. */
28105 if (TARGET_THUMB1_ONLY
)
28107 /* push r3 so we can use it as a temporary. */
28108 /* TODO: Omit this save if r3 is not used. */
28109 fputs ("\tpush {r3}\n", file
);
28110 fputs ("\tldr\tr3, ", file
);
28114 fputs ("\tldr\tr12, ", file
);
28116 assemble_name (file
, label
);
28117 fputc ('\n', file
);
28120 /* If we are generating PIC, the ldr instruction below loads
28121 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28122 the address of the add + 8, so we have:
28124 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28127 Note that we have "+ 1" because some versions of GNU ld
28128 don't set the low bit of the result for R_ARM_REL32
28129 relocations against thumb function symbols.
28130 On ARMv6M this is +4, not +8. */
28131 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28132 assemble_name (file
, labelpc
);
28133 fputs (":\n", file
);
28134 if (TARGET_THUMB1_ONLY
)
28136 /* This is 2 insns after the start of the thunk, so we know it
28137 is 4-byte aligned. */
28138 fputs ("\tadd\tr3, pc, r3\n", file
);
28139 fputs ("\tmov r12, r3\n", file
);
28142 fputs ("\tadd\tr12, pc, r12\n", file
);
28144 else if (TARGET_THUMB1_ONLY
)
28145 fputs ("\tmov r12, r3\n", file
);
28147 if (TARGET_THUMB1_ONLY
)
28149 if (mi_delta
> 255)
28151 fputs ("\tldr\tr3, ", file
);
28152 assemble_name (file
, label
);
28153 fputs ("+4\n", file
);
28154 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28155 mi_op
, this_regno
, this_regno
);
28157 else if (mi_delta
!= 0)
28159 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28160 mi_op
, this_regno
, this_regno
,
28166 /* TODO: Use movw/movt for large constants when available. */
28167 while (mi_delta
!= 0)
28169 if ((mi_delta
& (3 << shift
)) == 0)
28173 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28174 mi_op
, this_regno
, this_regno
,
28175 mi_delta
& (0xff << shift
));
28176 mi_delta
&= ~(0xff << shift
);
28183 if (TARGET_THUMB1_ONLY
)
28184 fputs ("\tpop\t{r3}\n", file
);
28186 fprintf (file
, "\tbx\tr12\n");
28187 ASM_OUTPUT_ALIGN (file
, 2);
28188 assemble_name (file
, label
);
28189 fputs (":\n", file
);
28192 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28193 rtx tem
= XEXP (DECL_RTL (function
), 0);
28194 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28195 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28197 gen_rtx_SYMBOL_REF (Pmode
,
28198 ggc_strdup (labelpc
)));
28199 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28202 /* Output ".word .LTHUNKn". */
28203 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28205 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28206 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28210 fputs ("\tb\t", file
);
28211 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28212 if (NEED_PLT_RELOC
)
28213 fputs ("(PLT)", file
);
28214 fputc ('\n', file
);
28217 final_end_function ();
28221 arm_emit_vector_const (FILE *file
, rtx x
)
28224 const char * pattern
;
28226 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28228 switch (GET_MODE (x
))
28230 case V2SImode
: pattern
= "%08x"; break;
28231 case V4HImode
: pattern
= "%04x"; break;
28232 case V8QImode
: pattern
= "%02x"; break;
28233 default: gcc_unreachable ();
28236 fprintf (file
, "0x");
28237 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28241 element
= CONST_VECTOR_ELT (x
, i
);
28242 fprintf (file
, pattern
, INTVAL (element
));
28248 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28249 HFmode constant pool entries are actually loaded with ldr. */
28251 arm_emit_fp16_const (rtx c
)
28256 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28257 bits
= real_to_target (NULL
, &r
, HFmode
);
28258 if (WORDS_BIG_ENDIAN
)
28259 assemble_zeros (2);
28260 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28261 if (!WORDS_BIG_ENDIAN
)
28262 assemble_zeros (2);
28266 arm_output_load_gr (rtx
*operands
)
28273 if (!MEM_P (operands
[1])
28274 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28275 || !REG_P (reg
= XEXP (sum
, 0))
28276 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28277 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28278 return "wldrw%?\t%0, %1";
28280 /* Fix up an out-of-range load of a GR register. */
28281 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28282 wcgr
= operands
[0];
28284 output_asm_insn ("ldr%?\t%0, %1", operands
);
28286 operands
[0] = wcgr
;
28288 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28289 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28294 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28296 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28297 named arg and all anonymous args onto the stack.
28298 XXX I know the prologue shouldn't be pushing registers, but it is faster
28302 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28303 enum machine_mode mode
,
28306 int second_time ATTRIBUTE_UNUSED
)
28308 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28311 cfun
->machine
->uses_anonymous_args
= 1;
28312 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28314 nregs
= pcum
->aapcs_ncrn
;
28315 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28319 nregs
= pcum
->nregs
;
28321 if (nregs
< NUM_ARG_REGS
)
28322 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28325 /* We can't rely on the caller doing the proper promotion when
28326 using APCS or ATPCS. */
28329 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28331 return !TARGET_AAPCS_BASED
;
28334 static enum machine_mode
28335 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28336 enum machine_mode mode
,
28337 int *punsignedp ATTRIBUTE_UNUSED
,
28338 const_tree fntype ATTRIBUTE_UNUSED
,
28339 int for_return ATTRIBUTE_UNUSED
)
28341 if (GET_MODE_CLASS (mode
) == MODE_INT
28342 && GET_MODE_SIZE (mode
) < 4)
28348 /* AAPCS based ABIs use short enums by default. */
28351 arm_default_short_enums (void)
28353 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28357 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28360 arm_align_anon_bitfield (void)
28362 return TARGET_AAPCS_BASED
;
28366 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28369 arm_cxx_guard_type (void)
28371 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28375 /* The EABI says test the least significant bit of a guard variable. */
28378 arm_cxx_guard_mask_bit (void)
28380 return TARGET_AAPCS_BASED
;
28384 /* The EABI specifies that all array cookies are 8 bytes long. */
28387 arm_get_cookie_size (tree type
)
28391 if (!TARGET_AAPCS_BASED
)
28392 return default_cxx_get_cookie_size (type
);
28394 size
= build_int_cst (sizetype
, 8);
28399 /* The EABI says that array cookies should also contain the element size. */
28402 arm_cookie_has_size (void)
28404 return TARGET_AAPCS_BASED
;
28408 /* The EABI says constructors and destructors should return a pointer to
28409 the object constructed/destroyed. */
28412 arm_cxx_cdtor_returns_this (void)
28414 return TARGET_AAPCS_BASED
;
28417 /* The EABI says that an inline function may never be the key
28421 arm_cxx_key_method_may_be_inline (void)
28423 return !TARGET_AAPCS_BASED
;
28427 arm_cxx_determine_class_data_visibility (tree decl
)
28429 if (!TARGET_AAPCS_BASED
28430 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28433 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28434 is exported. However, on systems without dynamic vague linkage,
28435 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28436 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28437 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28439 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28440 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28444 arm_cxx_class_data_always_comdat (void)
28446 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28447 vague linkage if the class has no key function. */
28448 return !TARGET_AAPCS_BASED
;
28452 /* The EABI says __aeabi_atexit should be used to register static
28456 arm_cxx_use_aeabi_atexit (void)
28458 return TARGET_AAPCS_BASED
;
28463 arm_set_return_address (rtx source
, rtx scratch
)
28465 arm_stack_offsets
*offsets
;
28466 HOST_WIDE_INT delta
;
28468 unsigned long saved_regs
;
28470 offsets
= arm_get_frame_offsets ();
28471 saved_regs
= offsets
->saved_regs_mask
;
28473 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28474 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28477 if (frame_pointer_needed
)
28478 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28481 /* LR will be the first saved register. */
28482 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28487 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28488 GEN_INT (delta
& ~4095)));
28493 addr
= stack_pointer_rtx
;
28495 addr
= plus_constant (Pmode
, addr
, delta
);
28497 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28503 thumb_set_return_address (rtx source
, rtx scratch
)
28505 arm_stack_offsets
*offsets
;
28506 HOST_WIDE_INT delta
;
28507 HOST_WIDE_INT limit
;
28510 unsigned long mask
;
28514 offsets
= arm_get_frame_offsets ();
28515 mask
= offsets
->saved_regs_mask
;
28516 if (mask
& (1 << LR_REGNUM
))
28519 /* Find the saved regs. */
28520 if (frame_pointer_needed
)
28522 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28523 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28529 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28532 /* Allow for the stack frame. */
28533 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28535 /* The link register is always the first saved register. */
28538 /* Construct the address. */
28539 addr
= gen_rtx_REG (SImode
, reg
);
28542 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28543 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28547 addr
= plus_constant (Pmode
, addr
, delta
);
28549 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28552 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28555 /* Implements target hook vector_mode_supported_p. */
28557 arm_vector_mode_supported_p (enum machine_mode mode
)
28559 /* Neon also supports V2SImode, etc. listed in the clause below. */
28560 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28561 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28564 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28565 && ((mode
== V2SImode
)
28566 || (mode
== V4HImode
)
28567 || (mode
== V8QImode
)))
28570 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28571 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28572 || mode
== V2HAmode
))
28578 /* Implements target hook array_mode_supported_p. */
28581 arm_array_mode_supported_p (enum machine_mode mode
,
28582 unsigned HOST_WIDE_INT nelems
)
28585 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28586 && (nelems
>= 2 && nelems
<= 4))
28592 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28593 registers when autovectorizing for Neon, at least until multiple vector
28594 widths are supported properly by the middle-end. */
28596 static enum machine_mode
28597 arm_preferred_simd_mode (enum machine_mode mode
)
28603 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28605 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28607 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28609 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28611 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28618 if (TARGET_REALLY_IWMMXT
)
28634 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28636 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28637 using r0-r4 for function arguments, r7 for the stack frame and don't have
28638 enough left over to do doubleword arithmetic. For Thumb-2 all the
28639 potentially problematic instructions accept high registers so this is not
28640 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28641 that require many low registers. */
28643 arm_class_likely_spilled_p (reg_class_t rclass
)
28645 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28646 || rclass
== CC_REG
)
28652 /* Implements target hook small_register_classes_for_mode_p. */
28654 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28656 return TARGET_THUMB1
;
28659 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28660 ARM insns and therefore guarantee that the shift count is modulo 256.
28661 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28662 guarantee no particular behavior for out-of-range counts. */
28664 static unsigned HOST_WIDE_INT
28665 arm_shift_truncation_mask (enum machine_mode mode
)
28667 return mode
== SImode
? 255 : 0;
28671 /* Map internal gcc register numbers to DWARF2 register numbers. */
28674 arm_dbx_register_number (unsigned int regno
)
28679 if (IS_VFP_REGNUM (regno
))
28681 /* See comment in arm_dwarf_register_span. */
28682 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28683 return 64 + regno
- FIRST_VFP_REGNUM
;
28685 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28688 if (IS_IWMMXT_GR_REGNUM (regno
))
28689 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28691 if (IS_IWMMXT_REGNUM (regno
))
28692 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28694 gcc_unreachable ();
28697 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28698 GCC models tham as 64 32-bit registers, so we need to describe this to
28699 the DWARF generation code. Other registers can use the default. */
28701 arm_dwarf_register_span (rtx rtl
)
28703 enum machine_mode mode
;
28709 regno
= REGNO (rtl
);
28710 if (!IS_VFP_REGNUM (regno
))
28713 /* XXX FIXME: The EABI defines two VFP register ranges:
28714 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28716 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28717 corresponding D register. Until GDB supports this, we shall use the
28718 legacy encodings. We also use these encodings for D0-D15 for
28719 compatibility with older debuggers. */
28720 mode
= GET_MODE (rtl
);
28721 if (GET_MODE_SIZE (mode
) < 8)
28724 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28726 nregs
= GET_MODE_SIZE (mode
) / 4;
28727 for (i
= 0; i
< nregs
; i
+= 2)
28728 if (TARGET_BIG_END
)
28730 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28731 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28735 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28736 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28741 nregs
= GET_MODE_SIZE (mode
) / 8;
28742 for (i
= 0; i
< nregs
; i
++)
28743 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28746 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28749 #if ARM_UNWIND_INFO
28750 /* Emit unwind directives for a store-multiple instruction or stack pointer
28751 push during alignment.
28752 These should only ever be generated by the function prologue code, so
28753 expect them to have a particular form.
28754 The store-multiple instruction sometimes pushes pc as the last register,
28755 although it should not be tracked into unwind information, or for -Os
28756 sometimes pushes some dummy registers before first register that needs
28757 to be tracked in unwind information; such dummy registers are there just
28758 to avoid separate stack adjustment, and will not be restored in the
28762 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
28765 HOST_WIDE_INT offset
;
28766 HOST_WIDE_INT nregs
;
28770 unsigned padfirst
= 0, padlast
= 0;
28773 e
= XVECEXP (p
, 0, 0);
28774 gcc_assert (GET_CODE (e
) == SET
);
28776 /* First insn will adjust the stack pointer. */
28777 gcc_assert (GET_CODE (e
) == SET
28778 && REG_P (SET_DEST (e
))
28779 && REGNO (SET_DEST (e
)) == SP_REGNUM
28780 && GET_CODE (SET_SRC (e
)) == PLUS
);
28782 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
28783 nregs
= XVECLEN (p
, 0) - 1;
28784 gcc_assert (nregs
);
28786 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
28789 /* For -Os dummy registers can be pushed at the beginning to
28790 avoid separate stack pointer adjustment. */
28791 e
= XVECEXP (p
, 0, 1);
28792 e
= XEXP (SET_DEST (e
), 0);
28793 if (GET_CODE (e
) == PLUS
)
28794 padfirst
= INTVAL (XEXP (e
, 1));
28795 gcc_assert (padfirst
== 0 || optimize_size
);
28796 /* The function prologue may also push pc, but not annotate it as it is
28797 never restored. We turn this into a stack pointer adjustment. */
28798 e
= XVECEXP (p
, 0, nregs
);
28799 e
= XEXP (SET_DEST (e
), 0);
28800 if (GET_CODE (e
) == PLUS
)
28801 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
28803 padlast
= offset
- 4;
28804 gcc_assert (padlast
== 0 || padlast
== 4);
28806 fprintf (asm_out_file
, "\t.pad #4\n");
28808 fprintf (asm_out_file
, "\t.save {");
28810 else if (IS_VFP_REGNUM (reg
))
28813 fprintf (asm_out_file
, "\t.vsave {");
28816 /* Unknown register type. */
28817 gcc_unreachable ();
28819 /* If the stack increment doesn't match the size of the saved registers,
28820 something has gone horribly wrong. */
28821 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
28825 /* The remaining insns will describe the stores. */
28826 for (i
= 1; i
<= nregs
; i
++)
28828 /* Expect (set (mem <addr>) (reg)).
28829 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28830 e
= XVECEXP (p
, 0, i
);
28831 gcc_assert (GET_CODE (e
) == SET
28832 && MEM_P (SET_DEST (e
))
28833 && REG_P (SET_SRC (e
)));
28835 reg
= REGNO (SET_SRC (e
));
28836 gcc_assert (reg
>= lastreg
);
28839 fprintf (asm_out_file
, ", ");
28840 /* We can't use %r for vfp because we need to use the
28841 double precision register names. */
28842 if (IS_VFP_REGNUM (reg
))
28843 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28845 asm_fprintf (asm_out_file
, "%r", reg
);
28847 #ifdef ENABLE_CHECKING
28848 /* Check that the addresses are consecutive. */
28849 e
= XEXP (SET_DEST (e
), 0);
28850 if (GET_CODE (e
) == PLUS
)
28851 gcc_assert (REG_P (XEXP (e
, 0))
28852 && REGNO (XEXP (e
, 0)) == SP_REGNUM
28853 && CONST_INT_P (XEXP (e
, 1))
28854 && offset
== INTVAL (XEXP (e
, 1)));
28858 && REGNO (e
) == SP_REGNUM
);
28859 offset
+= reg_size
;
28862 fprintf (asm_out_file
, "}\n");
28864 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
28867 /* Emit unwind directives for a SET. */
28870 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28878 switch (GET_CODE (e0
))
28881 /* Pushing a single register. */
28882 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28883 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28884 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28887 asm_fprintf (asm_out_file
, "\t.save ");
28888 if (IS_VFP_REGNUM (REGNO (e1
)))
28889 asm_fprintf(asm_out_file
, "{d%d}\n",
28890 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28892 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28896 if (REGNO (e0
) == SP_REGNUM
)
28898 /* A stack increment. */
28899 if (GET_CODE (e1
) != PLUS
28900 || !REG_P (XEXP (e1
, 0))
28901 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28902 || !CONST_INT_P (XEXP (e1
, 1)))
28905 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28906 -INTVAL (XEXP (e1
, 1)));
28908 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28910 HOST_WIDE_INT offset
;
28912 if (GET_CODE (e1
) == PLUS
)
28914 if (!REG_P (XEXP (e1
, 0))
28915 || !CONST_INT_P (XEXP (e1
, 1)))
28917 reg
= REGNO (XEXP (e1
, 0));
28918 offset
= INTVAL (XEXP (e1
, 1));
28919 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28920 HARD_FRAME_POINTER_REGNUM
, reg
,
28923 else if (REG_P (e1
))
28926 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28927 HARD_FRAME_POINTER_REGNUM
, reg
);
28932 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28934 /* Move from sp to reg. */
28935 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28937 else if (GET_CODE (e1
) == PLUS
28938 && REG_P (XEXP (e1
, 0))
28939 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28940 && CONST_INT_P (XEXP (e1
, 1)))
28942 /* Set reg to offset from sp. */
28943 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28944 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28956 /* Emit unwind directives for the given insn. */
28959 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
28962 bool handled_one
= false;
28964 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28967 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28968 && (TREE_NOTHROW (current_function_decl
)
28969 || crtl
->all_throwers_are_sibcalls
))
28972 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28975 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28977 switch (REG_NOTE_KIND (note
))
28979 case REG_FRAME_RELATED_EXPR
:
28980 pat
= XEXP (note
, 0);
28983 case REG_CFA_REGISTER
:
28984 pat
= XEXP (note
, 0);
28987 pat
= PATTERN (insn
);
28988 if (GET_CODE (pat
) == PARALLEL
)
28989 pat
= XVECEXP (pat
, 0, 0);
28992 /* Only emitted for IS_STACKALIGN re-alignment. */
28997 src
= SET_SRC (pat
);
28998 dest
= SET_DEST (pat
);
29000 gcc_assert (src
== stack_pointer_rtx
);
29001 reg
= REGNO (dest
);
29002 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29005 handled_one
= true;
29008 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29009 to get correct dwarf information for shrink-wrap. We should not
29010 emit unwind information for it because these are used either for
29011 pretend arguments or notes to adjust sp and restore registers from
29013 case REG_CFA_DEF_CFA
:
29014 case REG_CFA_ADJUST_CFA
:
29015 case REG_CFA_RESTORE
:
29018 case REG_CFA_EXPRESSION
:
29019 case REG_CFA_OFFSET
:
29020 /* ??? Only handling here what we actually emit. */
29021 gcc_unreachable ();
29029 pat
= PATTERN (insn
);
29032 switch (GET_CODE (pat
))
29035 arm_unwind_emit_set (asm_out_file
, pat
);
29039 /* Store multiple. */
29040 arm_unwind_emit_sequence (asm_out_file
, pat
);
29049 /* Output a reference from a function exception table to the type_info
29050 object X. The EABI specifies that the symbol should be relocated by
29051 an R_ARM_TARGET2 relocation. */
29054 arm_output_ttype (rtx x
)
29056 fputs ("\t.word\t", asm_out_file
);
29057 output_addr_const (asm_out_file
, x
);
29058 /* Use special relocations for symbol references. */
29059 if (!CONST_INT_P (x
))
29060 fputs ("(TARGET2)", asm_out_file
);
29061 fputc ('\n', asm_out_file
);
29066 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29069 arm_asm_emit_except_personality (rtx personality
)
29071 fputs ("\t.personality\t", asm_out_file
);
29072 output_addr_const (asm_out_file
, personality
);
29073 fputc ('\n', asm_out_file
);
29076 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29079 arm_asm_init_sections (void)
29081 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29084 #endif /* ARM_UNWIND_INFO */
29086 /* Output unwind directives for the start/end of a function. */
29089 arm_output_fn_unwind (FILE * f
, bool prologue
)
29091 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29095 fputs ("\t.fnstart\n", f
);
29098 /* If this function will never be unwound, then mark it as such.
29099 The came condition is used in arm_unwind_emit to suppress
29100 the frame annotations. */
29101 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29102 && (TREE_NOTHROW (current_function_decl
)
29103 || crtl
->all_throwers_are_sibcalls
))
29104 fputs("\t.cantunwind\n", f
);
29106 fputs ("\t.fnend\n", f
);
29111 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29113 enum tls_reloc reloc
;
29116 val
= XVECEXP (x
, 0, 0);
29117 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29119 output_addr_const (fp
, val
);
29124 fputs ("(tlsgd)", fp
);
29127 fputs ("(tlsldm)", fp
);
29130 fputs ("(tlsldo)", fp
);
29133 fputs ("(gottpoff)", fp
);
29136 fputs ("(tpoff)", fp
);
29139 fputs ("(tlsdesc)", fp
);
29142 gcc_unreachable ();
29151 fputs (" + (. - ", fp
);
29152 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29153 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29154 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29155 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29165 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29168 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29170 gcc_assert (size
== 4);
29171 fputs ("\t.word\t", file
);
29172 output_addr_const (file
, x
);
29173 fputs ("(tlsldo)", file
);
29176 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29179 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29181 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29182 return arm_emit_tls_decoration (fp
, x
);
29183 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29186 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29188 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29189 assemble_name_raw (fp
, label
);
29193 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29195 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29199 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29203 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29205 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29209 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29213 else if (GET_CODE (x
) == CONST_VECTOR
)
29214 return arm_emit_vector_const (fp
, x
);
29219 /* Output assembly for a shift instruction.
29220 SET_FLAGS determines how the instruction modifies the condition codes.
29221 0 - Do not set condition codes.
29222 1 - Set condition codes.
29223 2 - Use smallest instruction. */
29225 arm_output_shift(rtx
* operands
, int set_flags
)
29228 static const char flag_chars
[3] = {'?', '.', '!'};
29233 c
= flag_chars
[set_flags
];
29234 if (TARGET_UNIFIED_ASM
)
29236 shift
= shift_op(operands
[3], &val
);
29240 operands
[2] = GEN_INT(val
);
29241 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29244 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29247 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29248 output_asm_insn (pattern
, operands
);
29252 /* Output assembly for a WMMX immediate shift instruction. */
29254 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29256 int shift
= INTVAL (operands
[2]);
29258 enum machine_mode opmode
= GET_MODE (operands
[0]);
29260 gcc_assert (shift
>= 0);
29262 /* If the shift value in the register versions is > 63 (for D qualifier),
29263 31 (for W qualifier) or 15 (for H qualifier). */
29264 if (((opmode
== V4HImode
) && (shift
> 15))
29265 || ((opmode
== V2SImode
) && (shift
> 31))
29266 || ((opmode
== DImode
) && (shift
> 63)))
29270 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29271 output_asm_insn (templ
, operands
);
29272 if (opmode
== DImode
)
29274 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29275 output_asm_insn (templ
, operands
);
29280 /* The destination register will contain all zeros. */
29281 sprintf (templ
, "wzero\t%%0");
29282 output_asm_insn (templ
, operands
);
29287 if ((opmode
== DImode
) && (shift
> 32))
29289 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29290 output_asm_insn (templ
, operands
);
29291 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29292 output_asm_insn (templ
, operands
);
29296 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29297 output_asm_insn (templ
, operands
);
29302 /* Output assembly for a WMMX tinsr instruction. */
29304 arm_output_iwmmxt_tinsr (rtx
*operands
)
29306 int mask
= INTVAL (operands
[3]);
29309 int units
= mode_nunits
[GET_MODE (operands
[0])];
29310 gcc_assert ((mask
& (mask
- 1)) == 0);
29311 for (i
= 0; i
< units
; ++i
)
29313 if ((mask
& 0x01) == 1)
29319 gcc_assert (i
< units
);
29321 switch (GET_MODE (operands
[0]))
29324 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29327 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29330 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29333 gcc_unreachable ();
29336 output_asm_insn (templ
, operands
);
29341 /* Output a Thumb-1 casesi dispatch sequence. */
29343 thumb1_output_casesi (rtx
*operands
)
29345 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29347 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29349 switch (GET_MODE(diff_vec
))
29352 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29353 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29355 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29356 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29358 return "bl\t%___gnu_thumb1_case_si";
29360 gcc_unreachable ();
29364 /* Output a Thumb-2 casesi instruction. */
29366 thumb2_output_casesi (rtx
*operands
)
29368 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29370 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29372 output_asm_insn ("cmp\t%0, %1", operands
);
29373 output_asm_insn ("bhi\t%l3", operands
);
29374 switch (GET_MODE(diff_vec
))
29377 return "tbb\t[%|pc, %0]";
29379 return "tbh\t[%|pc, %0, lsl #1]";
29383 output_asm_insn ("adr\t%4, %l2", operands
);
29384 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29385 output_asm_insn ("add\t%4, %4, %5", operands
);
29390 output_asm_insn ("adr\t%4, %l2", operands
);
29391 return "ldr\t%|pc, [%4, %0, lsl #2]";
29394 gcc_unreachable ();
29398 /* Most ARM cores are single issue, but some newer ones can dual issue.
29399 The scheduler descriptions rely on this being correct. */
29401 arm_issue_rate (void)
29428 /* A table and a function to perform ARM-specific name mangling for
29429 NEON vector types in order to conform to the AAPCS (see "Procedure
29430 Call Standard for the ARM Architecture", Appendix A). To qualify
29431 for emission with the mangled names defined in that document, a
29432 vector type must not only be of the correct mode but also be
29433 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29436 enum machine_mode mode
;
29437 const char *element_type_name
;
29438 const char *aapcs_name
;
29439 } arm_mangle_map_entry
;
29441 static arm_mangle_map_entry arm_mangle_map
[] = {
29442 /* 64-bit containerized types. */
29443 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29444 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29445 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29446 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29447 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29448 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29449 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29450 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29451 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29452 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29454 /* 128-bit containerized types. */
29455 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29456 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29457 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29458 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29459 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29460 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29461 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29462 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29463 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29464 { VOIDmode
, NULL
, NULL
}
29468 arm_mangle_type (const_tree type
)
29470 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29472 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29473 has to be managled as if it is in the "std" namespace. */
29474 if (TARGET_AAPCS_BASED
29475 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29476 return "St9__va_list";
29478 /* Half-precision float. */
29479 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29482 if (TREE_CODE (type
) != VECTOR_TYPE
)
29485 /* Check the mode of the vector type, and the name of the vector
29486 element type, against the table. */
29487 while (pos
->mode
!= VOIDmode
)
29489 tree elt_type
= TREE_TYPE (type
);
29491 if (pos
->mode
== TYPE_MODE (type
)
29492 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29493 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29494 pos
->element_type_name
))
29495 return pos
->aapcs_name
;
29500 /* Use the default mangling for unrecognized (possibly user-defined)
29505 /* Order of allocation of core registers for Thumb: this allocation is
29506 written over the corresponding initial entries of the array
29507 initialized with REG_ALLOC_ORDER. We allocate all low registers
29508 first. Saving and restoring a low register is usually cheaper than
29509 using a call-clobbered high register. */
29511 static const int thumb_core_reg_alloc_order
[] =
29513 3, 2, 1, 0, 4, 5, 6, 7,
29514 14, 12, 8, 9, 10, 11
29517 /* Adjust register allocation order when compiling for Thumb. */
29520 arm_order_regs_for_local_alloc (void)
29522 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29523 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29525 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29526 sizeof (thumb_core_reg_alloc_order
));
29529 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29532 arm_frame_pointer_required (void)
29534 return (cfun
->has_nonlocal_label
29535 || SUBTARGET_FRAME_POINTER_REQUIRED
29536 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29539 /* Only thumb1 can't support conditional execution, so return true if
29540 the target is not thumb1. */
29542 arm_have_conditional_execution (void)
29544 return !TARGET_THUMB1
;
29548 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29550 enum machine_mode in_mode
, out_mode
;
29553 if (TREE_CODE (type_out
) != VECTOR_TYPE
29554 || TREE_CODE (type_in
) != VECTOR_TYPE
29555 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
29558 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29559 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29560 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29561 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29563 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29564 decl of the vectorized builtin for the appropriate vector mode.
29565 NULL_TREE is returned if no such builtin is available. */
29566 #undef ARM_CHECK_BUILTIN_MODE
29567 #define ARM_CHECK_BUILTIN_MODE(C) \
29568 (out_mode == SFmode && out_n == C \
29569 && in_mode == SFmode && in_n == C)
29571 #undef ARM_FIND_VRINT_VARIANT
29572 #define ARM_FIND_VRINT_VARIANT(N) \
29573 (ARM_CHECK_BUILTIN_MODE (2) \
29574 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29575 : (ARM_CHECK_BUILTIN_MODE (4) \
29576 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29579 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29581 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29584 case BUILT_IN_FLOORF
:
29585 return ARM_FIND_VRINT_VARIANT (vrintm
);
29586 case BUILT_IN_CEILF
:
29587 return ARM_FIND_VRINT_VARIANT (vrintp
);
29588 case BUILT_IN_TRUNCF
:
29589 return ARM_FIND_VRINT_VARIANT (vrintz
);
29590 case BUILT_IN_ROUNDF
:
29591 return ARM_FIND_VRINT_VARIANT (vrinta
);
29598 #undef ARM_CHECK_BUILTIN_MODE
29599 #undef ARM_FIND_VRINT_VARIANT
29601 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29602 static HOST_WIDE_INT
29603 arm_vector_alignment (const_tree type
)
29605 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29607 if (TARGET_AAPCS_BASED
)
29608 align
= MIN (align
, 64);
29613 static unsigned int
29614 arm_autovectorize_vector_sizes (void)
29616 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29620 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29622 /* Vectors which aren't in packed structures will not be less aligned than
29623 the natural alignment of their element type, so this is safe. */
29624 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29627 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29631 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29632 const_tree type
, int misalignment
,
29635 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29637 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29642 /* If the misalignment is unknown, we should be able to handle the access
29643 so long as it is not to a member of a packed data structure. */
29644 if (misalignment
== -1)
29647 /* Return true if the misalignment is a multiple of the natural alignment
29648 of the vector's element type. This is probably always going to be
29649 true in practice, since we've already established that this isn't a
29651 return ((misalignment
% align
) == 0);
29654 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29659 arm_conditional_register_usage (void)
29663 if (TARGET_THUMB1
&& optimize_size
)
29665 /* When optimizing for size on Thumb-1, it's better not
29666 to use the HI regs, because of the overhead of
29668 for (regno
= FIRST_HI_REGNUM
;
29669 regno
<= LAST_HI_REGNUM
; ++regno
)
29670 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29673 /* The link register can be clobbered by any branch insn,
29674 but we have no way to track that at present, so mark
29675 it as unavailable. */
29677 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29679 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29681 /* VFPv3 registers are disabled when earlier VFP
29682 versions are selected due to the definition of
29683 LAST_VFP_REGNUM. */
29684 for (regno
= FIRST_VFP_REGNUM
;
29685 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29687 fixed_regs
[regno
] = 0;
29688 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29689 || regno
>= FIRST_VFP_REGNUM
+ 32;
29693 if (TARGET_REALLY_IWMMXT
)
29695 regno
= FIRST_IWMMXT_GR_REGNUM
;
29696 /* The 2002/10/09 revision of the XScale ABI has wCG0
29697 and wCG1 as call-preserved registers. The 2002/11/21
29698 revision changed this so that all wCG registers are
29699 scratch registers. */
29700 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29701 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29702 fixed_regs
[regno
] = 0;
29703 /* The XScale ABI has wR0 - wR9 as scratch registers,
29704 the rest as call-preserved registers. */
29705 for (regno
= FIRST_IWMMXT_REGNUM
;
29706 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29708 fixed_regs
[regno
] = 0;
29709 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29713 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29715 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29716 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29718 else if (TARGET_APCS_STACK
)
29720 fixed_regs
[10] = 1;
29721 call_used_regs
[10] = 1;
29723 /* -mcaller-super-interworking reserves r11 for calls to
29724 _interwork_r11_call_via_rN(). Making the register global
29725 is an easy way of ensuring that it remains valid for all
29727 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29728 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29730 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29731 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29732 if (TARGET_CALLER_INTERWORKING
)
29733 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29735 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29739 arm_preferred_rename_class (reg_class_t rclass
)
29741 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29742 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29743 and code size can be reduced. */
29744 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29750 /* Compute the atrribute "length" of insn "*push_multi".
29751 So this function MUST be kept in sync with that insn pattern. */
29753 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29755 int i
, regno
, hi_reg
;
29756 int num_saves
= XVECLEN (parallel_op
, 0);
29766 regno
= REGNO (first_op
);
29767 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29768 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
29770 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
29771 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29779 /* Compute the number of instructions emitted by output_move_double. */
29781 arm_count_output_move_double_insns (rtx
*operands
)
29785 /* output_move_double may modify the operands array, so call it
29786 here on a copy of the array. */
29787 ops
[0] = operands
[0];
29788 ops
[1] = operands
[1];
29789 output_move_double (ops
, false, &count
);
29794 vfp3_const_double_for_fract_bits (rtx operand
)
29796 REAL_VALUE_TYPE r0
;
29798 if (!CONST_DOUBLE_P (operand
))
29801 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29802 if (exact_real_inverse (DFmode
, &r0
))
29804 if (exact_real_truncate (DFmode
, &r0
))
29806 HOST_WIDE_INT value
= real_to_integer (&r0
);
29807 value
= value
& 0xffffffff;
29808 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29809 return int_log2 (value
);
29816 vfp3_const_double_for_bits (rtx operand
)
29818 REAL_VALUE_TYPE r0
;
29820 if (!CONST_DOUBLE_P (operand
))
29823 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29824 if (exact_real_truncate (DFmode
, &r0
))
29826 HOST_WIDE_INT value
= real_to_integer (&r0
);
29827 value
= value
& 0xffffffff;
29828 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29829 return int_log2 (value
);
29835 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29838 arm_pre_atomic_barrier (enum memmodel model
)
29840 if (need_atomic_barrier_p (model
, true))
29841 emit_insn (gen_memory_barrier ());
29845 arm_post_atomic_barrier (enum memmodel model
)
29847 if (need_atomic_barrier_p (model
, false))
29848 emit_insn (gen_memory_barrier ());
29851 /* Emit the load-exclusive and store-exclusive instructions.
29852 Use acquire and release versions if necessary. */
29855 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29857 rtx (*gen
) (rtx
, rtx
);
29863 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29864 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29865 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29866 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29868 gcc_unreachable ();
29875 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29876 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
29877 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
29878 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
29880 gcc_unreachable ();
29884 emit_insn (gen (rval
, mem
));
29888 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
29891 rtx (*gen
) (rtx
, rtx
, rtx
);
29897 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29898 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29899 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29900 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29902 gcc_unreachable ();
29909 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29910 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
29911 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
29912 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
29914 gcc_unreachable ();
29918 emit_insn (gen (bval
, rval
, mem
));
29921 /* Mark the previous jump instruction as unlikely. */
29924 emit_unlikely_jump (rtx insn
)
29926 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
29928 insn
= emit_jump_insn (insn
);
29929 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
29932 /* Expand a compare and swap pattern. */
29935 arm_expand_compare_and_swap (rtx operands
[])
29937 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29938 enum machine_mode mode
;
29939 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
29941 bval
= operands
[0];
29942 rval
= operands
[1];
29944 oldval
= operands
[3];
29945 newval
= operands
[4];
29946 is_weak
= operands
[5];
29947 mod_s
= operands
[6];
29948 mod_f
= operands
[7];
29949 mode
= GET_MODE (mem
);
29951 /* Normally the succ memory model must be stronger than fail, but in the
29952 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29953 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29955 if (TARGET_HAVE_LDACQ
29956 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
29957 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
29958 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29964 /* For narrow modes, we're going to perform the comparison in SImode,
29965 so do the zero-extension now. */
29966 rval
= gen_reg_rtx (SImode
);
29967 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29971 /* Force the value into a register if needed. We waited until after
29972 the zero-extension above to do this properly. */
29973 if (!arm_add_operand (oldval
, SImode
))
29974 oldval
= force_reg (SImode
, oldval
);
29978 if (!cmpdi_operand (oldval
, mode
))
29979 oldval
= force_reg (mode
, oldval
);
29983 gcc_unreachable ();
29988 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
29989 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
29990 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
29991 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
29993 gcc_unreachable ();
29996 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
29998 if (mode
== QImode
|| mode
== HImode
)
29999 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30001 /* In all cases, we arrange for success to be signaled by Z set.
30002 This arrangement allows for the boolean result to be used directly
30003 in a subsequent branch, post optimization. */
30004 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30005 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
30006 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
30009 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30010 another memory store between the load-exclusive and store-exclusive can
30011 reset the monitor from Exclusive to Open state. This means we must wait
30012 until after reload to split the pattern, lest we get a register spill in
30013 the middle of the atomic sequence. */
30016 arm_split_compare_and_swap (rtx operands
[])
30018 rtx rval
, mem
, oldval
, newval
, scratch
;
30019 enum machine_mode mode
;
30020 enum memmodel mod_s
, mod_f
;
30022 rtx label1
, label2
, x
, cond
;
30024 rval
= operands
[0];
30026 oldval
= operands
[2];
30027 newval
= operands
[3];
30028 is_weak
= (operands
[4] != const0_rtx
);
30029 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30030 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30031 scratch
= operands
[7];
30032 mode
= GET_MODE (mem
);
30034 bool use_acquire
= TARGET_HAVE_LDACQ
30035 && !(mod_s
== MEMMODEL_RELAXED
30036 || mod_s
== MEMMODEL_CONSUME
30037 || mod_s
== MEMMODEL_RELEASE
);
30039 bool use_release
= TARGET_HAVE_LDACQ
30040 && !(mod_s
== MEMMODEL_RELAXED
30041 || mod_s
== MEMMODEL_CONSUME
30042 || mod_s
== MEMMODEL_ACQUIRE
);
30044 /* Checks whether a barrier is needed and emits one accordingly. */
30045 if (!(use_acquire
|| use_release
))
30046 arm_pre_atomic_barrier (mod_s
);
30051 label1
= gen_label_rtx ();
30052 emit_label (label1
);
30054 label2
= gen_label_rtx ();
30056 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30058 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30059 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30060 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30061 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30062 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30064 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30066 /* Weak or strong, we want EQ to be true for success, so that we
30067 match the flags that we got from the compare above. */
30068 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30069 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30070 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30074 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30075 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30076 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30077 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30080 if (mod_f
!= MEMMODEL_RELAXED
)
30081 emit_label (label2
);
30083 /* Checks whether a barrier is needed and emits one accordingly. */
30084 if (!(use_acquire
|| use_release
))
30085 arm_post_atomic_barrier (mod_s
);
30087 if (mod_f
== MEMMODEL_RELAXED
)
30088 emit_label (label2
);
30092 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30093 rtx value
, rtx model_rtx
, rtx cond
)
30095 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30096 enum machine_mode mode
= GET_MODE (mem
);
30097 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30100 bool use_acquire
= TARGET_HAVE_LDACQ
30101 && !(model
== MEMMODEL_RELAXED
30102 || model
== MEMMODEL_CONSUME
30103 || model
== MEMMODEL_RELEASE
);
30105 bool use_release
= TARGET_HAVE_LDACQ
30106 && !(model
== MEMMODEL_RELAXED
30107 || model
== MEMMODEL_CONSUME
30108 || model
== MEMMODEL_ACQUIRE
);
30110 /* Checks whether a barrier is needed and emits one accordingly. */
30111 if (!(use_acquire
|| use_release
))
30112 arm_pre_atomic_barrier (model
);
30114 label
= gen_label_rtx ();
30115 emit_label (label
);
30118 new_out
= gen_lowpart (wmode
, new_out
);
30120 old_out
= gen_lowpart (wmode
, old_out
);
30123 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30125 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30134 x
= gen_rtx_AND (wmode
, old_out
, value
);
30135 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30136 x
= gen_rtx_NOT (wmode
, new_out
);
30137 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30141 if (CONST_INT_P (value
))
30143 value
= GEN_INT (-INTVAL (value
));
30149 if (mode
== DImode
)
30151 /* DImode plus/minus need to clobber flags. */
30152 /* The adddi3 and subdi3 patterns are incorrectly written so that
30153 they require matching operands, even when we could easily support
30154 three operands. Thankfully, this can be fixed up post-splitting,
30155 as the individual add+adc patterns do accept three operands and
30156 post-reload cprop can make these moves go away. */
30157 emit_move_insn (new_out
, old_out
);
30159 x
= gen_adddi3 (new_out
, new_out
, value
);
30161 x
= gen_subdi3 (new_out
, new_out
, value
);
30168 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30169 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30173 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30176 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30177 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30179 /* Checks whether a barrier is needed and emits one accordingly. */
30180 if (!(use_acquire
|| use_release
))
30181 arm_post_atomic_barrier (model
);
30184 #define MAX_VECT_LEN 16
30186 struct expand_vec_perm_d
30188 rtx target
, op0
, op1
;
30189 unsigned char perm
[MAX_VECT_LEN
];
30190 enum machine_mode vmode
;
30191 unsigned char nelt
;
30196 /* Generate a variable permutation. */
30199 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30201 enum machine_mode vmode
= GET_MODE (target
);
30202 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30204 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30205 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30206 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30207 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30208 gcc_checking_assert (TARGET_NEON
);
30212 if (vmode
== V8QImode
)
30213 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30215 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30221 if (vmode
== V8QImode
)
30223 pair
= gen_reg_rtx (V16QImode
);
30224 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30225 pair
= gen_lowpart (TImode
, pair
);
30226 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30230 pair
= gen_reg_rtx (OImode
);
30231 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30232 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30238 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30240 enum machine_mode vmode
= GET_MODE (target
);
30241 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30242 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30243 rtx rmask
[MAX_VECT_LEN
], mask
;
30245 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30246 numbering of elements for big-endian, we must reverse the order. */
30247 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30249 /* The VTBL instruction does not use a modulo index, so we must take care
30250 of that ourselves. */
30251 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30252 for (i
= 0; i
< nelt
; ++i
)
30254 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30255 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30257 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30260 /* Generate or test for an insn that supports a constant permutation. */
30262 /* Recognize patterns for the VUZP insns. */
30265 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30267 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30268 rtx out0
, out1
, in0
, in1
, x
;
30269 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30271 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30274 /* Note that these are little-endian tests. Adjust for big-endian later. */
30275 if (d
->perm
[0] == 0)
30277 else if (d
->perm
[0] == 1)
30281 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30283 for (i
= 0; i
< nelt
; i
++)
30285 unsigned elt
= (i
* 2 + odd
) & mask
;
30286 if (d
->perm
[i
] != elt
)
30296 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30297 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30298 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30299 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30300 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30301 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30302 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30303 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30305 gcc_unreachable ();
30310 if (BYTES_BIG_ENDIAN
)
30312 x
= in0
, in0
= in1
, in1
= x
;
30317 out1
= gen_reg_rtx (d
->vmode
);
30319 x
= out0
, out0
= out1
, out1
= x
;
30321 emit_insn (gen (out0
, in0
, in1
, out1
));
30325 /* Recognize patterns for the VZIP insns. */
30328 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30330 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30331 rtx out0
, out1
, in0
, in1
, x
;
30332 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30334 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30337 /* Note that these are little-endian tests. Adjust for big-endian later. */
30339 if (d
->perm
[0] == high
)
30341 else if (d
->perm
[0] == 0)
30345 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30347 for (i
= 0; i
< nelt
/ 2; i
++)
30349 unsigned elt
= (i
+ high
) & mask
;
30350 if (d
->perm
[i
* 2] != elt
)
30352 elt
= (elt
+ nelt
) & mask
;
30353 if (d
->perm
[i
* 2 + 1] != elt
)
30363 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30364 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30365 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30366 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30367 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30368 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30369 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30370 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30372 gcc_unreachable ();
30377 if (BYTES_BIG_ENDIAN
)
30379 x
= in0
, in0
= in1
, in1
= x
;
30384 out1
= gen_reg_rtx (d
->vmode
);
30386 x
= out0
, out0
= out1
, out1
= x
;
30388 emit_insn (gen (out0
, in0
, in1
, out1
));
30392 /* Recognize patterns for the VREV insns. */
30395 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30397 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30398 rtx (*gen
)(rtx
, rtx
, rtx
);
30400 if (!d
->one_vector_p
)
30409 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30410 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30418 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30419 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30420 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30421 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30429 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30430 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30431 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30432 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30433 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30434 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30435 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30436 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30445 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30446 for (j
= 0; j
<= diff
; j
+= 1)
30448 /* This is guaranteed to be true as the value of diff
30449 is 7, 3, 1 and we should have enough elements in the
30450 queue to generate this. Getting a vector mask with a
30451 value of diff other than these values implies that
30452 something is wrong by the time we get here. */
30453 gcc_assert (i
+ j
< nelt
);
30454 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30462 /* ??? The third operand is an artifact of the builtin infrastructure
30463 and is ignored by the actual instruction. */
30464 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30468 /* Recognize patterns for the VTRN insns. */
30471 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30473 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30474 rtx out0
, out1
, in0
, in1
, x
;
30475 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30477 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30480 /* Note that these are little-endian tests. Adjust for big-endian later. */
30481 if (d
->perm
[0] == 0)
30483 else if (d
->perm
[0] == 1)
30487 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30489 for (i
= 0; i
< nelt
; i
+= 2)
30491 if (d
->perm
[i
] != i
+ odd
)
30493 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30503 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30504 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30505 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30506 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30507 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30508 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30509 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30510 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30512 gcc_unreachable ();
30517 if (BYTES_BIG_ENDIAN
)
30519 x
= in0
, in0
= in1
, in1
= x
;
30524 out1
= gen_reg_rtx (d
->vmode
);
30526 x
= out0
, out0
= out1
, out1
= x
;
30528 emit_insn (gen (out0
, in0
, in1
, out1
));
30532 /* Recognize patterns for the VEXT insns. */
30535 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30537 unsigned int i
, nelt
= d
->nelt
;
30538 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30541 unsigned int location
;
30543 unsigned int next
= d
->perm
[0] + 1;
30545 /* TODO: Handle GCC's numbering of elements for big-endian. */
30546 if (BYTES_BIG_ENDIAN
)
30549 /* Check if the extracted indexes are increasing by one. */
30550 for (i
= 1; i
< nelt
; next
++, i
++)
30552 /* If we hit the most significant element of the 2nd vector in
30553 the previous iteration, no need to test further. */
30554 if (next
== 2 * nelt
)
30557 /* If we are operating on only one vector: it could be a
30558 rotation. If there are only two elements of size < 64, let
30559 arm_evpc_neon_vrev catch it. */
30560 if (d
->one_vector_p
&& (next
== nelt
))
30562 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30568 if (d
->perm
[i
] != next
)
30572 location
= d
->perm
[0];
30576 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30577 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30578 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30579 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30580 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30581 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30582 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30583 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30584 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30593 offset
= GEN_INT (location
);
30594 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30598 /* The NEON VTBL instruction is a fully variable permuation that's even
30599 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30600 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30601 can do slightly better by expanding this as a constant where we don't
30602 have to apply a mask. */
30605 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30607 rtx rperm
[MAX_VECT_LEN
], sel
;
30608 enum machine_mode vmode
= d
->vmode
;
30609 unsigned int i
, nelt
= d
->nelt
;
30611 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30612 numbering of elements for big-endian, we must reverse the order. */
30613 if (BYTES_BIG_ENDIAN
)
30619 /* Generic code will try constant permutation twice. Once with the
30620 original mode and again with the elements lowered to QImode.
30621 So wait and don't do the selector expansion ourselves. */
30622 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30625 for (i
= 0; i
< nelt
; ++i
)
30626 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30627 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30628 sel
= force_reg (vmode
, sel
);
30630 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30635 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30637 /* Check if the input mask matches vext before reordering the
30640 if (arm_evpc_neon_vext (d
))
30643 /* The pattern matching functions above are written to look for a small
30644 number to begin the sequence (0, 1, N/2). If we begin with an index
30645 from the second operand, we can swap the operands. */
30646 if (d
->perm
[0] >= d
->nelt
)
30648 unsigned i
, nelt
= d
->nelt
;
30651 for (i
= 0; i
< nelt
; ++i
)
30652 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30661 if (arm_evpc_neon_vuzp (d
))
30663 if (arm_evpc_neon_vzip (d
))
30665 if (arm_evpc_neon_vrev (d
))
30667 if (arm_evpc_neon_vtrn (d
))
30669 return arm_evpc_neon_vtbl (d
);
30674 /* Expand a vec_perm_const pattern. */
30677 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30679 struct expand_vec_perm_d d
;
30680 int i
, nelt
, which
;
30686 d
.vmode
= GET_MODE (target
);
30687 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30688 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30689 d
.testing_p
= false;
30691 for (i
= which
= 0; i
< nelt
; ++i
)
30693 rtx e
= XVECEXP (sel
, 0, i
);
30694 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30695 which
|= (ei
< nelt
? 1 : 2);
30705 d
.one_vector_p
= false;
30706 if (!rtx_equal_p (op0
, op1
))
30709 /* The elements of PERM do not suggest that only the first operand
30710 is used, but both operands are identical. Allow easier matching
30711 of the permutation by folding the permutation into the single
30715 for (i
= 0; i
< nelt
; ++i
)
30716 d
.perm
[i
] &= nelt
- 1;
30718 d
.one_vector_p
= true;
30723 d
.one_vector_p
= true;
30727 return arm_expand_vec_perm_const_1 (&d
);
30730 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30733 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
30734 const unsigned char *sel
)
30736 struct expand_vec_perm_d d
;
30737 unsigned int i
, nelt
, which
;
30741 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30742 d
.testing_p
= true;
30743 memcpy (d
.perm
, sel
, nelt
);
30745 /* Categorize the set of elements in the selector. */
30746 for (i
= which
= 0; i
< nelt
; ++i
)
30748 unsigned char e
= d
.perm
[i
];
30749 gcc_assert (e
< 2 * nelt
);
30750 which
|= (e
< nelt
? 1 : 2);
30753 /* For all elements from second vector, fold the elements to first. */
30755 for (i
= 0; i
< nelt
; ++i
)
30758 /* Check whether the mask can be applied to the vector type. */
30759 d
.one_vector_p
= (which
!= 3);
30761 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30762 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
30763 if (!d
.one_vector_p
)
30764 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30767 ret
= arm_expand_vec_perm_const_1 (&d
);
30774 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
30776 /* If we are soft float and we do not have ldrd
30777 then all auto increment forms are ok. */
30778 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30783 /* Post increment and Pre Decrement are supported for all
30784 instruction forms except for vector forms. */
30787 if (VECTOR_MODE_P (mode
))
30789 if (code
!= ARM_PRE_DEC
)
30799 /* Without LDRD and mode size greater than
30800 word size, there is no point in auto-incrementing
30801 because ldm and stm will not have these forms. */
30802 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30805 /* Vector and floating point modes do not support
30806 these auto increment forms. */
30807 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30820 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30821 on ARM, since we know that shifts by negative amounts are no-ops.
30822 Additionally, the default expansion code is not available or suitable
30823 for post-reload insn splits (this can occur when the register allocator
30824 chooses not to do a shift in NEON).
30826 This function is used in both initial expand and post-reload splits, and
30827 handles all kinds of 64-bit shifts.
30829 Input requirements:
30830 - It is safe for the input and output to be the same register, but
30831 early-clobber rules apply for the shift amount and scratch registers.
30832 - Shift by register requires both scratch registers. In all other cases
30833 the scratch registers may be NULL.
30834 - Ashiftrt by a register also clobbers the CC register. */
30836 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30837 rtx amount
, rtx scratch1
, rtx scratch2
)
30839 rtx out_high
= gen_highpart (SImode
, out
);
30840 rtx out_low
= gen_lowpart (SImode
, out
);
30841 rtx in_high
= gen_highpart (SImode
, in
);
30842 rtx in_low
= gen_lowpart (SImode
, in
);
30845 in = the register pair containing the input value.
30846 out = the destination register pair.
30847 up = the high- or low-part of each pair.
30848 down = the opposite part to "up".
30849 In a shift, we can consider bits to shift from "up"-stream to
30850 "down"-stream, so in a left-shift "up" is the low-part and "down"
30851 is the high-part of each register pair. */
30853 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30854 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30855 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30856 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30858 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30860 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30861 && GET_MODE (out
) == DImode
);
30863 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30864 && GET_MODE (in
) == DImode
);
30866 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30867 && GET_MODE (amount
) == SImode
)
30868 || CONST_INT_P (amount
)));
30869 gcc_assert (scratch1
== NULL
30870 || (GET_CODE (scratch1
) == SCRATCH
)
30871 || (GET_MODE (scratch1
) == SImode
30872 && REG_P (scratch1
)));
30873 gcc_assert (scratch2
== NULL
30874 || (GET_CODE (scratch2
) == SCRATCH
)
30875 || (GET_MODE (scratch2
) == SImode
30876 && REG_P (scratch2
)));
30877 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30878 || !HARD_REGISTER_P (out
)
30879 || (REGNO (out
) != REGNO (amount
)
30880 && REGNO (out
) + 1 != REGNO (amount
)));
30882 /* Macros to make following code more readable. */
30883 #define SUB_32(DEST,SRC) \
30884 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30885 #define RSB_32(DEST,SRC) \
30886 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30887 #define SUB_S_32(DEST,SRC) \
30888 gen_addsi3_compare0 ((DEST), (SRC), \
30890 #define SET(DEST,SRC) \
30891 gen_rtx_SET (SImode, (DEST), (SRC))
30892 #define SHIFT(CODE,SRC,AMOUNT) \
30893 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30894 #define LSHIFT(CODE,SRC,AMOUNT) \
30895 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30896 SImode, (SRC), (AMOUNT))
30897 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30898 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30899 SImode, (SRC), (AMOUNT))
30901 gen_rtx_IOR (SImode, (A), (B))
30902 #define BRANCH(COND,LABEL) \
30903 gen_arm_cond_branch ((LABEL), \
30904 gen_rtx_ ## COND (CCmode, cc_reg, \
30908 /* Shifts by register and shifts by constant are handled separately. */
30909 if (CONST_INT_P (amount
))
30911 /* We have a shift-by-constant. */
30913 /* First, handle out-of-range shift amounts.
30914 In both cases we try to match the result an ARM instruction in a
30915 shift-by-register would give. This helps reduce execution
30916 differences between optimization levels, but it won't stop other
30917 parts of the compiler doing different things. This is "undefined
30918 behaviour, in any case. */
30919 if (INTVAL (amount
) <= 0)
30920 emit_insn (gen_movdi (out
, in
));
30921 else if (INTVAL (amount
) >= 64)
30923 if (code
== ASHIFTRT
)
30925 rtx const31_rtx
= GEN_INT (31);
30926 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30927 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30930 emit_insn (gen_movdi (out
, const0_rtx
));
30933 /* Now handle valid shifts. */
30934 else if (INTVAL (amount
) < 32)
30936 /* Shifts by a constant less than 32. */
30937 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30939 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30940 emit_insn (SET (out_down
,
30941 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30943 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30947 /* Shifts by a constant greater than 31. */
30948 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30950 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30951 if (code
== ASHIFTRT
)
30952 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30955 emit_insn (SET (out_up
, const0_rtx
));
30960 /* We have a shift-by-register. */
30961 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30963 /* This alternative requires the scratch registers. */
30964 gcc_assert (scratch1
&& REG_P (scratch1
));
30965 gcc_assert (scratch2
&& REG_P (scratch2
));
30967 /* We will need the values "amount-32" and "32-amount" later.
30968 Swapping them around now allows the later code to be more general. */
30972 emit_insn (SUB_32 (scratch1
, amount
));
30973 emit_insn (RSB_32 (scratch2
, amount
));
30976 emit_insn (RSB_32 (scratch1
, amount
));
30977 /* Also set CC = amount > 32. */
30978 emit_insn (SUB_S_32 (scratch2
, amount
));
30981 emit_insn (RSB_32 (scratch1
, amount
));
30982 emit_insn (SUB_32 (scratch2
, amount
));
30985 gcc_unreachable ();
30988 /* Emit code like this:
30991 out_down = in_down << amount;
30992 out_down = (in_up << (amount - 32)) | out_down;
30993 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30994 out_up = in_up << amount;
30997 out_down = in_down >> amount;
30998 out_down = (in_up << (32 - amount)) | out_down;
31000 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31001 out_up = in_up << amount;
31004 out_down = in_down >> amount;
31005 out_down = (in_up << (32 - amount)) | out_down;
31007 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31008 out_up = in_up << amount;
31010 The ARM and Thumb2 variants are the same but implemented slightly
31011 differently. If this were only called during expand we could just
31012 use the Thumb2 case and let combine do the right thing, but this
31013 can also be called from post-reload splitters. */
31015 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31017 if (!TARGET_THUMB2
)
31019 /* Emit code for ARM mode. */
31020 emit_insn (SET (out_down
,
31021 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31022 if (code
== ASHIFTRT
)
31024 rtx done_label
= gen_label_rtx ();
31025 emit_jump_insn (BRANCH (LT
, done_label
));
31026 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31028 emit_label (done_label
);
31031 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31036 /* Emit code for Thumb2 mode.
31037 Thumb2 can't do shift and or in one insn. */
31038 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31039 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31041 if (code
== ASHIFTRT
)
31043 rtx done_label
= gen_label_rtx ();
31044 emit_jump_insn (BRANCH (LT
, done_label
));
31045 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31046 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31047 emit_label (done_label
);
31051 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31052 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31056 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31071 /* Returns true if a valid comparison operation and makes
31072 the operands in a form that is valid. */
31074 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31076 enum rtx_code code
= GET_CODE (*comparison
);
31078 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31079 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31081 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31083 if (code
== UNEQ
|| code
== LTGT
)
31086 code_int
= (int)code
;
31087 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31088 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31093 if (!arm_add_operand (*op1
, mode
))
31094 *op1
= force_reg (mode
, *op1
);
31095 if (!arm_add_operand (*op2
, mode
))
31096 *op2
= force_reg (mode
, *op2
);
31100 if (!cmpdi_operand (*op1
, mode
))
31101 *op1
= force_reg (mode
, *op1
);
31102 if (!cmpdi_operand (*op2
, mode
))
31103 *op2
= force_reg (mode
, *op2
);
31108 if (!arm_float_compare_operand (*op1
, mode
))
31109 *op1
= force_reg (mode
, *op1
);
31110 if (!arm_float_compare_operand (*op2
, mode
))
31111 *op2
= force_reg (mode
, *op2
);
31121 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31123 static unsigned HOST_WIDE_INT
31124 arm_asan_shadow_offset (void)
31126 return (unsigned HOST_WIDE_INT
) 1 << 29;
31130 /* This is a temporary fix for PR60655. Ideally we need
31131 to handle most of these cases in the generic part but
31132 currently we reject minus (..) (sym_ref). We try to
31133 ameliorate the case with minus (sym_ref1) (sym_ref2)
31134 where they are in the same section. */
31137 arm_const_not_ok_for_debug_p (rtx p
)
31139 tree decl_op0
= NULL
;
31140 tree decl_op1
= NULL
;
31142 if (GET_CODE (p
) == MINUS
)
31144 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
31146 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
31148 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
31149 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
31151 if ((TREE_CODE (decl_op1
) == VAR_DECL
31152 || TREE_CODE (decl_op1
) == CONST_DECL
)
31153 && (TREE_CODE (decl_op0
) == VAR_DECL
31154 || TREE_CODE (decl_op0
) == CONST_DECL
))
31155 return (get_variable_section (decl_op1
, false)
31156 != get_variable_section (decl_op0
, false));
31158 if (TREE_CODE (decl_op1
) == LABEL_DECL
31159 && TREE_CODE (decl_op0
) == LABEL_DECL
)
31160 return (DECL_CONTEXT (decl_op1
)
31161 != DECL_CONTEXT (decl_op0
));
31171 #include "gt-arm.h"