1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
50 #include "target-def.h"
52 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode
;
62 typedef struct minipool_fixup Mfix
;
64 void (*arm_lang_output_object_attributes_hook
)(void);
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets
*arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
77 HOST_WIDE_INT
, rtx
, rtx
, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx
, int);
80 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
81 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
82 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
83 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
84 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
85 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
86 inline static int thumb1_index_register_rtx_p (rtx
, int);
87 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx
, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx
, int);
93 static void arm_print_operand_address (FILE *, rtx
);
94 static bool arm_print_operand_punct_valid_p (unsigned char code
);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
96 static arm_cc
get_arm_condition_code (rtx
);
97 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
98 static rtx
is_jump_table (rtx
);
99 static const char *output_multi_immediate (rtx
*, const char *, const char *,
101 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
102 static struct machine_function
*arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx
is_jump_table (rtx
);
105 static HOST_WIDE_INT
get_jump_table_size (rtx
);
106 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
107 static Mnode
*add_minipool_forward_ref (Mfix
*);
108 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_backward_ref (Mfix
*);
110 static void assign_minipool_offsets (Mfix
*);
111 static void arm_print_value (FILE *, rtx
);
112 static void dump_minipool (rtx
);
113 static int arm_barrier_cost (rtx
);
114 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
115 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
116 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree
);
123 static unsigned long arm_compute_func_type (void);
124 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
125 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
126 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
132 static int arm_comp_type_attributes (const_tree
, const_tree
);
133 static void arm_set_default_type_attributes (tree
);
134 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
135 static int optimal_immediate_sequence (enum rtx_code code
,
136 unsigned HOST_WIDE_INT val
,
137 struct four_ints
*return_sequence
);
138 static int optimal_immediate_sequence_1 (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
,
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree
, tree
);
144 static enum machine_mode
arm_promote_function_mode (const_tree
,
145 enum machine_mode
, int *,
147 static bool arm_return_in_memory (const_tree
, const_tree
);
148 static rtx
arm_function_value (const_tree
, const_tree
, bool);
149 static rtx
arm_libcall_value_1 (enum machine_mode
);
150 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
157 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
158 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
159 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
160 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
163 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
165 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
166 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
167 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
171 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
172 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
173 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
174 static tree
arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond
, rtx pattern
);
176 static rtx
emit_set_insn (rtx
, rtx
);
177 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
179 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
181 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
183 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
184 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
186 static rtx
aapcs_libcall_value (enum machine_mode
);
187 static int aapcs_select_return_coproc (const_tree
, const_tree
);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
191 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_encode_section_info (tree
, rtx
, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
202 static bool arm_pass_by_reference (cumulative_args_t
,
203 enum machine_mode
, const_tree
, bool);
204 static bool arm_promote_prototypes (const_tree
);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree
);
208 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
209 static bool arm_return_in_memory (const_tree
, const_tree
);
211 static void arm_unwind_emit (FILE *, rtx
);
212 static bool arm_output_ttype (rtx
);
213 static void arm_asm_emit_except_personality (rtx
);
214 static void arm_asm_init_sections (void);
216 static rtx
arm_dwarf_register_span (rtx
);
218 static tree
arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree
arm_get_cookie_size (tree
);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree
);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree
arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree
, rtx
);
230 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
233 static bool arm_cannot_copy_insn_p (rtx
);
234 static bool arm_tls_symbol_p (rtx x
);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
237 static bool arm_output_addr_const_extra (FILE *, rtx
);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree
);
240 static const char *arm_invalid_parameter_type (const_tree t
);
241 static const char *arm_invalid_return_type (const_tree t
);
242 static tree
arm_promoted_type (const_tree t
);
243 static tree
arm_convert_to_type (tree type
, tree expr
);
244 static bool arm_scalar_mode_supported_p (enum machine_mode
);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx
, tree
, rtx
);
249 static rtx
arm_trampoline_adjust_address (rtx
);
250 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
251 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
252 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
253 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode
,
255 unsigned HOST_WIDE_INT
);
256 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
257 static bool arm_class_likely_spilled_p (reg_class_t
);
258 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
259 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
264 static void arm_conditional_register_usage (void);
265 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
271 const unsigned char *sel
);
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table
[] =
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
282 { "long_call", 0, 0, false, true, true, NULL
, false },
283 /* Whereas these functions are always known to reside within the 26 bit
285 { "short_call", 0, 0, false, true, true, NULL
, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
297 /* ARM/PE has three new attributes:
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
306 { "dllimport", 0, 0, true, false, false, NULL
, false },
307 { "dllexport", 0, 0, true, false, false, NULL
, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
316 { NULL
, 0, 0, false, false, false, NULL
, false }
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
375 #undef TARGET_ENCODE_SECTION_INFO
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
629 struct gcc_target targetm
= TARGET_INITIALIZER
;
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack
;
633 static char * minipool_startobj
;
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped
= 5;
639 extern FILE * asm_out_file
;
641 /* True if we are currently building a constant table. */
642 int making_const_table
;
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune
= arm_none
;
647 /* The current tuning set. */
648 const struct tune_params
*current_tune
;
650 /* Which floating point hardware to schedule for. */
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc
*arm_fpu_desc
;
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label
[14];
658 static int thumb_call_reg_needed
;
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
691 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
692 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
694 /* Flags that only effect tuning, not available instructions. */
695 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
698 #define FL_FOR_ARCH2 FL_NOTM
699 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
700 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
701 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
702 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
703 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
704 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
705 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
706 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
707 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
708 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
709 #define FL_FOR_ARCH6J FL_FOR_ARCH6
710 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
711 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
712 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
713 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
714 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
715 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
716 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
717 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
719 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
720 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
721 | FL_ARM_DIV | FL_NOTM)
723 /* The bits in this mask specify which
724 instructions we are allowed to generate. */
725 static unsigned long insn_flags
= 0;
727 /* The bits in this mask specify which instruction scheduling options should
729 static unsigned long tune_flags
= 0;
731 /* The highest ARM architecture version supported by the
733 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
735 /* The following are used in the arm.md file as equivalents to bits
736 in the above two flag variables. */
738 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
741 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
744 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
747 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
750 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
753 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
756 /* Nonzero if this chip supports the ARM 6K extensions. */
759 /* Nonzero if instructions present in ARMv6-M can be used. */
762 /* Nonzero if this chip supports the ARM 7 extensions. */
765 /* Nonzero if instructions not present in the 'M' profile can be used. */
766 int arm_arch_notm
= 0;
768 /* Nonzero if instructions present in ARMv7E-M can be used. */
771 /* Nonzero if instructions present in ARMv8 can be used. */
774 /* Nonzero if this chip can benefit from load scheduling. */
775 int arm_ld_sched
= 0;
777 /* Nonzero if this chip is a StrongARM. */
778 int arm_tune_strongarm
= 0;
780 /* Nonzero if this chip supports Intel Wireless MMX technology. */
781 int arm_arch_iwmmxt
= 0;
783 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
784 int arm_arch_iwmmxt2
= 0;
786 /* Nonzero if this chip is an XScale. */
787 int arm_arch_xscale
= 0;
789 /* Nonzero if tuning for XScale */
790 int arm_tune_xscale
= 0;
792 /* Nonzero if we want to tune for stores that access the write-buffer.
793 This typically means an ARM6 or ARM7 with MMU or MPU. */
794 int arm_tune_wbuf
= 0;
796 /* Nonzero if tuning for Cortex-A9. */
797 int arm_tune_cortex_a9
= 0;
799 /* Nonzero if generating Thumb instructions. */
802 /* Nonzero if generating Thumb-1 instructions. */
805 /* Nonzero if we should define __THUMB_INTERWORK__ in the
807 XXX This is a bit of a hack, it's intended to help work around
808 problems in GLD which doesn't understand that armv5t code is
809 interworking clean. */
810 int arm_cpp_interwork
= 0;
812 /* Nonzero if chip supports Thumb 2. */
815 /* Nonzero if chip supports integer division instruction. */
816 int arm_arch_arm_hwdiv
;
817 int arm_arch_thumb_hwdiv
;
819 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
820 we must report the mode of the memory reference from
821 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
822 enum machine_mode output_memory_reference_mode
;
824 /* The register number to be used for the PIC offset register. */
825 unsigned arm_pic_register
= INVALID_REGNUM
;
827 /* Set to 1 after arm_reorg has started. Reset to start at the start of
828 the next function. */
829 static int after_arm_reorg
= 0;
831 enum arm_pcs arm_pcs_default
;
833 /* For an explanation of these variables, see final_prescan_insn below. */
835 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
836 enum arm_cond_code arm_current_cc
;
839 int arm_target_label
;
840 /* The number of conditionally executed insns, including the current insn. */
841 int arm_condexec_count
= 0;
842 /* A bitmask specifying the patterns for the IT block.
843 Zero means do not output an IT block before this insn. */
844 int arm_condexec_mask
= 0;
845 /* The number of bits used in arm_condexec_mask. */
846 int arm_condexec_masklen
= 0;
848 /* The condition codes of the ARM, and the inverse function. */
849 static const char * const arm_condition_codes
[] =
851 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
852 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
855 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
856 int arm_regs_in_sequence
[] =
858 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
861 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
862 #define streq(string1, string2) (strcmp (string1, string2) == 0)
864 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
865 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
866 | (1 << PIC_OFFSET_TABLE_REGNUM)))
868 /* Initialization code. */
872 const char *const name
;
873 enum processor_type core
;
875 enum base_architecture base_arch
;
876 const unsigned long flags
;
877 const struct tune_params
*const tune
;
881 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
882 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
887 const struct tune_params arm_slowmul_tune
=
889 arm_slowmul_rtx_costs
,
891 3, /* Constant limit. */
892 5, /* Max cond insns. */
893 ARM_PREFETCH_NOT_BENEFICIAL
,
894 true, /* Prefer constant pool. */
895 arm_default_branch_cost
,
896 false /* Prefer LDRD/STRD. */
899 const struct tune_params arm_fastmul_tune
=
901 arm_fastmul_rtx_costs
,
903 1, /* Constant limit. */
904 5, /* Max cond insns. */
905 ARM_PREFETCH_NOT_BENEFICIAL
,
906 true, /* Prefer constant pool. */
907 arm_default_branch_cost
,
908 false /* Prefer LDRD/STRD. */
911 /* StrongARM has early execution of branches, so a sequence that is worth
912 skipping is shorter. Set max_insns_skipped to a lower value. */
914 const struct tune_params arm_strongarm_tune
=
916 arm_fastmul_rtx_costs
,
918 1, /* Constant limit. */
919 3, /* Max cond insns. */
920 ARM_PREFETCH_NOT_BENEFICIAL
,
921 true, /* Prefer constant pool. */
922 arm_default_branch_cost
,
923 false /* Prefer LDRD/STRD. */
926 const struct tune_params arm_xscale_tune
=
928 arm_xscale_rtx_costs
,
929 xscale_sched_adjust_cost
,
930 2, /* Constant limit. */
931 3, /* Max cond insns. */
932 ARM_PREFETCH_NOT_BENEFICIAL
,
933 true, /* Prefer constant pool. */
934 arm_default_branch_cost
,
935 false /* Prefer LDRD/STRD. */
938 const struct tune_params arm_9e_tune
=
942 1, /* Constant limit. */
943 5, /* Max cond insns. */
944 ARM_PREFETCH_NOT_BENEFICIAL
,
945 true, /* Prefer constant pool. */
946 arm_default_branch_cost
,
947 false /* Prefer LDRD/STRD. */
950 const struct tune_params arm_v6t2_tune
=
954 1, /* Constant limit. */
955 5, /* Max cond insns. */
956 ARM_PREFETCH_NOT_BENEFICIAL
,
957 false, /* Prefer constant pool. */
958 arm_default_branch_cost
,
959 false /* Prefer LDRD/STRD. */
962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
963 const struct tune_params arm_cortex_tune
=
967 1, /* Constant limit. */
968 5, /* Max cond insns. */
969 ARM_PREFETCH_NOT_BENEFICIAL
,
970 false, /* Prefer constant pool. */
971 arm_default_branch_cost
,
972 false /* Prefer LDRD/STRD. */
975 const struct tune_params arm_cortex_a15_tune
=
979 1, /* Constant limit. */
980 5, /* Max cond insns. */
981 ARM_PREFETCH_NOT_BENEFICIAL
,
982 false, /* Prefer constant pool. */
983 arm_default_branch_cost
,
984 true /* Prefer LDRD/STRD. */
987 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
988 less appealing. Set max_insns_skipped to a low value. */
990 const struct tune_params arm_cortex_a5_tune
=
994 1, /* Constant limit. */
995 1, /* Max cond insns. */
996 ARM_PREFETCH_NOT_BENEFICIAL
,
997 false, /* Prefer constant pool. */
998 arm_cortex_a5_branch_cost
,
999 false /* Prefer LDRD/STRD. */
1002 const struct tune_params arm_cortex_a9_tune
=
1005 cortex_a9_sched_adjust_cost
,
1006 1, /* Constant limit. */
1007 5, /* Max cond insns. */
1008 ARM_PREFETCH_BENEFICIAL(4,32,32),
1009 false, /* Prefer constant pool. */
1010 arm_default_branch_cost
,
1011 false /* Prefer LDRD/STRD. */
1014 const struct tune_params arm_fa726te_tune
=
1017 fa726te_sched_adjust_cost
,
1018 1, /* Constant limit. */
1019 5, /* Max cond insns. */
1020 ARM_PREFETCH_NOT_BENEFICIAL
,
1021 true, /* Prefer constant pool. */
1022 arm_default_branch_cost
,
1023 false /* Prefer LDRD/STRD. */
1027 /* Not all of these give usefully different compilation alternatives,
1028 but there is no simple way of generalizing them. */
1029 static const struct processors all_cores
[] =
1032 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1033 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1034 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1035 #include "arm-cores.def"
1037 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1040 static const struct processors all_architectures
[] =
1042 /* ARM Architectures */
1043 /* We don't specify tuning costs here as it will be figured out
1046 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1047 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1048 #include "arm-arches.def"
1050 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1054 /* These are populated as commandline arguments are processed, or NULL
1055 if not specified. */
1056 static const struct processors
*arm_selected_arch
;
1057 static const struct processors
*arm_selected_cpu
;
1058 static const struct processors
*arm_selected_tune
;
1060 /* The name of the preprocessor macro to define for this architecture. */
1062 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1064 /* Available values for -mfpu=. */
1066 static const struct arm_fpu_desc all_fpus
[] =
1068 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1069 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1070 #include "arm-fpus.def"
1075 /* Supported TLS relocations. */
1083 TLS_DESCSEQ
/* GNU scheme */
1086 /* The maximum number of insns to be used when loading a constant. */
1088 arm_constant_limit (bool size_p
)
1090 return size_p
? 1 : current_tune
->constant_limit
;
1093 /* Emit an insn that's a simple single-set. Both the operands must be known
1096 emit_set_insn (rtx x
, rtx y
)
1098 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1101 /* Return the number of bits set in VALUE. */
1103 bit_count (unsigned long value
)
1105 unsigned long count
= 0;
1110 value
&= value
- 1; /* Clear the least-significant set bit. */
1118 enum machine_mode mode
;
1120 } arm_fixed_mode_set
;
1122 /* A small helper for setting fixed-point library libfuncs. */
1125 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1126 const char *funcname
, const char *modename
,
1131 if (num_suffix
== 0)
1132 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1134 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1136 set_optab_libfunc (optable
, mode
, buffer
);
1140 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1141 enum machine_mode from
, const char *funcname
,
1142 const char *toname
, const char *fromname
)
1145 const char *maybe_suffix_2
= "";
1147 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1148 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1149 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1150 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1151 maybe_suffix_2
= "2";
1153 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1156 set_conv_libfunc (optable
, to
, from
, buffer
);
1159 /* Set up library functions unique to ARM. */
1162 arm_init_libfuncs (void)
1164 /* For Linux, we have access to kernel support for atomic operations. */
1165 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1166 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1168 /* There are no special library functions unless we are using the
1173 /* The functions below are described in Section 4 of the "Run-Time
1174 ABI for the ARM architecture", Version 1.0. */
1176 /* Double-precision floating-point arithmetic. Table 2. */
1177 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1178 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1179 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1180 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1181 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1183 /* Double-precision comparisons. Table 3. */
1184 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1185 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1186 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1187 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1188 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1189 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1190 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1192 /* Single-precision floating-point arithmetic. Table 4. */
1193 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1194 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1195 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1196 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1197 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1199 /* Single-precision comparisons. Table 5. */
1200 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1201 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1202 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1203 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1204 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1205 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1206 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1208 /* Floating-point to integer conversions. Table 6. */
1209 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1210 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1211 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1212 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1213 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1214 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1215 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1216 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1218 /* Conversions between floating types. Table 7. */
1219 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1220 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1222 /* Integer to floating-point conversions. Table 8. */
1223 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1224 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1225 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1226 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1227 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1228 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1229 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1230 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1232 /* Long long. Table 9. */
1233 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1234 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1235 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1236 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1237 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1238 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1239 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1240 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1242 /* Integer (32/32->32) division. \S 4.3.1. */
1243 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1244 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1246 /* The divmod functions are designed so that they can be used for
1247 plain division, even though they return both the quotient and the
1248 remainder. The quotient is returned in the usual location (i.e.,
1249 r0 for SImode, {r0, r1} for DImode), just as would be expected
1250 for an ordinary division routine. Because the AAPCS calling
1251 conventions specify that all of { r0, r1, r2, r3 } are
1252 callee-saved registers, there is no need to tell the compiler
1253 explicitly that those registers are clobbered by these
1255 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1256 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1258 /* For SImode division the ABI provides div-without-mod routines,
1259 which are faster. */
1260 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1261 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1263 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1264 divmod libcalls instead. */
1265 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1266 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1267 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1268 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1270 /* Half-precision float operations. The compiler handles all operations
1271 with NULL libfuncs by converting the SFmode. */
1272 switch (arm_fp16_format
)
1274 case ARM_FP16_FORMAT_IEEE
:
1275 case ARM_FP16_FORMAT_ALTERNATIVE
:
1278 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1279 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1281 : "__gnu_f2h_alternative"));
1282 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1283 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1285 : "__gnu_h2f_alternative"));
1288 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1289 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1290 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1291 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1292 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1295 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1296 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1297 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1298 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1299 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1300 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1301 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1308 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1310 const arm_fixed_mode_set fixed_arith_modes
[] =
1331 const arm_fixed_mode_set fixed_conv_modes
[] =
1361 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
1363 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
1364 "add", fixed_arith_modes
[i
].name
, 3);
1365 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
1366 "ssadd", fixed_arith_modes
[i
].name
, 3);
1367 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
1368 "usadd", fixed_arith_modes
[i
].name
, 3);
1369 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
1370 "sub", fixed_arith_modes
[i
].name
, 3);
1371 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
1372 "sssub", fixed_arith_modes
[i
].name
, 3);
1373 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
1374 "ussub", fixed_arith_modes
[i
].name
, 3);
1375 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
1376 "mul", fixed_arith_modes
[i
].name
, 3);
1377 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
1378 "ssmul", fixed_arith_modes
[i
].name
, 3);
1379 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
1380 "usmul", fixed_arith_modes
[i
].name
, 3);
1381 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
1382 "div", fixed_arith_modes
[i
].name
, 3);
1383 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
1384 "udiv", fixed_arith_modes
[i
].name
, 3);
1385 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
1386 "ssdiv", fixed_arith_modes
[i
].name
, 3);
1387 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
1388 "usdiv", fixed_arith_modes
[i
].name
, 3);
1389 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
1390 "neg", fixed_arith_modes
[i
].name
, 2);
1391 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
1392 "ssneg", fixed_arith_modes
[i
].name
, 2);
1393 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
1394 "usneg", fixed_arith_modes
[i
].name
, 2);
1395 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
1396 "ashl", fixed_arith_modes
[i
].name
, 3);
1397 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
1398 "ashr", fixed_arith_modes
[i
].name
, 3);
1399 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
1400 "lshr", fixed_arith_modes
[i
].name
, 3);
1401 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
1402 "ssashl", fixed_arith_modes
[i
].name
, 3);
1403 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
1404 "usashl", fixed_arith_modes
[i
].name
, 3);
1405 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
1406 "cmp", fixed_arith_modes
[i
].name
, 2);
1409 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
1410 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
1413 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
1414 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
1417 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
1418 fixed_conv_modes
[j
].mode
, "fract",
1419 fixed_conv_modes
[i
].name
,
1420 fixed_conv_modes
[j
].name
);
1421 arm_set_fixed_conv_libfunc (satfract_optab
,
1422 fixed_conv_modes
[i
].mode
,
1423 fixed_conv_modes
[j
].mode
, "satfract",
1424 fixed_conv_modes
[i
].name
,
1425 fixed_conv_modes
[j
].name
);
1426 arm_set_fixed_conv_libfunc (fractuns_optab
,
1427 fixed_conv_modes
[i
].mode
,
1428 fixed_conv_modes
[j
].mode
, "fractuns",
1429 fixed_conv_modes
[i
].name
,
1430 fixed_conv_modes
[j
].name
);
1431 arm_set_fixed_conv_libfunc (satfractuns_optab
,
1432 fixed_conv_modes
[i
].mode
,
1433 fixed_conv_modes
[j
].mode
, "satfractuns",
1434 fixed_conv_modes
[i
].name
,
1435 fixed_conv_modes
[j
].name
);
1439 if (TARGET_AAPCS_BASED
)
1440 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1443 /* On AAPCS systems, this is the "struct __va_list". */
1444 static GTY(()) tree va_list_type
;
1446 /* Return the type to use as __builtin_va_list. */
1448 arm_build_builtin_va_list (void)
1453 if (!TARGET_AAPCS_BASED
)
1454 return std_build_builtin_va_list ();
1456 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1464 The C Library ABI further reinforces this definition in \S
1467 We must follow this definition exactly. The structure tag
1468 name is visible in C++ mangled names, and thus forms a part
1469 of the ABI. The field name may be used by people who
1470 #include <stdarg.h>. */
1471 /* Create the type. */
1472 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1473 /* Give it the required name. */
1474 va_list_name
= build_decl (BUILTINS_LOCATION
,
1476 get_identifier ("__va_list"),
1478 DECL_ARTIFICIAL (va_list_name
) = 1;
1479 TYPE_NAME (va_list_type
) = va_list_name
;
1480 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1481 /* Create the __ap field. */
1482 ap_field
= build_decl (BUILTINS_LOCATION
,
1484 get_identifier ("__ap"),
1486 DECL_ARTIFICIAL (ap_field
) = 1;
1487 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1488 TYPE_FIELDS (va_list_type
) = ap_field
;
1489 /* Compute its layout. */
1490 layout_type (va_list_type
);
1492 return va_list_type
;
1495 /* Return an expression of type "void *" pointing to the next
1496 available argument in a variable-argument list. VALIST is the
1497 user-level va_list object, of type __builtin_va_list. */
1499 arm_extract_valist_ptr (tree valist
)
1501 if (TREE_TYPE (valist
) == error_mark_node
)
1502 return error_mark_node
;
1504 /* On an AAPCS target, the pointer is stored within "struct
1506 if (TARGET_AAPCS_BASED
)
1508 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1509 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1510 valist
, ap_field
, NULL_TREE
);
1516 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1518 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1520 valist
= arm_extract_valist_ptr (valist
);
1521 std_expand_builtin_va_start (valist
, nextarg
);
1524 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1526 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1529 valist
= arm_extract_valist_ptr (valist
);
1530 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1533 /* Fix up any incompatible options that the user has specified. */
1535 arm_option_override (void)
1537 if (global_options_set
.x_arm_arch_option
)
1538 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1540 if (global_options_set
.x_arm_cpu_option
)
1541 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1543 if (global_options_set
.x_arm_tune_option
)
1544 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1547 SUBTARGET_OVERRIDE_OPTIONS
;
1550 if (arm_selected_arch
)
1552 if (arm_selected_cpu
)
1554 /* Check for conflict between mcpu and march. */
1555 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1558 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1559 /* -march wins for code generation.
1560 -mcpu wins for default tuning. */
1561 if (!arm_selected_tune
)
1562 arm_selected_tune
= arm_selected_cpu
;
1564 arm_selected_cpu
= arm_selected_arch
;
1568 arm_selected_arch
= NULL
;
1571 /* Pick a CPU based on the architecture. */
1572 arm_selected_cpu
= arm_selected_arch
;
1575 /* If the user did not specify a processor, choose one for them. */
1576 if (!arm_selected_cpu
)
1578 const struct processors
* sel
;
1579 unsigned int sought
;
1581 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1582 if (!arm_selected_cpu
->name
)
1584 #ifdef SUBTARGET_CPU_DEFAULT
1585 /* Use the subtarget default CPU if none was specified by
1587 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1589 /* Default to ARM6. */
1590 if (!arm_selected_cpu
->name
)
1591 arm_selected_cpu
= &all_cores
[arm6
];
1594 sel
= arm_selected_cpu
;
1595 insn_flags
= sel
->flags
;
1597 /* Now check to see if the user has specified some command line
1598 switch that require certain abilities from the cpu. */
1601 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1603 sought
|= (FL_THUMB
| FL_MODE32
);
1605 /* There are no ARM processors that support both APCS-26 and
1606 interworking. Therefore we force FL_MODE26 to be removed
1607 from insn_flags here (if it was set), so that the search
1608 below will always be able to find a compatible processor. */
1609 insn_flags
&= ~FL_MODE26
;
1612 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1614 /* Try to locate a CPU type that supports all of the abilities
1615 of the default CPU, plus the extra abilities requested by
1617 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1618 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1621 if (sel
->name
== NULL
)
1623 unsigned current_bit_count
= 0;
1624 const struct processors
* best_fit
= NULL
;
1626 /* Ideally we would like to issue an error message here
1627 saying that it was not possible to find a CPU compatible
1628 with the default CPU, but which also supports the command
1629 line options specified by the programmer, and so they
1630 ought to use the -mcpu=<name> command line option to
1631 override the default CPU type.
1633 If we cannot find a cpu that has both the
1634 characteristics of the default cpu and the given
1635 command line options we scan the array again looking
1636 for a best match. */
1637 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1638 if ((sel
->flags
& sought
) == sought
)
1642 count
= bit_count (sel
->flags
& insn_flags
);
1644 if (count
>= current_bit_count
)
1647 current_bit_count
= count
;
1651 gcc_assert (best_fit
);
1655 arm_selected_cpu
= sel
;
1659 gcc_assert (arm_selected_cpu
);
1660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1661 if (!arm_selected_tune
)
1662 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1664 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1665 insn_flags
= arm_selected_cpu
->flags
;
1666 arm_base_arch
= arm_selected_cpu
->base_arch
;
1668 arm_tune
= arm_selected_tune
->core
;
1669 tune_flags
= arm_selected_tune
->flags
;
1670 current_tune
= arm_selected_tune
->tune
;
1672 /* Make sure that the processor choice does not conflict with any of the
1673 other command line choices. */
1674 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1675 error ("target CPU does not support ARM mode");
1677 /* BPABI targets use linker tricks to allow interworking on cores
1678 without thumb support. */
1679 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1681 warning (0, "target CPU does not support interworking" );
1682 target_flags
&= ~MASK_INTERWORK
;
1685 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1687 warning (0, "target CPU does not support THUMB instructions");
1688 target_flags
&= ~MASK_THUMB
;
1691 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1694 target_flags
&= ~MASK_APCS_FRAME
;
1697 /* Callee super interworking implies thumb interworking. Adding
1698 this to the flags here simplifies the logic elsewhere. */
1699 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1700 target_flags
|= MASK_INTERWORK
;
1702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1703 from here where no function is being compiled currently. */
1704 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1707 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1710 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1713 target_flags
|= MASK_APCS_FRAME
;
1716 if (TARGET_POKE_FUNCTION_NAME
)
1717 target_flags
|= MASK_APCS_FRAME
;
1719 if (TARGET_APCS_REENT
&& flag_pic
)
1720 error ("-fpic and -mapcs-reent are incompatible");
1722 if (TARGET_APCS_REENT
)
1723 warning (0, "APCS reentrant code not supported. Ignored");
1725 /* If this target is normally configured to use APCS frames, warn if they
1726 are turned off and debugging is turned on. */
1728 && write_symbols
!= NO_DEBUG
1729 && !TARGET_APCS_FRAME
1730 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1733 if (TARGET_APCS_FLOAT
)
1734 warning (0, "passing floating point arguments in fp regs not yet supported");
1736 if (TARGET_LITTLE_WORDS
)
1737 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
1738 "will be removed in a future release");
1740 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1741 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1742 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1743 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1744 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1745 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1746 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1747 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1748 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1749 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
1750 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1751 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1752 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
1753 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1754 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1756 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1757 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1758 thumb_code
= TARGET_ARM
== 0;
1759 thumb1_code
= TARGET_THUMB1
!= 0;
1760 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1761 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1762 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1763 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
1764 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
1765 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
1766 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1768 /* If we are not using the default (ARM mode) section anchor offset
1769 ranges, then set the correct ranges now. */
1772 /* Thumb-1 LDR instructions cannot have negative offsets.
1773 Permissible positive offset ranges are 5-bit (for byte loads),
1774 6-bit (for halfword loads), or 7-bit (for word loads).
1775 Empirical results suggest a 7-bit anchor range gives the best
1776 overall code size. */
1777 targetm
.min_anchor_offset
= 0;
1778 targetm
.max_anchor_offset
= 127;
1780 else if (TARGET_THUMB2
)
1782 /* The minimum is set such that the total size of the block
1783 for a particular anchor is 248 + 1 + 4095 bytes, which is
1784 divisible by eight, ensuring natural spacing of anchors. */
1785 targetm
.min_anchor_offset
= -248;
1786 targetm
.max_anchor_offset
= 4095;
1789 /* V5 code we generate is completely interworking capable, so we turn off
1790 TARGET_INTERWORK here to avoid many tests later on. */
1792 /* XXX However, we must pass the right pre-processor defines to CPP
1793 or GLD can get confused. This is a hack. */
1794 if (TARGET_INTERWORK
)
1795 arm_cpp_interwork
= 1;
1798 target_flags
&= ~MASK_INTERWORK
;
1800 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1801 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1803 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1804 error ("iwmmxt abi requires an iwmmxt capable cpu");
1806 if (!global_options_set
.x_arm_fpu_index
)
1808 const char *target_fpu_name
;
1811 #ifdef FPUTYPE_DEFAULT
1812 target_fpu_name
= FPUTYPE_DEFAULT
;
1814 target_fpu_name
= "vfp";
1817 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
1822 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
1824 switch (arm_fpu_desc
->model
)
1826 case ARM_FP_MODEL_VFP
:
1827 arm_fpu_attr
= FPU_VFP
;
1834 if (TARGET_AAPCS_BASED
)
1836 if (TARGET_CALLER_INTERWORKING
)
1837 error ("AAPCS does not support -mcaller-super-interworking");
1839 if (TARGET_CALLEE_INTERWORKING
)
1840 error ("AAPCS does not support -mcallee-super-interworking");
1843 /* iWMMXt and NEON are incompatible. */
1844 if (TARGET_IWMMXT
&& TARGET_NEON
)
1845 error ("iWMMXt and NEON are incompatible");
1847 /* iWMMXt unsupported under Thumb mode. */
1848 if (TARGET_THUMB
&& TARGET_IWMMXT
)
1849 error ("iWMMXt unsupported under Thumb mode");
1851 /* __fp16 support currently assumes the core has ldrh. */
1852 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1853 sorry ("__fp16 and no ldrh");
1855 /* If soft-float is specified then don't use FPU. */
1856 if (TARGET_SOFT_FLOAT
)
1857 arm_fpu_attr
= FPU_NONE
;
1859 if (TARGET_AAPCS_BASED
)
1861 if (arm_abi
== ARM_ABI_IWMMXT
)
1862 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1863 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1864 && TARGET_HARD_FLOAT
1866 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1868 arm_pcs_default
= ARM_PCS_AAPCS
;
1872 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1873 sorry ("-mfloat-abi=hard and VFP");
1875 if (arm_abi
== ARM_ABI_APCS
)
1876 arm_pcs_default
= ARM_PCS_APCS
;
1878 arm_pcs_default
= ARM_PCS_ATPCS
;
1881 /* For arm2/3 there is no need to do any scheduling if we are doing
1882 software floating-point. */
1883 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
1884 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1886 /* Use the cp15 method if it is available. */
1887 if (target_thread_pointer
== TP_AUTO
)
1889 if (arm_arch6k
&& !TARGET_THUMB1
)
1890 target_thread_pointer
= TP_CP15
;
1892 target_thread_pointer
= TP_SOFT
;
1895 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1896 error ("can not use -mtp=cp15 with 16-bit Thumb");
1898 /* Override the default structure alignment for AAPCS ABI. */
1899 if (!global_options_set
.x_arm_structure_size_boundary
)
1901 if (TARGET_AAPCS_BASED
)
1902 arm_structure_size_boundary
= 8;
1906 if (arm_structure_size_boundary
!= 8
1907 && arm_structure_size_boundary
!= 32
1908 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
1910 if (ARM_DOUBLEWORD_ALIGN
)
1912 "structure size boundary can only be set to 8, 32 or 64");
1914 warning (0, "structure size boundary can only be set to 8 or 32");
1915 arm_structure_size_boundary
1916 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
1920 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1922 error ("RTP PIC is incompatible with Thumb");
1926 /* If stack checking is disabled, we can use r10 as the PIC register,
1927 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1928 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1930 if (TARGET_VXWORKS_RTP
)
1931 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1932 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1935 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1936 arm_pic_register
= 9;
1938 if (arm_pic_register_string
!= NULL
)
1940 int pic_register
= decode_reg_name (arm_pic_register_string
);
1943 warning (0, "-mpic-register= is useless without -fpic");
1945 /* Prevent the user from choosing an obviously stupid PIC register. */
1946 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1947 || pic_register
== HARD_FRAME_POINTER_REGNUM
1948 || pic_register
== STACK_POINTER_REGNUM
1949 || pic_register
>= PC_REGNUM
1950 || (TARGET_VXWORKS_RTP
1951 && (unsigned int) pic_register
!= arm_pic_register
))
1952 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1954 arm_pic_register
= pic_register
;
1957 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1958 if (fix_cm3_ldrd
== 2)
1960 if (arm_selected_cpu
->core
== cortexm3
)
1966 /* Enable -munaligned-access by default for
1967 - all ARMv6 architecture-based processors
1968 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1969 - ARMv8 architecture-base processors.
1971 Disable -munaligned-access by default for
1972 - all pre-ARMv6 architecture-based processors
1973 - ARMv6-M architecture-based processors. */
1975 if (unaligned_access
== 2)
1977 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
1978 unaligned_access
= 1;
1980 unaligned_access
= 0;
1982 else if (unaligned_access
== 1
1983 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
1985 warning (0, "target CPU does not support unaligned accesses");
1986 unaligned_access
= 0;
1989 if (TARGET_THUMB1
&& flag_schedule_insns
)
1991 /* Don't warn since it's on by default in -O2. */
1992 flag_schedule_insns
= 0;
1997 /* If optimizing for size, bump the number of instructions that we
1998 are prepared to conditionally execute (even on a StrongARM). */
1999 max_insns_skipped
= 6;
2002 max_insns_skipped
= current_tune
->max_insns_skipped
;
2004 /* Hot/Cold partitioning is not currently supported, since we can't
2005 handle literal pool placement in that case. */
2006 if (flag_reorder_blocks_and_partition
)
2008 inform (input_location
,
2009 "-freorder-blocks-and-partition not supported on this architecture");
2010 flag_reorder_blocks_and_partition
= 0;
2011 flag_reorder_blocks
= 1;
2015 /* Hoisting PIC address calculations more aggressively provides a small,
2016 but measurable, size reduction for PIC code. Therefore, we decrease
2017 the bar for unrestricted expression hoisting to the cost of PIC address
2018 calculation, which is 2 instructions. */
2019 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2020 global_options
.x_param_values
,
2021 global_options_set
.x_param_values
);
2023 /* ARM EABI defaults to strict volatile bitfields. */
2024 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2025 && abi_version_at_least(2))
2026 flag_strict_volatile_bitfields
= 1;
2028 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2029 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2030 if (flag_prefetch_loop_arrays
< 0
2033 && current_tune
->num_prefetch_slots
> 0)
2034 flag_prefetch_loop_arrays
= 1;
2036 /* Set up parameters to be used in prefetching algorithm. Do not override the
2037 defaults unless we are tuning for a core we have researched values for. */
2038 if (current_tune
->num_prefetch_slots
> 0)
2039 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2040 current_tune
->num_prefetch_slots
,
2041 global_options
.x_param_values
,
2042 global_options_set
.x_param_values
);
2043 if (current_tune
->l1_cache_line_size
>= 0)
2044 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2045 current_tune
->l1_cache_line_size
,
2046 global_options
.x_param_values
,
2047 global_options_set
.x_param_values
);
2048 if (current_tune
->l1_cache_size
>= 0)
2049 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2050 current_tune
->l1_cache_size
,
2051 global_options
.x_param_values
,
2052 global_options_set
.x_param_values
);
2054 /* Use the alternative scheduling-pressure algorithm by default. */
2055 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2056 global_options
.x_param_values
,
2057 global_options_set
.x_param_values
);
2059 /* Register global variables with the garbage collector. */
2060 arm_add_gc_roots ();
2064 arm_add_gc_roots (void)
2066 gcc_obstack_init(&minipool_obstack
);
2067 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2070 /* A table of known ARM exception types.
2071 For use with the interrupt function attribute. */
2075 const char *const arg
;
2076 const unsigned long return_value
;
2080 static const isr_attribute_arg isr_attribute_args
[] =
2082 { "IRQ", ARM_FT_ISR
},
2083 { "irq", ARM_FT_ISR
},
2084 { "FIQ", ARM_FT_FIQ
},
2085 { "fiq", ARM_FT_FIQ
},
2086 { "ABORT", ARM_FT_ISR
},
2087 { "abort", ARM_FT_ISR
},
2088 { "ABORT", ARM_FT_ISR
},
2089 { "abort", ARM_FT_ISR
},
2090 { "UNDEF", ARM_FT_EXCEPTION
},
2091 { "undef", ARM_FT_EXCEPTION
},
2092 { "SWI", ARM_FT_EXCEPTION
},
2093 { "swi", ARM_FT_EXCEPTION
},
2094 { NULL
, ARM_FT_NORMAL
}
2097 /* Returns the (interrupt) function type of the current
2098 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2100 static unsigned long
2101 arm_isr_value (tree argument
)
2103 const isr_attribute_arg
* ptr
;
2107 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2109 /* No argument - default to IRQ. */
2110 if (argument
== NULL_TREE
)
2113 /* Get the value of the argument. */
2114 if (TREE_VALUE (argument
) == NULL_TREE
2115 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2116 return ARM_FT_UNKNOWN
;
2118 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2120 /* Check it against the list of known arguments. */
2121 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2122 if (streq (arg
, ptr
->arg
))
2123 return ptr
->return_value
;
2125 /* An unrecognized interrupt type. */
2126 return ARM_FT_UNKNOWN
;
2129 /* Computes the type of the current function. */
2131 static unsigned long
2132 arm_compute_func_type (void)
2134 unsigned long type
= ARM_FT_UNKNOWN
;
2138 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2140 /* Decide if the current function is volatile. Such functions
2141 never return, and many memory cycles can be saved by not storing
2142 register values that will never be needed again. This optimization
2143 was added to speed up context switching in a kernel application. */
2145 && (TREE_NOTHROW (current_function_decl
)
2146 || !(flag_unwind_tables
2148 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2149 && TREE_THIS_VOLATILE (current_function_decl
))
2150 type
|= ARM_FT_VOLATILE
;
2152 if (cfun
->static_chain_decl
!= NULL
)
2153 type
|= ARM_FT_NESTED
;
2155 attr
= DECL_ATTRIBUTES (current_function_decl
);
2157 a
= lookup_attribute ("naked", attr
);
2159 type
|= ARM_FT_NAKED
;
2161 a
= lookup_attribute ("isr", attr
);
2163 a
= lookup_attribute ("interrupt", attr
);
2166 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2168 type
|= arm_isr_value (TREE_VALUE (a
));
2173 /* Returns the type of the current function. */
2176 arm_current_func_type (void)
2178 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2179 cfun
->machine
->func_type
= arm_compute_func_type ();
2181 return cfun
->machine
->func_type
;
2185 arm_allocate_stack_slots_for_args (void)
2187 /* Naked functions should not allocate stack slots for arguments. */
2188 return !IS_NAKED (arm_current_func_type ());
2192 arm_warn_func_return (tree decl
)
2194 /* Naked functions are implemented entirely in assembly, including the
2195 return sequence, so suppress warnings about this. */
2196 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2200 /* Output assembler code for a block containing the constant parts
2201 of a trampoline, leaving space for the variable parts.
2203 On the ARM, (if r8 is the static chain regnum, and remembering that
2204 referencing pc adds an offset of 8) the trampoline looks like:
2207 .word static chain value
2208 .word function's address
2209 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2212 arm_asm_trampoline_template (FILE *f
)
2216 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2217 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2219 else if (TARGET_THUMB2
)
2221 /* The Thumb-2 trampoline is similar to the arm implementation.
2222 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2223 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2224 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2225 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2229 ASM_OUTPUT_ALIGN (f
, 2);
2230 fprintf (f
, "\t.code\t16\n");
2231 fprintf (f
, ".Ltrampoline_start:\n");
2232 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2233 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2234 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2235 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2236 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2237 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2239 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2240 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2243 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2246 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2248 rtx fnaddr
, mem
, a_tramp
;
2250 emit_block_move (m_tramp
, assemble_trampoline_template (),
2251 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2253 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2254 emit_move_insn (mem
, chain_value
);
2256 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2257 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2258 emit_move_insn (mem
, fnaddr
);
2260 a_tramp
= XEXP (m_tramp
, 0);
2261 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2262 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2263 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2266 /* Thumb trampolines should be entered in thumb mode, so set
2267 the bottom bit of the address. */
2270 arm_trampoline_adjust_address (rtx addr
)
2273 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2274 NULL
, 0, OPTAB_LIB_WIDEN
);
2278 /* Return 1 if it is possible to return using a single instruction.
2279 If SIBLING is non-null, this is a test for a return before a sibling
2280 call. SIBLING is the call insn, so we can examine its register usage. */
2283 use_return_insn (int iscond
, rtx sibling
)
2286 unsigned int func_type
;
2287 unsigned long saved_int_regs
;
2288 unsigned HOST_WIDE_INT stack_adjust
;
2289 arm_stack_offsets
*offsets
;
2291 /* Never use a return instruction before reload has run. */
2292 if (!reload_completed
)
2295 func_type
= arm_current_func_type ();
2297 /* Naked, volatile and stack alignment functions need special
2299 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2302 /* So do interrupt functions that use the frame pointer and Thumb
2303 interrupt functions. */
2304 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2307 offsets
= arm_get_frame_offsets ();
2308 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2310 /* As do variadic functions. */
2311 if (crtl
->args
.pretend_args_size
2312 || cfun
->machine
->uses_anonymous_args
2313 /* Or if the function calls __builtin_eh_return () */
2314 || crtl
->calls_eh_return
2315 /* Or if the function calls alloca */
2316 || cfun
->calls_alloca
2317 /* Or if there is a stack adjustment. However, if the stack pointer
2318 is saved on the stack, we can use a pre-incrementing stack load. */
2319 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2320 && stack_adjust
== 4)))
2323 saved_int_regs
= offsets
->saved_regs_mask
;
2325 /* Unfortunately, the insn
2327 ldmib sp, {..., sp, ...}
2329 triggers a bug on most SA-110 based devices, such that the stack
2330 pointer won't be correctly restored if the instruction takes a
2331 page fault. We work around this problem by popping r3 along with
2332 the other registers, since that is never slower than executing
2333 another instruction.
2335 We test for !arm_arch5 here, because code for any architecture
2336 less than this could potentially be run on one of the buggy
2338 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2340 /* Validate that r3 is a call-clobbered register (always true in
2341 the default abi) ... */
2342 if (!call_used_regs
[3])
2345 /* ... that it isn't being used for a return value ... */
2346 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2349 /* ... or for a tail-call argument ... */
2352 gcc_assert (CALL_P (sibling
));
2354 if (find_regno_fusage (sibling
, USE
, 3))
2358 /* ... and that there are no call-saved registers in r0-r2
2359 (always true in the default ABI). */
2360 if (saved_int_regs
& 0x7)
2364 /* Can't be done if interworking with Thumb, and any registers have been
2366 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2369 /* On StrongARM, conditional returns are expensive if they aren't
2370 taken and multiple registers have been stacked. */
2371 if (iscond
&& arm_tune_strongarm
)
2373 /* Conditional return when just the LR is stored is a simple
2374 conditional-load instruction, that's not expensive. */
2375 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2379 && arm_pic_register
!= INVALID_REGNUM
2380 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2384 /* If there are saved registers but the LR isn't saved, then we need
2385 two instructions for the return. */
2386 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2389 /* Can't be done if any of the VFP regs are pushed,
2390 since this also requires an insn. */
2391 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2392 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2393 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2396 if (TARGET_REALLY_IWMMXT
)
2397 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2398 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2404 /* Return TRUE if int I is a valid immediate ARM constant. */
2407 const_ok_for_arm (HOST_WIDE_INT i
)
2411 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2412 be all zero, or all one. */
2413 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2414 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2415 != ((~(unsigned HOST_WIDE_INT
) 0)
2416 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2419 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2421 /* Fast return for 0 and small values. We must do this for zero, since
2422 the code below can't handle that one case. */
2423 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2426 /* Get the number of trailing zeros. */
2427 lowbit
= ffs((int) i
) - 1;
2429 /* Only even shifts are allowed in ARM mode so round down to the
2430 nearest even number. */
2434 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2439 /* Allow rotated constants in ARM mode. */
2441 && ((i
& ~0xc000003f) == 0
2442 || (i
& ~0xf000000f) == 0
2443 || (i
& ~0xfc000003) == 0))
2450 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2453 if (i
== v
|| i
== (v
| (v
<< 8)))
2456 /* Allow repeated pattern 0xXY00XY00. */
2466 /* Return true if I is a valid constant for the operation CODE. */
2468 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2470 if (const_ok_for_arm (i
))
2476 /* See if we can use movw. */
2477 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2480 /* Otherwise, try mvn. */
2481 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2484 /* See if we can use addw or subw. */
2486 && ((i
& 0xfffff000) == 0
2487 || ((-i
) & 0xfffff000) == 0))
2489 /* else fall through. */
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2511 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2517 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2521 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2528 /* Return true if I is a valid di mode constant for the operation CODE. */
2530 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
2532 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
2533 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
2534 rtx hi
= GEN_INT (hi_val
);
2535 rtx lo
= GEN_INT (lo_val
);
2543 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
2550 /* Emit a sequence of insns to handle a large constant.
2551 CODE is the code of the operation required, it can be any of SET, PLUS,
2552 IOR, AND, XOR, MINUS;
2553 MODE is the mode in which the operation is being performed;
2554 VAL is the integer to operate on;
2555 SOURCE is the other operand (a register, or a null-pointer for SET);
2556 SUBTARGETS means it is safe to create scratch registers if that will
2557 either produce a simpler sequence, or we will want to cse the values.
2558 Return value is the number of insns emitted. */
2560 /* ??? Tweak this for thumb2. */
2562 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2563 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2567 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2568 cond
= COND_EXEC_TEST (PATTERN (insn
));
2572 if (subtargets
|| code
== SET
2573 || (REG_P (target
) && REG_P (source
)
2574 && REGNO (target
) != REGNO (source
)))
2576 /* After arm_reorg has been called, we can't fix up expensive
2577 constants by pushing them into memory so we must synthesize
2578 them in-line, regardless of the cost. This is only likely to
2579 be more costly on chips that have load delay slots and we are
2580 compiling without running the scheduler (so no splitting
2581 occurred before the final instruction emission).
2583 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2585 if (!after_arm_reorg
2587 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2589 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2594 /* Currently SET is the only monadic value for CODE, all
2595 the rest are diadic. */
2596 if (TARGET_USE_MOVT
)
2597 arm_emit_movpair (target
, GEN_INT (val
));
2599 emit_set_insn (target
, GEN_INT (val
));
2605 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2607 if (TARGET_USE_MOVT
)
2608 arm_emit_movpair (temp
, GEN_INT (val
));
2610 emit_set_insn (temp
, GEN_INT (val
));
2612 /* For MINUS, the value is subtracted from, since we never
2613 have subtraction of a constant. */
2615 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2617 emit_set_insn (target
,
2618 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2624 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2628 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2629 ARM/THUMB2 immediates, and add up to VAL.
2630 Thr function return value gives the number of insns required. */
2632 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
2633 struct four_ints
*return_sequence
)
2635 int best_consecutive_zeros
= 0;
2639 struct four_ints tmp_sequence
;
2641 /* If we aren't targeting ARM, the best place to start is always at
2642 the bottom, otherwise look more closely. */
2645 for (i
= 0; i
< 32; i
+= 2)
2647 int consecutive_zeros
= 0;
2649 if (!(val
& (3 << i
)))
2651 while ((i
< 32) && !(val
& (3 << i
)))
2653 consecutive_zeros
+= 2;
2656 if (consecutive_zeros
> best_consecutive_zeros
)
2658 best_consecutive_zeros
= consecutive_zeros
;
2659 best_start
= i
- consecutive_zeros
;
2666 /* So long as it won't require any more insns to do so, it's
2667 desirable to emit a small constant (in bits 0...9) in the last
2668 insn. This way there is more chance that it can be combined with
2669 a later addressing insn to form a pre-indexed load or store
2670 operation. Consider:
2672 *((volatile int *)0xe0000100) = 1;
2673 *((volatile int *)0xe0000110) = 2;
2675 We want this to wind up as:
2679 str rB, [rA, #0x100]
2681 str rB, [rA, #0x110]
2683 rather than having to synthesize both large constants from scratch.
2685 Therefore, we calculate how many insns would be required to emit
2686 the constant starting from `best_start', and also starting from
2687 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2688 yield a shorter sequence, we may as well use zero. */
2689 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
2691 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
2693 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
2694 if (insns2
<= insns1
)
2696 *return_sequence
= tmp_sequence
;
2704 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2706 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
2707 struct four_ints
*return_sequence
, int i
)
2709 int remainder
= val
& 0xffffffff;
2712 /* Try and find a way of doing the job in either two or three
2715 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2716 location. We start at position I. This may be the MSB, or
2717 optimial_immediate_sequence may have positioned it at the largest block
2718 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2719 wrapping around to the top of the word when we drop off the bottom.
2720 In the worst case this code should produce no more than four insns.
2722 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2723 constants, shifted to any arbitrary location. We should always start
2728 unsigned int b1
, b2
, b3
, b4
;
2729 unsigned HOST_WIDE_INT result
;
2732 gcc_assert (insns
< 4);
2737 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2738 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
2741 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
2742 /* We can use addw/subw for the last 12 bits. */
2746 /* Use an 8-bit shifted/rotated immediate. */
2750 result
= remainder
& ((0x0ff << end
)
2751 | ((i
< end
) ? (0xff >> (32 - end
))
2758 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2759 arbitrary shifts. */
2760 i
-= TARGET_ARM
? 2 : 1;
2764 /* Next, see if we can do a better job with a thumb2 replicated
2767 We do it this way around to catch the cases like 0x01F001E0 where
2768 two 8-bit immediates would work, but a replicated constant would
2771 TODO: 16-bit constants that don't clear all the bits, but still win.
2772 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2775 b1
= (remainder
& 0xff000000) >> 24;
2776 b2
= (remainder
& 0x00ff0000) >> 16;
2777 b3
= (remainder
& 0x0000ff00) >> 8;
2778 b4
= remainder
& 0xff;
2782 /* The 8-bit immediate already found clears b1 (and maybe b2),
2783 but must leave b3 and b4 alone. */
2785 /* First try to find a 32-bit replicated constant that clears
2786 almost everything. We can assume that we can't do it in one,
2787 or else we wouldn't be here. */
2788 unsigned int tmp
= b1
& b2
& b3
& b4
;
2789 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
2791 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
2792 + (tmp
== b3
) + (tmp
== b4
);
2794 && (matching_bytes
>= 3
2795 || (matching_bytes
== 2
2796 && const_ok_for_op (remainder
& ~tmp2
, code
))))
2798 /* At least 3 of the bytes match, and the fourth has at
2799 least as many bits set, or two of the bytes match
2800 and it will only require one more insn to finish. */
2808 /* Second, try to find a 16-bit replicated constant that can
2809 leave three of the bytes clear. If b2 or b4 is already
2810 zero, then we can. If the 8-bit from above would not
2811 clear b2 anyway, then we still win. */
2812 else if (b1
== b3
&& (!b2
|| !b4
2813 || (remainder
& 0x00ff0000 & ~result
)))
2815 result
= remainder
& 0xff00ff00;
2821 /* The 8-bit immediate already found clears b2 (and maybe b3)
2822 and we don't get here unless b1 is alredy clear, but it will
2823 leave b4 unchanged. */
2825 /* If we can clear b2 and b4 at once, then we win, since the
2826 8-bits couldn't possibly reach that far. */
2829 result
= remainder
& 0x00ff00ff;
2835 return_sequence
->i
[insns
++] = result
;
2836 remainder
&= ~result
;
2838 if (code
== SET
|| code
== MINUS
)
2846 /* Emit an instruction with the indicated PATTERN. If COND is
2847 non-NULL, conditionalize the execution of the instruction on COND
2851 emit_constant_insn (rtx cond
, rtx pattern
)
2854 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2855 emit_insn (pattern
);
2858 /* As above, but extra parameter GENERATE which, if clear, suppresses
2862 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2863 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2868 int final_invert
= 0;
2870 int set_sign_bit_copies
= 0;
2871 int clear_sign_bit_copies
= 0;
2872 int clear_zero_bit_copies
= 0;
2873 int set_zero_bit_copies
= 0;
2874 int insns
= 0, neg_insns
, inv_insns
;
2875 unsigned HOST_WIDE_INT temp1
, temp2
;
2876 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2877 struct four_ints
*immediates
;
2878 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
2880 /* Find out which operations are safe for a given CODE. Also do a quick
2881 check for degenerate cases; these can occur when DImode operations
2894 if (remainder
== 0xffffffff)
2897 emit_constant_insn (cond
,
2898 gen_rtx_SET (VOIDmode
, target
,
2899 GEN_INT (ARM_SIGN_EXTEND (val
))));
2905 if (reload_completed
&& rtx_equal_p (target
, source
))
2909 emit_constant_insn (cond
,
2910 gen_rtx_SET (VOIDmode
, target
, source
));
2919 emit_constant_insn (cond
,
2920 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2923 if (remainder
== 0xffffffff)
2925 if (reload_completed
&& rtx_equal_p (target
, source
))
2928 emit_constant_insn (cond
,
2929 gen_rtx_SET (VOIDmode
, target
, source
));
2938 if (reload_completed
&& rtx_equal_p (target
, source
))
2941 emit_constant_insn (cond
,
2942 gen_rtx_SET (VOIDmode
, target
, source
));
2946 if (remainder
== 0xffffffff)
2949 emit_constant_insn (cond
,
2950 gen_rtx_SET (VOIDmode
, target
,
2951 gen_rtx_NOT (mode
, source
)));
2958 /* We treat MINUS as (val - source), since (source - val) is always
2959 passed as (source + (-val)). */
2963 emit_constant_insn (cond
,
2964 gen_rtx_SET (VOIDmode
, target
,
2965 gen_rtx_NEG (mode
, source
)));
2968 if (const_ok_for_arm (val
))
2971 emit_constant_insn (cond
,
2972 gen_rtx_SET (VOIDmode
, target
,
2973 gen_rtx_MINUS (mode
, GEN_INT (val
),
2984 /* If we can do it in one insn get out quickly. */
2985 if (const_ok_for_op (val
, code
))
2988 emit_constant_insn (cond
,
2989 gen_rtx_SET (VOIDmode
, target
,
2991 ? gen_rtx_fmt_ee (code
, mode
, source
,
2997 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2999 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3000 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3004 if (mode
== SImode
&& i
== 16)
3005 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3007 emit_constant_insn (cond
,
3008 gen_zero_extendhisi2
3009 (target
, gen_lowpart (HImode
, source
)));
3011 /* Extz only supports SImode, but we can coerce the operands
3013 emit_constant_insn (cond
,
3014 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3015 gen_lowpart (SImode
, source
),
3016 GEN_INT (i
), const0_rtx
));
3022 /* Calculate a few attributes that may be useful for specific
3024 /* Count number of leading zeros. */
3025 for (i
= 31; i
>= 0; i
--)
3027 if ((remainder
& (1 << i
)) == 0)
3028 clear_sign_bit_copies
++;
3033 /* Count number of leading 1's. */
3034 for (i
= 31; i
>= 0; i
--)
3036 if ((remainder
& (1 << i
)) != 0)
3037 set_sign_bit_copies
++;
3042 /* Count number of trailing zero's. */
3043 for (i
= 0; i
<= 31; i
++)
3045 if ((remainder
& (1 << i
)) == 0)
3046 clear_zero_bit_copies
++;
3051 /* Count number of trailing 1's. */
3052 for (i
= 0; i
<= 31; i
++)
3054 if ((remainder
& (1 << i
)) != 0)
3055 set_zero_bit_copies
++;
3063 /* See if we can do this by sign_extending a constant that is known
3064 to be negative. This is a good, way of doing it, since the shift
3065 may well merge into a subsequent insn. */
3066 if (set_sign_bit_copies
> 1)
3068 if (const_ok_for_arm
3069 (temp1
= ARM_SIGN_EXTEND (remainder
3070 << (set_sign_bit_copies
- 1))))
3074 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3075 emit_constant_insn (cond
,
3076 gen_rtx_SET (VOIDmode
, new_src
,
3078 emit_constant_insn (cond
,
3079 gen_ashrsi3 (target
, new_src
,
3080 GEN_INT (set_sign_bit_copies
- 1)));
3084 /* For an inverted constant, we will need to set the low bits,
3085 these will be shifted out of harm's way. */
3086 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3087 if (const_ok_for_arm (~temp1
))
3091 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3092 emit_constant_insn (cond
,
3093 gen_rtx_SET (VOIDmode
, new_src
,
3095 emit_constant_insn (cond
,
3096 gen_ashrsi3 (target
, new_src
,
3097 GEN_INT (set_sign_bit_copies
- 1)));
3103 /* See if we can calculate the value as the difference between two
3104 valid immediates. */
3105 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3107 int topshift
= clear_sign_bit_copies
& ~1;
3109 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3110 & (0xff000000 >> topshift
));
3112 /* If temp1 is zero, then that means the 9 most significant
3113 bits of remainder were 1 and we've caused it to overflow.
3114 When topshift is 0 we don't need to do anything since we
3115 can borrow from 'bit 32'. */
3116 if (temp1
== 0 && topshift
!= 0)
3117 temp1
= 0x80000000 >> (topshift
- 1);
3119 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3121 if (const_ok_for_arm (temp2
))
3125 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3126 emit_constant_insn (cond
,
3127 gen_rtx_SET (VOIDmode
, new_src
,
3129 emit_constant_insn (cond
,
3130 gen_addsi3 (target
, new_src
,
3138 /* See if we can generate this by setting the bottom (or the top)
3139 16 bits, and then shifting these into the other half of the
3140 word. We only look for the simplest cases, to do more would cost
3141 too much. Be careful, however, not to generate this when the
3142 alternative would take fewer insns. */
3143 if (val
& 0xffff0000)
3145 temp1
= remainder
& 0xffff0000;
3146 temp2
= remainder
& 0x0000ffff;
3148 /* Overlaps outside this range are best done using other methods. */
3149 for (i
= 9; i
< 24; i
++)
3151 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3152 && !const_ok_for_arm (temp2
))
3154 rtx new_src
= (subtargets
3155 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3157 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3158 source
, subtargets
, generate
);
3166 gen_rtx_ASHIFT (mode
, source
,
3173 /* Don't duplicate cases already considered. */
3174 for (i
= 17; i
< 24; i
++)
3176 if (((temp1
| (temp1
>> i
)) == remainder
)
3177 && !const_ok_for_arm (temp1
))
3179 rtx new_src
= (subtargets
3180 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3182 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3183 source
, subtargets
, generate
);
3188 gen_rtx_SET (VOIDmode
, target
,
3191 gen_rtx_LSHIFTRT (mode
, source
,
3202 /* If we have IOR or XOR, and the constant can be loaded in a
3203 single instruction, and we can find a temporary to put it in,
3204 then this can be done in two instructions instead of 3-4. */
3206 /* TARGET can't be NULL if SUBTARGETS is 0 */
3207 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3209 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3213 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3215 emit_constant_insn (cond
,
3216 gen_rtx_SET (VOIDmode
, sub
,
3218 emit_constant_insn (cond
,
3219 gen_rtx_SET (VOIDmode
, target
,
3220 gen_rtx_fmt_ee (code
, mode
,
3231 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3232 and the remainder 0s for e.g. 0xfff00000)
3233 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3235 This can be done in 2 instructions by using shifts with mov or mvn.
3240 mvn r0, r0, lsr #12 */
3241 if (set_sign_bit_copies
> 8
3242 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
3246 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3247 rtx shift
= GEN_INT (set_sign_bit_copies
);
3251 gen_rtx_SET (VOIDmode
, sub
,
3253 gen_rtx_ASHIFT (mode
,
3258 gen_rtx_SET (VOIDmode
, target
,
3260 gen_rtx_LSHIFTRT (mode
, sub
,
3267 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3269 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3271 For eg. r0 = r0 | 0xfff
3276 if (set_zero_bit_copies
> 8
3277 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
3281 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3282 rtx shift
= GEN_INT (set_zero_bit_copies
);
3286 gen_rtx_SET (VOIDmode
, sub
,
3288 gen_rtx_LSHIFTRT (mode
,
3293 gen_rtx_SET (VOIDmode
, target
,
3295 gen_rtx_ASHIFT (mode
, sub
,
3301 /* This will never be reached for Thumb2 because orn is a valid
3302 instruction. This is for Thumb1 and the ARM 32 bit cases.
3304 x = y | constant (such that ~constant is a valid constant)
3306 x = ~(~y & ~constant).
3308 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3312 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3313 emit_constant_insn (cond
,
3314 gen_rtx_SET (VOIDmode
, sub
,
3315 gen_rtx_NOT (mode
, source
)));
3318 sub
= gen_reg_rtx (mode
);
3319 emit_constant_insn (cond
,
3320 gen_rtx_SET (VOIDmode
, sub
,
3321 gen_rtx_AND (mode
, source
,
3323 emit_constant_insn (cond
,
3324 gen_rtx_SET (VOIDmode
, target
,
3325 gen_rtx_NOT (mode
, sub
)));
3332 /* See if two shifts will do 2 or more insn's worth of work. */
3333 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3335 HOST_WIDE_INT shift_mask
= ((0xffffffff
3336 << (32 - clear_sign_bit_copies
))
3339 if ((remainder
| shift_mask
) != 0xffffffff)
3343 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3344 insns
= arm_gen_constant (AND
, mode
, cond
,
3345 remainder
| shift_mask
,
3346 new_src
, source
, subtargets
, 1);
3351 rtx targ
= subtargets
? NULL_RTX
: target
;
3352 insns
= arm_gen_constant (AND
, mode
, cond
,
3353 remainder
| shift_mask
,
3354 targ
, source
, subtargets
, 0);
3360 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3361 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3363 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3364 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3370 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3372 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3374 if ((remainder
| shift_mask
) != 0xffffffff)
3378 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3380 insns
= arm_gen_constant (AND
, mode
, cond
,
3381 remainder
| shift_mask
,
3382 new_src
, source
, subtargets
, 1);
3387 rtx targ
= subtargets
? NULL_RTX
: target
;
3389 insns
= arm_gen_constant (AND
, mode
, cond
,
3390 remainder
| shift_mask
,
3391 targ
, source
, subtargets
, 0);
3397 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3398 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3400 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3401 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3413 /* Calculate what the instruction sequences would be if we generated it
3414 normally, negated, or inverted. */
3416 /* AND cannot be split into multiple insns, so invert and use BIC. */
3419 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
3422 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
3427 if (can_invert
|| final_invert
)
3428 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
3433 immediates
= &pos_immediates
;
3435 /* Is the negated immediate sequence more efficient? */
3436 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
3439 immediates
= &neg_immediates
;
3444 /* Is the inverted immediate sequence more efficient?
3445 We must allow for an extra NOT instruction for XOR operations, although
3446 there is some chance that the final 'mvn' will get optimized later. */
3447 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
3450 immediates
= &inv_immediates
;
3458 /* Now output the chosen sequence as instructions. */
3461 for (i
= 0; i
< insns
; i
++)
3463 rtx new_src
, temp1_rtx
;
3465 temp1
= immediates
->i
[i
];
3467 if (code
== SET
|| code
== MINUS
)
3468 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3469 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
3470 new_src
= gen_reg_rtx (mode
);
3476 else if (can_negate
)
3479 temp1
= trunc_int_for_mode (temp1
, mode
);
3480 temp1_rtx
= GEN_INT (temp1
);
3484 else if (code
== MINUS
)
3485 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3487 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3489 emit_constant_insn (cond
,
3490 gen_rtx_SET (VOIDmode
, new_src
,
3496 can_negate
= can_invert
;
3500 else if (code
== MINUS
)
3508 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3509 gen_rtx_NOT (mode
, source
)));
3516 /* Canonicalize a comparison so that we are more likely to recognize it.
3517 This can be done for a few constant compares, where we can make the
3518 immediate value easier to load. */
3521 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3523 enum machine_mode mode
;
3524 unsigned HOST_WIDE_INT i
, maxval
;
3526 mode
= GET_MODE (*op0
);
3527 if (mode
== VOIDmode
)
3528 mode
= GET_MODE (*op1
);
3530 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3532 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3533 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3534 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3535 for GTU/LEU in Thumb mode. */
3540 if (code
== GT
|| code
== LE
3541 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3543 /* Missing comparison. First try to use an available
3545 if (CONST_INT_P (*op1
))
3553 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3555 *op1
= GEN_INT (i
+ 1);
3556 return code
== GT
? GE
: LT
;
3561 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3562 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3564 *op1
= GEN_INT (i
+ 1);
3565 return code
== GTU
? GEU
: LTU
;
3573 /* If that did not work, reverse the condition. */
3577 return swap_condition (code
);
3583 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3584 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3585 to facilitate possible combining with a cmp into 'ands'. */
3587 && GET_CODE (*op0
) == ZERO_EXTEND
3588 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
3589 && GET_MODE (XEXP (*op0
, 0)) == QImode
3590 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
3591 && subreg_lowpart_p (XEXP (*op0
, 0))
3592 && *op1
== const0_rtx
)
3593 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
3596 /* Comparisons smaller than DImode. Only adjust comparisons against
3597 an out-of-range constant. */
3598 if (!CONST_INT_P (*op1
)
3599 || const_ok_for_arm (INTVAL (*op1
))
3600 || const_ok_for_arm (- INTVAL (*op1
)))
3614 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3616 *op1
= GEN_INT (i
+ 1);
3617 return code
== GT
? GE
: LT
;
3624 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3626 *op1
= GEN_INT (i
- 1);
3627 return code
== GE
? GT
: LE
;
3633 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3634 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3636 *op1
= GEN_INT (i
+ 1);
3637 return code
== GTU
? GEU
: LTU
;
3644 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3646 *op1
= GEN_INT (i
- 1);
3647 return code
== GEU
? GTU
: LEU
;
3659 /* Define how to find the value returned by a function. */
3662 arm_function_value(const_tree type
, const_tree func
,
3663 bool outgoing ATTRIBUTE_UNUSED
)
3665 enum machine_mode mode
;
3666 int unsignedp ATTRIBUTE_UNUSED
;
3667 rtx r ATTRIBUTE_UNUSED
;
3669 mode
= TYPE_MODE (type
);
3671 if (TARGET_AAPCS_BASED
)
3672 return aapcs_allocate_return_reg (mode
, type
, func
);
3674 /* Promote integer types. */
3675 if (INTEGRAL_TYPE_P (type
))
3676 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3678 /* Promotes small structs returned in a register to full-word size
3679 for big-endian AAPCS. */
3680 if (arm_return_in_msb (type
))
3682 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3683 if (size
% UNITS_PER_WORD
!= 0)
3685 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3686 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3690 return arm_libcall_value_1 (mode
);
3694 libcall_eq (const void *p1
, const void *p2
)
3696 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3700 libcall_hash (const void *p1
)
3702 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3706 add_libcall (htab_t htab
, rtx libcall
)
3708 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3712 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3714 static bool init_done
= false;
3715 static htab_t libcall_htab
;
3721 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3723 add_libcall (libcall_htab
,
3724 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3725 add_libcall (libcall_htab
,
3726 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3727 add_libcall (libcall_htab
,
3728 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3729 add_libcall (libcall_htab
,
3730 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3732 add_libcall (libcall_htab
,
3733 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3734 add_libcall (libcall_htab
,
3735 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3736 add_libcall (libcall_htab
,
3737 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3738 add_libcall (libcall_htab
,
3739 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3741 add_libcall (libcall_htab
,
3742 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3743 add_libcall (libcall_htab
,
3744 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3745 add_libcall (libcall_htab
,
3746 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
3747 add_libcall (libcall_htab
,
3748 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
3749 add_libcall (libcall_htab
,
3750 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3751 add_libcall (libcall_htab
,
3752 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3753 add_libcall (libcall_htab
,
3754 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3755 add_libcall (libcall_htab
,
3756 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3758 /* Values from double-precision helper functions are returned in core
3759 registers if the selected core only supports single-precision
3760 arithmetic, even if we are using the hard-float ABI. The same is
3761 true for single-precision helpers, but we will never be using the
3762 hard-float ABI on a CPU which doesn't support single-precision
3763 operations in hardware. */
3764 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
3765 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
3766 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
3767 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
3768 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
3769 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
3770 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
3771 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
3772 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
3773 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
3774 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
3775 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
3777 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
3781 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3785 arm_libcall_value_1 (enum machine_mode mode
)
3787 if (TARGET_AAPCS_BASED
)
3788 return aapcs_libcall_value (mode
);
3789 else if (TARGET_IWMMXT_ABI
3790 && arm_vector_mode_supported_p (mode
))
3791 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
3793 return gen_rtx_REG (mode
, ARG_REGISTER (1));
3796 /* Define how to find the value returned by a library function
3797 assuming the value has mode MODE. */
3800 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3802 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3803 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3805 /* The following libcalls return their result in integer registers,
3806 even though they return a floating point value. */
3807 if (arm_libcall_uses_aapcs_base (libcall
))
3808 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3812 return arm_libcall_value_1 (mode
);
3815 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3818 arm_function_value_regno_p (const unsigned int regno
)
3820 if (regno
== ARG_REGISTER (1)
3822 && TARGET_AAPCS_BASED
3824 && TARGET_HARD_FLOAT
3825 && regno
== FIRST_VFP_REGNUM
)
3826 || (TARGET_IWMMXT_ABI
3827 && regno
== FIRST_IWMMXT_REGNUM
))
3833 /* Determine the amount of memory needed to store the possible return
3834 registers of an untyped call. */
3836 arm_apply_result_size (void)
3842 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
3844 if (TARGET_IWMMXT_ABI
)
3851 /* Decide whether TYPE should be returned in memory (true)
3852 or in a register (false). FNTYPE is the type of the function making
3855 arm_return_in_memory (const_tree type
, const_tree fntype
)
3859 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3861 if (TARGET_AAPCS_BASED
)
3863 /* Simple, non-aggregate types (ie not including vectors and
3864 complex) are always returned in a register (or registers).
3865 We don't care about which register here, so we can short-cut
3866 some of the detail. */
3867 if (!AGGREGATE_TYPE_P (type
)
3868 && TREE_CODE (type
) != VECTOR_TYPE
3869 && TREE_CODE (type
) != COMPLEX_TYPE
)
3872 /* Any return value that is no larger than one word can be
3874 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3877 /* Check any available co-processors to see if they accept the
3878 type as a register candidate (VFP, for example, can return
3879 some aggregates in consecutive registers). These aren't
3880 available if the call is variadic. */
3881 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3884 /* Vector values should be returned using ARM registers, not
3885 memory (unless they're over 16 bytes, which will break since
3886 we only have four call-clobbered registers to play with). */
3887 if (TREE_CODE (type
) == VECTOR_TYPE
)
3888 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3890 /* The rest go in memory. */
3894 if (TREE_CODE (type
) == VECTOR_TYPE
)
3895 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3897 if (!AGGREGATE_TYPE_P (type
) &&
3898 (TREE_CODE (type
) != VECTOR_TYPE
))
3899 /* All simple types are returned in registers. */
3902 if (arm_abi
!= ARM_ABI_APCS
)
3904 /* ATPCS and later return aggregate types in memory only if they are
3905 larger than a word (or are variable size). */
3906 return (size
< 0 || size
> UNITS_PER_WORD
);
3909 /* For the arm-wince targets we choose to be compatible with Microsoft's
3910 ARM and Thumb compilers, which always return aggregates in memory. */
3912 /* All structures/unions bigger than one word are returned in memory.
3913 Also catch the case where int_size_in_bytes returns -1. In this case
3914 the aggregate is either huge or of variable size, and in either case
3915 we will want to return it via memory and not in a register. */
3916 if (size
< 0 || size
> UNITS_PER_WORD
)
3919 if (TREE_CODE (type
) == RECORD_TYPE
)
3923 /* For a struct the APCS says that we only return in a register
3924 if the type is 'integer like' and every addressable element
3925 has an offset of zero. For practical purposes this means
3926 that the structure can have at most one non bit-field element
3927 and that this element must be the first one in the structure. */
3929 /* Find the first field, ignoring non FIELD_DECL things which will
3930 have been created by C++. */
3931 for (field
= TYPE_FIELDS (type
);
3932 field
&& TREE_CODE (field
) != FIELD_DECL
;
3933 field
= DECL_CHAIN (field
))
3937 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3939 /* Check that the first field is valid for returning in a register. */
3941 /* ... Floats are not allowed */
3942 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3945 /* ... Aggregates that are not themselves valid for returning in
3946 a register are not allowed. */
3947 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3950 /* Now check the remaining fields, if any. Only bitfields are allowed,
3951 since they are not addressable. */
3952 for (field
= DECL_CHAIN (field
);
3954 field
= DECL_CHAIN (field
))
3956 if (TREE_CODE (field
) != FIELD_DECL
)
3959 if (!DECL_BIT_FIELD_TYPE (field
))
3966 if (TREE_CODE (type
) == UNION_TYPE
)
3970 /* Unions can be returned in registers if every element is
3971 integral, or can be returned in an integer register. */
3972 for (field
= TYPE_FIELDS (type
);
3974 field
= DECL_CHAIN (field
))
3976 if (TREE_CODE (field
) != FIELD_DECL
)
3979 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3982 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3988 #endif /* not ARM_WINCE */
3990 /* Return all other types in memory. */
3994 const struct pcs_attribute_arg
3998 } pcs_attribute_args
[] =
4000 {"aapcs", ARM_PCS_AAPCS
},
4001 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4003 /* We could recognize these, but changes would be needed elsewhere
4004 * to implement them. */
4005 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4006 {"atpcs", ARM_PCS_ATPCS
},
4007 {"apcs", ARM_PCS_APCS
},
4009 {NULL
, ARM_PCS_UNKNOWN
}
4013 arm_pcs_from_attribute (tree attr
)
4015 const struct pcs_attribute_arg
*ptr
;
4018 /* Get the value of the argument. */
4019 if (TREE_VALUE (attr
) == NULL_TREE
4020 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4021 return ARM_PCS_UNKNOWN
;
4023 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4025 /* Check it against the list of known arguments. */
4026 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4027 if (streq (arg
, ptr
->arg
))
4030 /* An unrecognized interrupt type. */
4031 return ARM_PCS_UNKNOWN
;
4034 /* Get the PCS variant to use for this call. TYPE is the function's type
4035 specification, DECL is the specific declartion. DECL may be null if
4036 the call could be indirect or if this is a library call. */
4038 arm_get_pcs_model (const_tree type
, const_tree decl
)
4040 bool user_convention
= false;
4041 enum arm_pcs user_pcs
= arm_pcs_default
;
4046 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4049 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4050 user_convention
= true;
4053 if (TARGET_AAPCS_BASED
)
4055 /* Detect varargs functions. These always use the base rules
4056 (no argument is ever a candidate for a co-processor
4058 bool base_rules
= stdarg_p (type
);
4060 if (user_convention
)
4062 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4063 sorry ("non-AAPCS derived PCS variant");
4064 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4065 error ("variadic functions must use the base AAPCS variant");
4069 return ARM_PCS_AAPCS
;
4070 else if (user_convention
)
4072 else if (decl
&& flag_unit_at_a_time
)
4074 /* Local functions never leak outside this compilation unit,
4075 so we are free to use whatever conventions are
4077 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4078 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4080 return ARM_PCS_AAPCS_LOCAL
;
4083 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4084 sorry ("PCS variant");
4086 /* For everything else we use the target's default. */
4087 return arm_pcs_default
;
4092 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4093 const_tree fntype ATTRIBUTE_UNUSED
,
4094 rtx libcall ATTRIBUTE_UNUSED
,
4095 const_tree fndecl ATTRIBUTE_UNUSED
)
4097 /* Record the unallocated VFP registers. */
4098 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4099 pcum
->aapcs_vfp_reg_alloc
= 0;
4102 /* Walk down the type tree of TYPE counting consecutive base elements.
4103 If *MODEP is VOIDmode, then set it to the first valid floating point
4104 type. If a non-floating point type is found, or if a floating point
4105 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4106 otherwise return the count in the sub-tree. */
4108 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4110 enum machine_mode mode
;
4113 switch (TREE_CODE (type
))
4116 mode
= TYPE_MODE (type
);
4117 if (mode
!= DFmode
&& mode
!= SFmode
)
4120 if (*modep
== VOIDmode
)
4129 mode
= TYPE_MODE (TREE_TYPE (type
));
4130 if (mode
!= DFmode
&& mode
!= SFmode
)
4133 if (*modep
== VOIDmode
)
4142 /* Use V2SImode and V4SImode as representatives of all 64-bit
4143 and 128-bit vector types, whether or not those modes are
4144 supported with the present options. */
4145 size
= int_size_in_bytes (type
);
4158 if (*modep
== VOIDmode
)
4161 /* Vector modes are considered to be opaque: two vectors are
4162 equivalent for the purposes of being homogeneous aggregates
4163 if they are the same size. */
4172 tree index
= TYPE_DOMAIN (type
);
4174 /* Can't handle incomplete types. */
4175 if (!COMPLETE_TYPE_P (type
))
4178 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4181 || !TYPE_MAX_VALUE (index
)
4182 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
4183 || !TYPE_MIN_VALUE (index
)
4184 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
4188 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
4189 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
4191 /* There must be no padding. */
4192 if (!host_integerp (TYPE_SIZE (type
), 1)
4193 || (tree_low_cst (TYPE_SIZE (type
), 1)
4194 != count
* GET_MODE_BITSIZE (*modep
)))
4206 /* Can't handle incomplete types. */
4207 if (!COMPLETE_TYPE_P (type
))
4210 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4212 if (TREE_CODE (field
) != FIELD_DECL
)
4215 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4221 /* There must be no padding. */
4222 if (!host_integerp (TYPE_SIZE (type
), 1)
4223 || (tree_low_cst (TYPE_SIZE (type
), 1)
4224 != count
* GET_MODE_BITSIZE (*modep
)))
4231 case QUAL_UNION_TYPE
:
4233 /* These aren't very interesting except in a degenerate case. */
4238 /* Can't handle incomplete types. */
4239 if (!COMPLETE_TYPE_P (type
))
4242 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4244 if (TREE_CODE (field
) != FIELD_DECL
)
4247 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4250 count
= count
> sub_count
? count
: sub_count
;
4253 /* There must be no padding. */
4254 if (!host_integerp (TYPE_SIZE (type
), 1)
4255 || (tree_low_cst (TYPE_SIZE (type
), 1)
4256 != count
* GET_MODE_BITSIZE (*modep
)))
4269 /* Return true if PCS_VARIANT should use VFP registers. */
4271 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
4273 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
4275 static bool seen_thumb1_vfp
= false;
4277 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
4279 sorry ("Thumb-1 hard-float VFP ABI");
4280 /* sorry() is not immediately fatal, so only display this once. */
4281 seen_thumb1_vfp
= true;
4287 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
4290 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
4291 (TARGET_VFP_DOUBLE
|| !is_double
));
4294 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4295 suitable for passing or returning in VFP registers for the PCS
4296 variant selected. If it is, then *BASE_MODE is updated to contain
4297 a machine mode describing each element of the argument's type and
4298 *COUNT to hold the number of such elements. */
4300 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
4301 enum machine_mode mode
, const_tree type
,
4302 enum machine_mode
*base_mode
, int *count
)
4304 enum machine_mode new_mode
= VOIDmode
;
4306 /* If we have the type information, prefer that to working things
4307 out from the mode. */
4310 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4312 if (ag_count
> 0 && ag_count
<= 4)
4317 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4318 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4319 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4324 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4327 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4333 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4336 *base_mode
= new_mode
;
4341 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4342 enum machine_mode mode
, const_tree type
)
4344 int count ATTRIBUTE_UNUSED
;
4345 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4347 if (!use_vfp_abi (pcs_variant
, false))
4349 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4354 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4357 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4360 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4361 &pcum
->aapcs_vfp_rmode
,
4362 &pcum
->aapcs_vfp_rcount
);
4366 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4367 const_tree type ATTRIBUTE_UNUSED
)
4369 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4370 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4373 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4374 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4376 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4377 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4380 int rcount
= pcum
->aapcs_vfp_rcount
;
4382 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4386 /* Avoid using unsupported vector modes. */
4387 if (rmode
== V2SImode
)
4389 else if (rmode
== V4SImode
)
4396 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4397 for (i
= 0; i
< rcount
; i
++)
4399 rtx tmp
= gen_rtx_REG (rmode
,
4400 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4401 tmp
= gen_rtx_EXPR_LIST
4403 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4404 XVECEXP (par
, 0, i
) = tmp
;
4407 pcum
->aapcs_reg
= par
;
4410 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4417 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4418 enum machine_mode mode
,
4419 const_tree type ATTRIBUTE_UNUSED
)
4421 if (!use_vfp_abi (pcs_variant
, false))
4424 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4427 enum machine_mode ag_mode
;
4432 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4437 if (ag_mode
== V2SImode
)
4439 else if (ag_mode
== V4SImode
)
4445 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4446 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4447 for (i
= 0; i
< count
; i
++)
4449 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4450 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4451 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4452 XVECEXP (par
, 0, i
) = tmp
;
4458 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4462 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4463 enum machine_mode mode ATTRIBUTE_UNUSED
,
4464 const_tree type ATTRIBUTE_UNUSED
)
4466 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4467 pcum
->aapcs_vfp_reg_alloc
= 0;
4471 #define AAPCS_CP(X) \
4473 aapcs_ ## X ## _cum_init, \
4474 aapcs_ ## X ## _is_call_candidate, \
4475 aapcs_ ## X ## _allocate, \
4476 aapcs_ ## X ## _is_return_candidate, \
4477 aapcs_ ## X ## _allocate_return_reg, \
4478 aapcs_ ## X ## _advance \
4481 /* Table of co-processors that can be used to pass arguments in
4482 registers. Idealy no arugment should be a candidate for more than
4483 one co-processor table entry, but the table is processed in order
4484 and stops after the first match. If that entry then fails to put
4485 the argument into a co-processor register, the argument will go on
4489 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4490 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4492 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4493 BLKmode) is a candidate for this co-processor's registers; this
4494 function should ignore any position-dependent state in
4495 CUMULATIVE_ARGS and only use call-type dependent information. */
4496 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4498 /* Return true if the argument does get a co-processor register; it
4499 should set aapcs_reg to an RTX of the register allocated as is
4500 required for a return from FUNCTION_ARG. */
4501 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4503 /* Return true if a result of mode MODE (or type TYPE if MODE is
4504 BLKmode) is can be returned in this co-processor's registers. */
4505 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4507 /* Allocate and return an RTX element to hold the return type of a
4508 call, this routine must not fail and will only be called if
4509 is_return_candidate returned true with the same parameters. */
4510 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4512 /* Finish processing this argument and prepare to start processing
4514 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4515 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4523 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4528 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4529 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4536 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4538 /* We aren't passed a decl, so we can't check that a call is local.
4539 However, it isn't clear that that would be a win anyway, since it
4540 might limit some tail-calling opportunities. */
4541 enum arm_pcs pcs_variant
;
4545 const_tree fndecl
= NULL_TREE
;
4547 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4550 fntype
= TREE_TYPE (fntype
);
4553 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4556 pcs_variant
= arm_pcs_default
;
4558 if (pcs_variant
!= ARM_PCS_AAPCS
)
4562 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4563 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4572 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4575 /* We aren't passed a decl, so we can't check that a call is local.
4576 However, it isn't clear that that would be a win anyway, since it
4577 might limit some tail-calling opportunities. */
4578 enum arm_pcs pcs_variant
;
4579 int unsignedp ATTRIBUTE_UNUSED
;
4583 const_tree fndecl
= NULL_TREE
;
4585 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4588 fntype
= TREE_TYPE (fntype
);
4591 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4594 pcs_variant
= arm_pcs_default
;
4596 /* Promote integer types. */
4597 if (type
&& INTEGRAL_TYPE_P (type
))
4598 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4600 if (pcs_variant
!= ARM_PCS_AAPCS
)
4604 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4605 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4607 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4611 /* Promotes small structs returned in a register to full-word size
4612 for big-endian AAPCS. */
4613 if (type
&& arm_return_in_msb (type
))
4615 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4616 if (size
% UNITS_PER_WORD
!= 0)
4618 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4619 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4623 return gen_rtx_REG (mode
, R0_REGNUM
);
4627 aapcs_libcall_value (enum machine_mode mode
)
4629 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
4630 && GET_MODE_SIZE (mode
) <= 4)
4633 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4636 /* Lay out a function argument using the AAPCS rules. The rule
4637 numbers referred to here are those in the AAPCS. */
4639 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4640 const_tree type
, bool named
)
4645 /* We only need to do this once per argument. */
4646 if (pcum
->aapcs_arg_processed
)
4649 pcum
->aapcs_arg_processed
= true;
4651 /* Special case: if named is false then we are handling an incoming
4652 anonymous argument which is on the stack. */
4656 /* Is this a potential co-processor register candidate? */
4657 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4659 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4660 pcum
->aapcs_cprc_slot
= slot
;
4662 /* We don't have to apply any of the rules from part B of the
4663 preparation phase, these are handled elsewhere in the
4668 /* A Co-processor register candidate goes either in its own
4669 class of registers or on the stack. */
4670 if (!pcum
->aapcs_cprc_failed
[slot
])
4672 /* C1.cp - Try to allocate the argument to co-processor
4674 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4677 /* C2.cp - Put the argument on the stack and note that we
4678 can't assign any more candidates in this slot. We also
4679 need to note that we have allocated stack space, so that
4680 we won't later try to split a non-cprc candidate between
4681 core registers and the stack. */
4682 pcum
->aapcs_cprc_failed
[slot
] = true;
4683 pcum
->can_split
= false;
4686 /* We didn't get a register, so this argument goes on the
4688 gcc_assert (pcum
->can_split
== false);
4693 /* C3 - For double-word aligned arguments, round the NCRN up to the
4694 next even number. */
4695 ncrn
= pcum
->aapcs_ncrn
;
4696 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4699 nregs
= ARM_NUM_REGS2(mode
, type
);
4701 /* Sigh, this test should really assert that nregs > 0, but a GCC
4702 extension allows empty structs and then gives them empty size; it
4703 then allows such a structure to be passed by value. For some of
4704 the code below we have to pretend that such an argument has
4705 non-zero size so that we 'locate' it correctly either in
4706 registers or on the stack. */
4707 gcc_assert (nregs
>= 0);
4709 nregs2
= nregs
? nregs
: 1;
4711 /* C4 - Argument fits entirely in core registers. */
4712 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4714 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4715 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4719 /* C5 - Some core registers left and there are no arguments already
4720 on the stack: split this argument between the remaining core
4721 registers and the stack. */
4722 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4724 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4725 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4726 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4730 /* C6 - NCRN is set to 4. */
4731 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4733 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4737 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4738 for a call to a function whose data type is FNTYPE.
4739 For a library call, FNTYPE is NULL. */
4741 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4743 tree fndecl ATTRIBUTE_UNUSED
)
4745 /* Long call handling. */
4747 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4749 pcum
->pcs_variant
= arm_pcs_default
;
4751 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4753 if (arm_libcall_uses_aapcs_base (libname
))
4754 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4756 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4757 pcum
->aapcs_reg
= NULL_RTX
;
4758 pcum
->aapcs_partial
= 0;
4759 pcum
->aapcs_arg_processed
= false;
4760 pcum
->aapcs_cprc_slot
= -1;
4761 pcum
->can_split
= true;
4763 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4767 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4769 pcum
->aapcs_cprc_failed
[i
] = false;
4770 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4778 /* On the ARM, the offset starts at 0. */
4780 pcum
->iwmmxt_nregs
= 0;
4781 pcum
->can_split
= true;
4783 /* Varargs vectors are treated the same as long long.
4784 named_count avoids having to change the way arm handles 'named' */
4785 pcum
->named_count
= 0;
4788 if (TARGET_REALLY_IWMMXT
&& fntype
)
4792 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4794 fn_arg
= TREE_CHAIN (fn_arg
))
4795 pcum
->named_count
+= 1;
4797 if (! pcum
->named_count
)
4798 pcum
->named_count
= INT_MAX
;
4803 /* Return true if mode/type need doubleword alignment. */
4805 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4807 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4808 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4812 /* Determine where to put an argument to a function.
4813 Value is zero to push the argument on the stack,
4814 or a hard register in which to store the argument.
4816 MODE is the argument's machine mode.
4817 TYPE is the data type of the argument (as a tree).
4818 This is null for libcalls where that information may
4820 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4821 the preceding args and about the function being called.
4822 NAMED is nonzero if this argument is a named parameter
4823 (otherwise it is an extra parameter matching an ellipsis).
4825 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4826 other arguments are passed on the stack. If (NAMED == 0) (which happens
4827 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4828 defined), say it is passed in the stack (function_prologue will
4829 indeed make it pass in the stack if necessary). */
4832 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
4833 const_tree type
, bool named
)
4835 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4838 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4839 a call insn (op3 of a call_value insn). */
4840 if (mode
== VOIDmode
)
4843 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4845 aapcs_layout_arg (pcum
, mode
, type
, named
);
4846 return pcum
->aapcs_reg
;
4849 /* Varargs vectors are treated the same as long long.
4850 named_count avoids having to change the way arm handles 'named' */
4851 if (TARGET_IWMMXT_ABI
4852 && arm_vector_mode_supported_p (mode
)
4853 && pcum
->named_count
> pcum
->nargs
+ 1)
4855 if (pcum
->iwmmxt_nregs
<= 9)
4856 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4859 pcum
->can_split
= false;
4864 /* Put doubleword aligned quantities in even register pairs. */
4866 && ARM_DOUBLEWORD_ALIGN
4867 && arm_needs_doubleword_align (mode
, type
))
4870 /* Only allow splitting an arg between regs and memory if all preceding
4871 args were allocated to regs. For args passed by reference we only count
4872 the reference pointer. */
4873 if (pcum
->can_split
)
4876 nregs
= ARM_NUM_REGS2 (mode
, type
);
4878 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4881 return gen_rtx_REG (mode
, pcum
->nregs
);
4885 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4887 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4888 ? DOUBLEWORD_ALIGNMENT
4893 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
4894 tree type
, bool named
)
4896 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4897 int nregs
= pcum
->nregs
;
4899 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4901 aapcs_layout_arg (pcum
, mode
, type
, named
);
4902 return pcum
->aapcs_partial
;
4905 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4908 if (NUM_ARG_REGS
> nregs
4909 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4911 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4916 /* Update the data in PCUM to advance over an argument
4917 of mode MODE and data type TYPE.
4918 (TYPE is null for libcalls where that information may not be available.) */
4921 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
4922 const_tree type
, bool named
)
4924 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4926 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4928 aapcs_layout_arg (pcum
, mode
, type
, named
);
4930 if (pcum
->aapcs_cprc_slot
>= 0)
4932 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4934 pcum
->aapcs_cprc_slot
= -1;
4937 /* Generic stuff. */
4938 pcum
->aapcs_arg_processed
= false;
4939 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4940 pcum
->aapcs_reg
= NULL_RTX
;
4941 pcum
->aapcs_partial
= 0;
4946 if (arm_vector_mode_supported_p (mode
)
4947 && pcum
->named_count
> pcum
->nargs
4948 && TARGET_IWMMXT_ABI
)
4949 pcum
->iwmmxt_nregs
+= 1;
4951 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4955 /* Variable sized types are passed by reference. This is a GCC
4956 extension to the ARM ABI. */
4959 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
4960 enum machine_mode mode ATTRIBUTE_UNUSED
,
4961 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4963 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4966 /* Encode the current state of the #pragma [no_]long_calls. */
4969 OFF
, /* No #pragma [no_]long_calls is in effect. */
4970 LONG
, /* #pragma long_calls is in effect. */
4971 SHORT
/* #pragma no_long_calls is in effect. */
4974 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4977 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4979 arm_pragma_long_calls
= LONG
;
4983 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4985 arm_pragma_long_calls
= SHORT
;
4989 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4991 arm_pragma_long_calls
= OFF
;
4994 /* Handle an attribute requiring a FUNCTION_DECL;
4995 arguments as in struct attribute_spec.handler. */
4997 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4998 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5000 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5002 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5004 *no_add_attrs
= true;
5010 /* Handle an "interrupt" or "isr" attribute;
5011 arguments as in struct attribute_spec.handler. */
5013 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5018 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5020 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5022 *no_add_attrs
= true;
5024 /* FIXME: the argument if any is checked for type attributes;
5025 should it be checked for decl ones? */
5029 if (TREE_CODE (*node
) == FUNCTION_TYPE
5030 || TREE_CODE (*node
) == METHOD_TYPE
)
5032 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5034 warning (OPT_Wattributes
, "%qE attribute ignored",
5036 *no_add_attrs
= true;
5039 else if (TREE_CODE (*node
) == POINTER_TYPE
5040 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5041 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5042 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5044 *node
= build_variant_type_copy (*node
);
5045 TREE_TYPE (*node
) = build_type_attribute_variant
5047 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5048 *no_add_attrs
= true;
5052 /* Possibly pass this attribute on from the type to a decl. */
5053 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5054 | (int) ATTR_FLAG_FUNCTION_NEXT
5055 | (int) ATTR_FLAG_ARRAY_NEXT
))
5057 *no_add_attrs
= true;
5058 return tree_cons (name
, args
, NULL_TREE
);
5062 warning (OPT_Wattributes
, "%qE attribute ignored",
5071 /* Handle a "pcs" attribute; arguments as in struct
5072 attribute_spec.handler. */
5074 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5075 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5077 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5079 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5080 *no_add_attrs
= true;
5085 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5086 /* Handle the "notshared" attribute. This attribute is another way of
5087 requesting hidden visibility. ARM's compiler supports
5088 "__declspec(notshared)"; we support the same thing via an
5092 arm_handle_notshared_attribute (tree
*node
,
5093 tree name ATTRIBUTE_UNUSED
,
5094 tree args ATTRIBUTE_UNUSED
,
5095 int flags ATTRIBUTE_UNUSED
,
5098 tree decl
= TYPE_NAME (*node
);
5102 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5103 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5104 *no_add_attrs
= false;
5110 /* Return 0 if the attributes for two types are incompatible, 1 if they
5111 are compatible, and 2 if they are nearly compatible (which causes a
5112 warning to be generated). */
5114 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5118 /* Check for mismatch of non-default calling convention. */
5119 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5122 /* Check for mismatched call attributes. */
5123 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5124 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5125 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5126 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5128 /* Only bother to check if an attribute is defined. */
5129 if (l1
| l2
| s1
| s2
)
5131 /* If one type has an attribute, the other must have the same attribute. */
5132 if ((l1
!= l2
) || (s1
!= s2
))
5135 /* Disallow mixed attributes. */
5136 if ((l1
& s2
) || (l2
& s1
))
5140 /* Check for mismatched ISR attribute. */
5141 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5143 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5144 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5146 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5153 /* Assigns default attributes to newly defined type. This is used to
5154 set short_call/long_call attributes for function types of
5155 functions defined inside corresponding #pragma scopes. */
5157 arm_set_default_type_attributes (tree type
)
5159 /* Add __attribute__ ((long_call)) to all functions, when
5160 inside #pragma long_calls or __attribute__ ((short_call)),
5161 when inside #pragma no_long_calls. */
5162 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5164 tree type_attr_list
, attr_name
;
5165 type_attr_list
= TYPE_ATTRIBUTES (type
);
5167 if (arm_pragma_long_calls
== LONG
)
5168 attr_name
= get_identifier ("long_call");
5169 else if (arm_pragma_long_calls
== SHORT
)
5170 attr_name
= get_identifier ("short_call");
5174 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5175 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5179 /* Return true if DECL is known to be linked into section SECTION. */
5182 arm_function_in_section_p (tree decl
, section
*section
)
5184 /* We can only be certain about functions defined in the same
5185 compilation unit. */
5186 if (!TREE_STATIC (decl
))
5189 /* Make sure that SYMBOL always binds to the definition in this
5190 compilation unit. */
5191 if (!targetm
.binds_local_p (decl
))
5194 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5195 if (!DECL_SECTION_NAME (decl
))
5197 /* Make sure that we will not create a unique section for DECL. */
5198 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
5202 return function_section (decl
) == section
;
5205 /* Return nonzero if a 32-bit "long_call" should be generated for
5206 a call from the current function to DECL. We generate a long_call
5209 a. has an __attribute__((long call))
5210 or b. is within the scope of a #pragma long_calls
5211 or c. the -mlong-calls command line switch has been specified
5213 However we do not generate a long call if the function:
5215 d. has an __attribute__ ((short_call))
5216 or e. is inside the scope of a #pragma no_long_calls
5217 or f. is defined in the same section as the current function. */
5220 arm_is_long_call_p (tree decl
)
5225 return TARGET_LONG_CALLS
;
5227 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
5228 if (lookup_attribute ("short_call", attrs
))
5231 /* For "f", be conservative, and only cater for cases in which the
5232 whole of the current function is placed in the same section. */
5233 if (!flag_reorder_blocks_and_partition
5234 && TREE_CODE (decl
) == FUNCTION_DECL
5235 && arm_function_in_section_p (decl
, current_function_section ()))
5238 if (lookup_attribute ("long_call", attrs
))
5241 return TARGET_LONG_CALLS
;
5244 /* Return nonzero if it is ok to make a tail-call to DECL. */
5246 arm_function_ok_for_sibcall (tree decl
, tree exp
)
5248 unsigned long func_type
;
5250 if (cfun
->machine
->sibcall_blocked
)
5253 /* Never tailcall something for which we have no decl, or if we
5254 are generating code for Thumb-1. */
5255 if (decl
== NULL
|| TARGET_THUMB1
)
5258 /* The PIC register is live on entry to VxWorks PLT entries, so we
5259 must make the call before restoring the PIC register. */
5260 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
5263 /* Cannot tail-call to long calls, since these are out of range of
5264 a branch instruction. */
5265 if (arm_is_long_call_p (decl
))
5268 /* If we are interworking and the function is not declared static
5269 then we can't tail-call it unless we know that it exists in this
5270 compilation unit (since it might be a Thumb routine). */
5271 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
5274 func_type
= arm_current_func_type ();
5275 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5276 if (IS_INTERRUPT (func_type
))
5279 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5281 /* Check that the return value locations are the same. For
5282 example that we aren't returning a value from the sibling in
5283 a VFP register but then need to transfer it to a core
5287 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
5288 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5290 if (!rtx_equal_p (a
, b
))
5294 /* Never tailcall if function may be called with a misaligned SP. */
5295 if (IS_STACKALIGN (func_type
))
5298 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5299 references should become a NOP. Don't convert such calls into
5301 if (TARGET_AAPCS_BASED
5302 && arm_abi
== ARM_ABI_AAPCS
5303 && DECL_WEAK (decl
))
5306 /* Everything else is ok. */
5311 /* Addressing mode support functions. */
5313 /* Return nonzero if X is a legitimate immediate operand when compiling
5314 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5316 legitimate_pic_operand_p (rtx x
)
5318 if (GET_CODE (x
) == SYMBOL_REF
5319 || (GET_CODE (x
) == CONST
5320 && GET_CODE (XEXP (x
, 0)) == PLUS
5321 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5327 /* Record that the current function needs a PIC register. Initialize
5328 cfun->machine->pic_reg if we have not already done so. */
5331 require_pic_register (void)
5333 /* A lot of the logic here is made obscure by the fact that this
5334 routine gets called as part of the rtx cost estimation process.
5335 We don't want those calls to affect any assumptions about the real
5336 function; and further, we can't call entry_of_function() until we
5337 start the real expansion process. */
5338 if (!crtl
->uses_pic_offset_table
)
5340 gcc_assert (can_create_pseudo_p ());
5341 if (arm_pic_register
!= INVALID_REGNUM
)
5343 if (!cfun
->machine
->pic_reg
)
5344 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5346 /* Play games to avoid marking the function as needing pic
5347 if we are being called as part of the cost-estimation
5349 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5350 crtl
->uses_pic_offset_table
= 1;
5356 if (!cfun
->machine
->pic_reg
)
5357 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5359 /* Play games to avoid marking the function as needing pic
5360 if we are being called as part of the cost-estimation
5362 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5364 crtl
->uses_pic_offset_table
= 1;
5367 arm_load_pic_register (0UL);
5372 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
5374 INSN_LOCATION (insn
) = prologue_location
;
5376 /* We can be called during expansion of PHI nodes, where
5377 we can't yet emit instructions directly in the final
5378 insn stream. Queue the insns on the entry edge, they will
5379 be committed after everything else is expanded. */
5380 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5387 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5389 if (GET_CODE (orig
) == SYMBOL_REF
5390 || GET_CODE (orig
) == LABEL_REF
)
5396 gcc_assert (can_create_pseudo_p ());
5397 reg
= gen_reg_rtx (Pmode
);
5400 /* VxWorks does not impose a fixed gap between segments; the run-time
5401 gap can be different from the object-file gap. We therefore can't
5402 use GOTOFF unless we are absolutely sure that the symbol is in the
5403 same segment as the GOT. Unfortunately, the flexibility of linker
5404 scripts means that we can't be sure of that in general, so assume
5405 that GOTOFF is never valid on VxWorks. */
5406 if ((GET_CODE (orig
) == LABEL_REF
5407 || (GET_CODE (orig
) == SYMBOL_REF
&&
5408 SYMBOL_REF_LOCAL_P (orig
)))
5410 && !TARGET_VXWORKS_RTP
)
5411 insn
= arm_pic_static_addr (orig
, reg
);
5417 /* If this function doesn't have a pic register, create one now. */
5418 require_pic_register ();
5420 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5422 /* Make the MEM as close to a constant as possible. */
5423 mem
= SET_SRC (pat
);
5424 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5425 MEM_READONLY_P (mem
) = 1;
5426 MEM_NOTRAP_P (mem
) = 1;
5428 insn
= emit_insn (pat
);
5431 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5433 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5437 else if (GET_CODE (orig
) == CONST
)
5441 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5442 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5445 /* Handle the case where we have: const (UNSPEC_TLS). */
5446 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5447 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5450 /* Handle the case where we have:
5451 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5453 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5454 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5455 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5457 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
5463 gcc_assert (can_create_pseudo_p ());
5464 reg
= gen_reg_rtx (Pmode
);
5467 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5469 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5470 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5471 base
== reg
? 0 : reg
);
5473 if (CONST_INT_P (offset
))
5475 /* The base register doesn't really matter, we only want to
5476 test the index for the appropriate mode. */
5477 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5479 gcc_assert (can_create_pseudo_p ());
5480 offset
= force_reg (Pmode
, offset
);
5483 if (CONST_INT_P (offset
))
5484 return plus_constant (Pmode
, base
, INTVAL (offset
));
5487 if (GET_MODE_SIZE (mode
) > 4
5488 && (GET_MODE_CLASS (mode
) == MODE_INT
5489 || TARGET_SOFT_FLOAT
))
5491 emit_insn (gen_addsi3 (reg
, base
, offset
));
5495 return gen_rtx_PLUS (Pmode
, base
, offset
);
5502 /* Find a spare register to use during the prolog of a function. */
5505 thumb_find_work_register (unsigned long pushed_regs_mask
)
5509 /* Check the argument registers first as these are call-used. The
5510 register allocation order means that sometimes r3 might be used
5511 but earlier argument registers might not, so check them all. */
5512 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5513 if (!df_regs_ever_live_p (reg
))
5516 /* Before going on to check the call-saved registers we can try a couple
5517 more ways of deducing that r3 is available. The first is when we are
5518 pushing anonymous arguments onto the stack and we have less than 4
5519 registers worth of fixed arguments(*). In this case r3 will be part of
5520 the variable argument list and so we can be sure that it will be
5521 pushed right at the start of the function. Hence it will be available
5522 for the rest of the prologue.
5523 (*): ie crtl->args.pretend_args_size is greater than 0. */
5524 if (cfun
->machine
->uses_anonymous_args
5525 && crtl
->args
.pretend_args_size
> 0)
5526 return LAST_ARG_REGNUM
;
5528 /* The other case is when we have fixed arguments but less than 4 registers
5529 worth. In this case r3 might be used in the body of the function, but
5530 it is not being used to convey an argument into the function. In theory
5531 we could just check crtl->args.size to see how many bytes are
5532 being passed in argument registers, but it seems that it is unreliable.
5533 Sometimes it will have the value 0 when in fact arguments are being
5534 passed. (See testcase execute/20021111-1.c for an example). So we also
5535 check the args_info.nregs field as well. The problem with this field is
5536 that it makes no allowances for arguments that are passed to the
5537 function but which are not used. Hence we could miss an opportunity
5538 when a function has an unused argument in r3. But it is better to be
5539 safe than to be sorry. */
5540 if (! cfun
->machine
->uses_anonymous_args
5541 && crtl
->args
.size
>= 0
5542 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5543 && crtl
->args
.info
.nregs
< 4)
5544 return LAST_ARG_REGNUM
;
5546 /* Otherwise look for a call-saved register that is going to be pushed. */
5547 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5548 if (pushed_regs_mask
& (1 << reg
))
5553 /* Thumb-2 can use high regs. */
5554 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5555 if (pushed_regs_mask
& (1 << reg
))
5558 /* Something went wrong - thumb_compute_save_reg_mask()
5559 should have arranged for a suitable register to be pushed. */
5563 static GTY(()) int pic_labelno
;
5565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5571 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5573 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5576 gcc_assert (flag_pic
);
5578 pic_reg
= cfun
->machine
->pic_reg
;
5579 if (TARGET_VXWORKS_RTP
)
5581 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5582 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5583 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5585 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5587 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5588 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5592 /* We use an UNSPEC rather than a LABEL_REF because this label
5593 never appears in the code stream. */
5595 labelno
= GEN_INT (pic_labelno
++);
5596 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5597 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5599 /* On the ARM the PC register contains 'dot + 8' at the time of the
5600 addition, on the Thumb it is 'dot + 4'. */
5601 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
5602 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5604 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5608 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
5610 else /* TARGET_THUMB1 */
5612 if (arm_pic_register
!= INVALID_REGNUM
5613 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5615 /* We will have pushed the pic register, so we should always be
5616 able to find a work register. */
5617 pic_tmp
= gen_rtx_REG (SImode
,
5618 thumb_find_work_register (saved_regs
));
5619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5620 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5621 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5624 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
5628 /* Need to emit this whether or not we obey regdecls,
5629 since setjmp/longjmp can cause life info to screw up. */
5633 /* Generate code to load the address of a static var when flag_pic is set. */
5635 arm_pic_static_addr (rtx orig
, rtx reg
)
5637 rtx l1
, labelno
, offset_rtx
, insn
;
5639 gcc_assert (flag_pic
);
5641 /* We use an UNSPEC rather than a LABEL_REF because this label
5642 never appears in the code stream. */
5643 labelno
= GEN_INT (pic_labelno
++);
5644 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5645 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5647 /* On the ARM the PC register contains 'dot + 8' at the time of the
5648 addition, on the Thumb it is 'dot + 4'. */
5649 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
5650 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5651 UNSPEC_SYMBOL_OFFSET
);
5652 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5654 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
5658 /* Return nonzero if X is valid as an ARM state addressing register. */
5660 arm_address_register_rtx_p (rtx x
, int strict_p
)
5670 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5672 return (regno
<= LAST_ARM_REGNUM
5673 || regno
>= FIRST_PSEUDO_REGISTER
5674 || regno
== FRAME_POINTER_REGNUM
5675 || regno
== ARG_POINTER_REGNUM
);
5678 /* Return TRUE if this rtx is the difference of a symbol and a label,
5679 and will reduce to a PC-relative relocation in the object file.
5680 Expressions like this can be left alone when generating PIC, rather
5681 than forced through the GOT. */
5683 pcrel_constant_p (rtx x
)
5685 if (GET_CODE (x
) == MINUS
)
5686 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5691 /* Return true if X will surely end up in an index register after next
5694 will_be_in_index_register (const_rtx x
)
5696 /* arm.md: calculate_pic_address will split this into a register. */
5697 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
5700 /* Return nonzero if X is a valid ARM state address operand. */
5702 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5706 enum rtx_code code
= GET_CODE (x
);
5708 if (arm_address_register_rtx_p (x
, strict_p
))
5711 use_ldrd
= (TARGET_LDRD
5713 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5715 if (code
== POST_INC
|| code
== PRE_DEC
5716 || ((code
== PRE_INC
|| code
== POST_DEC
)
5717 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5718 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5720 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5721 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5722 && GET_CODE (XEXP (x
, 1)) == PLUS
5723 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5725 rtx addend
= XEXP (XEXP (x
, 1), 1);
5727 /* Don't allow ldrd post increment by register because it's hard
5728 to fixup invalid register choices. */
5730 && GET_CODE (x
) == POST_MODIFY
5734 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5735 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5738 /* After reload constants split into minipools will have addresses
5739 from a LABEL_REF. */
5740 else if (reload_completed
5741 && (code
== LABEL_REF
5743 && GET_CODE (XEXP (x
, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5745 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
5748 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5751 else if (code
== PLUS
)
5753 rtx xop0
= XEXP (x
, 0);
5754 rtx xop1
= XEXP (x
, 1);
5756 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5757 && ((CONST_INT_P (xop1
)
5758 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5759 || (!strict_p
&& will_be_in_index_register (xop1
))))
5760 || (arm_address_register_rtx_p (xop1
, strict_p
)
5761 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5765 /* Reload currently can't handle MINUS, so disable this for now */
5766 else if (GET_CODE (x
) == MINUS
)
5768 rtx xop0
= XEXP (x
, 0);
5769 rtx xop1
= XEXP (x
, 1);
5771 return (arm_address_register_rtx_p (xop0
, strict_p
)
5772 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5776 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5777 && code
== SYMBOL_REF
5778 && CONSTANT_POOL_ADDRESS_P (x
)
5780 && symbol_mentioned_p (get_pool_constant (x
))
5781 && ! pcrel_constant_p (get_pool_constant (x
))))
5787 /* Return nonzero if X is a valid Thumb-2 address operand. */
5789 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5792 enum rtx_code code
= GET_CODE (x
);
5794 if (arm_address_register_rtx_p (x
, strict_p
))
5797 use_ldrd
= (TARGET_LDRD
5799 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5801 if (code
== POST_INC
|| code
== PRE_DEC
5802 || ((code
== PRE_INC
|| code
== POST_DEC
)
5803 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5804 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5806 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5807 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5808 && GET_CODE (XEXP (x
, 1)) == PLUS
5809 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5811 /* Thumb-2 only has autoincrement by constant. */
5812 rtx addend
= XEXP (XEXP (x
, 1), 1);
5813 HOST_WIDE_INT offset
;
5815 if (!CONST_INT_P (addend
))
5818 offset
= INTVAL(addend
);
5819 if (GET_MODE_SIZE (mode
) <= 4)
5820 return (offset
> -256 && offset
< 256);
5822 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5823 && (offset
& 3) == 0);
5826 /* After reload constants split into minipools will have addresses
5827 from a LABEL_REF. */
5828 else if (reload_completed
5829 && (code
== LABEL_REF
5831 && GET_CODE (XEXP (x
, 0)) == PLUS
5832 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5833 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
5836 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5839 else if (code
== PLUS
)
5841 rtx xop0
= XEXP (x
, 0);
5842 rtx xop1
= XEXP (x
, 1);
5844 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5845 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5846 || (!strict_p
&& will_be_in_index_register (xop1
))))
5847 || (arm_address_register_rtx_p (xop1
, strict_p
)
5848 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5851 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5852 && code
== SYMBOL_REF
5853 && CONSTANT_POOL_ADDRESS_P (x
)
5855 && symbol_mentioned_p (get_pool_constant (x
))
5856 && ! pcrel_constant_p (get_pool_constant (x
))))
5862 /* Return nonzero if INDEX is valid for an address index operand in
5865 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5868 HOST_WIDE_INT range
;
5869 enum rtx_code code
= GET_CODE (index
);
5871 /* Standard coprocessor addressing modes. */
5872 if (TARGET_HARD_FLOAT
5874 && (mode
== SFmode
|| mode
== DFmode
))
5875 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5876 && INTVAL (index
) > -1024
5877 && (INTVAL (index
) & 3) == 0);
5879 /* For quad modes, we restrict the constant offset to be slightly less
5880 than what the instruction format permits. We do this because for
5881 quad mode moves, we will actually decompose them into two separate
5882 double-mode reads or writes. INDEX must therefore be a valid
5883 (double-mode) offset and so should INDEX+8. */
5884 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5885 return (code
== CONST_INT
5886 && INTVAL (index
) < 1016
5887 && INTVAL (index
) > -1024
5888 && (INTVAL (index
) & 3) == 0);
5890 /* We have no such constraint on double mode offsets, so we permit the
5891 full range of the instruction format. */
5892 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5893 return (code
== CONST_INT
5894 && INTVAL (index
) < 1024
5895 && INTVAL (index
) > -1024
5896 && (INTVAL (index
) & 3) == 0);
5898 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5899 return (code
== CONST_INT
5900 && INTVAL (index
) < 1024
5901 && INTVAL (index
) > -1024
5902 && (INTVAL (index
) & 3) == 0);
5904 if (arm_address_register_rtx_p (index
, strict_p
)
5905 && (GET_MODE_SIZE (mode
) <= 4))
5908 if (mode
== DImode
|| mode
== DFmode
)
5910 if (code
== CONST_INT
)
5912 HOST_WIDE_INT val
= INTVAL (index
);
5915 return val
> -256 && val
< 256;
5917 return val
> -4096 && val
< 4092;
5920 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5923 if (GET_MODE_SIZE (mode
) <= 4
5927 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5931 rtx xiop0
= XEXP (index
, 0);
5932 rtx xiop1
= XEXP (index
, 1);
5934 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5935 && power_of_two_operand (xiop1
, SImode
))
5936 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5937 && power_of_two_operand (xiop0
, SImode
)));
5939 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5940 || code
== ASHIFT
|| code
== ROTATERT
)
5942 rtx op
= XEXP (index
, 1);
5944 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5947 && INTVAL (op
) <= 31);
5951 /* For ARM v4 we may be doing a sign-extend operation during the
5957 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5963 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5965 return (code
== CONST_INT
5966 && INTVAL (index
) < range
5967 && INTVAL (index
) > -range
);
5970 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5971 index operand. i.e. 1, 2, 4 or 8. */
5973 thumb2_index_mul_operand (rtx op
)
5977 if (!CONST_INT_P (op
))
5981 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5984 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5986 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5988 enum rtx_code code
= GET_CODE (index
);
5990 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5991 /* Standard coprocessor addressing modes. */
5992 if (TARGET_HARD_FLOAT
5994 && (mode
== SFmode
|| mode
== DFmode
))
5995 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5996 /* Thumb-2 allows only > -256 index range for it's core register
5997 load/stores. Since we allow SF/DF in core registers, we have
5998 to use the intersection between -256~4096 (core) and -1024~1024
6000 && INTVAL (index
) > -256
6001 && (INTVAL (index
) & 3) == 0);
6003 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6005 /* For DImode assume values will usually live in core regs
6006 and only allow LDRD addressing modes. */
6007 if (!TARGET_LDRD
|| mode
!= DImode
)
6008 return (code
== CONST_INT
6009 && INTVAL (index
) < 1024
6010 && INTVAL (index
) > -1024
6011 && (INTVAL (index
) & 3) == 0);
6014 /* For quad modes, we restrict the constant offset to be slightly less
6015 than what the instruction format permits. We do this because for
6016 quad mode moves, we will actually decompose them into two separate
6017 double-mode reads or writes. INDEX must therefore be a valid
6018 (double-mode) offset and so should INDEX+8. */
6019 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6020 return (code
== CONST_INT
6021 && INTVAL (index
) < 1016
6022 && INTVAL (index
) > -1024
6023 && (INTVAL (index
) & 3) == 0);
6025 /* We have no such constraint on double mode offsets, so we permit the
6026 full range of the instruction format. */
6027 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6028 return (code
== CONST_INT
6029 && INTVAL (index
) < 1024
6030 && INTVAL (index
) > -1024
6031 && (INTVAL (index
) & 3) == 0);
6033 if (arm_address_register_rtx_p (index
, strict_p
)
6034 && (GET_MODE_SIZE (mode
) <= 4))
6037 if (mode
== DImode
|| mode
== DFmode
)
6039 if (code
== CONST_INT
)
6041 HOST_WIDE_INT val
= INTVAL (index
);
6042 /* ??? Can we assume ldrd for thumb2? */
6043 /* Thumb-2 ldrd only has reg+const addressing modes. */
6044 /* ldrd supports offsets of +-1020.
6045 However the ldr fallback does not. */
6046 return val
> -256 && val
< 256 && (val
& 3) == 0;
6054 rtx xiop0
= XEXP (index
, 0);
6055 rtx xiop1
= XEXP (index
, 1);
6057 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6058 && thumb2_index_mul_operand (xiop1
))
6059 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6060 && thumb2_index_mul_operand (xiop0
)));
6062 else if (code
== ASHIFT
)
6064 rtx op
= XEXP (index
, 1);
6066 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6069 && INTVAL (op
) <= 3);
6072 return (code
== CONST_INT
6073 && INTVAL (index
) < 4096
6074 && INTVAL (index
) > -256);
6077 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6079 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6089 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6091 return (regno
<= LAST_LO_REGNUM
6092 || regno
> LAST_VIRTUAL_REGISTER
6093 || regno
== FRAME_POINTER_REGNUM
6094 || (GET_MODE_SIZE (mode
) >= 4
6095 && (regno
== STACK_POINTER_REGNUM
6096 || regno
>= FIRST_PSEUDO_REGISTER
6097 || x
== hard_frame_pointer_rtx
6098 || x
== arg_pointer_rtx
)));
6101 /* Return nonzero if x is a legitimate index register. This is the case
6102 for any base register that can access a QImode object. */
6104 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6106 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6109 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6111 The AP may be eliminated to either the SP or the FP, so we use the
6112 least common denominator, e.g. SImode, and offsets from 0 to 64.
6114 ??? Verify whether the above is the right approach.
6116 ??? Also, the FP may be eliminated to the SP, so perhaps that
6117 needs special handling also.
6119 ??? Look at how the mips16 port solves this problem. It probably uses
6120 better ways to solve some of these problems.
6122 Although it is not incorrect, we don't accept QImode and HImode
6123 addresses based on the frame pointer or arg pointer until the
6124 reload pass starts. This is so that eliminating such addresses
6125 into stack based ones won't produce impossible code. */
6127 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6129 /* ??? Not clear if this is right. Experiment. */
6130 if (GET_MODE_SIZE (mode
) < 4
6131 && !(reload_in_progress
|| reload_completed
)
6132 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6133 || reg_mentioned_p (arg_pointer_rtx
, x
)
6134 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6135 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6136 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6137 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6140 /* Accept any base register. SP only in SImode or larger. */
6141 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6144 /* This is PC relative data before arm_reorg runs. */
6145 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6146 && GET_CODE (x
) == SYMBOL_REF
6147 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6150 /* This is PC relative data after arm_reorg runs. */
6151 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6153 && (GET_CODE (x
) == LABEL_REF
6154 || (GET_CODE (x
) == CONST
6155 && GET_CODE (XEXP (x
, 0)) == PLUS
6156 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6157 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6160 /* Post-inc indexing only supported for SImode and larger. */
6161 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
6162 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
6165 else if (GET_CODE (x
) == PLUS
)
6167 /* REG+REG address can be any two index registers. */
6168 /* We disallow FRAME+REG addressing since we know that FRAME
6169 will be replaced with STACK, and SP relative addressing only
6170 permits SP+OFFSET. */
6171 if (GET_MODE_SIZE (mode
) <= 4
6172 && XEXP (x
, 0) != frame_pointer_rtx
6173 && XEXP (x
, 1) != frame_pointer_rtx
6174 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6175 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
6176 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
6179 /* REG+const has 5-7 bit offset for non-SP registers. */
6180 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6181 || XEXP (x
, 0) == arg_pointer_rtx
)
6182 && CONST_INT_P (XEXP (x
, 1))
6183 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6186 /* REG+const has 10-bit offset for SP, but only SImode and
6187 larger is supported. */
6188 /* ??? Should probably check for DI/DFmode overflow here
6189 just like GO_IF_LEGITIMATE_OFFSET does. */
6190 else if (REG_P (XEXP (x
, 0))
6191 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
6192 && GET_MODE_SIZE (mode
) >= 4
6193 && CONST_INT_P (XEXP (x
, 1))
6194 && INTVAL (XEXP (x
, 1)) >= 0
6195 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
6196 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6199 else if (REG_P (XEXP (x
, 0))
6200 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
6201 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
6202 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
6203 && REGNO (XEXP (x
, 0))
6204 <= LAST_VIRTUAL_POINTER_REGISTER
))
6205 && GET_MODE_SIZE (mode
) >= 4
6206 && CONST_INT_P (XEXP (x
, 1))
6207 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6211 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6212 && GET_MODE_SIZE (mode
) == 4
6213 && GET_CODE (x
) == SYMBOL_REF
6214 && CONSTANT_POOL_ADDRESS_P (x
)
6216 && symbol_mentioned_p (get_pool_constant (x
))
6217 && ! pcrel_constant_p (get_pool_constant (x
))))
6223 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6224 instruction of mode MODE. */
6226 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
6228 switch (GET_MODE_SIZE (mode
))
6231 return val
>= 0 && val
< 32;
6234 return val
>= 0 && val
< 64 && (val
& 1) == 0;
6238 && (val
+ GET_MODE_SIZE (mode
)) <= 128
6244 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
6247 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
6248 else if (TARGET_THUMB2
)
6249 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
6250 else /* if (TARGET_THUMB1) */
6251 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
6254 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6256 Given an rtx X being reloaded into a reg required to be
6257 in class CLASS, return the class of reg to actually use.
6258 In general this is just CLASS, but for the Thumb core registers and
6259 immediate constants we prefer a LO_REGS class or a subset. */
6262 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
6268 if (rclass
== GENERAL_REGS
6269 || rclass
== HI_REGS
6270 || rclass
== NO_REGS
6271 || rclass
== STACK_REG
)
6278 /* Build the SYMBOL_REF for __tls_get_addr. */
6280 static GTY(()) rtx tls_get_addr_libfunc
;
6283 get_tls_get_addr (void)
6285 if (!tls_get_addr_libfunc
)
6286 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
6287 return tls_get_addr_libfunc
;
6291 arm_load_tp (rtx target
)
6294 target
= gen_reg_rtx (SImode
);
6298 /* Can return in any reg. */
6299 emit_insn (gen_load_tp_hard (target
));
6303 /* Always returned in r0. Immediately copy the result into a pseudo,
6304 otherwise other uses of r0 (e.g. setting up function arguments) may
6305 clobber the value. */
6309 emit_insn (gen_load_tp_soft ());
6311 tmp
= gen_rtx_REG (SImode
, 0);
6312 emit_move_insn (target
, tmp
);
6318 load_tls_operand (rtx x
, rtx reg
)
6322 if (reg
== NULL_RTX
)
6323 reg
= gen_reg_rtx (SImode
);
6325 tmp
= gen_rtx_CONST (SImode
, x
);
6327 emit_move_insn (reg
, tmp
);
6333 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
6335 rtx insns
, label
, labelno
, sum
;
6337 gcc_assert (reloc
!= TLS_DESCSEQ
);
6340 labelno
= GEN_INT (pic_labelno
++);
6341 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6342 label
= gen_rtx_CONST (VOIDmode
, label
);
6344 sum
= gen_rtx_UNSPEC (Pmode
,
6345 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
6346 GEN_INT (TARGET_ARM
? 8 : 4)),
6348 reg
= load_tls_operand (sum
, reg
);
6351 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
6353 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6355 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
6356 LCT_PURE
, /* LCT_CONST? */
6357 Pmode
, 1, reg
, Pmode
);
6359 insns
= get_insns ();
6366 arm_tls_descseq_addr (rtx x
, rtx reg
)
6368 rtx labelno
= GEN_INT (pic_labelno
++);
6369 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6370 rtx sum
= gen_rtx_UNSPEC (Pmode
,
6371 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
6372 gen_rtx_CONST (VOIDmode
, label
),
6373 GEN_INT (!TARGET_ARM
)),
6375 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
6377 emit_insn (gen_tlscall (x
, labelno
));
6379 reg
= gen_reg_rtx (SImode
);
6381 gcc_assert (REGNO (reg
) != 0);
6383 emit_move_insn (reg
, reg0
);
6389 legitimize_tls_address (rtx x
, rtx reg
)
6391 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
6392 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6396 case TLS_MODEL_GLOBAL_DYNAMIC
:
6397 if (TARGET_GNU2_TLS
)
6399 reg
= arm_tls_descseq_addr (x
, reg
);
6401 tp
= arm_load_tp (NULL_RTX
);
6403 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6407 /* Original scheme */
6408 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6409 dest
= gen_reg_rtx (Pmode
);
6410 emit_libcall_block (insns
, dest
, ret
, x
);
6414 case TLS_MODEL_LOCAL_DYNAMIC
:
6415 if (TARGET_GNU2_TLS
)
6417 reg
= arm_tls_descseq_addr (x
, reg
);
6419 tp
= arm_load_tp (NULL_RTX
);
6421 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6425 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6427 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6428 share the LDM result with other LD model accesses. */
6429 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6431 dest
= gen_reg_rtx (Pmode
);
6432 emit_libcall_block (insns
, dest
, ret
, eqv
);
6434 /* Load the addend. */
6435 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
6436 GEN_INT (TLS_LDO32
)),
6438 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6439 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
6443 case TLS_MODEL_INITIAL_EXEC
:
6444 labelno
= GEN_INT (pic_labelno
++);
6445 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6446 label
= gen_rtx_CONST (VOIDmode
, label
);
6447 sum
= gen_rtx_UNSPEC (Pmode
,
6448 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6449 GEN_INT (TARGET_ARM
? 8 : 4)),
6451 reg
= load_tls_operand (sum
, reg
);
6454 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6455 else if (TARGET_THUMB2
)
6456 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6459 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6460 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6463 tp
= arm_load_tp (NULL_RTX
);
6465 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6467 case TLS_MODEL_LOCAL_EXEC
:
6468 tp
= arm_load_tp (NULL_RTX
);
6470 reg
= gen_rtx_UNSPEC (Pmode
,
6471 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6473 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6475 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6482 /* Try machine-dependent ways of modifying an illegitimate address
6483 to be legitimate. If we find one, return the new, valid address. */
6485 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6489 /* TODO: legitimize_address for Thumb2. */
6492 return thumb_legitimize_address (x
, orig_x
, mode
);
6495 if (arm_tls_symbol_p (x
))
6496 return legitimize_tls_address (x
, NULL_RTX
);
6498 if (GET_CODE (x
) == PLUS
)
6500 rtx xop0
= XEXP (x
, 0);
6501 rtx xop1
= XEXP (x
, 1);
6503 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6504 xop0
= force_reg (SImode
, xop0
);
6506 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6507 xop1
= force_reg (SImode
, xop1
);
6509 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6510 && CONST_INT_P (xop1
))
6512 HOST_WIDE_INT n
, low_n
;
6516 /* VFP addressing modes actually allow greater offsets, but for
6517 now we just stick with the lowest common denominator. */
6519 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6531 low_n
= ((mode
) == TImode
? 0
6532 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6536 base_reg
= gen_reg_rtx (SImode
);
6537 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
6538 emit_move_insn (base_reg
, val
);
6539 x
= plus_constant (Pmode
, base_reg
, low_n
);
6541 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6542 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6545 /* XXX We don't allow MINUS any more -- see comment in
6546 arm_legitimate_address_outer_p (). */
6547 else if (GET_CODE (x
) == MINUS
)
6549 rtx xop0
= XEXP (x
, 0);
6550 rtx xop1
= XEXP (x
, 1);
6552 if (CONSTANT_P (xop0
))
6553 xop0
= force_reg (SImode
, xop0
);
6555 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6556 xop1
= force_reg (SImode
, xop1
);
6558 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6559 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6562 /* Make sure to take full advantage of the pre-indexed addressing mode
6563 with absolute addresses which often allows for the base register to
6564 be factorized for multiple adjacent memory references, and it might
6565 even allows for the mini pool to be avoided entirely. */
6566 else if (CONST_INT_P (x
) && optimize
> 0)
6569 HOST_WIDE_INT mask
, base
, index
;
6572 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6573 use a 8-bit index. So let's use a 12-bit index for SImode only and
6574 hope that arm_gen_constant will enable ldrb to use more bits. */
6575 bits
= (mode
== SImode
) ? 12 : 8;
6576 mask
= (1 << bits
) - 1;
6577 base
= INTVAL (x
) & ~mask
;
6578 index
= INTVAL (x
) & mask
;
6579 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6581 /* It'll most probably be more efficient to generate the base
6582 with more bits set and use a negative index instead. */
6586 base_reg
= force_reg (SImode
, GEN_INT (base
));
6587 x
= plus_constant (Pmode
, base_reg
, index
);
6592 /* We need to find and carefully transform any SYMBOL and LABEL
6593 references; so go back to the original address expression. */
6594 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6596 if (new_x
!= orig_x
)
6604 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6605 to be legitimate. If we find one, return the new, valid address. */
6607 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6609 if (arm_tls_symbol_p (x
))
6610 return legitimize_tls_address (x
, NULL_RTX
);
6612 if (GET_CODE (x
) == PLUS
6613 && CONST_INT_P (XEXP (x
, 1))
6614 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6615 || INTVAL (XEXP (x
, 1)) < 0))
6617 rtx xop0
= XEXP (x
, 0);
6618 rtx xop1
= XEXP (x
, 1);
6619 HOST_WIDE_INT offset
= INTVAL (xop1
);
6621 /* Try and fold the offset into a biasing of the base register and
6622 then offsetting that. Don't do this when optimizing for space
6623 since it can cause too many CSEs. */
6624 if (optimize_size
&& offset
>= 0
6625 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6627 HOST_WIDE_INT delta
;
6630 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6631 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6632 delta
= 31 * GET_MODE_SIZE (mode
);
6634 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6636 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
6638 x
= plus_constant (Pmode
, xop0
, delta
);
6640 else if (offset
< 0 && offset
> -256)
6641 /* Small negative offsets are best done with a subtract before the
6642 dereference, forcing these into a register normally takes two
6644 x
= force_operand (x
, NULL_RTX
);
6647 /* For the remaining cases, force the constant into a register. */
6648 xop1
= force_reg (SImode
, xop1
);
6649 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6652 else if (GET_CODE (x
) == PLUS
6653 && s_register_operand (XEXP (x
, 1), SImode
)
6654 && !s_register_operand (XEXP (x
, 0), SImode
))
6656 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6658 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6663 /* We need to find and carefully transform any SYMBOL and LABEL
6664 references; so go back to the original address expression. */
6665 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6667 if (new_x
!= orig_x
)
6675 arm_legitimize_reload_address (rtx
*p
,
6676 enum machine_mode mode
,
6677 int opnum
, int type
,
6678 int ind_levels ATTRIBUTE_UNUSED
)
6680 /* We must recognize output that we have already generated ourselves. */
6681 if (GET_CODE (*p
) == PLUS
6682 && GET_CODE (XEXP (*p
, 0)) == PLUS
6683 && REG_P (XEXP (XEXP (*p
, 0), 0))
6684 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
6685 && CONST_INT_P (XEXP (*p
, 1)))
6687 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6688 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6689 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6693 if (GET_CODE (*p
) == PLUS
6694 && REG_P (XEXP (*p
, 0))
6695 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
6696 /* If the base register is equivalent to a constant, let the generic
6697 code handle it. Otherwise we will run into problems if a future
6698 reload pass decides to rematerialize the constant. */
6699 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
6700 && CONST_INT_P (XEXP (*p
, 1)))
6702 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
6703 HOST_WIDE_INT low
, high
;
6705 /* Detect coprocessor load/stores. */
6706 bool coproc_p
= ((TARGET_HARD_FLOAT
6708 && (mode
== SFmode
|| mode
== DFmode
))
6709 || (TARGET_REALLY_IWMMXT
6710 && VALID_IWMMXT_REG_MODE (mode
))
6712 && (VALID_NEON_DREG_MODE (mode
)
6713 || VALID_NEON_QREG_MODE (mode
))));
6715 /* For some conditions, bail out when lower two bits are unaligned. */
6716 if ((val
& 0x3) != 0
6717 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6719 /* For DI, and DF under soft-float: */
6720 || ((mode
== DImode
|| mode
== DFmode
)
6721 /* Without ldrd, we use stm/ldm, which does not
6722 fair well with unaligned bits. */
6724 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6725 || TARGET_THUMB2
))))
6728 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6729 of which the (reg+high) gets turned into a reload add insn,
6730 we try to decompose the index into high/low values that can often
6731 also lead to better reload CSE.
6733 ldr r0, [r2, #4100] // Offset too large
6734 ldr r1, [r2, #4104] // Offset too large
6736 is best reloaded as:
6742 which post-reload CSE can simplify in most cases to eliminate the
6743 second add instruction:
6748 The idea here is that we want to split out the bits of the constant
6749 as a mask, rather than as subtracting the maximum offset that the
6750 respective type of load/store used can handle.
6752 When encountering negative offsets, we can still utilize it even if
6753 the overall offset is positive; sometimes this may lead to an immediate
6754 that can be constructed with fewer instructions.
6756 ldr r0, [r2, #0x3FFFFC]
6758 This is best reloaded as:
6759 add t1, r2, #0x400000
6762 The trick for spotting this for a load insn with N bits of offset
6763 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6764 negative offset that is going to make bit N and all the bits below
6765 it become zero in the remainder part.
6767 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6768 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6769 used in most cases of ARM load/store instructions. */
6771 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6772 (((VAL) & ((1 << (N)) - 1)) \
6773 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6778 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
6780 /* NEON quad-word load/stores are made of two double-word accesses,
6781 so the valid index range is reduced by 8. Treat as 9-bit range if
6783 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
6784 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
6786 else if (GET_MODE_SIZE (mode
) == 8)
6789 low
= (TARGET_THUMB2
6790 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
6791 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
6793 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6794 to access doublewords. The supported load/store offsets are
6795 -8, -4, and 4, which we try to produce here. */
6796 low
= ((val
& 0xf) ^ 0x8) - 0x8;
6798 else if (GET_MODE_SIZE (mode
) < 8)
6800 /* NEON element load/stores do not have an offset. */
6801 if (TARGET_NEON_FP16
&& mode
== HFmode
)
6806 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6807 Try the wider 12-bit range first, and re-try if the result
6809 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6811 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6815 if (mode
== HImode
|| mode
== HFmode
)
6818 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6821 /* The storehi/movhi_bytes fallbacks can use only
6822 [-4094,+4094] of the full ldrb/strb index range. */
6823 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6824 if (low
== 4095 || low
== -4095)
6829 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6835 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
6836 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
6837 - (unsigned HOST_WIDE_INT
) 0x80000000);
6838 /* Check for overflow or zero */
6839 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
6842 /* Reload the high part into a base reg; leave the low part
6844 *p
= gen_rtx_PLUS (GET_MODE (*p
),
6845 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
6848 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6849 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6850 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6858 thumb_legitimize_reload_address (rtx
*x_p
,
6859 enum machine_mode mode
,
6860 int opnum
, int type
,
6861 int ind_levels ATTRIBUTE_UNUSED
)
6865 if (GET_CODE (x
) == PLUS
6866 && GET_MODE_SIZE (mode
) < 4
6867 && REG_P (XEXP (x
, 0))
6868 && XEXP (x
, 0) == stack_pointer_rtx
6869 && CONST_INT_P (XEXP (x
, 1))
6870 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6875 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6876 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6880 /* If both registers are hi-regs, then it's better to reload the
6881 entire expression rather than each register individually. That
6882 only requires one reload register rather than two. */
6883 if (GET_CODE (x
) == PLUS
6884 && REG_P (XEXP (x
, 0))
6885 && REG_P (XEXP (x
, 1))
6886 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6887 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6892 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6893 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6900 /* Test for various thread-local symbols. */
6902 /* Return TRUE if X is a thread-local symbol. */
6905 arm_tls_symbol_p (rtx x
)
6907 if (! TARGET_HAVE_TLS
)
6910 if (GET_CODE (x
) != SYMBOL_REF
)
6913 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6916 /* Helper for arm_tls_referenced_p. */
6919 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6921 if (GET_CODE (*x
) == SYMBOL_REF
)
6922 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6924 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6925 TLS offsets, not real symbol references. */
6926 if (GET_CODE (*x
) == UNSPEC
6927 && XINT (*x
, 1) == UNSPEC_TLS
)
6933 /* Return TRUE if X contains any TLS symbol references. */
6936 arm_tls_referenced_p (rtx x
)
6938 if (! TARGET_HAVE_TLS
)
6941 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6944 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6946 On the ARM, allow any integer (invalid ones are removed later by insn
6947 patterns), nice doubles and symbol_refs which refer to the function's
6950 When generating pic allow anything. */
6953 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
6955 /* At present, we have no support for Neon structure constants, so forbid
6956 them here. It might be possible to handle simple cases like 0 and -1
6958 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
6961 return flag_pic
|| !label_mentioned_p (x
);
6965 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6967 return (CONST_INT_P (x
)
6968 || CONST_DOUBLE_P (x
)
6969 || CONSTANT_ADDRESS_P (x
)
6974 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6976 return (!arm_cannot_force_const_mem (mode
, x
)
6978 ? arm_legitimate_constant_p_1 (mode
, x
)
6979 : thumb_legitimate_constant_p (mode
, x
)));
6982 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6985 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6989 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6991 split_const (x
, &base
, &offset
);
6992 if (GET_CODE (base
) == SYMBOL_REF
6993 && !offset_within_block_p (base
, INTVAL (offset
)))
6996 return arm_tls_referenced_p (x
);
6999 #define REG_OR_SUBREG_REG(X) \
7001 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7003 #define REG_OR_SUBREG_RTX(X) \
7004 (REG_P (X) ? (X) : SUBREG_REG (X))
7007 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7009 enum machine_mode mode
= GET_MODE (x
);
7023 return COSTS_N_INSNS (1);
7026 if (CONST_INT_P (XEXP (x
, 1)))
7029 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7036 return COSTS_N_INSNS (2) + cycles
;
7038 return COSTS_N_INSNS (1) + 16;
7041 return (COSTS_N_INSNS (1)
7042 + 4 * ((MEM_P (SET_SRC (x
)))
7043 + MEM_P (SET_DEST (x
))));
7048 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7050 if (thumb_shiftable_const (INTVAL (x
)))
7051 return COSTS_N_INSNS (2);
7052 return COSTS_N_INSNS (3);
7054 else if ((outer
== PLUS
|| outer
== COMPARE
)
7055 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7057 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7058 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7059 return COSTS_N_INSNS (1);
7060 else if (outer
== AND
)
7063 /* This duplicates the tests in the andsi3 expander. */
7064 for (i
= 9; i
<= 31; i
++)
7065 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7066 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7067 return COSTS_N_INSNS (2);
7069 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7070 || outer
== LSHIFTRT
)
7072 return COSTS_N_INSNS (2);
7078 return COSTS_N_INSNS (3);
7096 /* XXX another guess. */
7097 /* Memory costs quite a lot for the first word, but subsequent words
7098 load at the equivalent of a single insn each. */
7099 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7100 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7105 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7111 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7112 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7118 return total
+ COSTS_N_INSNS (1);
7120 /* Assume a two-shift sequence. Increase the cost slightly so
7121 we prefer actual shifts over an extend operation. */
7122 return total
+ 1 + COSTS_N_INSNS (2);
7130 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7132 enum machine_mode mode
= GET_MODE (x
);
7133 enum rtx_code subcode
;
7135 enum rtx_code code
= GET_CODE (x
);
7141 /* Memory costs quite a lot for the first word, but subsequent words
7142 load at the equivalent of a single insn each. */
7143 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7150 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7151 *total
= COSTS_N_INSNS (2);
7152 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7153 *total
= COSTS_N_INSNS (4);
7155 *total
= COSTS_N_INSNS (20);
7159 if (REG_P (XEXP (x
, 1)))
7160 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
7161 else if (!CONST_INT_P (XEXP (x
, 1)))
7162 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7168 *total
+= COSTS_N_INSNS (4);
7173 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
7174 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7177 *total
+= COSTS_N_INSNS (3);
7181 *total
+= COSTS_N_INSNS (1);
7182 /* Increase the cost of complex shifts because they aren't any faster,
7183 and reduce dual issue opportunities. */
7184 if (arm_tune_cortex_a9
7185 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
7193 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7194 if (CONST_INT_P (XEXP (x
, 0))
7195 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7197 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7201 if (CONST_INT_P (XEXP (x
, 1))
7202 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7204 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7211 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7213 if (TARGET_HARD_FLOAT
7215 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7217 *total
= COSTS_N_INSNS (1);
7218 if (CONST_DOUBLE_P (XEXP (x
, 0))
7219 && arm_const_double_rtx (XEXP (x
, 0)))
7221 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7225 if (CONST_DOUBLE_P (XEXP (x
, 1))
7226 && arm_const_double_rtx (XEXP (x
, 1)))
7228 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7234 *total
= COSTS_N_INSNS (20);
7238 *total
= COSTS_N_INSNS (1);
7239 if (CONST_INT_P (XEXP (x
, 0))
7240 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7242 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7246 subcode
= GET_CODE (XEXP (x
, 1));
7247 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7248 || subcode
== LSHIFTRT
7249 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7251 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7252 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7256 /* A shift as a part of RSB costs no more than RSB itself. */
7257 if (GET_CODE (XEXP (x
, 0)) == MULT
7258 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7260 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
7261 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7266 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
7268 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7269 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7273 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
7274 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
7276 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7277 if (REG_P (XEXP (XEXP (x
, 1), 0))
7278 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
7279 *total
+= COSTS_N_INSNS (1);
7287 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
7288 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7289 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7291 *total
= COSTS_N_INSNS (1);
7292 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
7294 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7298 /* MLA: All arguments must be registers. We filter out
7299 multiplication by a power of two, so that we fall down into
7301 if (GET_CODE (XEXP (x
, 0)) == MULT
7302 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7304 /* The cost comes from the cost of the multiply. */
7308 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7310 if (TARGET_HARD_FLOAT
7312 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7314 *total
= COSTS_N_INSNS (1);
7315 if (CONST_DOUBLE_P (XEXP (x
, 1))
7316 && arm_const_double_rtx (XEXP (x
, 1)))
7318 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7325 *total
= COSTS_N_INSNS (20);
7329 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
7330 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
7332 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7333 if (REG_P (XEXP (XEXP (x
, 0), 0))
7334 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
7335 *total
+= COSTS_N_INSNS (1);
7341 case AND
: case XOR
: case IOR
:
7343 /* Normally the frame registers will be spilt into reg+const during
7344 reload, so it is a bad idea to combine them with other instructions,
7345 since then they might not be moved outside of loops. As a compromise
7346 we allow integration with ops that have a constant as their second
7348 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
7349 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
7350 && !CONST_INT_P (XEXP (x
, 1)))
7351 *total
= COSTS_N_INSNS (1);
7355 *total
+= COSTS_N_INSNS (2);
7356 if (CONST_INT_P (XEXP (x
, 1))
7357 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7359 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7366 *total
+= COSTS_N_INSNS (1);
7367 if (CONST_INT_P (XEXP (x
, 1))
7368 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7370 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7373 subcode
= GET_CODE (XEXP (x
, 0));
7374 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7375 || subcode
== LSHIFTRT
7376 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7378 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7379 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7384 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7386 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7387 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7391 if (subcode
== UMIN
|| subcode
== UMAX
7392 || subcode
== SMIN
|| subcode
== SMAX
)
7394 *total
= COSTS_N_INSNS (3);
7401 /* This should have been handled by the CPU specific routines. */
7405 if (arm_arch3m
&& mode
== SImode
7406 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7407 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7408 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
7409 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
7410 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7411 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
7413 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
7416 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7420 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7422 if (TARGET_HARD_FLOAT
7424 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7426 *total
= COSTS_N_INSNS (1);
7429 *total
= COSTS_N_INSNS (2);
7435 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
7436 if (mode
== SImode
&& code
== NOT
)
7438 subcode
= GET_CODE (XEXP (x
, 0));
7439 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7440 || subcode
== LSHIFTRT
7441 || subcode
== ROTATE
|| subcode
== ROTATERT
7443 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
7445 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7446 /* Register shifts cost an extra cycle. */
7447 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
7448 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
7457 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7459 *total
= COSTS_N_INSNS (4);
7463 operand
= XEXP (x
, 0);
7465 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
7466 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
7467 && REG_P (XEXP (operand
, 0))
7468 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
7469 *total
+= COSTS_N_INSNS (1);
7470 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
7471 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
7475 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7477 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7483 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7484 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7486 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7492 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7493 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7495 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7515 /* SCC insns. In the case where the comparison has already been
7516 performed, then they cost 2 instructions. Otherwise they need
7517 an additional comparison before them. */
7518 *total
= COSTS_N_INSNS (2);
7519 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7526 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7532 *total
+= COSTS_N_INSNS (1);
7533 if (CONST_INT_P (XEXP (x
, 1))
7534 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7536 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7540 subcode
= GET_CODE (XEXP (x
, 0));
7541 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7542 || subcode
== LSHIFTRT
7543 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7545 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7546 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7551 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7553 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7554 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7564 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7565 if (!CONST_INT_P (XEXP (x
, 1))
7566 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7567 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7571 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7573 if (TARGET_HARD_FLOAT
7575 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7577 *total
= COSTS_N_INSNS (1);
7580 *total
= COSTS_N_INSNS (20);
7583 *total
= COSTS_N_INSNS (1);
7585 *total
+= COSTS_N_INSNS (3);
7591 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7593 rtx op
= XEXP (x
, 0);
7594 enum machine_mode opmode
= GET_MODE (op
);
7597 *total
+= COSTS_N_INSNS (1);
7599 if (opmode
!= SImode
)
7603 /* If !arm_arch4, we use one of the extendhisi2_mem
7604 or movhi_bytes patterns for HImode. For a QImode
7605 sign extension, we first zero-extend from memory
7606 and then perform a shift sequence. */
7607 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7608 *total
+= COSTS_N_INSNS (2);
7611 *total
+= COSTS_N_INSNS (1);
7613 /* We don't have the necessary insn, so we need to perform some
7615 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7616 /* An and with constant 255. */
7617 *total
+= COSTS_N_INSNS (1);
7619 /* A shift sequence. Increase costs slightly to avoid
7620 combining two shifts into an extend operation. */
7621 *total
+= COSTS_N_INSNS (2) + 1;
7627 switch (GET_MODE (XEXP (x
, 0)))
7634 *total
= COSTS_N_INSNS (1);
7644 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7648 if (const_ok_for_arm (INTVAL (x
))
7649 || const_ok_for_arm (~INTVAL (x
)))
7650 *total
= COSTS_N_INSNS (1);
7652 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7653 INTVAL (x
), NULL_RTX
,
7660 *total
= COSTS_N_INSNS (3);
7664 *total
= COSTS_N_INSNS (1);
7668 *total
= COSTS_N_INSNS (1);
7669 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7673 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7674 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7675 *total
= COSTS_N_INSNS (1);
7677 *total
= COSTS_N_INSNS (4);
7681 /* The vec_extract patterns accept memory operands that require an
7682 address reload. Account for the cost of that reload to give the
7683 auto-inc-dec pass an incentive to try to replace them. */
7684 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
7685 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
7687 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
7688 if (!neon_vector_mem_operand (SET_DEST (x
), 2))
7689 *total
+= COSTS_N_INSNS (1);
7692 /* Likewise for the vec_set patterns. */
7693 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
7694 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
7695 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
7697 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
7698 *total
= rtx_cost (mem
, code
, 0, speed
);
7699 if (!neon_vector_mem_operand (mem
, 2))
7700 *total
+= COSTS_N_INSNS (1);
7706 /* We cost this as high as our memory costs to allow this to
7707 be hoisted from loops. */
7708 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
7710 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7716 && TARGET_HARD_FLOAT
7718 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
7719 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
7720 *total
= COSTS_N_INSNS (1);
7722 *total
= COSTS_N_INSNS (4);
7726 *total
= COSTS_N_INSNS (4);
7731 /* Estimates the size cost of thumb1 instructions.
7732 For now most of the code is copied from thumb1_rtx_costs. We need more
7733 fine grain tuning when we have more related test cases. */
7735 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7737 enum machine_mode mode
= GET_MODE (x
);
7750 return COSTS_N_INSNS (1);
7753 if (CONST_INT_P (XEXP (x
, 1)))
7755 /* Thumb1 mul instruction can't operate on const. We must Load it
7756 into a register first. */
7757 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7758 return COSTS_N_INSNS (1) + const_size
;
7760 return COSTS_N_INSNS (1);
7763 return (COSTS_N_INSNS (1)
7764 + 4 * ((MEM_P (SET_SRC (x
)))
7765 + MEM_P (SET_DEST (x
))));
7770 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7771 return COSTS_N_INSNS (1);
7772 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7773 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7774 return COSTS_N_INSNS (2);
7775 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7776 if (thumb_shiftable_const (INTVAL (x
)))
7777 return COSTS_N_INSNS (2);
7778 return COSTS_N_INSNS (3);
7780 else if ((outer
== PLUS
|| outer
== COMPARE
)
7781 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7783 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7784 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7785 return COSTS_N_INSNS (1);
7786 else if (outer
== AND
)
7789 /* This duplicates the tests in the andsi3 expander. */
7790 for (i
= 9; i
<= 31; i
++)
7791 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7792 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7793 return COSTS_N_INSNS (2);
7795 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7796 || outer
== LSHIFTRT
)
7798 return COSTS_N_INSNS (2);
7804 return COSTS_N_INSNS (3);
7822 /* XXX another guess. */
7823 /* Memory costs quite a lot for the first word, but subsequent words
7824 load at the equivalent of a single insn each. */
7825 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7826 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7831 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7836 /* XXX still guessing. */
7837 switch (GET_MODE (XEXP (x
, 0)))
7840 return (1 + (mode
== DImode
? 4 : 0)
7841 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
7844 return (4 + (mode
== DImode
? 4 : 0)
7845 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
7848 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
7859 /* RTX costs when optimizing for size. */
7861 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7864 enum machine_mode mode
= GET_MODE (x
);
7867 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7871 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7875 /* A memory access costs 1 insn if the mode is small, or the address is
7876 a single register, otherwise it costs one insn per word. */
7877 if (REG_P (XEXP (x
, 0)))
7878 *total
= COSTS_N_INSNS (1);
7880 && GET_CODE (XEXP (x
, 0)) == PLUS
7881 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7882 /* This will be split into two instructions.
7883 See arm.md:calculate_pic_address. */
7884 *total
= COSTS_N_INSNS (2);
7886 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7893 /* Needs a libcall, so it costs about this. */
7894 *total
= COSTS_N_INSNS (2);
7898 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
7900 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7908 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
7910 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7913 else if (mode
== SImode
)
7915 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7916 /* Slightly disparage register shifts, but not by much. */
7917 if (!CONST_INT_P (XEXP (x
, 1)))
7918 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
7922 /* Needs a libcall. */
7923 *total
= COSTS_N_INSNS (2);
7927 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7928 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7930 *total
= COSTS_N_INSNS (1);
7936 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7937 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7939 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7940 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7941 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7942 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7943 || subcode1
== ASHIFTRT
)
7945 /* It's just the cost of the two operands. */
7950 *total
= COSTS_N_INSNS (1);
7954 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7958 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7959 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7961 *total
= COSTS_N_INSNS (1);
7965 /* A shift as a part of ADD costs nothing. */
7966 if (GET_CODE (XEXP (x
, 0)) == MULT
7967 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7969 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7970 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
7971 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
7976 case AND
: case XOR
: case IOR
:
7979 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7981 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7982 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7983 || (code
== AND
&& subcode
== NOT
))
7985 /* It's just the cost of the two operands. */
7991 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7995 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7999 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8000 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8002 *total
= COSTS_N_INSNS (1);
8008 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8017 if (cc_register (XEXP (x
, 0), VOIDmode
))
8020 *total
= COSTS_N_INSNS (1);
8024 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8025 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8026 *total
= COSTS_N_INSNS (1);
8028 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8033 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8036 if (const_ok_for_arm (INTVAL (x
)))
8037 /* A multiplication by a constant requires another instruction
8038 to load the constant to a register. */
8039 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8041 else if (const_ok_for_arm (~INTVAL (x
)))
8042 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8043 else if (const_ok_for_arm (-INTVAL (x
)))
8045 if (outer_code
== COMPARE
|| outer_code
== PLUS
8046 || outer_code
== MINUS
)
8049 *total
= COSTS_N_INSNS (1);
8052 *total
= COSTS_N_INSNS (2);
8058 *total
= COSTS_N_INSNS (2);
8062 *total
= COSTS_N_INSNS (4);
8067 && TARGET_HARD_FLOAT
8068 && outer_code
== SET
8069 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8070 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8071 *total
= COSTS_N_INSNS (1);
8073 *total
= COSTS_N_INSNS (4);
8078 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8079 cost of these slightly. */
8080 *total
= COSTS_N_INSNS (1) + 1;
8087 if (mode
!= VOIDmode
)
8088 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8090 *total
= COSTS_N_INSNS (4); /* How knows? */
8095 /* RTX costs when optimizing for size. */
8097 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
8098 int *total
, bool speed
)
8101 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
8102 (enum rtx_code
) outer_code
, total
);
8104 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
8105 (enum rtx_code
) outer_code
,
8109 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8110 supported on any "slowmul" cores, so it can be ignored. */
8113 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8114 int *total
, bool speed
)
8116 enum machine_mode mode
= GET_MODE (x
);
8120 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8127 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
8130 *total
= COSTS_N_INSNS (20);
8134 if (CONST_INT_P (XEXP (x
, 1)))
8136 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
8137 & (unsigned HOST_WIDE_INT
) 0xffffffff);
8138 int cost
, const_ok
= const_ok_for_arm (i
);
8139 int j
, booth_unit_size
;
8141 /* Tune as appropriate. */
8142 cost
= const_ok
? 4 : 8;
8143 booth_unit_size
= 2;
8144 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
8146 i
>>= booth_unit_size
;
8150 *total
= COSTS_N_INSNS (cost
);
8151 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8155 *total
= COSTS_N_INSNS (20);
8159 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
8164 /* RTX cost for cores with a fast multiply unit (M variants). */
8167 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8168 int *total
, bool speed
)
8170 enum machine_mode mode
= GET_MODE (x
);
8174 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8178 /* ??? should thumb2 use different costs? */
8182 /* There is no point basing this on the tuning, since it is always the
8183 fast variant if it exists at all. */
8185 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8186 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8187 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8189 *total
= COSTS_N_INSNS(2);
8196 *total
= COSTS_N_INSNS (5);
8200 if (CONST_INT_P (XEXP (x
, 1)))
8202 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
8203 & (unsigned HOST_WIDE_INT
) 0xffffffff);
8204 int cost
, const_ok
= const_ok_for_arm (i
);
8205 int j
, booth_unit_size
;
8207 /* Tune as appropriate. */
8208 cost
= const_ok
? 4 : 8;
8209 booth_unit_size
= 8;
8210 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
8212 i
>>= booth_unit_size
;
8216 *total
= COSTS_N_INSNS(cost
);
8222 *total
= COSTS_N_INSNS (4);
8226 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8228 if (TARGET_HARD_FLOAT
8230 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8232 *total
= COSTS_N_INSNS (1);
8237 /* Requires a lib call */
8238 *total
= COSTS_N_INSNS (20);
8242 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8247 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8248 so it can be ignored. */
8251 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8252 int *total
, bool speed
)
8254 enum machine_mode mode
= GET_MODE (x
);
8258 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8265 if (GET_CODE (XEXP (x
, 0)) != MULT
)
8266 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8268 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8269 will stall until the multiplication is complete. */
8270 *total
= COSTS_N_INSNS (3);
8274 /* There is no point basing this on the tuning, since it is always the
8275 fast variant if it exists at all. */
8277 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8278 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8279 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8281 *total
= COSTS_N_INSNS (2);
8288 *total
= COSTS_N_INSNS (5);
8292 if (CONST_INT_P (XEXP (x
, 1)))
8294 /* If operand 1 is a constant we can more accurately
8295 calculate the cost of the multiply. The multiplier can
8296 retire 15 bits on the first cycle and a further 12 on the
8297 second. We do, of course, have to load the constant into
8298 a register first. */
8299 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8300 /* There's a general overhead of one cycle. */
8302 unsigned HOST_WIDE_INT masked_const
;
8307 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
8309 masked_const
= i
& 0xffff8000;
8310 if (masked_const
!= 0)
8313 masked_const
= i
& 0xf8000000;
8314 if (masked_const
!= 0)
8317 *total
= COSTS_N_INSNS (cost
);
8323 *total
= COSTS_N_INSNS (3);
8327 /* Requires a lib call */
8328 *total
= COSTS_N_INSNS (20);
8332 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8337 /* RTX costs for 9e (and later) cores. */
8340 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8341 int *total
, bool speed
)
8343 enum machine_mode mode
= GET_MODE (x
);
8350 *total
= COSTS_N_INSNS (3);
8354 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8362 /* There is no point basing this on the tuning, since it is always the
8363 fast variant if it exists at all. */
8365 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8366 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8367 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8369 *total
= COSTS_N_INSNS (2);
8376 *total
= COSTS_N_INSNS (5);
8382 *total
= COSTS_N_INSNS (2);
8386 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8388 if (TARGET_HARD_FLOAT
8390 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8392 *total
= COSTS_N_INSNS (1);
8397 *total
= COSTS_N_INSNS (20);
8401 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8404 /* All address computations that can be done are free, but rtx cost returns
8405 the same for practically all of them. So we weight the different types
8406 of address here in the order (most pref first):
8407 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8409 arm_arm_address_cost (rtx x
)
8411 enum rtx_code c
= GET_CODE (x
);
8413 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
8415 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
8420 if (CONST_INT_P (XEXP (x
, 1)))
8423 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
8433 arm_thumb_address_cost (rtx x
)
8435 enum rtx_code c
= GET_CODE (x
);
8440 && REG_P (XEXP (x
, 0))
8441 && CONST_INT_P (XEXP (x
, 1)))
8448 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
8449 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
8451 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
8454 /* Adjust cost hook for XScale. */
8456 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8458 /* Some true dependencies can have a higher cost depending
8459 on precisely how certain input operands are used. */
8460 if (REG_NOTE_KIND(link
) == 0
8461 && recog_memoized (insn
) >= 0
8462 && recog_memoized (dep
) >= 0)
8464 int shift_opnum
= get_attr_shift (insn
);
8465 enum attr_type attr_type
= get_attr_type (dep
);
8467 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8468 operand for INSN. If we have a shifted input operand and the
8469 instruction we depend on is another ALU instruction, then we may
8470 have to account for an additional stall. */
8471 if (shift_opnum
!= 0
8472 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
8474 rtx shifted_operand
;
8477 /* Get the shifted operand. */
8478 extract_insn (insn
);
8479 shifted_operand
= recog_data
.operand
[shift_opnum
];
8481 /* Iterate over all the operands in DEP. If we write an operand
8482 that overlaps with SHIFTED_OPERAND, then we have increase the
8483 cost of this dependency. */
8485 preprocess_constraints ();
8486 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
8488 /* We can ignore strict inputs. */
8489 if (recog_data
.operand_type
[opno
] == OP_IN
)
8492 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
8504 /* Adjust cost hook for Cortex A9. */
8506 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8508 switch (REG_NOTE_KIND (link
))
8515 case REG_DEP_OUTPUT
:
8516 if (recog_memoized (insn
) >= 0
8517 && recog_memoized (dep
) >= 0)
8519 if (GET_CODE (PATTERN (insn
)) == SET
)
8522 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
8524 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
8526 enum attr_type attr_type_insn
= get_attr_type (insn
);
8527 enum attr_type attr_type_dep
= get_attr_type (dep
);
8529 /* By default all dependencies of the form
8532 have an extra latency of 1 cycle because
8533 of the input and output dependency in this
8534 case. However this gets modeled as an true
8535 dependency and hence all these checks. */
8536 if (REG_P (SET_DEST (PATTERN (insn
)))
8537 && REG_P (SET_DEST (PATTERN (dep
)))
8538 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
8539 SET_DEST (PATTERN (dep
))))
8541 /* FMACS is a special case where the dependent
8542 instruction can be issued 3 cycles before
8543 the normal latency in case of an output
8545 if ((attr_type_insn
== TYPE_FMACS
8546 || attr_type_insn
== TYPE_FMACD
)
8547 && (attr_type_dep
== TYPE_FMACS
8548 || attr_type_dep
== TYPE_FMACD
))
8550 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8551 *cost
= insn_default_latency (dep
) - 3;
8553 *cost
= insn_default_latency (dep
);
8558 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8559 *cost
= insn_default_latency (dep
) + 1;
8561 *cost
= insn_default_latency (dep
);
8577 /* Adjust cost hook for FA726TE. */
8579 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8581 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8582 have penalty of 3. */
8583 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
8584 && recog_memoized (insn
) >= 0
8585 && recog_memoized (dep
) >= 0
8586 && get_attr_conds (dep
) == CONDS_SET
)
8588 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8589 if (get_attr_conds (insn
) == CONDS_USE
8590 && get_attr_type (insn
) != TYPE_BRANCH
)
8596 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
8597 || get_attr_conds (insn
) == CONDS_USE
)
8607 /* Implement TARGET_REGISTER_MOVE_COST.
8609 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8610 it is typically more expensive than a single memory access. We set
8611 the cost to less than two memory accesses so that floating
8612 point to integer conversion does not go through memory. */
8615 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
8616 reg_class_t from
, reg_class_t to
)
8620 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
8621 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
8623 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
8624 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
8626 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
8633 if (from
== HI_REGS
|| to
== HI_REGS
)
8640 /* Implement TARGET_MEMORY_MOVE_COST. */
8643 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
8644 bool in ATTRIBUTE_UNUSED
)
8650 if (GET_MODE_SIZE (mode
) < 4)
8653 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
8657 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8658 It corrects the value of COST based on the relationship between
8659 INSN and DEP through the dependence LINK. It returns the new
8660 value. There is a per-core adjust_cost hook to adjust scheduler costs
8661 and the per-core hook can choose to completely override the generic
8662 adjust_cost function. Only put bits of code into arm_adjust_cost that
8663 are common across all cores. */
8665 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8669 /* When generating Thumb-1 code, we want to place flag-setting operations
8670 close to a conditional branch which depends on them, so that we can
8671 omit the comparison. */
8673 && REG_NOTE_KIND (link
) == 0
8674 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8675 && recog_memoized (dep
) >= 0
8676 && get_attr_conds (dep
) == CONDS_SET
)
8679 if (current_tune
->sched_adjust_cost
!= NULL
)
8681 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8685 /* XXX Is this strictly true? */
8686 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8687 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8690 /* Call insns don't incur a stall, even if they follow a load. */
8691 if (REG_NOTE_KIND (link
) == 0
8695 if ((i_pat
= single_set (insn
)) != NULL
8696 && MEM_P (SET_SRC (i_pat
))
8697 && (d_pat
= single_set (dep
)) != NULL
8698 && MEM_P (SET_DEST (d_pat
)))
8700 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8701 /* This is a load after a store, there is no conflict if the load reads
8702 from a cached area. Assume that loads from the stack, and from the
8703 constant pool are cached, and that others will miss. This is a
8706 if ((GET_CODE (src_mem
) == SYMBOL_REF
8707 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8708 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8709 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8710 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8718 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
8721 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
8723 return (optimize
> 0) ? 2 : 0;
8727 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
8729 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
8732 static bool fp_consts_inited
= false;
8734 static REAL_VALUE_TYPE value_fp0
;
8737 init_fp_table (void)
8741 r
= REAL_VALUE_ATOF ("0", DFmode
);
8743 fp_consts_inited
= true;
8746 /* Return TRUE if rtx X is a valid immediate FP constant. */
8748 arm_const_double_rtx (rtx x
)
8752 if (!fp_consts_inited
)
8755 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8756 if (REAL_VALUE_MINUS_ZERO (r
))
8759 if (REAL_VALUES_EQUAL (r
, value_fp0
))
8765 /* VFPv3 has a fairly wide range of representable immediates, formed from
8766 "quarter-precision" floating-point values. These can be evaluated using this
8767 formula (with ^ for exponentiation):
8771 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8772 16 <= n <= 31 and 0 <= r <= 7.
8774 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8776 - A (most-significant) is the sign bit.
8777 - BCD are the exponent (encoded as r XOR 3).
8778 - EFGH are the mantissa (encoded as n - 16).
8781 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8782 fconst[sd] instruction, or -1 if X isn't suitable. */
8784 vfp3_const_double_index (rtx x
)
8786 REAL_VALUE_TYPE r
, m
;
8788 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8789 unsigned HOST_WIDE_INT mask
;
8790 HOST_WIDE_INT m1
, m2
;
8791 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8793 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
8796 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8798 /* We can't represent these things, so detect them first. */
8799 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8802 /* Extract sign, exponent and mantissa. */
8803 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8804 r
= real_value_abs (&r
);
8805 exponent
= REAL_EXP (&r
);
8806 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8807 highest (sign) bit, with a fixed binary point at bit point_pos.
8808 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8809 bits for the mantissa, this may fail (low bits would be lost). */
8810 real_ldexp (&m
, &r
, point_pos
- exponent
);
8811 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8815 /* If there are bits set in the low part of the mantissa, we can't
8816 represent this value. */
8820 /* Now make it so that mantissa contains the most-significant bits, and move
8821 the point_pos to indicate that the least-significant bits have been
8823 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8826 /* We can permit four significant bits of mantissa only, plus a high bit
8827 which is always 1. */
8828 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8829 if ((mantissa
& mask
) != 0)
8832 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8833 mantissa
>>= point_pos
- 5;
8835 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8836 floating-point immediate zero with Neon using an integer-zero load, but
8837 that case is handled elsewhere.) */
8841 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8843 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8844 normalized significands are in the range [1, 2). (Our mantissa is shifted
8845 left 4 places at this point relative to normalized IEEE754 values). GCC
8846 internally uses [0.5, 1) (see real.c), so the exponent returned from
8847 REAL_EXP must be altered. */
8848 exponent
= 5 - exponent
;
8850 if (exponent
< 0 || exponent
> 7)
8853 /* Sign, mantissa and exponent are now in the correct form to plug into the
8854 formula described in the comment above. */
8855 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8858 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8860 vfp3_const_double_rtx (rtx x
)
8865 return vfp3_const_double_index (x
) != -1;
8868 /* Recognize immediates which can be used in various Neon instructions. Legal
8869 immediates are described by the following table (for VMVN variants, the
8870 bitwise inverse of the constant shown is recognized. In either case, VMOV
8871 is output and the correct instruction to use for a given constant is chosen
8872 by the assembler). The constant shown is replicated across all elements of
8873 the destination vector.
8875 insn elems variant constant (binary)
8876 ---- ----- ------- -----------------
8877 vmov i32 0 00000000 00000000 00000000 abcdefgh
8878 vmov i32 1 00000000 00000000 abcdefgh 00000000
8879 vmov i32 2 00000000 abcdefgh 00000000 00000000
8880 vmov i32 3 abcdefgh 00000000 00000000 00000000
8881 vmov i16 4 00000000 abcdefgh
8882 vmov i16 5 abcdefgh 00000000
8883 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8884 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8885 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8886 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8887 vmvn i16 10 00000000 abcdefgh
8888 vmvn i16 11 abcdefgh 00000000
8889 vmov i32 12 00000000 00000000 abcdefgh 11111111
8890 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8891 vmov i32 14 00000000 abcdefgh 11111111 11111111
8892 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8894 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8895 eeeeeeee ffffffff gggggggg hhhhhhhh
8896 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8897 vmov f32 19 00000000 00000000 00000000 00000000
8899 For case 18, B = !b. Representable values are exactly those accepted by
8900 vfp3_const_double_index, but are output as floating-point numbers rather
8903 For case 19, we will change it to vmov.i32 when assembling.
8905 Variants 0-5 (inclusive) may also be used as immediates for the second
8906 operand of VORR/VBIC instructions.
8908 The INVERSE argument causes the bitwise inverse of the given operand to be
8909 recognized instead (used for recognizing legal immediates for the VAND/VORN
8910 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8911 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8912 output, rather than the real insns vbic/vorr).
8914 INVERSE makes no difference to the recognition of float vectors.
8916 The return value is the variant of immediate as shown in the above table, or
8917 -1 if the given value doesn't match any of the listed patterns.
8920 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8921 rtx
*modconst
, int *elementwidth
)
8923 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8925 for (i = 0; i < idx; i += (STRIDE)) \
8930 immtype = (CLASS); \
8931 elsize = (ELSIZE); \
8935 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
8936 unsigned int innersize
;
8937 unsigned char bytes
[16];
8938 int immtype
= -1, matches
;
8939 unsigned int invmask
= inverse
? 0xff : 0;
8940 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
8944 n_elts
= CONST_VECTOR_NUNITS (op
);
8945 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8950 if (mode
== VOIDmode
)
8952 innersize
= GET_MODE_SIZE (mode
);
8955 /* Vectors of float constants. */
8956 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8958 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8961 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
8964 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8966 for (i
= 1; i
< n_elts
; i
++)
8968 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8971 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8973 if (!REAL_VALUES_EQUAL (r0
, re
))
8978 *modconst
= CONST_VECTOR_ELT (op
, 0);
8983 if (el0
== CONST0_RTX (GET_MODE (el0
)))
8989 /* Splat vector constant out into a byte vector. */
8990 for (i
= 0; i
< n_elts
; i
++)
8992 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
8993 unsigned HOST_WIDE_INT elpart
;
8994 unsigned int part
, parts
;
8996 if (CONST_INT_P (el
))
8998 elpart
= INTVAL (el
);
9001 else if (CONST_DOUBLE_P (el
))
9003 elpart
= CONST_DOUBLE_LOW (el
);
9009 for (part
= 0; part
< parts
; part
++)
9012 for (byte
= 0; byte
< innersize
; byte
++)
9014 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
9015 elpart
>>= BITS_PER_UNIT
;
9017 if (CONST_DOUBLE_P (el
))
9018 elpart
= CONST_DOUBLE_HIGH (el
);
9023 gcc_assert (idx
== GET_MODE_SIZE (mode
));
9027 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
9028 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9030 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
9031 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9033 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9034 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
9036 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9037 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
9039 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
9041 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
9043 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
9044 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9046 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
9047 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9049 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9050 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
9052 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9053 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
9055 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
9057 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
9059 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
9060 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9062 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
9063 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9065 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9066 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
9068 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9069 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
9071 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
9073 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
9074 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
9082 *elementwidth
= elsize
;
9086 unsigned HOST_WIDE_INT imm
= 0;
9088 /* Un-invert bytes of recognized vector, if necessary. */
9090 for (i
= 0; i
< idx
; i
++)
9091 bytes
[i
] ^= invmask
;
9095 /* FIXME: Broken on 32-bit H_W_I hosts. */
9096 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
9098 for (i
= 0; i
< 8; i
++)
9099 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
9100 << (i
* BITS_PER_UNIT
);
9102 *modconst
= GEN_INT (imm
);
9106 unsigned HOST_WIDE_INT imm
= 0;
9108 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
9109 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
9111 *modconst
= GEN_INT (imm
);
9119 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9120 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9121 float elements), and a modified constant (whatever should be output for a
9122 VMOV) in *MODCONST. */
9125 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
9126 rtx
*modconst
, int *elementwidth
)
9130 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
9136 *modconst
= tmpconst
;
9139 *elementwidth
= tmpwidth
;
9144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9145 the immediate is valid, write a constant suitable for using as an operand
9146 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9147 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9150 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
9151 rtx
*modconst
, int *elementwidth
)
9155 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
9157 if (retval
< 0 || retval
> 5)
9161 *modconst
= tmpconst
;
9164 *elementwidth
= tmpwidth
;
9169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9170 the immediate is valid, write a constant suitable for using as an operand
9171 to VSHR/VSHL to *MODCONST and the corresponding element width to
9172 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9173 because they have different limitations. */
9176 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
9177 rtx
*modconst
, int *elementwidth
,
9180 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
9181 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
9182 unsigned HOST_WIDE_INT last_elt
= 0;
9183 unsigned HOST_WIDE_INT maxshift
;
9185 /* Split vector constant out into a byte vector. */
9186 for (i
= 0; i
< n_elts
; i
++)
9188 rtx el
= CONST_VECTOR_ELT (op
, i
);
9189 unsigned HOST_WIDE_INT elpart
;
9191 if (CONST_INT_P (el
))
9192 elpart
= INTVAL (el
);
9193 else if (CONST_DOUBLE_P (el
))
9198 if (i
!= 0 && elpart
!= last_elt
)
9204 /* Shift less than element size. */
9205 maxshift
= innersize
* 8;
9209 /* Left shift immediate value can be from 0 to <size>-1. */
9210 if (last_elt
>= maxshift
)
9215 /* Right shift immediate value can be from 1 to <size>. */
9216 if (last_elt
== 0 || last_elt
> maxshift
)
9221 *elementwidth
= innersize
* 8;
9224 *modconst
= CONST_VECTOR_ELT (op
, 0);
9229 /* Return a string suitable for output of Neon immediate logic operation
9233 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
9234 int inverse
, int quad
)
9236 int width
, is_valid
;
9237 static char templ
[40];
9239 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
9241 gcc_assert (is_valid
!= 0);
9244 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
9246 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
9251 /* Return a string suitable for output of Neon immediate shift operation
9252 (VSHR or VSHL) MNEM. */
9255 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
9256 enum machine_mode mode
, int quad
,
9259 int width
, is_valid
;
9260 static char templ
[40];
9262 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
9263 gcc_assert (is_valid
!= 0);
9266 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
9268 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
9273 /* Output a sequence of pairwise operations to implement a reduction.
9274 NOTE: We do "too much work" here, because pairwise operations work on two
9275 registers-worth of operands in one go. Unfortunately we can't exploit those
9276 extra calculations to do the full operation in fewer steps, I don't think.
9277 Although all vector elements of the result but the first are ignored, we
9278 actually calculate the same result in each of the elements. An alternative
9279 such as initially loading a vector with zero to use as each of the second
9280 operands would use up an additional register and take an extra instruction,
9281 for no particular gain. */
9284 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
9285 rtx (*reduc
) (rtx
, rtx
, rtx
))
9287 enum machine_mode inner
= GET_MODE_INNER (mode
);
9288 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
9291 for (i
= parts
/ 2; i
>= 1; i
/= 2)
9293 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
9294 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
9299 /* If VALS is a vector constant that can be loaded into a register
9300 using VDUP, generate instructions to do so and return an RTX to
9301 assign to the register. Otherwise return NULL_RTX. */
9304 neon_vdup_constant (rtx vals
)
9306 enum machine_mode mode
= GET_MODE (vals
);
9307 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9308 int n_elts
= GET_MODE_NUNITS (mode
);
9309 bool all_same
= true;
9313 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
9316 for (i
= 0; i
< n_elts
; ++i
)
9318 x
= XVECEXP (vals
, 0, i
);
9319 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9324 /* The elements are not all the same. We could handle repeating
9325 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9326 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9330 /* We can load this constant by using VDUP and a constant in a
9331 single ARM register. This will be cheaper than a vector
9334 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9335 return gen_rtx_VEC_DUPLICATE (mode
, x
);
9338 /* Generate code to load VALS, which is a PARALLEL containing only
9339 constants (for vec_init) or CONST_VECTOR, efficiently into a
9340 register. Returns an RTX to copy into the register, or NULL_RTX
9341 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9344 neon_make_constant (rtx vals
)
9346 enum machine_mode mode
= GET_MODE (vals
);
9348 rtx const_vec
= NULL_RTX
;
9349 int n_elts
= GET_MODE_NUNITS (mode
);
9353 if (GET_CODE (vals
) == CONST_VECTOR
)
9355 else if (GET_CODE (vals
) == PARALLEL
)
9357 /* A CONST_VECTOR must contain only CONST_INTs and
9358 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9359 Only store valid constants in a CONST_VECTOR. */
9360 for (i
= 0; i
< n_elts
; ++i
)
9362 rtx x
= XVECEXP (vals
, 0, i
);
9363 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9366 if (n_const
== n_elts
)
9367 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
9372 if (const_vec
!= NULL
9373 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
9374 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9376 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
9377 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9378 pipeline cycle; creating the constant takes one or two ARM
9381 else if (const_vec
!= NULL_RTX
)
9382 /* Load from constant pool. On Cortex-A8 this takes two cycles
9383 (for either double or quad vectors). We can not take advantage
9384 of single-cycle VLD1 because we need a PC-relative addressing
9388 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9389 We can not construct an initializer. */
9393 /* Initialize vector TARGET to VALS. */
9396 neon_expand_vector_init (rtx target
, rtx vals
)
9398 enum machine_mode mode
= GET_MODE (target
);
9399 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9400 int n_elts
= GET_MODE_NUNITS (mode
);
9401 int n_var
= 0, one_var
= -1;
9402 bool all_same
= true;
9406 for (i
= 0; i
< n_elts
; ++i
)
9408 x
= XVECEXP (vals
, 0, i
);
9409 if (!CONSTANT_P (x
))
9410 ++n_var
, one_var
= i
;
9412 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9418 rtx constant
= neon_make_constant (vals
);
9419 if (constant
!= NULL_RTX
)
9421 emit_move_insn (target
, constant
);
9426 /* Splat a single non-constant element if we can. */
9427 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
9429 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9430 emit_insn (gen_rtx_SET (VOIDmode
, target
,
9431 gen_rtx_VEC_DUPLICATE (mode
, x
)));
9435 /* One field is non-constant. Load constant then overwrite varying
9436 field. This is more efficient than using the stack. */
9439 rtx copy
= copy_rtx (vals
);
9440 rtx index
= GEN_INT (one_var
);
9442 /* Load constant part of vector, substitute neighboring value for
9444 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
9445 neon_expand_vector_init (target
, copy
);
9447 /* Insert variable. */
9448 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
9452 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
9455 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
9458 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
9461 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
9464 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
9467 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
9470 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
9473 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
9476 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
9484 /* Construct the vector in memory one field at a time
9485 and load the whole vector. */
9486 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
9487 for (i
= 0; i
< n_elts
; i
++)
9488 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
9489 i
* GET_MODE_SIZE (inner_mode
)),
9490 XVECEXP (vals
, 0, i
));
9491 emit_move_insn (target
, mem
);
9494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9495 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9496 reported source locations are bogus. */
9499 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
9504 gcc_assert (CONST_INT_P (operand
));
9506 lane
= INTVAL (operand
);
9508 if (lane
< low
|| lane
>= high
)
9512 /* Bounds-check lanes. */
9515 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9517 bounds_check (operand
, low
, high
, "lane out of range");
9520 /* Bounds-check constants. */
9523 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9525 bounds_check (operand
, low
, high
, "constant out of range");
9529 neon_element_bits (enum machine_mode mode
)
9532 return GET_MODE_BITSIZE (mode
);
9534 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
9538 /* Predicates for `match_operand' and `match_operator'. */
9540 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9541 WB is true if full writeback address modes are allowed and is false
9542 if limited writeback address modes (POST_INC and PRE_DEC) are
9546 arm_coproc_mem_operand (rtx op
, bool wb
)
9550 /* Reject eliminable registers. */
9551 if (! (reload_in_progress
|| reload_completed
)
9552 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9553 || reg_mentioned_p (arg_pointer_rtx
, op
)
9554 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9555 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9556 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9557 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9560 /* Constants are converted into offsets from labels. */
9566 if (reload_completed
9567 && (GET_CODE (ind
) == LABEL_REF
9568 || (GET_CODE (ind
) == CONST
9569 && GET_CODE (XEXP (ind
, 0)) == PLUS
9570 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9571 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
9574 /* Match: (mem (reg)). */
9576 return arm_address_register_rtx_p (ind
, 0);
9578 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9579 acceptable in any case (subject to verification by
9580 arm_address_register_rtx_p). We need WB to be true to accept
9581 PRE_INC and POST_DEC. */
9582 if (GET_CODE (ind
) == POST_INC
9583 || GET_CODE (ind
) == PRE_DEC
9585 && (GET_CODE (ind
) == PRE_INC
9586 || GET_CODE (ind
) == POST_DEC
)))
9587 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9590 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
9591 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
9592 && GET_CODE (XEXP (ind
, 1)) == PLUS
9593 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
9594 ind
= XEXP (ind
, 1);
9599 if (GET_CODE (ind
) == PLUS
9600 && REG_P (XEXP (ind
, 0))
9601 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9602 && CONST_INT_P (XEXP (ind
, 1))
9603 && INTVAL (XEXP (ind
, 1)) > -1024
9604 && INTVAL (XEXP (ind
, 1)) < 1024
9605 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9611 /* Return TRUE if OP is a memory operand which we can load or store a vector
9612 to/from. TYPE is one of the following values:
9613 0 - Vector load/stor (vldr)
9614 1 - Core registers (ldm)
9615 2 - Element/structure loads (vld1)
9618 neon_vector_mem_operand (rtx op
, int type
)
9622 /* Reject eliminable registers. */
9623 if (! (reload_in_progress
|| reload_completed
)
9624 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9625 || reg_mentioned_p (arg_pointer_rtx
, op
)
9626 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9627 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9628 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9629 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9632 /* Constants are converted into offsets from labels. */
9638 if (reload_completed
9639 && (GET_CODE (ind
) == LABEL_REF
9640 || (GET_CODE (ind
) == CONST
9641 && GET_CODE (XEXP (ind
, 0)) == PLUS
9642 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9643 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
9646 /* Match: (mem (reg)). */
9648 return arm_address_register_rtx_p (ind
, 0);
9650 /* Allow post-increment with Neon registers. */
9651 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
9652 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
9653 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9655 /* FIXME: vld1 allows register post-modify. */
9661 && GET_CODE (ind
) == PLUS
9662 && REG_P (XEXP (ind
, 0))
9663 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9664 && CONST_INT_P (XEXP (ind
, 1))
9665 && INTVAL (XEXP (ind
, 1)) > -1024
9666 /* For quad modes, we restrict the constant offset to be slightly less
9667 than what the instruction format permits. We have no such constraint
9668 on double mode offsets. (This must match arm_legitimate_index_p.) */
9669 && (INTVAL (XEXP (ind
, 1))
9670 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
9671 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9677 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9680 neon_struct_mem_operand (rtx op
)
9684 /* Reject eliminable registers. */
9685 if (! (reload_in_progress
|| reload_completed
)
9686 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9687 || reg_mentioned_p (arg_pointer_rtx
, op
)
9688 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9689 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9690 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9691 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9694 /* Constants are converted into offsets from labels. */
9700 if (reload_completed
9701 && (GET_CODE (ind
) == LABEL_REF
9702 || (GET_CODE (ind
) == CONST
9703 && GET_CODE (XEXP (ind
, 0)) == PLUS
9704 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9705 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
9708 /* Match: (mem (reg)). */
9710 return arm_address_register_rtx_p (ind
, 0);
9712 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9713 if (GET_CODE (ind
) == POST_INC
9714 || GET_CODE (ind
) == PRE_DEC
)
9715 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9720 /* Return true if X is a register that will be eliminated later on. */
9722 arm_eliminable_register (rtx x
)
9724 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9725 || REGNO (x
) == ARG_POINTER_REGNUM
9726 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9727 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9730 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9731 coprocessor registers. Otherwise return NO_REGS. */
9734 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9738 if (!TARGET_NEON_FP16
)
9739 return GENERAL_REGS
;
9740 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9742 return GENERAL_REGS
;
9745 /* The neon move patterns handle all legitimate vector and struct
9748 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
9749 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9750 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
9751 || VALID_NEON_STRUCT_MODE (mode
)))
9754 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9757 return GENERAL_REGS
;
9760 /* Values which must be returned in the most-significant end of the return
9764 arm_return_in_msb (const_tree valtype
)
9766 return (TARGET_AAPCS_BASED
9768 && (AGGREGATE_TYPE_P (valtype
)
9769 || TREE_CODE (valtype
) == COMPLEX_TYPE
9770 || FIXED_POINT_TYPE_P (valtype
)));
9773 /* Return TRUE if X references a SYMBOL_REF. */
9775 symbol_mentioned_p (rtx x
)
9780 if (GET_CODE (x
) == SYMBOL_REF
)
9783 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9784 are constant offsets, not symbols. */
9785 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9788 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9790 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9796 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9797 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9800 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9807 /* Return TRUE if X references a LABEL_REF. */
9809 label_mentioned_p (rtx x
)
9814 if (GET_CODE (x
) == LABEL_REF
)
9817 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9818 instruction, but they are constant offsets, not symbols. */
9819 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9822 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9823 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9829 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9830 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9833 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9841 tls_mentioned_p (rtx x
)
9843 switch (GET_CODE (x
))
9846 return tls_mentioned_p (XEXP (x
, 0));
9849 if (XINT (x
, 1) == UNSPEC_TLS
)
9857 /* Must not copy any rtx that uses a pc-relative address. */
9860 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9862 if (GET_CODE (*x
) == UNSPEC
9863 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
9864 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
9870 arm_cannot_copy_insn_p (rtx insn
)
9872 /* The tls call insn cannot be copied, as it is paired with a data
9874 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
9877 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9883 enum rtx_code code
= GET_CODE (x
);
9900 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9903 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
9904 int *mask
, bool *signed_sat
)
9906 /* The high bound must be a power of two minus one. */
9907 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
9911 /* The low bound is either zero (for usat) or one less than the
9912 negation of the high bound (for ssat). */
9913 if (INTVAL (lo_bound
) == 0)
9918 *signed_sat
= false;
9923 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
9936 /* Return 1 if memory locations are adjacent. */
9938 adjacent_mem_locations (rtx a
, rtx b
)
9940 /* We don't guarantee to preserve the order of these memory refs. */
9941 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9944 if ((REG_P (XEXP (a
, 0))
9945 || (GET_CODE (XEXP (a
, 0)) == PLUS
9946 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
9947 && (REG_P (XEXP (b
, 0))
9948 || (GET_CODE (XEXP (b
, 0)) == PLUS
9949 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
9951 HOST_WIDE_INT val0
= 0, val1
= 0;
9955 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9957 reg0
= XEXP (XEXP (a
, 0), 0);
9958 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9963 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9965 reg1
= XEXP (XEXP (b
, 0), 0);
9966 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9971 /* Don't accept any offset that will require multiple
9972 instructions to handle, since this would cause the
9973 arith_adjacentmem pattern to output an overlong sequence. */
9974 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9977 /* Don't allow an eliminable register: register elimination can make
9978 the offset too large. */
9979 if (arm_eliminable_register (reg0
))
9982 val_diff
= val1
- val0
;
9986 /* If the target has load delay slots, then there's no benefit
9987 to using an ldm instruction unless the offset is zero and
9988 we are optimizing for size. */
9989 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9990 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9991 && (val_diff
== 4 || val_diff
== -4));
9994 return ((REGNO (reg0
) == REGNO (reg1
))
9995 && (val_diff
== 4 || val_diff
== -4));
10001 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10002 for load operations, false for store operations. CONSECUTIVE is true
10003 if the register numbers in the operation must be consecutive in the register
10004 bank. RETURN_PC is true if value is to be loaded in PC.
10005 The pattern we are trying to match for load is:
10006 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10007 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10010 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10013 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10014 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10015 3. If consecutive is TRUE, then for kth register being loaded,
10016 REGNO (R_dk) = REGNO (R_d0) + k.
10017 The pattern for store is similar. */
10019 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
10020 bool consecutive
, bool return_pc
)
10022 HOST_WIDE_INT count
= XVECLEN (op
, 0);
10023 rtx reg
, mem
, addr
;
10025 unsigned first_regno
;
10026 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
10028 bool addr_reg_in_reglist
= false;
10029 bool update
= false;
10034 /* If not in SImode, then registers must be consecutive
10035 (e.g., VLDM instructions for DFmode). */
10036 gcc_assert ((mode
== SImode
) || consecutive
);
10037 /* Setting return_pc for stores is illegal. */
10038 gcc_assert (!return_pc
|| load
);
10040 /* Set up the increments and the regs per val based on the mode. */
10041 reg_increment
= GET_MODE_SIZE (mode
);
10042 regs_per_val
= reg_increment
/ 4;
10043 offset_adj
= return_pc
? 1 : 0;
10046 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
10047 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
10050 /* Check if this is a write-back. */
10051 elt
= XVECEXP (op
, 0, offset_adj
);
10052 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
10058 /* The offset adjustment must be the number of registers being
10059 popped times the size of a single register. */
10060 if (!REG_P (SET_DEST (elt
))
10061 || !REG_P (XEXP (SET_SRC (elt
), 0))
10062 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
10063 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
10064 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
10065 ((count
- 1 - offset_adj
) * reg_increment
))
10069 i
= i
+ offset_adj
;
10070 base
= base
+ offset_adj
;
10071 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10072 success depends on the type: VLDM can do just one reg,
10073 LDM must do at least two. */
10074 if ((count
<= i
) && (mode
== SImode
))
10077 elt
= XVECEXP (op
, 0, i
- 1);
10078 if (GET_CODE (elt
) != SET
)
10083 reg
= SET_DEST (elt
);
10084 mem
= SET_SRC (elt
);
10088 reg
= SET_SRC (elt
);
10089 mem
= SET_DEST (elt
);
10092 if (!REG_P (reg
) || !MEM_P (mem
))
10095 regno
= REGNO (reg
);
10096 first_regno
= regno
;
10097 addr
= XEXP (mem
, 0);
10098 if (GET_CODE (addr
) == PLUS
)
10100 if (!CONST_INT_P (XEXP (addr
, 1)))
10103 offset
= INTVAL (XEXP (addr
, 1));
10104 addr
= XEXP (addr
, 0);
10110 /* Don't allow SP to be loaded unless it is also the base register. It
10111 guarantees that SP is reset correctly when an LDM instruction
10112 is interruptted. Otherwise, we might end up with a corrupt stack. */
10113 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
10116 for (; i
< count
; i
++)
10118 elt
= XVECEXP (op
, 0, i
);
10119 if (GET_CODE (elt
) != SET
)
10124 reg
= SET_DEST (elt
);
10125 mem
= SET_SRC (elt
);
10129 reg
= SET_SRC (elt
);
10130 mem
= SET_DEST (elt
);
10134 || GET_MODE (reg
) != mode
10135 || REGNO (reg
) <= regno
10138 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
10139 /* Don't allow SP to be loaded unless it is also the base register. It
10140 guarantees that SP is reset correctly when an LDM instruction
10141 is interrupted. Otherwise, we might end up with a corrupt stack. */
10142 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
10144 || GET_MODE (mem
) != mode
10145 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
10146 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
10147 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
10148 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
10149 offset
+ (i
- base
) * reg_increment
))
10150 && (!REG_P (XEXP (mem
, 0))
10151 || offset
+ (i
- base
) * reg_increment
!= 0)))
10154 regno
= REGNO (reg
);
10155 if (regno
== REGNO (addr
))
10156 addr_reg_in_reglist
= true;
10161 if (update
&& addr_reg_in_reglist
)
10164 /* For Thumb-1, address register is always modified - either by write-back
10165 or by explicit load. If the pattern does not describe an update,
10166 then the address register must be in the list of loaded registers. */
10168 return update
|| addr_reg_in_reglist
;
10174 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10175 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10176 instruction. ADD_OFFSET is nonzero if the base address register needs
10177 to be modified with an add instruction before we can use it. */
10180 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
10181 int nops
, HOST_WIDE_INT add_offset
)
10183 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10184 if the offset isn't small enough. The reason 2 ldrs are faster
10185 is because these ARMs are able to do more than one cache access
10186 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10187 whilst the ARM8 has a double bandwidth cache. This means that
10188 these cores can do both an instruction fetch and a data fetch in
10189 a single cycle, so the trick of calculating the address into a
10190 scratch register (one of the result regs) and then doing a load
10191 multiple actually becomes slower (and no smaller in code size).
10192 That is the transformation
10194 ldr rd1, [rbase + offset]
10195 ldr rd2, [rbase + offset + 4]
10199 add rd1, rbase, offset
10200 ldmia rd1, {rd1, rd2}
10202 produces worse code -- '3 cycles + any stalls on rd2' instead of
10203 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10204 access per cycle, the first sequence could never complete in less
10205 than 6 cycles, whereas the ldm sequence would only take 5 and
10206 would make better use of sequential accesses if not hitting the
10209 We cheat here and test 'arm_ld_sched' which we currently know to
10210 only be true for the ARM8, ARM9 and StrongARM. If this ever
10211 changes, then the test below needs to be reworked. */
10212 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
10215 /* XScale has load-store double instructions, but they have stricter
10216 alignment requirements than load-store multiple, so we cannot
10219 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10220 the pipeline until completion.
10228 An ldr instruction takes 1-3 cycles, but does not block the
10237 Best case ldr will always win. However, the more ldr instructions
10238 we issue, the less likely we are to be able to schedule them well.
10239 Using ldr instructions also increases code size.
10241 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10242 for counts of 3 or 4 regs. */
10243 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
10248 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10249 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10250 an array ORDER which describes the sequence to use when accessing the
10251 offsets that produces an ascending order. In this sequence, each
10252 offset must be larger by exactly 4 than the previous one. ORDER[0]
10253 must have been filled in with the lowest offset by the caller.
10254 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10255 we use to verify that ORDER produces an ascending order of registers.
10256 Return true if it was possible to construct such an order, false if
10260 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
10261 int *unsorted_regs
)
10264 for (i
= 1; i
< nops
; i
++)
10268 order
[i
] = order
[i
- 1];
10269 for (j
= 0; j
< nops
; j
++)
10270 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
10272 /* We must find exactly one offset that is higher than the
10273 previous one by 4. */
10274 if (order
[i
] != order
[i
- 1])
10278 if (order
[i
] == order
[i
- 1])
10280 /* The register numbers must be ascending. */
10281 if (unsorted_regs
!= NULL
10282 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
10288 /* Used to determine in a peephole whether a sequence of load
10289 instructions can be changed into a load-multiple instruction.
10290 NOPS is the number of separate load instructions we are examining. The
10291 first NOPS entries in OPERANDS are the destination registers, the
10292 next NOPS entries are memory operands. If this function is
10293 successful, *BASE is set to the common base register of the memory
10294 accesses; *LOAD_OFFSET is set to the first memory location's offset
10295 from that base register.
10296 REGS is an array filled in with the destination register numbers.
10297 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10298 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10299 the sequence of registers in REGS matches the loads from ascending memory
10300 locations, and the function verifies that the register numbers are
10301 themselves ascending. If CHECK_REGS is false, the register numbers
10302 are stored in the order they are found in the operands. */
10304 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
10305 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
10307 int unsorted_regs
[MAX_LDM_STM_OPS
];
10308 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10309 int order
[MAX_LDM_STM_OPS
];
10310 rtx base_reg_rtx
= NULL
;
10314 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10315 easily extended if required. */
10316 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10318 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10320 /* Loop over the operands and check that the memory references are
10321 suitable (i.e. immediate offsets from the same base register). At
10322 the same time, extract the target register, and the memory
10324 for (i
= 0; i
< nops
; i
++)
10329 /* Convert a subreg of a mem into the mem itself. */
10330 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10331 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
10333 gcc_assert (MEM_P (operands
[nops
+ i
]));
10335 /* Don't reorder volatile memory references; it doesn't seem worth
10336 looking for the case where the order is ok anyway. */
10337 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10340 offset
= const0_rtx
;
10342 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
10343 || (GET_CODE (reg
) == SUBREG
10344 && REG_P (reg
= SUBREG_REG (reg
))))
10345 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10346 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
10347 || (GET_CODE (reg
) == SUBREG
10348 && REG_P (reg
= SUBREG_REG (reg
))))
10349 && (CONST_INT_P (offset
10350 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
10354 base_reg
= REGNO (reg
);
10355 base_reg_rtx
= reg
;
10356 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10359 else if (base_reg
!= (int) REGNO (reg
))
10360 /* Not addressed from the same base register. */
10363 unsorted_regs
[i
] = (REG_P (operands
[i
])
10364 ? REGNO (operands
[i
])
10365 : REGNO (SUBREG_REG (operands
[i
])));
10367 /* If it isn't an integer register, or if it overwrites the
10368 base register but isn't the last insn in the list, then
10369 we can't do this. */
10370 if (unsorted_regs
[i
] < 0
10371 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10372 || unsorted_regs
[i
] > 14
10373 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
10376 unsorted_offsets
[i
] = INTVAL (offset
);
10377 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10381 /* Not a suitable memory address. */
10385 /* All the useful information has now been extracted from the
10386 operands into unsorted_regs and unsorted_offsets; additionally,
10387 order[0] has been set to the lowest offset in the list. Sort
10388 the offsets into order, verifying that they are adjacent, and
10389 check that the register numbers are ascending. */
10390 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10391 check_regs
? unsorted_regs
: NULL
))
10395 memcpy (saved_order
, order
, sizeof order
);
10401 for (i
= 0; i
< nops
; i
++)
10402 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10404 *load_offset
= unsorted_offsets
[order
[0]];
10408 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
10411 if (unsorted_offsets
[order
[0]] == 0)
10412 ldm_case
= 1; /* ldmia */
10413 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10414 ldm_case
= 2; /* ldmib */
10415 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10416 ldm_case
= 3; /* ldmda */
10417 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10418 ldm_case
= 4; /* ldmdb */
10419 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
10420 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
10425 if (!multiple_operation_profitable_p (false, nops
,
10427 ? unsorted_offsets
[order
[0]] : 0))
10433 /* Used to determine in a peephole whether a sequence of store instructions can
10434 be changed into a store-multiple instruction.
10435 NOPS is the number of separate store instructions we are examining.
10436 NOPS_TOTAL is the total number of instructions recognized by the peephole
10438 The first NOPS entries in OPERANDS are the source registers, the next
10439 NOPS entries are memory operands. If this function is successful, *BASE is
10440 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10441 to the first memory location's offset from that base register. REGS is an
10442 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10443 likewise filled with the corresponding rtx's.
10444 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10445 numbers to an ascending order of stores.
10446 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10447 from ascending memory locations, and the function verifies that the register
10448 numbers are themselves ascending. If CHECK_REGS is false, the register
10449 numbers are stored in the order they are found in the operands. */
10451 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
10452 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
10453 HOST_WIDE_INT
*load_offset
, bool check_regs
)
10455 int unsorted_regs
[MAX_LDM_STM_OPS
];
10456 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
10457 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10458 int order
[MAX_LDM_STM_OPS
];
10460 rtx base_reg_rtx
= NULL
;
10463 /* Write back of base register is currently only supported for Thumb 1. */
10464 int base_writeback
= TARGET_THUMB1
;
10466 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10467 easily extended if required. */
10468 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10470 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10472 /* Loop over the operands and check that the memory references are
10473 suitable (i.e. immediate offsets from the same base register). At
10474 the same time, extract the target register, and the memory
10476 for (i
= 0; i
< nops
; i
++)
10481 /* Convert a subreg of a mem into the mem itself. */
10482 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10483 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
10485 gcc_assert (MEM_P (operands
[nops
+ i
]));
10487 /* Don't reorder volatile memory references; it doesn't seem worth
10488 looking for the case where the order is ok anyway. */
10489 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10492 offset
= const0_rtx
;
10494 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
10495 || (GET_CODE (reg
) == SUBREG
10496 && REG_P (reg
= SUBREG_REG (reg
))))
10497 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10498 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
10499 || (GET_CODE (reg
) == SUBREG
10500 && REG_P (reg
= SUBREG_REG (reg
))))
10501 && (CONST_INT_P (offset
10502 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
10504 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
10505 ? operands
[i
] : SUBREG_REG (operands
[i
]));
10506 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
10510 base_reg
= REGNO (reg
);
10511 base_reg_rtx
= reg
;
10512 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10515 else if (base_reg
!= (int) REGNO (reg
))
10516 /* Not addressed from the same base register. */
10519 /* If it isn't an integer register, then we can't do this. */
10520 if (unsorted_regs
[i
] < 0
10521 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10522 /* The effects are unpredictable if the base register is
10523 both updated and stored. */
10524 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
10525 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
10526 || unsorted_regs
[i
] > 14)
10529 unsorted_offsets
[i
] = INTVAL (offset
);
10530 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10534 /* Not a suitable memory address. */
10538 /* All the useful information has now been extracted from the
10539 operands into unsorted_regs and unsorted_offsets; additionally,
10540 order[0] has been set to the lowest offset in the list. Sort
10541 the offsets into order, verifying that they are adjacent, and
10542 check that the register numbers are ascending. */
10543 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10544 check_regs
? unsorted_regs
: NULL
))
10548 memcpy (saved_order
, order
, sizeof order
);
10554 for (i
= 0; i
< nops
; i
++)
10556 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10558 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
10561 *load_offset
= unsorted_offsets
[order
[0]];
10565 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
10568 if (unsorted_offsets
[order
[0]] == 0)
10569 stm_case
= 1; /* stmia */
10570 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10571 stm_case
= 2; /* stmib */
10572 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10573 stm_case
= 3; /* stmda */
10574 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10575 stm_case
= 4; /* stmdb */
10579 if (!multiple_operation_profitable_p (false, nops
, 0))
10585 /* Routines for use in generating RTL. */
10587 /* Generate a load-multiple instruction. COUNT is the number of loads in
10588 the instruction; REGS and MEMS are arrays containing the operands.
10589 BASEREG is the base register to be used in addressing the memory operands.
10590 WBACK_OFFSET is nonzero if the instruction should update the base
10594 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10595 HOST_WIDE_INT wback_offset
)
10600 if (!multiple_operation_profitable_p (false, count
, 0))
10606 for (i
= 0; i
< count
; i
++)
10607 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
10609 if (wback_offset
!= 0)
10610 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
10612 seq
= get_insns ();
10618 result
= gen_rtx_PARALLEL (VOIDmode
,
10619 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10620 if (wback_offset
!= 0)
10622 XVECEXP (result
, 0, 0)
10623 = gen_rtx_SET (VOIDmode
, basereg
,
10624 plus_constant (Pmode
, basereg
, wback_offset
));
10629 for (j
= 0; i
< count
; i
++, j
++)
10630 XVECEXP (result
, 0, i
)
10631 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
10636 /* Generate a store-multiple instruction. COUNT is the number of stores in
10637 the instruction; REGS and MEMS are arrays containing the operands.
10638 BASEREG is the base register to be used in addressing the memory operands.
10639 WBACK_OFFSET is nonzero if the instruction should update the base
10643 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10644 HOST_WIDE_INT wback_offset
)
10649 if (GET_CODE (basereg
) == PLUS
)
10650 basereg
= XEXP (basereg
, 0);
10652 if (!multiple_operation_profitable_p (false, count
, 0))
10658 for (i
= 0; i
< count
; i
++)
10659 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
10661 if (wback_offset
!= 0)
10662 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
10664 seq
= get_insns ();
10670 result
= gen_rtx_PARALLEL (VOIDmode
,
10671 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10672 if (wback_offset
!= 0)
10674 XVECEXP (result
, 0, 0)
10675 = gen_rtx_SET (VOIDmode
, basereg
,
10676 plus_constant (Pmode
, basereg
, wback_offset
));
10681 for (j
= 0; i
< count
; i
++, j
++)
10682 XVECEXP (result
, 0, i
)
10683 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
10688 /* Generate either a load-multiple or a store-multiple instruction. This
10689 function can be used in situations where we can start with a single MEM
10690 rtx and adjust its address upwards.
10691 COUNT is the number of operations in the instruction, not counting a
10692 possible update of the base register. REGS is an array containing the
10694 BASEREG is the base register to be used in addressing the memory operands,
10695 which are constructed from BASEMEM.
10696 WRITE_BACK specifies whether the generated instruction should include an
10697 update of the base register.
10698 OFFSETP is used to pass an offset to and from this function; this offset
10699 is not used when constructing the address (instead BASEMEM should have an
10700 appropriate offset in its address), it is used only for setting
10701 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10704 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
10705 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
10707 rtx mems
[MAX_LDM_STM_OPS
];
10708 HOST_WIDE_INT offset
= *offsetp
;
10711 gcc_assert (count
<= MAX_LDM_STM_OPS
);
10713 if (GET_CODE (basereg
) == PLUS
)
10714 basereg
= XEXP (basereg
, 0);
10716 for (i
= 0; i
< count
; i
++)
10718 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
10719 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
10727 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
10728 write_back
? 4 * count
: 0);
10730 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
10731 write_back
? 4 * count
: 0);
10735 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10736 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10738 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
10743 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10744 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10746 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10750 /* Called from a peephole2 expander to turn a sequence of loads into an
10751 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10752 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10753 is true if we can reorder the registers because they are used commutatively
10755 Returns true iff we could generate a new instruction. */
10758 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10760 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10761 rtx mems
[MAX_LDM_STM_OPS
];
10762 int i
, j
, base_reg
;
10764 HOST_WIDE_INT offset
;
10765 int write_back
= FALSE
;
10769 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10770 &base_reg
, &offset
, !sort_regs
);
10776 for (i
= 0; i
< nops
- 1; i
++)
10777 for (j
= i
+ 1; j
< nops
; j
++)
10778 if (regs
[i
] > regs
[j
])
10784 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10788 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10789 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10795 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10796 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10798 if (!TARGET_THUMB1
)
10800 base_reg
= regs
[0];
10801 base_reg_rtx
= newbase
;
10805 for (i
= 0; i
< nops
; i
++)
10807 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10808 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10811 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10812 write_back
? offset
+ i
* 4 : 0));
10816 /* Called from a peephole2 expander to turn a sequence of stores into an
10817 STM instruction. OPERANDS are the operands found by the peephole matcher;
10818 NOPS indicates how many separate stores we are trying to combine.
10819 Returns true iff we could generate a new instruction. */
10822 gen_stm_seq (rtx
*operands
, int nops
)
10825 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10826 rtx mems
[MAX_LDM_STM_OPS
];
10829 HOST_WIDE_INT offset
;
10830 int write_back
= FALSE
;
10833 bool base_reg_dies
;
10835 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10836 mem_order
, &base_reg
, &offset
, true);
10841 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10843 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10846 gcc_assert (base_reg_dies
);
10852 gcc_assert (base_reg_dies
);
10853 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10857 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
10859 for (i
= 0; i
< nops
; i
++)
10861 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10862 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10865 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10866 write_back
? offset
+ i
* 4 : 0));
10870 /* Called from a peephole2 expander to turn a sequence of stores that are
10871 preceded by constant loads into an STM instruction. OPERANDS are the
10872 operands found by the peephole matcher; NOPS indicates how many
10873 separate stores we are trying to combine; there are 2 * NOPS
10874 instructions in the peephole.
10875 Returns true iff we could generate a new instruction. */
10878 gen_const_stm_seq (rtx
*operands
, int nops
)
10880 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10881 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10882 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10883 rtx mems
[MAX_LDM_STM_OPS
];
10886 HOST_WIDE_INT offset
;
10887 int write_back
= FALSE
;
10890 bool base_reg_dies
;
10892 HARD_REG_SET allocated
;
10894 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10895 mem_order
, &base_reg
, &offset
, false);
10900 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10902 /* If the same register is used more than once, try to find a free
10904 CLEAR_HARD_REG_SET (allocated
);
10905 for (i
= 0; i
< nops
; i
++)
10907 for (j
= i
+ 1; j
< nops
; j
++)
10908 if (regs
[i
] == regs
[j
])
10910 rtx t
= peep2_find_free_register (0, nops
* 2,
10911 TARGET_THUMB1
? "l" : "r",
10912 SImode
, &allocated
);
10916 regs
[i
] = REGNO (t
);
10920 /* Compute an ordering that maps the register numbers to an ascending
10923 for (i
= 0; i
< nops
; i
++)
10924 if (regs
[i
] < regs
[reg_order
[0]])
10927 for (i
= 1; i
< nops
; i
++)
10929 int this_order
= reg_order
[i
- 1];
10930 for (j
= 0; j
< nops
; j
++)
10931 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10932 && (this_order
== reg_order
[i
- 1]
10933 || regs
[j
] < regs
[this_order
]))
10935 reg_order
[i
] = this_order
;
10938 /* Ensure that registers that must be live after the instruction end
10939 up with the correct value. */
10940 for (i
= 0; i
< nops
; i
++)
10942 int this_order
= reg_order
[i
];
10943 if ((this_order
!= mem_order
[i
]
10944 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10945 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10949 /* Load the constants. */
10950 for (i
= 0; i
< nops
; i
++)
10952 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10953 sorted_regs
[i
] = regs
[reg_order
[i
]];
10954 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10957 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10959 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10962 gcc_assert (base_reg_dies
);
10968 gcc_assert (base_reg_dies
);
10969 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10973 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
10975 for (i
= 0; i
< nops
; i
++)
10977 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10978 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10981 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10982 write_back
? offset
+ i
* 4 : 0));
10986 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10987 unaligned copies on processors which support unaligned semantics for those
10988 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10989 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10990 An interleave factor of 1 (the minimum) will perform no interleaving.
10991 Load/store multiple are used for aligned addresses where possible. */
10994 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
10995 HOST_WIDE_INT length
,
10996 unsigned int interleave_factor
)
10998 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
10999 int *regnos
= XALLOCAVEC (int, interleave_factor
);
11000 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
11001 HOST_WIDE_INT i
, j
;
11002 HOST_WIDE_INT remaining
= length
, words
;
11003 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
11005 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
11006 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
11007 HOST_WIDE_INT srcoffset
, dstoffset
;
11008 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
11011 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
11013 /* Use hard registers if we have aligned source or destination so we can use
11014 load/store multiple with contiguous registers. */
11015 if (dst_aligned
|| src_aligned
)
11016 for (i
= 0; i
< interleave_factor
; i
++)
11017 regs
[i
] = gen_rtx_REG (SImode
, i
);
11019 for (i
= 0; i
< interleave_factor
; i
++)
11020 regs
[i
] = gen_reg_rtx (SImode
);
11022 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
11023 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
11025 srcoffset
= dstoffset
= 0;
11027 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11028 For copying the last bytes we want to subtract this offset again. */
11029 src_autoinc
= dst_autoinc
= 0;
11031 for (i
= 0; i
< interleave_factor
; i
++)
11034 /* Copy BLOCK_SIZE_BYTES chunks. */
11036 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
11039 if (src_aligned
&& interleave_factor
> 1)
11041 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
11042 TRUE
, srcbase
, &srcoffset
));
11043 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
11047 for (j
= 0; j
< interleave_factor
; j
++)
11049 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
11051 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
11052 srcoffset
+ j
* UNITS_PER_WORD
);
11053 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
11055 srcoffset
+= block_size_bytes
;
11059 if (dst_aligned
&& interleave_factor
> 1)
11061 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
11062 TRUE
, dstbase
, &dstoffset
));
11063 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
11067 for (j
= 0; j
< interleave_factor
; j
++)
11069 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
11071 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
11072 dstoffset
+ j
* UNITS_PER_WORD
);
11073 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
11075 dstoffset
+= block_size_bytes
;
11078 remaining
-= block_size_bytes
;
11081 /* Copy any whole words left (note these aren't interleaved with any
11082 subsequent halfword/byte load/stores in the interests of simplicity). */
11084 words
= remaining
/ UNITS_PER_WORD
;
11086 gcc_assert (words
< interleave_factor
);
11088 if (src_aligned
&& words
> 1)
11090 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
11092 src_autoinc
+= UNITS_PER_WORD
* words
;
11096 for (j
= 0; j
< words
; j
++)
11098 addr
= plus_constant (Pmode
, src
,
11099 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
11100 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
11101 srcoffset
+ j
* UNITS_PER_WORD
);
11102 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
11104 srcoffset
+= words
* UNITS_PER_WORD
;
11107 if (dst_aligned
&& words
> 1)
11109 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
11111 dst_autoinc
+= words
* UNITS_PER_WORD
;
11115 for (j
= 0; j
< words
; j
++)
11117 addr
= plus_constant (Pmode
, dst
,
11118 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
11119 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
11120 dstoffset
+ j
* UNITS_PER_WORD
);
11121 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
11123 dstoffset
+= words
* UNITS_PER_WORD
;
11126 remaining
-= words
* UNITS_PER_WORD
;
11128 gcc_assert (remaining
< 4);
11130 /* Copy a halfword if necessary. */
11132 if (remaining
>= 2)
11134 halfword_tmp
= gen_reg_rtx (SImode
);
11136 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
11137 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
11138 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
11140 /* Either write out immediately, or delay until we've loaded the last
11141 byte, depending on interleave factor. */
11142 if (interleave_factor
== 1)
11144 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11145 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
11146 emit_insn (gen_unaligned_storehi (mem
,
11147 gen_lowpart (HImode
, halfword_tmp
)));
11148 halfword_tmp
= NULL
;
11156 gcc_assert (remaining
< 2);
11158 /* Copy last byte. */
11160 if ((remaining
& 1) != 0)
11162 byte_tmp
= gen_reg_rtx (SImode
);
11164 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
11165 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
11166 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
11168 if (interleave_factor
== 1)
11170 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11171 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
11172 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
11181 /* Store last halfword if we haven't done so already. */
11185 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11186 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
11187 emit_insn (gen_unaligned_storehi (mem
,
11188 gen_lowpart (HImode
, halfword_tmp
)));
11192 /* Likewise for last byte. */
11196 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11197 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
11198 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
11202 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
11205 /* From mips_adjust_block_mem:
11207 Helper function for doing a loop-based block operation on memory
11208 reference MEM. Each iteration of the loop will operate on LENGTH
11211 Create a new base register for use within the loop and point it to
11212 the start of MEM. Create a new memory reference that uses this
11213 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11216 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
11219 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
11221 /* Although the new mem does not refer to a known location,
11222 it does keep up to LENGTH bytes of alignment. */
11223 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
11224 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
11227 /* From mips_block_move_loop:
11229 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11230 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11231 the memory regions do not overlap. */
11234 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
11235 unsigned int interleave_factor
,
11236 HOST_WIDE_INT bytes_per_iter
)
11238 rtx label
, src_reg
, dest_reg
, final_src
, test
;
11239 HOST_WIDE_INT leftover
;
11241 leftover
= length
% bytes_per_iter
;
11242 length
-= leftover
;
11244 /* Create registers and memory references for use within the loop. */
11245 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
11246 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
11248 /* Calculate the value that SRC_REG should have after the last iteration of
11250 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
11251 0, 0, OPTAB_WIDEN
);
11253 /* Emit the start of the loop. */
11254 label
= gen_label_rtx ();
11255 emit_label (label
);
11257 /* Emit the loop body. */
11258 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
11259 interleave_factor
);
11261 /* Move on to the next block. */
11262 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
11263 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
11265 /* Emit the loop condition. */
11266 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
11267 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
11269 /* Mop up any left-over bytes. */
11271 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
11274 /* Emit a block move when either the source or destination is unaligned (not
11275 aligned to a four-byte boundary). This may need further tuning depending on
11276 core type, optimize_size setting, etc. */
11279 arm_movmemqi_unaligned (rtx
*operands
)
11281 HOST_WIDE_INT length
= INTVAL (operands
[2]);
11285 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
11286 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
11287 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11288 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11289 or dst_aligned though: allow more interleaving in those cases since the
11290 resulting code can be smaller. */
11291 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
11292 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
11295 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
11296 interleave_factor
, bytes_per_iter
);
11298 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
11299 interleave_factor
);
11303 /* Note that the loop created by arm_block_move_unaligned_loop may be
11304 subject to loop unrolling, which makes tuning this condition a little
11307 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
11309 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
11316 arm_gen_movmemqi (rtx
*operands
)
11318 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
11319 HOST_WIDE_INT srcoffset
, dstoffset
;
11321 rtx src
, dst
, srcbase
, dstbase
;
11322 rtx part_bytes_reg
= NULL
;
11325 if (!CONST_INT_P (operands
[2])
11326 || !CONST_INT_P (operands
[3])
11327 || INTVAL (operands
[2]) > 64)
11330 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
11331 return arm_movmemqi_unaligned (operands
);
11333 if (INTVAL (operands
[3]) & 3)
11336 dstbase
= operands
[0];
11337 srcbase
= operands
[1];
11339 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
11340 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
11342 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
11343 out_words_to_go
= INTVAL (operands
[2]) / 4;
11344 last_bytes
= INTVAL (operands
[2]) & 3;
11345 dstoffset
= srcoffset
= 0;
11347 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
11348 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
11350 for (i
= 0; in_words_to_go
>= 2; i
+=4)
11352 if (in_words_to_go
> 4)
11353 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
11354 TRUE
, srcbase
, &srcoffset
));
11356 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
11357 src
, FALSE
, srcbase
,
11360 if (out_words_to_go
)
11362 if (out_words_to_go
> 4)
11363 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
11364 TRUE
, dstbase
, &dstoffset
));
11365 else if (out_words_to_go
!= 1)
11366 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
11367 out_words_to_go
, dst
,
11370 dstbase
, &dstoffset
));
11373 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
11374 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
11375 if (last_bytes
!= 0)
11377 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
11383 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
11384 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
11387 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11388 if (out_words_to_go
)
11392 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
11393 sreg
= copy_to_reg (mem
);
11395 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
11396 emit_move_insn (mem
, sreg
);
11399 gcc_assert (!in_words_to_go
); /* Sanity check */
11402 if (in_words_to_go
)
11404 gcc_assert (in_words_to_go
> 0);
11406 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
11407 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
11410 gcc_assert (!last_bytes
|| part_bytes_reg
);
11412 if (BYTES_BIG_ENDIAN
&& last_bytes
)
11414 rtx tmp
= gen_reg_rtx (SImode
);
11416 /* The bytes we want are in the top end of the word. */
11417 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
11418 GEN_INT (8 * (4 - last_bytes
))));
11419 part_bytes_reg
= tmp
;
11423 mem
= adjust_automodify_address (dstbase
, QImode
,
11424 plus_constant (Pmode
, dst
,
11426 dstoffset
+ last_bytes
- 1);
11427 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
11431 tmp
= gen_reg_rtx (SImode
);
11432 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
11433 part_bytes_reg
= tmp
;
11440 if (last_bytes
> 1)
11442 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
11443 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
11447 rtx tmp
= gen_reg_rtx (SImode
);
11448 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
11449 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
11450 part_bytes_reg
= tmp
;
11457 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
11458 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
11465 /* Select a dominance comparison mode if possible for a test of the general
11466 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11467 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11468 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11469 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11470 In all cases OP will be either EQ or NE, but we don't need to know which
11471 here. If we are unable to support a dominance comparison we return
11472 CC mode. This will then fail to match for the RTL expressions that
11473 generate this call. */
11475 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
11477 enum rtx_code cond1
, cond2
;
11480 /* Currently we will probably get the wrong result if the individual
11481 comparisons are not simple. This also ensures that it is safe to
11482 reverse a comparison if necessary. */
11483 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
11485 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
11489 /* The if_then_else variant of this tests the second condition if the
11490 first passes, but is true if the first fails. Reverse the first
11491 condition to get a true "inclusive-or" expression. */
11492 if (cond_or
== DOM_CC_NX_OR_Y
)
11493 cond1
= reverse_condition (cond1
);
11495 /* If the comparisons are not equal, and one doesn't dominate the other,
11496 then we can't do this. */
11498 && !comparison_dominates_p (cond1
, cond2
)
11499 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
11504 enum rtx_code temp
= cond1
;
11512 if (cond_or
== DOM_CC_X_AND_Y
)
11517 case EQ
: return CC_DEQmode
;
11518 case LE
: return CC_DLEmode
;
11519 case LEU
: return CC_DLEUmode
;
11520 case GE
: return CC_DGEmode
;
11521 case GEU
: return CC_DGEUmode
;
11522 default: gcc_unreachable ();
11526 if (cond_or
== DOM_CC_X_AND_Y
)
11538 gcc_unreachable ();
11542 if (cond_or
== DOM_CC_X_AND_Y
)
11554 gcc_unreachable ();
11558 if (cond_or
== DOM_CC_X_AND_Y
)
11559 return CC_DLTUmode
;
11564 return CC_DLTUmode
;
11566 return CC_DLEUmode
;
11570 gcc_unreachable ();
11574 if (cond_or
== DOM_CC_X_AND_Y
)
11575 return CC_DGTUmode
;
11580 return CC_DGTUmode
;
11582 return CC_DGEUmode
;
11586 gcc_unreachable ();
11589 /* The remaining cases only occur when both comparisons are the
11592 gcc_assert (cond1
== cond2
);
11596 gcc_assert (cond1
== cond2
);
11600 gcc_assert (cond1
== cond2
);
11604 gcc_assert (cond1
== cond2
);
11605 return CC_DLEUmode
;
11608 gcc_assert (cond1
== cond2
);
11609 return CC_DGEUmode
;
11612 gcc_unreachable ();
11617 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
11619 /* All floating point compares return CCFP if it is an equality
11620 comparison, and CCFPE otherwise. */
11621 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
11644 gcc_unreachable ();
11648 /* A compare with a shifted operand. Because of canonicalization, the
11649 comparison will have to be swapped when we emit the assembler. */
11650 if (GET_MODE (y
) == SImode
11651 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
11652 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11653 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
11654 || GET_CODE (x
) == ROTATERT
))
11657 /* This operation is performed swapped, but since we only rely on the Z
11658 flag we don't need an additional mode. */
11659 if (GET_MODE (y
) == SImode
11660 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
11661 && GET_CODE (x
) == NEG
11662 && (op
== EQ
|| op
== NE
))
11665 /* This is a special case that is used by combine to allow a
11666 comparison of a shifted byte load to be split into a zero-extend
11667 followed by a comparison of the shifted integer (only valid for
11668 equalities and unsigned inequalities). */
11669 if (GET_MODE (x
) == SImode
11670 && GET_CODE (x
) == ASHIFT
11671 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
11672 && GET_CODE (XEXP (x
, 0)) == SUBREG
11673 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
11674 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
11675 && (op
== EQ
|| op
== NE
11676 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
11677 && CONST_INT_P (y
))
11680 /* A construct for a conditional compare, if the false arm contains
11681 0, then both conditions must be true, otherwise either condition
11682 must be true. Not all conditions are possible, so CCmode is
11683 returned if it can't be done. */
11684 if (GET_CODE (x
) == IF_THEN_ELSE
11685 && (XEXP (x
, 2) == const0_rtx
11686 || XEXP (x
, 2) == const1_rtx
)
11687 && COMPARISON_P (XEXP (x
, 0))
11688 && COMPARISON_P (XEXP (x
, 1)))
11689 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11690 INTVAL (XEXP (x
, 2)));
11692 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11693 if (GET_CODE (x
) == AND
11694 && (op
== EQ
|| op
== NE
)
11695 && COMPARISON_P (XEXP (x
, 0))
11696 && COMPARISON_P (XEXP (x
, 1)))
11697 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11700 if (GET_CODE (x
) == IOR
11701 && (op
== EQ
|| op
== NE
)
11702 && COMPARISON_P (XEXP (x
, 0))
11703 && COMPARISON_P (XEXP (x
, 1)))
11704 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11707 /* An operation (on Thumb) where we want to test for a single bit.
11708 This is done by shifting that bit up into the top bit of a
11709 scratch register; we can then branch on the sign bit. */
11711 && GET_MODE (x
) == SImode
11712 && (op
== EQ
|| op
== NE
)
11713 && GET_CODE (x
) == ZERO_EXTRACT
11714 && XEXP (x
, 1) == const1_rtx
)
11717 /* An operation that sets the condition codes as a side-effect, the
11718 V flag is not set correctly, so we can only use comparisons where
11719 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11721 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11722 if (GET_MODE (x
) == SImode
11724 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
11725 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
11726 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
11727 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
11728 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
11729 || GET_CODE (x
) == LSHIFTRT
11730 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11731 || GET_CODE (x
) == ROTATERT
11732 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
11733 return CC_NOOVmode
;
11735 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
11738 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
11739 && GET_CODE (x
) == PLUS
11740 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
11743 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
11749 /* A DImode comparison against zero can be implemented by
11750 or'ing the two halves together. */
11751 if (y
== const0_rtx
)
11754 /* We can do an equality test in three Thumb instructions. */
11764 /* DImode unsigned comparisons can be implemented by cmp +
11765 cmpeq without a scratch register. Not worth doing in
11776 /* DImode signed and unsigned comparisons can be implemented
11777 by cmp + sbcs with a scratch register, but that does not
11778 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11779 gcc_assert (op
!= EQ
&& op
!= NE
);
11783 gcc_unreachable ();
11787 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
11788 return GET_MODE (x
);
11793 /* X and Y are two things to compare using CODE. Emit the compare insn and
11794 return the rtx for register 0 in the proper mode. FP means this is a
11795 floating point compare: I don't think that it is needed on the arm. */
11797 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
11799 enum machine_mode mode
;
11801 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
11803 /* We might have X as a constant, Y as a register because of the predicates
11804 used for cmpdi. If so, force X to a register here. */
11805 if (dimode_comparison
&& !REG_P (x
))
11806 x
= force_reg (DImode
, x
);
11808 mode
= SELECT_CC_MODE (code
, x
, y
);
11809 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
11811 if (dimode_comparison
11812 && mode
!= CC_CZmode
)
11816 /* To compare two non-zero values for equality, XOR them and
11817 then compare against zero. Not used for ARM mode; there
11818 CC_CZmode is cheaper. */
11819 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
11821 gcc_assert (!reload_completed
);
11822 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
11826 /* A scratch register is required. */
11827 if (reload_completed
)
11828 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
11830 scratch
= gen_rtx_SCRATCH (SImode
);
11832 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11833 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11834 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
11837 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11842 /* Generate a sequence of insns that will generate the correct return
11843 address mask depending on the physical architecture that the program
11846 arm_gen_return_addr_mask (void)
11848 rtx reg
= gen_reg_rtx (Pmode
);
11850 emit_insn (gen_return_addr_mask (reg
));
11855 arm_reload_in_hi (rtx
*operands
)
11857 rtx ref
= operands
[1];
11859 HOST_WIDE_INT offset
= 0;
11861 if (GET_CODE (ref
) == SUBREG
)
11863 offset
= SUBREG_BYTE (ref
);
11864 ref
= SUBREG_REG (ref
);
11869 /* We have a pseudo which has been spilt onto the stack; there
11870 are two cases here: the first where there is a simple
11871 stack-slot replacement and a second where the stack-slot is
11872 out of range, or is used as a subreg. */
11873 if (reg_equiv_mem (REGNO (ref
)))
11875 ref
= reg_equiv_mem (REGNO (ref
));
11876 base
= find_replacement (&XEXP (ref
, 0));
11879 /* The slot is out of range, or was dressed up in a SUBREG. */
11880 base
= reg_equiv_address (REGNO (ref
));
11883 base
= find_replacement (&XEXP (ref
, 0));
11885 /* Handle the case where the address is too complex to be offset by 1. */
11886 if (GET_CODE (base
) == MINUS
11887 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
11889 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11891 emit_set_insn (base_plus
, base
);
11894 else if (GET_CODE (base
) == PLUS
)
11896 /* The addend must be CONST_INT, or we would have dealt with it above. */
11897 HOST_WIDE_INT hi
, lo
;
11899 offset
+= INTVAL (XEXP (base
, 1));
11900 base
= XEXP (base
, 0);
11902 /* Rework the address into a legal sequence of insns. */
11903 /* Valid range for lo is -4095 -> 4095 */
11906 : -((-offset
) & 0xfff));
11908 /* Corner case, if lo is the max offset then we would be out of range
11909 once we have added the additional 1 below, so bump the msb into the
11910 pre-loading insn(s). */
11914 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11915 ^ (HOST_WIDE_INT
) 0x80000000)
11916 - (HOST_WIDE_INT
) 0x80000000);
11918 gcc_assert (hi
+ lo
== offset
);
11922 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11924 /* Get the base address; addsi3 knows how to handle constants
11925 that require more than one insn. */
11926 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11932 /* Operands[2] may overlap operands[0] (though it won't overlap
11933 operands[1]), that's why we asked for a DImode reg -- so we can
11934 use the bit that does not overlap. */
11935 if (REGNO (operands
[2]) == REGNO (operands
[0]))
11936 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11938 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
11940 emit_insn (gen_zero_extendqisi2 (scratch
,
11941 gen_rtx_MEM (QImode
,
11942 plus_constant (Pmode
, base
,
11944 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11945 gen_rtx_MEM (QImode
,
11946 plus_constant (Pmode
, base
,
11948 if (!BYTES_BIG_ENDIAN
)
11949 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11950 gen_rtx_IOR (SImode
,
11953 gen_rtx_SUBREG (SImode
, operands
[0], 0),
11957 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11958 gen_rtx_IOR (SImode
,
11959 gen_rtx_ASHIFT (SImode
, scratch
,
11961 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
11964 /* Handle storing a half-word to memory during reload by synthesizing as two
11965 byte stores. Take care not to clobber the input values until after we
11966 have moved them somewhere safe. This code assumes that if the DImode
11967 scratch in operands[2] overlaps either the input value or output address
11968 in some way, then that value must die in this insn (we absolutely need
11969 two scratch registers for some corner cases). */
11971 arm_reload_out_hi (rtx
*operands
)
11973 rtx ref
= operands
[0];
11974 rtx outval
= operands
[1];
11976 HOST_WIDE_INT offset
= 0;
11978 if (GET_CODE (ref
) == SUBREG
)
11980 offset
= SUBREG_BYTE (ref
);
11981 ref
= SUBREG_REG (ref
);
11986 /* We have a pseudo which has been spilt onto the stack; there
11987 are two cases here: the first where there is a simple
11988 stack-slot replacement and a second where the stack-slot is
11989 out of range, or is used as a subreg. */
11990 if (reg_equiv_mem (REGNO (ref
)))
11992 ref
= reg_equiv_mem (REGNO (ref
));
11993 base
= find_replacement (&XEXP (ref
, 0));
11996 /* The slot is out of range, or was dressed up in a SUBREG. */
11997 base
= reg_equiv_address (REGNO (ref
));
12000 base
= find_replacement (&XEXP (ref
, 0));
12002 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
12004 /* Handle the case where the address is too complex to be offset by 1. */
12005 if (GET_CODE (base
) == MINUS
12006 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
12008 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
12010 /* Be careful not to destroy OUTVAL. */
12011 if (reg_overlap_mentioned_p (base_plus
, outval
))
12013 /* Updating base_plus might destroy outval, see if we can
12014 swap the scratch and base_plus. */
12015 if (!reg_overlap_mentioned_p (scratch
, outval
))
12018 scratch
= base_plus
;
12023 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
12025 /* Be conservative and copy OUTVAL into the scratch now,
12026 this should only be necessary if outval is a subreg
12027 of something larger than a word. */
12028 /* XXX Might this clobber base? I can't see how it can,
12029 since scratch is known to overlap with OUTVAL, and
12030 must be wider than a word. */
12031 emit_insn (gen_movhi (scratch_hi
, outval
));
12032 outval
= scratch_hi
;
12036 emit_set_insn (base_plus
, base
);
12039 else if (GET_CODE (base
) == PLUS
)
12041 /* The addend must be CONST_INT, or we would have dealt with it above. */
12042 HOST_WIDE_INT hi
, lo
;
12044 offset
+= INTVAL (XEXP (base
, 1));
12045 base
= XEXP (base
, 0);
12047 /* Rework the address into a legal sequence of insns. */
12048 /* Valid range for lo is -4095 -> 4095 */
12051 : -((-offset
) & 0xfff));
12053 /* Corner case, if lo is the max offset then we would be out of range
12054 once we have added the additional 1 below, so bump the msb into the
12055 pre-loading insn(s). */
12059 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
12060 ^ (HOST_WIDE_INT
) 0x80000000)
12061 - (HOST_WIDE_INT
) 0x80000000);
12063 gcc_assert (hi
+ lo
== offset
);
12067 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
12069 /* Be careful not to destroy OUTVAL. */
12070 if (reg_overlap_mentioned_p (base_plus
, outval
))
12072 /* Updating base_plus might destroy outval, see if we
12073 can swap the scratch and base_plus. */
12074 if (!reg_overlap_mentioned_p (scratch
, outval
))
12077 scratch
= base_plus
;
12082 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
12084 /* Be conservative and copy outval into scratch now,
12085 this should only be necessary if outval is a
12086 subreg of something larger than a word. */
12087 /* XXX Might this clobber base? I can't see how it
12088 can, since scratch is known to overlap with
12090 emit_insn (gen_movhi (scratch_hi
, outval
));
12091 outval
= scratch_hi
;
12095 /* Get the base address; addsi3 knows how to handle constants
12096 that require more than one insn. */
12097 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
12103 if (BYTES_BIG_ENDIAN
)
12105 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
12106 plus_constant (Pmode
, base
,
12108 gen_lowpart (QImode
, outval
)));
12109 emit_insn (gen_lshrsi3 (scratch
,
12110 gen_rtx_SUBREG (SImode
, outval
, 0),
12112 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
12114 gen_lowpart (QImode
, scratch
)));
12118 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
12120 gen_lowpart (QImode
, outval
)));
12121 emit_insn (gen_lshrsi3 (scratch
,
12122 gen_rtx_SUBREG (SImode
, outval
, 0),
12124 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
12125 plus_constant (Pmode
, base
,
12127 gen_lowpart (QImode
, scratch
)));
12131 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12132 (padded to the size of a word) should be passed in a register. */
12135 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
12137 if (TARGET_AAPCS_BASED
)
12138 return must_pass_in_stack_var_size (mode
, type
);
12140 return must_pass_in_stack_var_size_or_pad (mode
, type
);
12144 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12145 Return true if an argument passed on the stack should be padded upwards,
12146 i.e. if the least-significant byte has useful data.
12147 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12148 aggregate types are placed in the lowest memory address. */
12151 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
12153 if (!TARGET_AAPCS_BASED
)
12154 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
12156 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
12163 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12164 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12165 register has useful data, and return the opposite if the most
12166 significant byte does. */
12169 arm_pad_reg_upward (enum machine_mode mode
,
12170 tree type
, int first ATTRIBUTE_UNUSED
)
12172 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
12174 /* For AAPCS, small aggregates, small fixed-point types,
12175 and small complex types are always padded upwards. */
12178 if ((AGGREGATE_TYPE_P (type
)
12179 || TREE_CODE (type
) == COMPLEX_TYPE
12180 || FIXED_POINT_TYPE_P (type
))
12181 && int_size_in_bytes (type
) <= 4)
12186 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
12187 && GET_MODE_SIZE (mode
) <= 4)
12192 /* Otherwise, use default padding. */
12193 return !BYTES_BIG_ENDIAN
;
12196 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12197 assuming that the address in the base register is word aligned. */
12199 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
12201 HOST_WIDE_INT max_offset
;
12203 /* Offset must be a multiple of 4 in Thumb mode. */
12204 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
12209 else if (TARGET_ARM
)
12214 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
12217 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12218 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12219 Assumes that the address in the base register RN is word aligned. Pattern
12220 guarantees that both memory accesses use the same base register,
12221 the offsets are constants within the range, and the gap between the offsets is 4.
12222 If preload complete then check that registers are legal. WBACK indicates whether
12223 address is updated. LOAD indicates whether memory access is load or store. */
12225 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
12226 bool wback
, bool load
)
12228 unsigned int t
, t2
, n
;
12230 if (!reload_completed
)
12233 if (!offset_ok_for_ldrd_strd (offset
))
12240 if ((TARGET_THUMB2
)
12241 && ((wback
&& (n
== t
|| n
== t2
))
12242 || (t
== SP_REGNUM
)
12243 || (t
== PC_REGNUM
)
12244 || (t2
== SP_REGNUM
)
12245 || (t2
== PC_REGNUM
)
12246 || (!load
&& (n
== PC_REGNUM
))
12247 || (load
&& (t
== t2
))
12248 /* Triggers Cortex-M3 LDRD errata. */
12249 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
12253 && ((wback
&& (n
== t
|| n
== t2
))
12254 || (t2
== PC_REGNUM
)
12255 || (t
% 2 != 0) /* First destination register is not even. */
12257 /* PC can be used as base register (for offset addressing only),
12258 but it is depricated. */
12259 || (n
== PC_REGNUM
)))
12266 /* Print a symbolic form of X to the debug file, F. */
12268 arm_print_value (FILE *f
, rtx x
)
12270 switch (GET_CODE (x
))
12273 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
12277 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
12285 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
12287 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
12288 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
12296 fprintf (f
, "\"%s\"", XSTR (x
, 0));
12300 fprintf (f
, "`%s'", XSTR (x
, 0));
12304 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
12308 arm_print_value (f
, XEXP (x
, 0));
12312 arm_print_value (f
, XEXP (x
, 0));
12314 arm_print_value (f
, XEXP (x
, 1));
12322 fprintf (f
, "????");
12327 /* Routines for manipulation of the constant pool. */
12329 /* Arm instructions cannot load a large constant directly into a
12330 register; they have to come from a pc relative load. The constant
12331 must therefore be placed in the addressable range of the pc
12332 relative load. Depending on the precise pc relative load
12333 instruction the range is somewhere between 256 bytes and 4k. This
12334 means that we often have to dump a constant inside a function, and
12335 generate code to branch around it.
12337 It is important to minimize this, since the branches will slow
12338 things down and make the code larger.
12340 Normally we can hide the table after an existing unconditional
12341 branch so that there is no interruption of the flow, but in the
12342 worst case the code looks like this:
12360 We fix this by performing a scan after scheduling, which notices
12361 which instructions need to have their operands fetched from the
12362 constant table and builds the table.
12364 The algorithm starts by building a table of all the constants that
12365 need fixing up and all the natural barriers in the function (places
12366 where a constant table can be dropped without breaking the flow).
12367 For each fixup we note how far the pc-relative replacement will be
12368 able to reach and the offset of the instruction into the function.
12370 Having built the table we then group the fixes together to form
12371 tables that are as large as possible (subject to addressing
12372 constraints) and emit each table of constants after the last
12373 barrier that is within range of all the instructions in the group.
12374 If a group does not contain a barrier, then we forcibly create one
12375 by inserting a jump instruction into the flow. Once the table has
12376 been inserted, the insns are then modified to reference the
12377 relevant entry in the pool.
12379 Possible enhancements to the algorithm (not implemented) are:
12381 1) For some processors and object formats, there may be benefit in
12382 aligning the pools to the start of cache lines; this alignment
12383 would need to be taken into account when calculating addressability
12386 /* These typedefs are located at the start of this file, so that
12387 they can be used in the prototypes there. This comment is to
12388 remind readers of that fact so that the following structures
12389 can be understood more easily.
12391 typedef struct minipool_node Mnode;
12392 typedef struct minipool_fixup Mfix; */
12394 struct minipool_node
12396 /* Doubly linked chain of entries. */
12399 /* The maximum offset into the code that this entry can be placed. While
12400 pushing fixes for forward references, all entries are sorted in order
12401 of increasing max_address. */
12402 HOST_WIDE_INT max_address
;
12403 /* Similarly for an entry inserted for a backwards ref. */
12404 HOST_WIDE_INT min_address
;
12405 /* The number of fixes referencing this entry. This can become zero
12406 if we "unpush" an entry. In this case we ignore the entry when we
12407 come to emit the code. */
12409 /* The offset from the start of the minipool. */
12410 HOST_WIDE_INT offset
;
12411 /* The value in table. */
12413 /* The mode of value. */
12414 enum machine_mode mode
;
12415 /* The size of the value. With iWMMXt enabled
12416 sizes > 4 also imply an alignment of 8-bytes. */
12420 struct minipool_fixup
12424 HOST_WIDE_INT address
;
12426 enum machine_mode mode
;
12430 HOST_WIDE_INT forwards
;
12431 HOST_WIDE_INT backwards
;
12434 /* Fixes less than a word need padding out to a word boundary. */
12435 #define MINIPOOL_FIX_SIZE(mode) \
12436 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12438 static Mnode
* minipool_vector_head
;
12439 static Mnode
* minipool_vector_tail
;
12440 static rtx minipool_vector_label
;
12441 static int minipool_pad
;
12443 /* The linked list of all minipool fixes required for this function. */
12444 Mfix
* minipool_fix_head
;
12445 Mfix
* minipool_fix_tail
;
12446 /* The fix entry for the current minipool, once it has been placed. */
12447 Mfix
* minipool_barrier
;
12449 /* Determines if INSN is the start of a jump table. Returns the end
12450 of the TABLE or NULL_RTX. */
12452 is_jump_table (rtx insn
)
12456 if (jump_to_label_p (insn
)
12457 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
12458 == next_real_insn (insn
))
12461 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
12462 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
12468 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12469 #define JUMP_TABLES_IN_TEXT_SECTION 0
12472 static HOST_WIDE_INT
12473 get_jump_table_size (rtx insn
)
12475 /* ADDR_VECs only take room if read-only data does into the text
12477 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
12479 rtx body
= PATTERN (insn
);
12480 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
12481 HOST_WIDE_INT size
;
12482 HOST_WIDE_INT modesize
;
12484 modesize
= GET_MODE_SIZE (GET_MODE (body
));
12485 size
= modesize
* XVECLEN (body
, elt
);
12489 /* Round up size of TBB table to a halfword boundary. */
12490 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
12493 /* No padding necessary for TBH. */
12496 /* Add two bytes for alignment on Thumb. */
12501 gcc_unreachable ();
12509 /* Return the maximum amount of padding that will be inserted before
12512 static HOST_WIDE_INT
12513 get_label_padding (rtx label
)
12515 HOST_WIDE_INT align
, min_insn_size
;
12517 align
= 1 << label_to_alignment (label
);
12518 min_insn_size
= TARGET_THUMB
? 2 : 4;
12519 return align
> min_insn_size
? align
- min_insn_size
: 0;
12522 /* Move a minipool fix MP from its current location to before MAX_MP.
12523 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12524 constraints may need updating. */
12526 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
12527 HOST_WIDE_INT max_address
)
12529 /* The code below assumes these are different. */
12530 gcc_assert (mp
!= max_mp
);
12532 if (max_mp
== NULL
)
12534 if (max_address
< mp
->max_address
)
12535 mp
->max_address
= max_address
;
12539 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
12540 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
12542 mp
->max_address
= max_address
;
12544 /* Unlink MP from its current position. Since max_mp is non-null,
12545 mp->prev must be non-null. */
12546 mp
->prev
->next
= mp
->next
;
12547 if (mp
->next
!= NULL
)
12548 mp
->next
->prev
= mp
->prev
;
12550 minipool_vector_tail
= mp
->prev
;
12552 /* Re-insert it before MAX_MP. */
12554 mp
->prev
= max_mp
->prev
;
12557 if (mp
->prev
!= NULL
)
12558 mp
->prev
->next
= mp
;
12560 minipool_vector_head
= mp
;
12563 /* Save the new entry. */
12566 /* Scan over the preceding entries and adjust their addresses as
12568 while (mp
->prev
!= NULL
12569 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
12571 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
12578 /* Add a constant to the minipool for a forward reference. Returns the
12579 node added or NULL if the constant will not fit in this pool. */
12581 add_minipool_forward_ref (Mfix
*fix
)
12583 /* If set, max_mp is the first pool_entry that has a lower
12584 constraint than the one we are trying to add. */
12585 Mnode
* max_mp
= NULL
;
12586 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
12589 /* If the minipool starts before the end of FIX->INSN then this FIX
12590 can not be placed into the current pool. Furthermore, adding the
12591 new constant pool entry may cause the pool to start FIX_SIZE bytes
12593 if (minipool_vector_head
&&
12594 (fix
->address
+ get_attr_length (fix
->insn
)
12595 >= minipool_vector_head
->max_address
- fix
->fix_size
))
12598 /* Scan the pool to see if a constant with the same value has
12599 already been added. While we are doing this, also note the
12600 location where we must insert the constant if it doesn't already
12602 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12604 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
12605 && fix
->mode
== mp
->mode
12606 && (!LABEL_P (fix
->value
)
12607 || (CODE_LABEL_NUMBER (fix
->value
)
12608 == CODE_LABEL_NUMBER (mp
->value
)))
12609 && rtx_equal_p (fix
->value
, mp
->value
))
12611 /* More than one fix references this entry. */
12613 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
12616 /* Note the insertion point if necessary. */
12618 && mp
->max_address
> max_address
)
12621 /* If we are inserting an 8-bytes aligned quantity and
12622 we have not already found an insertion point, then
12623 make sure that all such 8-byte aligned quantities are
12624 placed at the start of the pool. */
12625 if (ARM_DOUBLEWORD_ALIGN
12627 && fix
->fix_size
>= 8
12628 && mp
->fix_size
< 8)
12631 max_address
= mp
->max_address
;
12635 /* The value is not currently in the minipool, so we need to create
12636 a new entry for it. If MAX_MP is NULL, the entry will be put on
12637 the end of the list since the placement is less constrained than
12638 any existing entry. Otherwise, we insert the new fix before
12639 MAX_MP and, if necessary, adjust the constraints on the other
12642 mp
->fix_size
= fix
->fix_size
;
12643 mp
->mode
= fix
->mode
;
12644 mp
->value
= fix
->value
;
12646 /* Not yet required for a backwards ref. */
12647 mp
->min_address
= -65536;
12649 if (max_mp
== NULL
)
12651 mp
->max_address
= max_address
;
12653 mp
->prev
= minipool_vector_tail
;
12655 if (mp
->prev
== NULL
)
12657 minipool_vector_head
= mp
;
12658 minipool_vector_label
= gen_label_rtx ();
12661 mp
->prev
->next
= mp
;
12663 minipool_vector_tail
= mp
;
12667 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
12668 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
12670 mp
->max_address
= max_address
;
12673 mp
->prev
= max_mp
->prev
;
12675 if (mp
->prev
!= NULL
)
12676 mp
->prev
->next
= mp
;
12678 minipool_vector_head
= mp
;
12681 /* Save the new entry. */
12684 /* Scan over the preceding entries and adjust their addresses as
12686 while (mp
->prev
!= NULL
12687 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
12689 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
12697 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
12698 HOST_WIDE_INT min_address
)
12700 HOST_WIDE_INT offset
;
12702 /* The code below assumes these are different. */
12703 gcc_assert (mp
!= min_mp
);
12705 if (min_mp
== NULL
)
12707 if (min_address
> mp
->min_address
)
12708 mp
->min_address
= min_address
;
12712 /* We will adjust this below if it is too loose. */
12713 mp
->min_address
= min_address
;
12715 /* Unlink MP from its current position. Since min_mp is non-null,
12716 mp->next must be non-null. */
12717 mp
->next
->prev
= mp
->prev
;
12718 if (mp
->prev
!= NULL
)
12719 mp
->prev
->next
= mp
->next
;
12721 minipool_vector_head
= mp
->next
;
12723 /* Reinsert it after MIN_MP. */
12725 mp
->next
= min_mp
->next
;
12727 if (mp
->next
!= NULL
)
12728 mp
->next
->prev
= mp
;
12730 minipool_vector_tail
= mp
;
12736 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12738 mp
->offset
= offset
;
12739 if (mp
->refcount
> 0)
12740 offset
+= mp
->fix_size
;
12742 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12743 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12749 /* Add a constant to the minipool for a backward reference. Returns the
12750 node added or NULL if the constant will not fit in this pool.
12752 Note that the code for insertion for a backwards reference can be
12753 somewhat confusing because the calculated offsets for each fix do
12754 not take into account the size of the pool (which is still under
12757 add_minipool_backward_ref (Mfix
*fix
)
12759 /* If set, min_mp is the last pool_entry that has a lower constraint
12760 than the one we are trying to add. */
12761 Mnode
*min_mp
= NULL
;
12762 /* This can be negative, since it is only a constraint. */
12763 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
12766 /* If we can't reach the current pool from this insn, or if we can't
12767 insert this entry at the end of the pool without pushing other
12768 fixes out of range, then we don't try. This ensures that we
12769 can't fail later on. */
12770 if (min_address
>= minipool_barrier
->address
12771 || (minipool_vector_tail
->min_address
+ fix
->fix_size
12772 >= minipool_barrier
->address
))
12775 /* Scan the pool to see if a constant with the same value has
12776 already been added. While we are doing this, also note the
12777 location where we must insert the constant if it doesn't already
12779 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
12781 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
12782 && fix
->mode
== mp
->mode
12783 && (!LABEL_P (fix
->value
)
12784 || (CODE_LABEL_NUMBER (fix
->value
)
12785 == CODE_LABEL_NUMBER (mp
->value
)))
12786 && rtx_equal_p (fix
->value
, mp
->value
)
12787 /* Check that there is enough slack to move this entry to the
12788 end of the table (this is conservative). */
12789 && (mp
->max_address
12790 > (minipool_barrier
->address
12791 + minipool_vector_tail
->offset
12792 + minipool_vector_tail
->fix_size
)))
12795 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
12798 if (min_mp
!= NULL
)
12799 mp
->min_address
+= fix
->fix_size
;
12802 /* Note the insertion point if necessary. */
12803 if (mp
->min_address
< min_address
)
12805 /* For now, we do not allow the insertion of 8-byte alignment
12806 requiring nodes anywhere but at the start of the pool. */
12807 if (ARM_DOUBLEWORD_ALIGN
12808 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12813 else if (mp
->max_address
12814 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
12816 /* Inserting before this entry would push the fix beyond
12817 its maximum address (which can happen if we have
12818 re-located a forwards fix); force the new fix to come
12820 if (ARM_DOUBLEWORD_ALIGN
12821 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12826 min_address
= mp
->min_address
+ fix
->fix_size
;
12829 /* Do not insert a non-8-byte aligned quantity before 8-byte
12830 aligned quantities. */
12831 else if (ARM_DOUBLEWORD_ALIGN
12832 && fix
->fix_size
< 8
12833 && mp
->fix_size
>= 8)
12836 min_address
= mp
->min_address
+ fix
->fix_size
;
12841 /* We need to create a new entry. */
12843 mp
->fix_size
= fix
->fix_size
;
12844 mp
->mode
= fix
->mode
;
12845 mp
->value
= fix
->value
;
12847 mp
->max_address
= minipool_barrier
->address
+ 65536;
12849 mp
->min_address
= min_address
;
12851 if (min_mp
== NULL
)
12854 mp
->next
= minipool_vector_head
;
12856 if (mp
->next
== NULL
)
12858 minipool_vector_tail
= mp
;
12859 minipool_vector_label
= gen_label_rtx ();
12862 mp
->next
->prev
= mp
;
12864 minipool_vector_head
= mp
;
12868 mp
->next
= min_mp
->next
;
12872 if (mp
->next
!= NULL
)
12873 mp
->next
->prev
= mp
;
12875 minipool_vector_tail
= mp
;
12878 /* Save the new entry. */
12886 /* Scan over the following entries and adjust their offsets. */
12887 while (mp
->next
!= NULL
)
12889 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12890 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12893 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
12895 mp
->next
->offset
= mp
->offset
;
12904 assign_minipool_offsets (Mfix
*barrier
)
12906 HOST_WIDE_INT offset
= 0;
12909 minipool_barrier
= barrier
;
12911 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12913 mp
->offset
= offset
;
12915 if (mp
->refcount
> 0)
12916 offset
+= mp
->fix_size
;
12920 /* Output the literal table */
12922 dump_minipool (rtx scan
)
12928 if (ARM_DOUBLEWORD_ALIGN
)
12929 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12930 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
12937 fprintf (dump_file
,
12938 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12939 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
12941 scan
= emit_label_after (gen_label_rtx (), scan
);
12942 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
12943 scan
= emit_label_after (minipool_vector_label
, scan
);
12945 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
12947 if (mp
->refcount
> 0)
12951 fprintf (dump_file
,
12952 ";; Offset %u, min %ld, max %ld ",
12953 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
12954 (unsigned long) mp
->max_address
);
12955 arm_print_value (dump_file
, mp
->value
);
12956 fputc ('\n', dump_file
);
12959 switch (mp
->fix_size
)
12961 #ifdef HAVE_consttable_1
12963 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
12967 #ifdef HAVE_consttable_2
12969 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
12973 #ifdef HAVE_consttable_4
12975 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
12979 #ifdef HAVE_consttable_8
12981 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
12985 #ifdef HAVE_consttable_16
12987 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
12992 gcc_unreachable ();
13000 minipool_vector_head
= minipool_vector_tail
= NULL
;
13001 scan
= emit_insn_after (gen_consttable_end (), scan
);
13002 scan
= emit_barrier_after (scan
);
13005 /* Return the cost of forcibly inserting a barrier after INSN. */
13007 arm_barrier_cost (rtx insn
)
13009 /* Basing the location of the pool on the loop depth is preferable,
13010 but at the moment, the basic block information seems to be
13011 corrupt by this stage of the compilation. */
13012 int base_cost
= 50;
13013 rtx next
= next_nonnote_insn (insn
);
13015 if (next
!= NULL
&& LABEL_P (next
))
13018 switch (GET_CODE (insn
))
13021 /* It will always be better to place the table before the label, rather
13030 return base_cost
- 10;
13033 return base_cost
+ 10;
13037 /* Find the best place in the insn stream in the range
13038 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13039 Create the barrier by inserting a jump and add a new fix entry for
13042 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
13044 HOST_WIDE_INT count
= 0;
13046 rtx from
= fix
->insn
;
13047 /* The instruction after which we will insert the jump. */
13048 rtx selected
= NULL
;
13050 /* The address at which the jump instruction will be placed. */
13051 HOST_WIDE_INT selected_address
;
13053 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
13054 rtx label
= gen_label_rtx ();
13056 selected_cost
= arm_barrier_cost (from
);
13057 selected_address
= fix
->address
;
13059 while (from
&& count
< max_count
)
13064 /* This code shouldn't have been called if there was a natural barrier
13066 gcc_assert (!BARRIER_P (from
));
13068 /* Count the length of this insn. This must stay in sync with the
13069 code that pushes minipool fixes. */
13070 if (LABEL_P (from
))
13071 count
+= get_label_padding (from
);
13073 count
+= get_attr_length (from
);
13075 /* If there is a jump table, add its length. */
13076 tmp
= is_jump_table (from
);
13079 count
+= get_jump_table_size (tmp
);
13081 /* Jump tables aren't in a basic block, so base the cost on
13082 the dispatch insn. If we select this location, we will
13083 still put the pool after the table. */
13084 new_cost
= arm_barrier_cost (from
);
13086 if (count
< max_count
13087 && (!selected
|| new_cost
<= selected_cost
))
13090 selected_cost
= new_cost
;
13091 selected_address
= fix
->address
+ count
;
13094 /* Continue after the dispatch table. */
13095 from
= NEXT_INSN (tmp
);
13099 new_cost
= arm_barrier_cost (from
);
13101 if (count
< max_count
13102 && (!selected
|| new_cost
<= selected_cost
))
13105 selected_cost
= new_cost
;
13106 selected_address
= fix
->address
+ count
;
13109 from
= NEXT_INSN (from
);
13112 /* Make sure that we found a place to insert the jump. */
13113 gcc_assert (selected
);
13115 /* Make sure we do not split a call and its corresponding
13116 CALL_ARG_LOCATION note. */
13117 if (CALL_P (selected
))
13119 rtx next
= NEXT_INSN (selected
);
13120 if (next
&& NOTE_P (next
)
13121 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
13125 /* Create a new JUMP_INSN that branches around a barrier. */
13126 from
= emit_jump_insn_after (gen_jump (label
), selected
);
13127 JUMP_LABEL (from
) = label
;
13128 barrier
= emit_barrier_after (from
);
13129 emit_label_after (label
, barrier
);
13131 /* Create a minipool barrier entry for the new barrier. */
13132 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
13133 new_fix
->insn
= barrier
;
13134 new_fix
->address
= selected_address
;
13135 new_fix
->next
= fix
->next
;
13136 fix
->next
= new_fix
;
13141 /* Record that there is a natural barrier in the insn stream at
13144 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
13146 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
13149 fix
->address
= address
;
13152 if (minipool_fix_head
!= NULL
)
13153 minipool_fix_tail
->next
= fix
;
13155 minipool_fix_head
= fix
;
13157 minipool_fix_tail
= fix
;
13160 /* Record INSN, which will need fixing up to load a value from the
13161 minipool. ADDRESS is the offset of the insn since the start of the
13162 function; LOC is a pointer to the part of the insn which requires
13163 fixing; VALUE is the constant that must be loaded, which is of type
13166 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
13167 enum machine_mode mode
, rtx value
)
13169 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
13172 fix
->address
= address
;
13175 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
13176 fix
->value
= value
;
13177 fix
->forwards
= get_attr_pool_range (insn
);
13178 fix
->backwards
= get_attr_neg_pool_range (insn
);
13179 fix
->minipool
= NULL
;
13181 /* If an insn doesn't have a range defined for it, then it isn't
13182 expecting to be reworked by this code. Better to stop now than
13183 to generate duff assembly code. */
13184 gcc_assert (fix
->forwards
|| fix
->backwards
);
13186 /* If an entry requires 8-byte alignment then assume all constant pools
13187 require 4 bytes of padding. Trying to do this later on a per-pool
13188 basis is awkward because existing pool entries have to be modified. */
13189 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
13194 fprintf (dump_file
,
13195 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13196 GET_MODE_NAME (mode
),
13197 INSN_UID (insn
), (unsigned long) address
,
13198 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
13199 arm_print_value (dump_file
, fix
->value
);
13200 fprintf (dump_file
, "\n");
13203 /* Add it to the chain of fixes. */
13206 if (minipool_fix_head
!= NULL
)
13207 minipool_fix_tail
->next
= fix
;
13209 minipool_fix_head
= fix
;
13211 minipool_fix_tail
= fix
;
13214 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13215 Returns the number of insns needed, or 99 if we don't know how to
13218 arm_const_double_inline_cost (rtx val
)
13220 rtx lowpart
, highpart
;
13221 enum machine_mode mode
;
13223 mode
= GET_MODE (val
);
13225 if (mode
== VOIDmode
)
13228 gcc_assert (GET_MODE_SIZE (mode
) == 8);
13230 lowpart
= gen_lowpart (SImode
, val
);
13231 highpart
= gen_highpart_mode (SImode
, mode
, val
);
13233 gcc_assert (CONST_INT_P (lowpart
));
13234 gcc_assert (CONST_INT_P (highpart
));
13236 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
13237 NULL_RTX
, NULL_RTX
, 0, 0)
13238 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
13239 NULL_RTX
, NULL_RTX
, 0, 0));
13242 /* Return true if it is worthwhile to split a 64-bit constant into two
13243 32-bit operations. This is the case if optimizing for size, or
13244 if we have load delay slots, or if one 32-bit part can be done with
13245 a single data operation. */
13247 arm_const_double_by_parts (rtx val
)
13249 enum machine_mode mode
= GET_MODE (val
);
13252 if (optimize_size
|| arm_ld_sched
)
13255 if (mode
== VOIDmode
)
13258 part
= gen_highpart_mode (SImode
, mode
, val
);
13260 gcc_assert (CONST_INT_P (part
));
13262 if (const_ok_for_arm (INTVAL (part
))
13263 || const_ok_for_arm (~INTVAL (part
)))
13266 part
= gen_lowpart (SImode
, val
);
13268 gcc_assert (CONST_INT_P (part
));
13270 if (const_ok_for_arm (INTVAL (part
))
13271 || const_ok_for_arm (~INTVAL (part
)))
13277 /* Return true if it is possible to inline both the high and low parts
13278 of a 64-bit constant into 32-bit data processing instructions. */
13280 arm_const_double_by_immediates (rtx val
)
13282 enum machine_mode mode
= GET_MODE (val
);
13285 if (mode
== VOIDmode
)
13288 part
= gen_highpart_mode (SImode
, mode
, val
);
13290 gcc_assert (CONST_INT_P (part
));
13292 if (!const_ok_for_arm (INTVAL (part
)))
13295 part
= gen_lowpart (SImode
, val
);
13297 gcc_assert (CONST_INT_P (part
));
13299 if (!const_ok_for_arm (INTVAL (part
)))
13305 /* Scan INSN and note any of its operands that need fixing.
13306 If DO_PUSHES is false we do not actually push any of the fixups
13309 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
13313 extract_insn (insn
);
13315 if (!constrain_operands (1))
13316 fatal_insn_not_found (insn
);
13318 if (recog_data
.n_alternatives
== 0)
13321 /* Fill in recog_op_alt with information about the constraints of
13323 preprocess_constraints ();
13325 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
13327 /* Things we need to fix can only occur in inputs. */
13328 if (recog_data
.operand_type
[opno
] != OP_IN
)
13331 /* If this alternative is a memory reference, then any mention
13332 of constants in this alternative is really to fool reload
13333 into allowing us to accept one there. We need to fix them up
13334 now so that we output the right code. */
13335 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
13337 rtx op
= recog_data
.operand
[opno
];
13339 if (CONSTANT_P (op
))
13342 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
13343 recog_data
.operand_mode
[opno
], op
);
13345 else if (MEM_P (op
)
13346 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
13347 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
13351 rtx cop
= avoid_constant_pool_reference (op
);
13353 /* Casting the address of something to a mode narrower
13354 than a word can cause avoid_constant_pool_reference()
13355 to return the pool reference itself. That's no good to
13356 us here. Lets just hope that we can use the
13357 constant pool value directly. */
13359 cop
= get_pool_constant (XEXP (op
, 0));
13361 push_minipool_fix (insn
, address
,
13362 recog_data
.operand_loc
[opno
],
13363 recog_data
.operand_mode
[opno
], cop
);
13373 /* Convert instructions to their cc-clobbering variant if possible, since
13374 that allows us to use smaller encodings. */
13377 thumb2_reorg (void)
13382 INIT_REG_SET (&live
);
13384 /* We are freeing block_for_insn in the toplev to keep compatibility
13385 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13386 compute_bb_for_insn ();
13393 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
13394 df_simulate_initialize_backwards (bb
, &live
);
13395 FOR_BB_INSNS_REVERSE (bb
, insn
)
13397 if (NONJUMP_INSN_P (insn
)
13398 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
13399 && GET_CODE (PATTERN (insn
)) == SET
)
13401 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
13402 rtx pat
= PATTERN (insn
);
13403 rtx dst
= XEXP (pat
, 0);
13404 rtx src
= XEXP (pat
, 1);
13405 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
13407 if (!OBJECT_P (src
))
13408 op0
= XEXP (src
, 0);
13410 if (BINARY_P (src
))
13411 op1
= XEXP (src
, 1);
13413 if (low_register_operand (dst
, SImode
))
13415 switch (GET_CODE (src
))
13418 /* Adding two registers and storing the result
13419 in the first source is already a 16-bit
13421 if (rtx_equal_p (dst
, op0
)
13422 && register_operand (op1
, SImode
))
13425 if (low_register_operand (op0
, SImode
))
13427 /* ADDS <Rd>,<Rn>,<Rm> */
13428 if (low_register_operand (op1
, SImode
))
13430 /* ADDS <Rdn>,#<imm8> */
13431 /* SUBS <Rdn>,#<imm8> */
13432 else if (rtx_equal_p (dst
, op0
)
13433 && CONST_INT_P (op1
)
13434 && IN_RANGE (INTVAL (op1
), -255, 255))
13436 /* ADDS <Rd>,<Rn>,#<imm3> */
13437 /* SUBS <Rd>,<Rn>,#<imm3> */
13438 else if (CONST_INT_P (op1
)
13439 && IN_RANGE (INTVAL (op1
), -7, 7))
13445 /* RSBS <Rd>,<Rn>,#0
13446 Not handled here: see NEG below. */
13447 /* SUBS <Rd>,<Rn>,#<imm3>
13449 Not handled here: see PLUS above. */
13450 /* SUBS <Rd>,<Rn>,<Rm> */
13451 if (low_register_operand (op0
, SImode
)
13452 && low_register_operand (op1
, SImode
))
13457 /* MULS <Rdm>,<Rn>,<Rdm>
13458 As an exception to the rule, this is only used
13459 when optimizing for size since MULS is slow on all
13460 known implementations. We do not even want to use
13461 MULS in cold code, if optimizing for speed, so we
13462 test the global flag here. */
13463 if (!optimize_size
)
13465 /* else fall through. */
13469 /* ANDS <Rdn>,<Rm> */
13470 if (rtx_equal_p (dst
, op0
)
13471 && low_register_operand (op1
, SImode
))
13473 else if (rtx_equal_p (dst
, op1
)
13474 && low_register_operand (op0
, SImode
))
13475 action
= SWAP_CONV
;
13481 /* ASRS <Rdn>,<Rm> */
13482 /* LSRS <Rdn>,<Rm> */
13483 /* LSLS <Rdn>,<Rm> */
13484 if (rtx_equal_p (dst
, op0
)
13485 && low_register_operand (op1
, SImode
))
13487 /* ASRS <Rd>,<Rm>,#<imm5> */
13488 /* LSRS <Rd>,<Rm>,#<imm5> */
13489 /* LSLS <Rd>,<Rm>,#<imm5> */
13490 else if (low_register_operand (op0
, SImode
)
13491 && CONST_INT_P (op1
)
13492 && IN_RANGE (INTVAL (op1
), 0, 31))
13497 /* RORS <Rdn>,<Rm> */
13498 if (rtx_equal_p (dst
, op0
)
13499 && low_register_operand (op1
, SImode
))
13505 /* MVNS <Rd>,<Rm> */
13506 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13507 if (low_register_operand (op0
, SImode
))
13512 /* MOVS <Rd>,#<imm8> */
13513 if (CONST_INT_P (src
)
13514 && IN_RANGE (INTVAL (src
), 0, 255))
13519 /* MOVS and MOV<c> with registers have different
13520 encodings, so are not relevant here. */
13528 if (action
!= SKIP
)
13530 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
13531 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
13534 if (action
== SWAP_CONV
)
13536 src
= copy_rtx (src
);
13537 XEXP (src
, 0) = op1
;
13538 XEXP (src
, 1) = op0
;
13539 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
13540 vec
= gen_rtvec (2, pat
, clobber
);
13542 else /* action == CONV */
13543 vec
= gen_rtvec (2, pat
, clobber
);
13545 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
13546 INSN_CODE (insn
) = -1;
13550 if (NONDEBUG_INSN_P (insn
))
13551 df_simulate_one_insn_backwards (bb
, insn
, &live
);
13555 CLEAR_REG_SET (&live
);
13558 /* Gcc puts the pool in the wrong place for ARM, since we can only
13559 load addresses a limited distance around the pc. We do some
13560 special munging to move the constant pool values to the correct
13561 point in the code. */
13566 HOST_WIDE_INT address
= 0;
13572 /* Ensure all insns that must be split have been split at this point.
13573 Otherwise, the pool placement code below may compute incorrect
13574 insn lengths. Note that when optimizing, all insns have already
13575 been split at this point. */
13577 split_all_insns_noflow ();
13579 minipool_fix_head
= minipool_fix_tail
= NULL
;
13581 /* The first insn must always be a note, or the code below won't
13582 scan it properly. */
13583 insn
= get_insns ();
13584 gcc_assert (NOTE_P (insn
));
13587 /* Scan all the insns and record the operands that will need fixing. */
13588 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
13590 if (BARRIER_P (insn
))
13591 push_minipool_barrier (insn
, address
);
13592 else if (INSN_P (insn
))
13596 note_invalid_constants (insn
, address
, true);
13597 address
+= get_attr_length (insn
);
13599 /* If the insn is a vector jump, add the size of the table
13600 and skip the table. */
13601 if ((table
= is_jump_table (insn
)) != NULL
)
13603 address
+= get_jump_table_size (table
);
13607 else if (LABEL_P (insn
))
13608 /* Add the worst-case padding due to alignment. We don't add
13609 the _current_ padding because the minipool insertions
13610 themselves might change it. */
13611 address
+= get_label_padding (insn
);
13614 fix
= minipool_fix_head
;
13616 /* Now scan the fixups and perform the required changes. */
13621 Mfix
* last_added_fix
;
13622 Mfix
* last_barrier
= NULL
;
13625 /* Skip any further barriers before the next fix. */
13626 while (fix
&& BARRIER_P (fix
->insn
))
13629 /* No more fixes. */
13633 last_added_fix
= NULL
;
13635 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
13637 if (BARRIER_P (ftmp
->insn
))
13639 if (ftmp
->address
>= minipool_vector_head
->max_address
)
13642 last_barrier
= ftmp
;
13644 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
13647 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
13650 /* If we found a barrier, drop back to that; any fixes that we
13651 could have reached but come after the barrier will now go in
13652 the next mini-pool. */
13653 if (last_barrier
!= NULL
)
13655 /* Reduce the refcount for those fixes that won't go into this
13657 for (fdel
= last_barrier
->next
;
13658 fdel
&& fdel
!= ftmp
;
13661 fdel
->minipool
->refcount
--;
13662 fdel
->minipool
= NULL
;
13665 ftmp
= last_barrier
;
13669 /* ftmp is first fix that we can't fit into this pool and
13670 there no natural barriers that we could use. Insert a
13671 new barrier in the code somewhere between the previous
13672 fix and this one, and arrange to jump around it. */
13673 HOST_WIDE_INT max_address
;
13675 /* The last item on the list of fixes must be a barrier, so
13676 we can never run off the end of the list of fixes without
13677 last_barrier being set. */
13680 max_address
= minipool_vector_head
->max_address
;
13681 /* Check that there isn't another fix that is in range that
13682 we couldn't fit into this pool because the pool was
13683 already too large: we need to put the pool before such an
13684 instruction. The pool itself may come just after the
13685 fix because create_fix_barrier also allows space for a
13686 jump instruction. */
13687 if (ftmp
->address
< max_address
)
13688 max_address
= ftmp
->address
+ 1;
13690 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
13693 assign_minipool_offsets (last_barrier
);
13697 if (!BARRIER_P (ftmp
->insn
)
13698 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
13705 /* Scan over the fixes we have identified for this pool, fixing them
13706 up and adding the constants to the pool itself. */
13707 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
13708 this_fix
= this_fix
->next
)
13709 if (!BARRIER_P (this_fix
->insn
))
13712 = plus_constant (Pmode
,
13713 gen_rtx_LABEL_REF (VOIDmode
,
13714 minipool_vector_label
),
13715 this_fix
->minipool
->offset
);
13716 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
13719 dump_minipool (last_barrier
->insn
);
13723 /* From now on we must synthesize any constants that we can't handle
13724 directly. This can happen if the RTL gets split during final
13725 instruction generation. */
13726 after_arm_reorg
= 1;
13728 /* Free the minipool memory. */
13729 obstack_free (&minipool_obstack
, minipool_startobj
);
13732 /* Routines to output assembly language. */
13734 /* If the rtx is the correct value then return the string of the number.
13735 In this way we can ensure that valid double constants are generated even
13736 when cross compiling. */
13738 fp_immediate_constant (rtx x
)
13742 if (!fp_consts_inited
)
13745 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
13747 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
13751 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13752 static const char *
13753 fp_const_from_val (REAL_VALUE_TYPE
*r
)
13755 if (!fp_consts_inited
)
13758 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
13762 /* OPERANDS[0] is the entire list of insns that constitute pop,
13763 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13764 is in the list, UPDATE is true iff the list contains explicit
13765 update of base register. */
13767 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
13773 const char *conditional
;
13774 int num_saves
= XVECLEN (operands
[0], 0);
13775 unsigned int regno
;
13776 unsigned int regno_base
= REGNO (operands
[1]);
13779 offset
+= update
? 1 : 0;
13780 offset
+= return_pc
? 1 : 0;
13782 /* Is the base register in the list? */
13783 for (i
= offset
; i
< num_saves
; i
++)
13785 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
13786 /* If SP is in the list, then the base register must be SP. */
13787 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
13788 /* If base register is in the list, there must be no explicit update. */
13789 if (regno
== regno_base
)
13790 gcc_assert (!update
);
13793 conditional
= reverse
? "%?%D0" : "%?%d0";
13794 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
13796 /* Output pop (not stmfd) because it has a shorter encoding. */
13797 gcc_assert (update
);
13798 sprintf (pattern
, "pop%s\t{", conditional
);
13802 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13803 It's just a convention, their semantics are identical. */
13804 if (regno_base
== SP_REGNUM
)
13805 sprintf (pattern
, "ldm%sfd\t", conditional
);
13806 else if (TARGET_UNIFIED_ASM
)
13807 sprintf (pattern
, "ldmia%s\t", conditional
);
13809 sprintf (pattern
, "ldm%sia\t", conditional
);
13811 strcat (pattern
, reg_names
[regno_base
]);
13813 strcat (pattern
, "!, {");
13815 strcat (pattern
, ", {");
13818 /* Output the first destination register. */
13820 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
13822 /* Output the rest of the destination registers. */
13823 for (i
= offset
+ 1; i
< num_saves
; i
++)
13825 strcat (pattern
, ", ");
13827 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
13830 strcat (pattern
, "}");
13832 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
13833 strcat (pattern
, "^");
13835 output_asm_insn (pattern
, &cond
);
13839 /* Output the assembly for a store multiple. */
13842 vfp_output_fstmd (rtx
* operands
)
13849 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
13850 p
= strlen (pattern
);
13852 gcc_assert (REG_P (operands
[1]));
13854 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
13855 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
13857 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
13859 strcpy (&pattern
[p
], "}");
13861 output_asm_insn (pattern
, operands
);
13866 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13867 number of bytes pushed. */
13870 vfp_emit_fstmd (int base_reg
, int count
)
13877 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13878 register pairs are stored by a store multiple insn. We avoid this
13879 by pushing an extra pair. */
13880 if (count
== 2 && !arm_arch6
)
13882 if (base_reg
== LAST_VFP_REGNUM
- 3)
13887 /* FSTMD may not store more than 16 doubleword registers at once. Split
13888 larger stores into multiple parts (up to a maximum of two, in
13893 /* NOTE: base_reg is an internal register number, so each D register
13895 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
13896 saved
+= vfp_emit_fstmd (base_reg
, 16);
13900 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
13901 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
13903 reg
= gen_rtx_REG (DFmode
, base_reg
);
13906 XVECEXP (par
, 0, 0)
13907 = gen_rtx_SET (VOIDmode
,
13910 gen_rtx_PRE_MODIFY (Pmode
,
13913 (Pmode
, stack_pointer_rtx
,
13916 gen_rtx_UNSPEC (BLKmode
,
13917 gen_rtvec (1, reg
),
13918 UNSPEC_PUSH_MULT
));
13920 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13921 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
13922 RTX_FRAME_RELATED_P (tmp
) = 1;
13923 XVECEXP (dwarf
, 0, 0) = tmp
;
13925 tmp
= gen_rtx_SET (VOIDmode
,
13926 gen_frame_mem (DFmode
, stack_pointer_rtx
),
13928 RTX_FRAME_RELATED_P (tmp
) = 1;
13929 XVECEXP (dwarf
, 0, 1) = tmp
;
13931 for (i
= 1; i
< count
; i
++)
13933 reg
= gen_rtx_REG (DFmode
, base_reg
);
13935 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
13937 tmp
= gen_rtx_SET (VOIDmode
,
13938 gen_frame_mem (DFmode
,
13939 plus_constant (Pmode
,
13943 RTX_FRAME_RELATED_P (tmp
) = 1;
13944 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
13947 par
= emit_insn (par
);
13948 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
13949 RTX_FRAME_RELATED_P (par
) = 1;
13954 /* Emit a call instruction with pattern PAT. ADDR is the address of
13955 the call target. */
13958 arm_emit_call_insn (rtx pat
, rtx addr
)
13962 insn
= emit_call_insn (pat
);
13964 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13965 If the call might use such an entry, add a use of the PIC register
13966 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13967 if (TARGET_VXWORKS_RTP
13969 && GET_CODE (addr
) == SYMBOL_REF
13970 && (SYMBOL_REF_DECL (addr
)
13971 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
13972 : !SYMBOL_REF_LOCAL_P (addr
)))
13974 require_pic_register ();
13975 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
13979 /* Output a 'call' insn. */
13981 output_call (rtx
*operands
)
13983 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
13985 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13986 if (REGNO (operands
[0]) == LR_REGNUM
)
13988 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
13989 output_asm_insn ("mov%?\t%0, %|lr", operands
);
13992 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13994 if (TARGET_INTERWORK
|| arm_arch4t
)
13995 output_asm_insn ("bx%?\t%0", operands
);
13997 output_asm_insn ("mov%?\t%|pc, %0", operands
);
14002 /* Output a 'call' insn that is a reference in memory. This is
14003 disabled for ARMv5 and we prefer a blx instead because otherwise
14004 there's a significant performance overhead. */
14006 output_call_mem (rtx
*operands
)
14008 gcc_assert (!arm_arch5
);
14009 if (TARGET_INTERWORK
)
14011 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
14012 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
14013 output_asm_insn ("bx%?\t%|ip", operands
);
14015 else if (regno_use_in (LR_REGNUM
, operands
[0]))
14017 /* LR is used in the memory address. We load the address in the
14018 first instruction. It's safe to use IP as the target of the
14019 load since the call will kill it anyway. */
14020 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
14021 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
14023 output_asm_insn ("bx%?\t%|ip", operands
);
14025 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
14029 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
14030 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
14037 /* Output a move from arm registers to arm registers of a long double
14038 OPERANDS[0] is the destination.
14039 OPERANDS[1] is the source. */
14041 output_mov_long_double_arm_from_arm (rtx
*operands
)
14043 /* We have to be careful here because the two might overlap. */
14044 int dest_start
= REGNO (operands
[0]);
14045 int src_start
= REGNO (operands
[1]);
14049 if (dest_start
< src_start
)
14051 for (i
= 0; i
< 3; i
++)
14053 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
14054 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
14055 output_asm_insn ("mov%?\t%0, %1", ops
);
14060 for (i
= 2; i
>= 0; i
--)
14062 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
14063 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
14064 output_asm_insn ("mov%?\t%0, %1", ops
);
14072 arm_emit_movpair (rtx dest
, rtx src
)
14074 /* If the src is an immediate, simplify it. */
14075 if (CONST_INT_P (src
))
14077 HOST_WIDE_INT val
= INTVAL (src
);
14078 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
14079 if ((val
>> 16) & 0x0000ffff)
14080 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
14082 GEN_INT ((val
>> 16) & 0x0000ffff));
14085 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
14086 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
14089 /* Output a move between double words. It must be REG<-MEM
14092 output_move_double (rtx
*operands
, bool emit
, int *count
)
14094 enum rtx_code code0
= GET_CODE (operands
[0]);
14095 enum rtx_code code1
= GET_CODE (operands
[1]);
14100 /* The only case when this might happen is when
14101 you are looking at the length of a DImode instruction
14102 that has an invalid constant in it. */
14103 if (code0
== REG
&& code1
!= MEM
)
14105 gcc_assert (!emit
);
14112 unsigned int reg0
= REGNO (operands
[0]);
14114 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
14116 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
14118 switch (GET_CODE (XEXP (operands
[1], 0)))
14125 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
14126 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
14128 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
14133 gcc_assert (TARGET_LDRD
);
14135 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
14142 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
14144 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
14152 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
14154 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
14159 gcc_assert (TARGET_LDRD
);
14161 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
14166 /* Autoicrement addressing modes should never have overlapping
14167 base and destination registers, and overlapping index registers
14168 are already prohibited, so this doesn't need to worry about
14170 otherops
[0] = operands
[0];
14171 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
14172 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
14174 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
14176 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
14178 /* Registers overlap so split out the increment. */
14181 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
14182 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
14189 /* Use a single insn if we can.
14190 FIXME: IWMMXT allows offsets larger than ldrd can
14191 handle, fix these up with a pair of ldr. */
14193 || !CONST_INT_P (otherops
[2])
14194 || (INTVAL (otherops
[2]) > -256
14195 && INTVAL (otherops
[2]) < 256))
14198 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
14204 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
14205 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
14215 /* Use a single insn if we can.
14216 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14217 fix these up with a pair of ldr. */
14219 || !CONST_INT_P (otherops
[2])
14220 || (INTVAL (otherops
[2]) > -256
14221 && INTVAL (otherops
[2]) < 256))
14224 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
14230 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
14231 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
14241 /* We might be able to use ldrd %0, %1 here. However the range is
14242 different to ldr/adr, and it is broken on some ARMv7-M
14243 implementations. */
14244 /* Use the second register of the pair to avoid problematic
14246 otherops
[1] = operands
[1];
14248 output_asm_insn ("adr%?\t%0, %1", otherops
);
14249 operands
[1] = otherops
[0];
14253 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
14255 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
14262 /* ??? This needs checking for thumb2. */
14264 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
14265 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
14267 otherops
[0] = operands
[0];
14268 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
14269 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
14271 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
14273 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
14275 switch ((int) INTVAL (otherops
[2]))
14279 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
14285 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
14291 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
14295 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
14296 operands
[1] = otherops
[0];
14298 && (REG_P (otherops
[2])
14300 || (CONST_INT_P (otherops
[2])
14301 && INTVAL (otherops
[2]) > -256
14302 && INTVAL (otherops
[2]) < 256)))
14304 if (reg_overlap_mentioned_p (operands
[0],
14308 /* Swap base and index registers over to
14309 avoid a conflict. */
14311 otherops
[1] = otherops
[2];
14314 /* If both registers conflict, it will usually
14315 have been fixed by a splitter. */
14316 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
14317 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
14321 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14322 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
14329 otherops
[0] = operands
[0];
14331 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
14336 if (CONST_INT_P (otherops
[2]))
14340 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
14341 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
14343 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14349 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14355 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
14362 return "ldr%(d%)\t%0, [%1]";
14364 return "ldm%(ia%)\t%1, %M0";
14368 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
14369 /* Take care of overlapping base/data reg. */
14370 if (reg_mentioned_p (operands
[0], operands
[1]))
14374 output_asm_insn ("ldr%?\t%0, %1", otherops
);
14375 output_asm_insn ("ldr%?\t%0, %1", operands
);
14385 output_asm_insn ("ldr%?\t%0, %1", operands
);
14386 output_asm_insn ("ldr%?\t%0, %1", otherops
);
14396 /* Constraints should ensure this. */
14397 gcc_assert (code0
== MEM
&& code1
== REG
);
14398 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
14400 switch (GET_CODE (XEXP (operands
[0], 0)))
14406 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
14408 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
14413 gcc_assert (TARGET_LDRD
);
14415 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
14422 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
14424 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
14432 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
14434 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
14439 gcc_assert (TARGET_LDRD
);
14441 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
14446 otherops
[0] = operands
[1];
14447 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
14448 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
14450 /* IWMMXT allows offsets larger than ldrd can handle,
14451 fix these up with a pair of ldr. */
14453 && CONST_INT_P (otherops
[2])
14454 && (INTVAL(otherops
[2]) <= -256
14455 || INTVAL(otherops
[2]) >= 256))
14457 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
14461 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
14462 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
14471 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
14472 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
14478 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
14481 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
14486 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
14491 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
14492 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
14494 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
14498 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
14505 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
14512 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
14517 && (REG_P (otherops
[2])
14519 || (CONST_INT_P (otherops
[2])
14520 && INTVAL (otherops
[2]) > -256
14521 && INTVAL (otherops
[2]) < 256)))
14523 otherops
[0] = operands
[1];
14524 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
14526 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
14532 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
14533 otherops
[1] = operands
[1];
14536 output_asm_insn ("str%?\t%1, %0", operands
);
14537 output_asm_insn ("str%?\t%H1, %0", otherops
);
14547 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14548 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14551 output_move_quad (rtx
*operands
)
14553 if (REG_P (operands
[0]))
14555 /* Load, or reg->reg move. */
14557 if (MEM_P (operands
[1]))
14559 switch (GET_CODE (XEXP (operands
[1], 0)))
14562 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
14567 output_asm_insn ("adr%?\t%0, %1", operands
);
14568 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
14572 gcc_unreachable ();
14580 gcc_assert (REG_P (operands
[1]));
14582 dest
= REGNO (operands
[0]);
14583 src
= REGNO (operands
[1]);
14585 /* This seems pretty dumb, but hopefully GCC won't try to do it
14588 for (i
= 0; i
< 4; i
++)
14590 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
14591 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
14592 output_asm_insn ("mov%?\t%0, %1", ops
);
14595 for (i
= 3; i
>= 0; i
--)
14597 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
14598 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
14599 output_asm_insn ("mov%?\t%0, %1", ops
);
14605 gcc_assert (MEM_P (operands
[0]));
14606 gcc_assert (REG_P (operands
[1]));
14607 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
14609 switch (GET_CODE (XEXP (operands
[0], 0)))
14612 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
14616 gcc_unreachable ();
14623 /* Output a VFP load or store instruction. */
14626 output_move_vfp (rtx
*operands
)
14628 rtx reg
, mem
, addr
, ops
[2];
14629 int load
= REG_P (operands
[0]);
14630 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
14631 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
14634 enum machine_mode mode
;
14636 reg
= operands
[!load
];
14637 mem
= operands
[load
];
14639 mode
= GET_MODE (reg
);
14641 gcc_assert (REG_P (reg
));
14642 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
14643 gcc_assert (mode
== SFmode
14647 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
14648 gcc_assert (MEM_P (mem
));
14650 addr
= XEXP (mem
, 0);
14652 switch (GET_CODE (addr
))
14655 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14656 ops
[0] = XEXP (addr
, 0);
14661 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
14662 ops
[0] = XEXP (addr
, 0);
14667 templ
= "f%s%c%%?\t%%%s0, %%1%s";
14673 sprintf (buff
, templ
,
14674 load
? "ld" : "st",
14677 integer_p
? "\t%@ int" : "");
14678 output_asm_insn (buff
, ops
);
14683 /* Output a Neon double-word or quad-word load or store, or a load
14684 or store for larger structure modes.
14686 WARNING: The ordering of elements is weird in big-endian mode,
14687 because the EABI requires that vectors stored in memory appear
14688 as though they were stored by a VSTM, as required by the EABI.
14689 GCC RTL defines element ordering based on in-memory order.
14690 This can be different from the architectural ordering of elements
14691 within a NEON register. The intrinsics defined in arm_neon.h use the
14692 NEON register element ordering, not the GCC RTL element ordering.
14694 For example, the in-memory ordering of a big-endian a quadword
14695 vector with 16-bit elements when stored from register pair {d0,d1}
14696 will be (lowest address first, d0[N] is NEON register element N):
14698 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14700 When necessary, quadword registers (dN, dN+1) are moved to ARM
14701 registers from rN in the order:
14703 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14705 So that STM/LDM can be used on vectors in ARM registers, and the
14706 same memory layout will result as if VSTM/VLDM were used.
14708 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
14709 possible, which allows use of appropriate alignment tags.
14710 Note that the choice of "64" is independent of the actual vector
14711 element size; this size simply ensures that the behavior is
14712 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
14714 Due to limitations of those instructions, use of VST1.64/VLD1.64
14715 is not possible if:
14716 - the address contains PRE_DEC, or
14717 - the mode refers to more than 4 double-word registers
14719 In those cases, it would be possible to replace VSTM/VLDM by a
14720 sequence of instructions; this is not currently implemented since
14721 this is not certain to actually improve performance. */
14724 output_move_neon (rtx
*operands
)
14726 rtx reg
, mem
, addr
, ops
[2];
14727 int regno
, nregs
, load
= REG_P (operands
[0]);
14730 enum machine_mode mode
;
14732 reg
= operands
[!load
];
14733 mem
= operands
[load
];
14735 mode
= GET_MODE (reg
);
14737 gcc_assert (REG_P (reg
));
14738 regno
= REGNO (reg
);
14739 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
14740 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
14741 || NEON_REGNO_OK_FOR_QUAD (regno
));
14742 gcc_assert (VALID_NEON_DREG_MODE (mode
)
14743 || VALID_NEON_QREG_MODE (mode
)
14744 || VALID_NEON_STRUCT_MODE (mode
));
14745 gcc_assert (MEM_P (mem
));
14747 addr
= XEXP (mem
, 0);
14749 /* Strip off const from addresses like (const (plus (...))). */
14750 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
14751 addr
= XEXP (addr
, 0);
14753 switch (GET_CODE (addr
))
14756 /* We have to use vldm / vstm for too-large modes. */
14759 templ
= "v%smia%%?\t%%0!, %%h1";
14760 ops
[0] = XEXP (addr
, 0);
14764 templ
= "v%s1.64\t%%h1, %%A0";
14771 /* We have to use vldm / vstm in this case, since there is no
14772 pre-decrement form of the vld1 / vst1 instructions. */
14773 templ
= "v%smdb%%?\t%%0!, %%h1";
14774 ops
[0] = XEXP (addr
, 0);
14779 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14780 gcc_unreachable ();
14787 for (i
= 0; i
< nregs
; i
++)
14789 /* We're only using DImode here because it's a convenient size. */
14790 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
14791 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
14792 if (reg_overlap_mentioned_p (ops
[0], mem
))
14794 gcc_assert (overlap
== -1);
14799 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
14800 output_asm_insn (buff
, ops
);
14805 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
14806 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
14807 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
14808 output_asm_insn (buff
, ops
);
14815 /* We have to use vldm / vstm for too-large modes. */
14817 templ
= "v%smia%%?\t%%m0, %%h1";
14819 templ
= "v%s1.64\t%%h1, %%A0";
14825 sprintf (buff
, templ
, load
? "ld" : "st");
14826 output_asm_insn (buff
, ops
);
14831 /* Compute and return the length of neon_mov<mode>, where <mode> is
14832 one of VSTRUCT modes: EI, OI, CI or XI. */
14834 arm_attr_length_move_neon (rtx insn
)
14836 rtx reg
, mem
, addr
;
14838 enum machine_mode mode
;
14840 extract_insn_cached (insn
);
14842 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
14844 mode
= GET_MODE (recog_data
.operand
[0]);
14855 gcc_unreachable ();
14859 load
= REG_P (recog_data
.operand
[0]);
14860 reg
= recog_data
.operand
[!load
];
14861 mem
= recog_data
.operand
[load
];
14863 gcc_assert (MEM_P (mem
));
14865 mode
= GET_MODE (reg
);
14866 addr
= XEXP (mem
, 0);
14868 /* Strip off const from addresses like (const (plus (...))). */
14869 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
14870 addr
= XEXP (addr
, 0);
14872 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
14874 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
14881 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14885 arm_address_offset_is_imm (rtx insn
)
14889 extract_insn_cached (insn
);
14891 if (REG_P (recog_data
.operand
[0]))
14894 mem
= recog_data
.operand
[0];
14896 gcc_assert (MEM_P (mem
));
14898 addr
= XEXP (mem
, 0);
14901 || (GET_CODE (addr
) == PLUS
14902 && REG_P (XEXP (addr
, 0))
14903 && CONST_INT_P (XEXP (addr
, 1))))
14909 /* Output an ADD r, s, #n where n may be too big for one instruction.
14910 If adding zero to one register, output nothing. */
14912 output_add_immediate (rtx
*operands
)
14914 HOST_WIDE_INT n
= INTVAL (operands
[2]);
14916 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
14919 output_multi_immediate (operands
,
14920 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14923 output_multi_immediate (operands
,
14924 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14931 /* Output a multiple immediate operation.
14932 OPERANDS is the vector of operands referred to in the output patterns.
14933 INSTR1 is the output pattern to use for the first constant.
14934 INSTR2 is the output pattern to use for subsequent constants.
14935 IMMED_OP is the index of the constant slot in OPERANDS.
14936 N is the constant value. */
14937 static const char *
14938 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
14939 int immed_op
, HOST_WIDE_INT n
)
14941 #if HOST_BITS_PER_WIDE_INT > 32
14947 /* Quick and easy output. */
14948 operands
[immed_op
] = const0_rtx
;
14949 output_asm_insn (instr1
, operands
);
14954 const char * instr
= instr1
;
14956 /* Note that n is never zero here (which would give no output). */
14957 for (i
= 0; i
< 32; i
+= 2)
14961 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
14962 output_asm_insn (instr
, operands
);
14972 /* Return the name of a shifter operation. */
14973 static const char *
14974 arm_shift_nmem(enum rtx_code code
)
14979 return ARM_LSL_NAME
;
14995 /* Return the appropriate ARM instruction for the operation code.
14996 The returned result should not be overwritten. OP is the rtx of the
14997 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15000 arithmetic_instr (rtx op
, int shift_first_arg
)
15002 switch (GET_CODE (op
))
15008 return shift_first_arg
? "rsb" : "sub";
15023 return arm_shift_nmem(GET_CODE(op
));
15026 gcc_unreachable ();
15030 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15031 for the operation code. The returned result should not be overwritten.
15032 OP is the rtx code of the shift.
15033 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15035 static const char *
15036 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
15039 enum rtx_code code
= GET_CODE (op
);
15041 switch (GET_CODE (XEXP (op
, 1)))
15049 *amountp
= INTVAL (XEXP (op
, 1));
15053 gcc_unreachable ();
15059 gcc_assert (*amountp
!= -1);
15060 *amountp
= 32 - *amountp
;
15063 /* Fall through. */
15069 mnem
= arm_shift_nmem(code
);
15073 /* We never have to worry about the amount being other than a
15074 power of 2, since this case can never be reloaded from a reg. */
15075 gcc_assert (*amountp
!= -1);
15076 *amountp
= int_log2 (*amountp
);
15077 return ARM_LSL_NAME
;
15080 gcc_unreachable ();
15083 if (*amountp
!= -1)
15085 /* This is not 100% correct, but follows from the desire to merge
15086 multiplication by a power of 2 with the recognizer for a
15087 shift. >=32 is not a valid shift for "lsl", so we must try and
15088 output a shift that produces the correct arithmetical result.
15089 Using lsr #32 is identical except for the fact that the carry bit
15090 is not set correctly if we set the flags; but we never use the
15091 carry bit from such an operation, so we can ignore that. */
15092 if (code
== ROTATERT
)
15093 /* Rotate is just modulo 32. */
15095 else if (*amountp
!= (*amountp
& 31))
15097 if (code
== ASHIFT
)
15102 /* Shifts of 0 are no-ops. */
15110 /* Obtain the shift from the POWER of two. */
15112 static HOST_WIDE_INT
15113 int_log2 (HOST_WIDE_INT power
)
15115 HOST_WIDE_INT shift
= 0;
15117 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
15119 gcc_assert (shift
<= 31);
15126 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15127 because /bin/as is horribly restrictive. The judgement about
15128 whether or not each character is 'printable' (and can be output as
15129 is) or not (and must be printed with an octal escape) must be made
15130 with reference to the *host* character set -- the situation is
15131 similar to that discussed in the comments above pp_c_char in
15132 c-pretty-print.c. */
15134 #define MAX_ASCII_LEN 51
15137 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
15140 int len_so_far
= 0;
15142 fputs ("\t.ascii\t\"", stream
);
15144 for (i
= 0; i
< len
; i
++)
15148 if (len_so_far
>= MAX_ASCII_LEN
)
15150 fputs ("\"\n\t.ascii\t\"", stream
);
15156 if (c
== '\\' || c
== '\"')
15158 putc ('\\', stream
);
15166 fprintf (stream
, "\\%03o", c
);
15171 fputs ("\"\n", stream
);
15174 /* Compute the register save mask for registers 0 through 12
15175 inclusive. This code is used by arm_compute_save_reg_mask. */
15177 static unsigned long
15178 arm_compute_save_reg0_reg12_mask (void)
15180 unsigned long func_type
= arm_current_func_type ();
15181 unsigned long save_reg_mask
= 0;
15184 if (IS_INTERRUPT (func_type
))
15186 unsigned int max_reg
;
15187 /* Interrupt functions must not corrupt any registers,
15188 even call clobbered ones. If this is a leaf function
15189 we can just examine the registers used by the RTL, but
15190 otherwise we have to assume that whatever function is
15191 called might clobber anything, and so we have to save
15192 all the call-clobbered registers as well. */
15193 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
15194 /* FIQ handlers have registers r8 - r12 banked, so
15195 we only need to check r0 - r7, Normal ISRs only
15196 bank r14 and r15, so we must check up to r12.
15197 r13 is the stack pointer which is always preserved,
15198 so we do not need to consider it here. */
15203 for (reg
= 0; reg
<= max_reg
; reg
++)
15204 if (df_regs_ever_live_p (reg
)
15205 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
15206 save_reg_mask
|= (1 << reg
);
15208 /* Also save the pic base register if necessary. */
15210 && !TARGET_SINGLE_PIC_BASE
15211 && arm_pic_register
!= INVALID_REGNUM
15212 && crtl
->uses_pic_offset_table
)
15213 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15215 else if (IS_VOLATILE(func_type
))
15217 /* For noreturn functions we historically omitted register saves
15218 altogether. However this really messes up debugging. As a
15219 compromise save just the frame pointers. Combined with the link
15220 register saved elsewhere this should be sufficient to get
15222 if (frame_pointer_needed
)
15223 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
15224 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
15225 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
15226 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
15227 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
15231 /* In the normal case we only need to save those registers
15232 which are call saved and which are used by this function. */
15233 for (reg
= 0; reg
<= 11; reg
++)
15234 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15235 save_reg_mask
|= (1 << reg
);
15237 /* Handle the frame pointer as a special case. */
15238 if (frame_pointer_needed
)
15239 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
15241 /* If we aren't loading the PIC register,
15242 don't stack it even though it may be live. */
15244 && !TARGET_SINGLE_PIC_BASE
15245 && arm_pic_register
!= INVALID_REGNUM
15246 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
15247 || crtl
->uses_pic_offset_table
))
15248 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15250 /* The prologue will copy SP into R0, so save it. */
15251 if (IS_STACKALIGN (func_type
))
15252 save_reg_mask
|= 1;
15255 /* Save registers so the exception handler can modify them. */
15256 if (crtl
->calls_eh_return
)
15262 reg
= EH_RETURN_DATA_REGNO (i
);
15263 if (reg
== INVALID_REGNUM
)
15265 save_reg_mask
|= 1 << reg
;
15269 return save_reg_mask
;
15273 /* Compute the number of bytes used to store the static chain register on the
15274 stack, above the stack frame. We need to know this accurately to get the
15275 alignment of the rest of the stack frame correct. */
15277 static int arm_compute_static_chain_stack_bytes (void)
15279 unsigned long func_type
= arm_current_func_type ();
15280 int static_chain_stack_bytes
= 0;
15282 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
15283 IS_NESTED (func_type
) &&
15284 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
15285 static_chain_stack_bytes
= 4;
15287 return static_chain_stack_bytes
;
15291 /* Compute a bit mask of which registers need to be
15292 saved on the stack for the current function.
15293 This is used by arm_get_frame_offsets, which may add extra registers. */
15295 static unsigned long
15296 arm_compute_save_reg_mask (void)
15298 unsigned int save_reg_mask
= 0;
15299 unsigned long func_type
= arm_current_func_type ();
15302 if (IS_NAKED (func_type
))
15303 /* This should never really happen. */
15306 /* If we are creating a stack frame, then we must save the frame pointer,
15307 IP (which will hold the old stack pointer), LR and the PC. */
15308 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15310 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
15313 | (1 << PC_REGNUM
);
15315 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
15317 /* Decide if we need to save the link register.
15318 Interrupt routines have their own banked link register,
15319 so they never need to save it.
15320 Otherwise if we do not use the link register we do not need to save
15321 it. If we are pushing other registers onto the stack however, we
15322 can save an instruction in the epilogue by pushing the link register
15323 now and then popping it back into the PC. This incurs extra memory
15324 accesses though, so we only do it when optimizing for size, and only
15325 if we know that we will not need a fancy return sequence. */
15326 if (df_regs_ever_live_p (LR_REGNUM
)
15329 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
15330 && !crtl
->calls_eh_return
))
15331 save_reg_mask
|= 1 << LR_REGNUM
;
15333 if (cfun
->machine
->lr_save_eliminated
)
15334 save_reg_mask
&= ~ (1 << LR_REGNUM
);
15336 if (TARGET_REALLY_IWMMXT
15337 && ((bit_count (save_reg_mask
)
15338 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
15339 arm_compute_static_chain_stack_bytes())
15342 /* The total number of registers that are going to be pushed
15343 onto the stack is odd. We need to ensure that the stack
15344 is 64-bit aligned before we start to save iWMMXt registers,
15345 and also before we start to create locals. (A local variable
15346 might be a double or long long which we will load/store using
15347 an iWMMXt instruction). Therefore we need to push another
15348 ARM register, so that the stack will be 64-bit aligned. We
15349 try to avoid using the arg registers (r0 -r3) as they might be
15350 used to pass values in a tail call. */
15351 for (reg
= 4; reg
<= 12; reg
++)
15352 if ((save_reg_mask
& (1 << reg
)) == 0)
15356 save_reg_mask
|= (1 << reg
);
15359 cfun
->machine
->sibcall_blocked
= 1;
15360 save_reg_mask
|= (1 << 3);
15364 /* We may need to push an additional register for use initializing the
15365 PIC base register. */
15366 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
15367 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
15369 reg
= thumb_find_work_register (1 << 4);
15370 if (!call_used_regs
[reg
])
15371 save_reg_mask
|= (1 << reg
);
15374 return save_reg_mask
;
15378 /* Compute a bit mask of which registers need to be
15379 saved on the stack for the current function. */
15380 static unsigned long
15381 thumb1_compute_save_reg_mask (void)
15383 unsigned long mask
;
15387 for (reg
= 0; reg
< 12; reg
++)
15388 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15392 && !TARGET_SINGLE_PIC_BASE
15393 && arm_pic_register
!= INVALID_REGNUM
15394 && crtl
->uses_pic_offset_table
)
15395 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15397 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15398 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
15399 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
15401 /* LR will also be pushed if any lo regs are pushed. */
15402 if (mask
& 0xff || thumb_force_lr_save ())
15403 mask
|= (1 << LR_REGNUM
);
15405 /* Make sure we have a low work register if we need one.
15406 We will need one if we are going to push a high register,
15407 but we are not currently intending to push a low register. */
15408 if ((mask
& 0xff) == 0
15409 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
15411 /* Use thumb_find_work_register to choose which register
15412 we will use. If the register is live then we will
15413 have to push it. Use LAST_LO_REGNUM as our fallback
15414 choice for the register to select. */
15415 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
15416 /* Make sure the register returned by thumb_find_work_register is
15417 not part of the return value. */
15418 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
15419 reg
= LAST_LO_REGNUM
;
15421 if (! call_used_regs
[reg
])
15425 /* The 504 below is 8 bytes less than 512 because there are two possible
15426 alignment words. We can't tell here if they will be present or not so we
15427 have to play it safe and assume that they are. */
15428 if ((CALLER_INTERWORKING_SLOT_SIZE
+
15429 ROUND_UP_WORD (get_frame_size ()) +
15430 crtl
->outgoing_args_size
) >= 504)
15432 /* This is the same as the code in thumb1_expand_prologue() which
15433 determines which register to use for stack decrement. */
15434 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
15435 if (mask
& (1 << reg
))
15438 if (reg
> LAST_LO_REGNUM
)
15440 /* Make sure we have a register available for stack decrement. */
15441 mask
|= 1 << LAST_LO_REGNUM
;
15449 /* Return the number of bytes required to save VFP registers. */
15451 arm_get_vfp_saved_size (void)
15453 unsigned int regno
;
15458 /* Space for saved VFP registers. */
15459 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15462 for (regno
= FIRST_VFP_REGNUM
;
15463 regno
< LAST_VFP_REGNUM
;
15466 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
15467 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
15471 /* Workaround ARM10 VFPr1 bug. */
15472 if (count
== 2 && !arm_arch6
)
15474 saved
+= count
* 8;
15483 if (count
== 2 && !arm_arch6
)
15485 saved
+= count
* 8;
15492 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15493 everything bar the final return instruction. If simple_return is true,
15494 then do not output epilogue, because it has already been emitted in RTL. */
15496 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
15497 bool simple_return
)
15499 char conditional
[10];
15502 unsigned long live_regs_mask
;
15503 unsigned long func_type
;
15504 arm_stack_offsets
*offsets
;
15506 func_type
= arm_current_func_type ();
15508 if (IS_NAKED (func_type
))
15511 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
15513 /* If this function was declared non-returning, and we have
15514 found a tail call, then we have to trust that the called
15515 function won't return. */
15520 /* Otherwise, trap an attempted return by aborting. */
15522 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
15524 assemble_external_libcall (ops
[1]);
15525 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
15531 gcc_assert (!cfun
->calls_alloca
|| really_return
);
15533 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
15535 cfun
->machine
->return_used_this_function
= 1;
15537 offsets
= arm_get_frame_offsets ();
15538 live_regs_mask
= offsets
->saved_regs_mask
;
15540 if (!simple_return
&& live_regs_mask
)
15542 const char * return_reg
;
15544 /* If we do not have any special requirements for function exit
15545 (e.g. interworking) then we can load the return address
15546 directly into the PC. Otherwise we must load it into LR. */
15548 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
15549 return_reg
= reg_names
[PC_REGNUM
];
15551 return_reg
= reg_names
[LR_REGNUM
];
15553 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
15555 /* There are three possible reasons for the IP register
15556 being saved. 1) a stack frame was created, in which case
15557 IP contains the old stack pointer, or 2) an ISR routine
15558 corrupted it, or 3) it was saved to align the stack on
15559 iWMMXt. In case 1, restore IP into SP, otherwise just
15561 if (frame_pointer_needed
)
15563 live_regs_mask
&= ~ (1 << IP_REGNUM
);
15564 live_regs_mask
|= (1 << SP_REGNUM
);
15567 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
15570 /* On some ARM architectures it is faster to use LDR rather than
15571 LDM to load a single register. On other architectures, the
15572 cost is the same. In 26 bit mode, or for exception handlers,
15573 we have to use LDM to load the PC so that the CPSR is also
15575 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
15576 if (live_regs_mask
== (1U << reg
))
15579 if (reg
<= LAST_ARM_REGNUM
15580 && (reg
!= LR_REGNUM
15582 || ! IS_INTERRUPT (func_type
)))
15584 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
15585 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
15592 /* Generate the load multiple instruction to restore the
15593 registers. Note we can get here, even if
15594 frame_pointer_needed is true, but only if sp already
15595 points to the base of the saved core registers. */
15596 if (live_regs_mask
& (1 << SP_REGNUM
))
15598 unsigned HOST_WIDE_INT stack_adjust
;
15600 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
15601 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
15603 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
15604 if (TARGET_UNIFIED_ASM
)
15605 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
15607 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
15610 /* If we can't use ldmib (SA110 bug),
15611 then try to pop r3 instead. */
15613 live_regs_mask
|= 1 << 3;
15615 if (TARGET_UNIFIED_ASM
)
15616 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
15618 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
15622 if (TARGET_UNIFIED_ASM
)
15623 sprintf (instr
, "pop%s\t{", conditional
);
15625 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
15627 p
= instr
+ strlen (instr
);
15629 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
15630 if (live_regs_mask
& (1 << reg
))
15632 int l
= strlen (reg_names
[reg
]);
15638 memcpy (p
, ", ", 2);
15642 memcpy (p
, "%|", 2);
15643 memcpy (p
+ 2, reg_names
[reg
], l
);
15647 if (live_regs_mask
& (1 << LR_REGNUM
))
15649 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
15650 /* If returning from an interrupt, restore the CPSR. */
15651 if (IS_INTERRUPT (func_type
))
15658 output_asm_insn (instr
, & operand
);
15660 /* See if we need to generate an extra instruction to
15661 perform the actual function return. */
15663 && func_type
!= ARM_FT_INTERWORKED
15664 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
15666 /* The return has already been handled
15667 by loading the LR into the PC. */
15674 switch ((int) ARM_FUNC_TYPE (func_type
))
15678 /* ??? This is wrong for unified assembly syntax. */
15679 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
15682 case ARM_FT_INTERWORKED
:
15683 sprintf (instr
, "bx%s\t%%|lr", conditional
);
15686 case ARM_FT_EXCEPTION
:
15687 /* ??? This is wrong for unified assembly syntax. */
15688 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
15692 /* Use bx if it's available. */
15693 if (arm_arch5
|| arm_arch4t
)
15694 sprintf (instr
, "bx%s\t%%|lr", conditional
);
15696 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
15700 output_asm_insn (instr
, & operand
);
15706 /* Write the function name into the code section, directly preceding
15707 the function prologue.
15709 Code will be output similar to this:
15711 .ascii "arm_poke_function_name", 0
15714 .word 0xff000000 + (t1 - t0)
15715 arm_poke_function_name
15717 stmfd sp!, {fp, ip, lr, pc}
15720 When performing a stack backtrace, code can inspect the value
15721 of 'pc' stored at 'fp' + 0. If the trace function then looks
15722 at location pc - 12 and the top 8 bits are set, then we know
15723 that there is a function name embedded immediately preceding this
15724 location and has length ((pc[-3]) & 0xff000000).
15726 We assume that pc is declared as a pointer to an unsigned long.
15728 It is of no benefit to output the function name if we are assembling
15729 a leaf function. These function types will not contain a stack
15730 backtrace structure, therefore it is not possible to determine the
15733 arm_poke_function_name (FILE *stream
, const char *name
)
15735 unsigned long alignlength
;
15736 unsigned long length
;
15739 length
= strlen (name
) + 1;
15740 alignlength
= ROUND_UP_WORD (length
);
15742 ASM_OUTPUT_ASCII (stream
, name
, length
);
15743 ASM_OUTPUT_ALIGN (stream
, 2);
15744 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
15745 assemble_aligned_integer (UNITS_PER_WORD
, x
);
15748 /* Place some comments into the assembler stream
15749 describing the current function. */
15751 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
15753 unsigned long func_type
;
15755 /* ??? Do we want to print some of the below anyway? */
15759 /* Sanity check. */
15760 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
15762 func_type
= arm_current_func_type ();
15764 switch ((int) ARM_FUNC_TYPE (func_type
))
15767 case ARM_FT_NORMAL
:
15769 case ARM_FT_INTERWORKED
:
15770 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
15773 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
15776 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
15778 case ARM_FT_EXCEPTION
:
15779 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
15783 if (IS_NAKED (func_type
))
15784 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15786 if (IS_VOLATILE (func_type
))
15787 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
15789 if (IS_NESTED (func_type
))
15790 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
15791 if (IS_STACKALIGN (func_type
))
15792 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15794 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15796 crtl
->args
.pretend_args_size
, frame_size
);
15798 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15799 frame_pointer_needed
,
15800 cfun
->machine
->uses_anonymous_args
);
15802 if (cfun
->machine
->lr_save_eliminated
)
15803 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
15805 if (crtl
->calls_eh_return
)
15806 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
15811 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
15812 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
15814 arm_stack_offsets
*offsets
;
15820 /* Emit any call-via-reg trampolines that are needed for v4t support
15821 of call_reg and call_value_reg type insns. */
15822 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
15824 rtx label
= cfun
->machine
->call_via
[regno
];
15828 switch_to_section (function_section (current_function_decl
));
15829 targetm
.asm_out
.internal_label (asm_out_file
, "L",
15830 CODE_LABEL_NUMBER (label
));
15831 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
15835 /* ??? Probably not safe to set this here, since it assumes that a
15836 function will be emitted as assembly immediately after we generate
15837 RTL for it. This does not happen for inline functions. */
15838 cfun
->machine
->return_used_this_function
= 0;
15840 else /* TARGET_32BIT */
15842 /* We need to take into account any stack-frame rounding. */
15843 offsets
= arm_get_frame_offsets ();
15845 gcc_assert (!use_return_insn (FALSE
, NULL
)
15846 || (cfun
->machine
->return_used_this_function
!= 0)
15847 || offsets
->saved_regs
== offsets
->outgoing_args
15848 || frame_pointer_needed
);
15850 /* Reset the ARM-specific per-function variables. */
15851 after_arm_reorg
= 0;
15855 /* Generate and emit a pattern that will be recognized as STRD pattern. If even
15856 number of registers are being pushed, multiple STRD patterns are created for
15857 all register pairs. If odd number of registers are pushed, emit a
15858 combination of STRDs and STR for the prologue saves. */
15860 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
15864 rtx par
= NULL_RTX
;
15865 rtx insn
= NULL_RTX
;
15866 rtx dwarf
= NULL_RTX
;
15867 rtx tmp
, reg
, tmp1
;
15869 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15870 if (saved_regs_mask
& (1 << i
))
15873 gcc_assert (num_regs
&& num_regs
<= 16);
15875 /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
15876 registers to push. */
15877 tmp
= gen_rtx_SET (VOIDmode
,
15879 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
15880 RTX_FRAME_RELATED_P (tmp
) = 1;
15881 insn
= emit_insn (tmp
);
15883 /* Create sequence for DWARF info. */
15884 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
15886 /* RTLs cannot be shared, hence create new copy for dwarf. */
15887 tmp1
= gen_rtx_SET (VOIDmode
,
15889 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
15890 RTX_FRAME_RELATED_P (tmp1
) = 1;
15891 XVECEXP (dwarf
, 0, 0) = tmp1
;
15893 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
15894 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
15896 /* Var j iterates over all the registers to gather all the registers in
15897 saved_regs_mask. Var i gives index of register R_j in stack frame.
15898 A PARALLEL RTX of register-pair is created here, so that pattern for
15899 STRD can be matched. If num_regs is odd, 1st register will be pushed
15900 using STR and remaining registers will be pushed with STRD in pairs.
15901 If num_regs is even, all registers are pushed with STRD in pairs.
15902 Hence, skip first element for odd num_regs. */
15903 for (i
= num_regs
- 1, j
= LAST_ARM_REGNUM
; i
>= (num_regs
% 2); j
--)
15904 if (saved_regs_mask
& (1 << j
))
15906 /* Create RTX for store. New RTX is created for dwarf as
15907 they are not sharable. */
15908 reg
= gen_rtx_REG (SImode
, j
);
15909 tmp
= gen_rtx_SET (SImode
,
15912 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
)),
15915 tmp1
= gen_rtx_SET (SImode
,
15918 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
)),
15920 RTX_FRAME_RELATED_P (tmp
) = 1;
15921 RTX_FRAME_RELATED_P (tmp1
) = 1;
15923 if (((i
- (num_regs
% 2)) % 2) == 1)
15924 /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
15925 be created. Hence create it first. The STRD pattern we are
15927 [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
15928 (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
15929 where the target registers need not be consecutive. */
15930 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
15932 /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
15933 even, the reg_j is added as 0th element and if it is odd, reg_i is
15934 added as 1st element of STRD pattern shown above. */
15935 XVECEXP (par
, 0, ((i
- (num_regs
% 2)) % 2)) = tmp
;
15936 XVECEXP (dwarf
, 0, (i
+ 1)) = tmp1
;
15938 if (((i
- (num_regs
% 2)) % 2) == 0)
15939 /* When (i - (num_regs % 2)) is even, RTXs for both the registers
15940 to be loaded are generated in above given STRD pattern, and the
15941 pattern can be emitted now. */
15947 if ((num_regs
% 2) == 1)
15949 /* If odd number of registers are pushed, generate STR pattern to store
15951 for (; (saved_regs_mask
& (1 << j
)) == 0; j
--);
15953 tmp1
= gen_frame_mem (SImode
, plus_constant (Pmode
,
15954 stack_pointer_rtx
, 4 * i
));
15955 reg
= gen_rtx_REG (SImode
, j
);
15956 tmp
= gen_rtx_SET (SImode
, tmp1
, reg
);
15957 RTX_FRAME_RELATED_P (tmp
) = 1;
15961 tmp1
= gen_rtx_SET (SImode
,
15964 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
)),
15966 RTX_FRAME_RELATED_P (tmp1
) = 1;
15967 XVECEXP (dwarf
, 0, (i
+ 1)) = tmp1
;
15970 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15971 RTX_FRAME_RELATED_P (insn
) = 1;
15975 /* Generate and emit an insn that we will recognize as a push_multi.
15976 Unfortunately, since this insn does not reflect very well the actual
15977 semantics of the operation, we need to annotate the insn for the benefit
15978 of DWARF2 frame unwind information. */
15980 emit_multi_reg_push (unsigned long mask
)
15983 int num_dwarf_regs
;
15987 int dwarf_par_index
;
15990 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15991 if (mask
& (1 << i
))
15994 gcc_assert (num_regs
&& num_regs
<= 16);
15996 /* We don't record the PC in the dwarf frame information. */
15997 num_dwarf_regs
= num_regs
;
15998 if (mask
& (1 << PC_REGNUM
))
16001 /* For the body of the insn we are going to generate an UNSPEC in
16002 parallel with several USEs. This allows the insn to be recognized
16003 by the push_multi pattern in the arm.md file.
16005 The body of the insn looks something like this:
16008 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16009 (const_int:SI <num>)))
16010 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16016 For the frame note however, we try to be more explicit and actually
16017 show each register being stored into the stack frame, plus a (single)
16018 decrement of the stack pointer. We do it this way in order to be
16019 friendly to the stack unwinding code, which only wants to see a single
16020 stack decrement per instruction. The RTL we generate for the note looks
16021 something like this:
16024 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16025 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16026 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16027 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16031 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16032 instead we'd have a parallel expression detailing all
16033 the stores to the various memory addresses so that debug
16034 information is more up-to-date. Remember however while writing
16035 this to take care of the constraints with the push instruction.
16037 Note also that this has to be taken care of for the VFP registers.
16039 For more see PR43399. */
16041 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
16042 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
16043 dwarf_par_index
= 1;
16045 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
16047 if (mask
& (1 << i
))
16049 reg
= gen_rtx_REG (SImode
, i
);
16051 XVECEXP (par
, 0, 0)
16052 = gen_rtx_SET (VOIDmode
,
16055 gen_rtx_PRE_MODIFY (Pmode
,
16058 (Pmode
, stack_pointer_rtx
,
16061 gen_rtx_UNSPEC (BLKmode
,
16062 gen_rtvec (1, reg
),
16063 UNSPEC_PUSH_MULT
));
16065 if (i
!= PC_REGNUM
)
16067 tmp
= gen_rtx_SET (VOIDmode
,
16068 gen_frame_mem (SImode
, stack_pointer_rtx
),
16070 RTX_FRAME_RELATED_P (tmp
) = 1;
16071 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
16079 for (j
= 1, i
++; j
< num_regs
; i
++)
16081 if (mask
& (1 << i
))
16083 reg
= gen_rtx_REG (SImode
, i
);
16085 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
16087 if (i
!= PC_REGNUM
)
16090 = gen_rtx_SET (VOIDmode
,
16093 plus_constant (Pmode
, stack_pointer_rtx
,
16096 RTX_FRAME_RELATED_P (tmp
) = 1;
16097 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
16104 par
= emit_insn (par
);
16106 tmp
= gen_rtx_SET (VOIDmode
,
16108 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
16109 RTX_FRAME_RELATED_P (tmp
) = 1;
16110 XVECEXP (dwarf
, 0, 0) = tmp
;
16112 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
16117 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
16118 SAVED_REGS_MASK shows which registers need to be restored.
16120 Unfortunately, since this insn does not reflect very well the actual
16121 semantics of the operation, we need to annotate the insn for the benefit
16122 of DWARF2 frame unwind information. */
16124 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
16129 rtx dwarf
= NULL_RTX
;
16135 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
16136 offset_adj
= return_in_pc
? 1 : 0;
16137 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
16138 if (saved_regs_mask
& (1 << i
))
16141 gcc_assert (num_regs
&& num_regs
<= 16);
16143 /* If SP is in reglist, then we don't emit SP update insn. */
16144 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
16146 /* The parallel needs to hold num_regs SETs
16147 and one SET for the stack update. */
16148 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
16153 XVECEXP (par
, 0, 0) = tmp
;
16158 /* Increment the stack pointer, based on there being
16159 num_regs 4-byte registers to restore. */
16160 tmp
= gen_rtx_SET (VOIDmode
,
16162 plus_constant (Pmode
,
16165 RTX_FRAME_RELATED_P (tmp
) = 1;
16166 XVECEXP (par
, 0, offset_adj
) = tmp
;
16169 /* Now restore every reg, which may include PC. */
16170 for (j
= 0, i
= 0; j
< num_regs
; i
++)
16171 if (saved_regs_mask
& (1 << i
))
16173 reg
= gen_rtx_REG (SImode
, i
);
16174 tmp
= gen_rtx_SET (VOIDmode
,
16178 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
16179 RTX_FRAME_RELATED_P (tmp
) = 1;
16180 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
16182 /* We need to maintain a sequence for DWARF info too. As dwarf info
16183 should not have PC, skip PC. */
16184 if (i
!= PC_REGNUM
)
16185 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16191 par
= emit_jump_insn (par
);
16193 par
= emit_insn (par
);
16195 REG_NOTES (par
) = dwarf
;
16198 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16199 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16201 Unfortunately, since this insn does not reflect very well the actual
16202 semantics of the operation, we need to annotate the insn for the benefit
16203 of DWARF2 frame unwind information. */
16205 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
16209 rtx dwarf
= NULL_RTX
;
16212 gcc_assert (num_regs
&& num_regs
<= 32);
16214 /* Workaround ARM10 VFPr1 bug. */
16215 if (num_regs
== 2 && !arm_arch6
)
16217 if (first_reg
== 15)
16223 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16224 there could be up to 32 D-registers to restore.
16225 If there are more than 16 D-registers, make two recursive calls,
16226 each of which emits one pop_multi instruction. */
16229 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
16230 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
16234 /* The parallel needs to hold num_regs SETs
16235 and one SET for the stack update. */
16236 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
16238 /* Increment the stack pointer, based on there being
16239 num_regs 8-byte registers to restore. */
16240 tmp
= gen_rtx_SET (VOIDmode
,
16242 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
16243 RTX_FRAME_RELATED_P (tmp
) = 1;
16244 XVECEXP (par
, 0, 0) = tmp
;
16246 /* Now show every reg that will be restored, using a SET for each. */
16247 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
16249 reg
= gen_rtx_REG (DFmode
, i
);
16251 tmp
= gen_rtx_SET (VOIDmode
,
16255 plus_constant (Pmode
, base_reg
, 8 * j
)));
16256 RTX_FRAME_RELATED_P (tmp
) = 1;
16257 XVECEXP (par
, 0, j
+ 1) = tmp
;
16259 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16264 par
= emit_insn (par
);
16265 REG_NOTES (par
) = dwarf
;
16268 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
16269 number of registers are being popped, multiple LDRD patterns are created for
16270 all register pairs. If odd number of registers are popped, last register is
16271 loaded by using LDR pattern. */
16273 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
16277 rtx par
= NULL_RTX
;
16278 rtx dwarf
= NULL_RTX
;
16279 rtx tmp
, reg
, tmp1
;
16282 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
16283 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
16284 if (saved_regs_mask
& (1 << i
))
16287 gcc_assert (num_regs
&& num_regs
<= 16);
16289 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
16290 to be popped. So, if num_regs is even, now it will become odd,
16291 and we can generate pop with PC. If num_regs is odd, it will be
16292 even now, and ldr with return can be generated for PC. */
16296 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
16298 /* Var j iterates over all the registers to gather all the registers in
16299 saved_regs_mask. Var i gives index of saved registers in stack frame.
16300 A PARALLEL RTX of register-pair is created here, so that pattern for
16301 LDRD can be matched. As PC is always last register to be popped, and
16302 we have already decremented num_regs if PC, we don't have to worry
16303 about PC in this loop. */
16304 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
16305 if (saved_regs_mask
& (1 << j
))
16307 /* Create RTX for memory load. */
16308 reg
= gen_rtx_REG (SImode
, j
);
16309 tmp
= gen_rtx_SET (SImode
,
16311 gen_frame_mem (SImode
,
16312 plus_constant (Pmode
,
16313 stack_pointer_rtx
, 4 * i
)));
16314 RTX_FRAME_RELATED_P (tmp
) = 1;
16318 /* When saved-register index (i) is even, the RTX to be emitted is
16319 yet to be created. Hence create it first. The LDRD pattern we
16320 are generating is :
16321 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
16322 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
16323 where target registers need not be consecutive. */
16324 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
16328 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
16329 added as 0th element and if i is odd, reg_i is added as 1st element
16330 of LDRD pattern shown above. */
16331 XVECEXP (par
, 0, (i
% 2)) = tmp
;
16332 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16336 /* When saved-register index (i) is odd, RTXs for both the registers
16337 to be loaded are generated in above given LDRD pattern, and the
16338 pattern can be emitted now. */
16339 par
= emit_insn (par
);
16340 REG_NOTES (par
) = dwarf
;
16346 /* If the number of registers pushed is odd AND return_in_pc is false OR
16347 number of registers are even AND return_in_pc is true, last register is
16348 popped using LDR. It can be PC as well. Hence, adjust the stack first and
16349 then LDR with post increment. */
16351 /* Increment the stack pointer, based on there being
16352 num_regs 4-byte registers to restore. */
16353 tmp
= gen_rtx_SET (VOIDmode
,
16355 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
16356 RTX_FRAME_RELATED_P (tmp
) = 1;
16361 if (((num_regs
% 2) == 1 && !return_in_pc
)
16362 || ((num_regs
% 2) == 0 && return_in_pc
))
16364 /* Scan for the single register to be popped. Skip until the saved
16365 register is found. */
16366 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
16368 /* Gen LDR with post increment here. */
16369 tmp1
= gen_rtx_MEM (SImode
,
16370 gen_rtx_POST_INC (SImode
,
16371 stack_pointer_rtx
));
16372 set_mem_alias_set (tmp1
, get_frame_alias_set ());
16374 reg
= gen_rtx_REG (SImode
, j
);
16375 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
16376 RTX_FRAME_RELATED_P (tmp
) = 1;
16377 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16381 /* If return_in_pc, j must be PC_REGNUM. */
16382 gcc_assert (j
== PC_REGNUM
);
16383 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
16384 XVECEXP (par
, 0, 0) = ret_rtx
;
16385 XVECEXP (par
, 0, 1) = tmp
;
16386 par
= emit_jump_insn (par
);
16390 par
= emit_insn (tmp
);
16393 REG_NOTES (par
) = dwarf
;
16395 else if ((num_regs
% 2) == 1 && return_in_pc
)
16397 /* There are 2 registers to be popped. So, generate the pattern
16398 pop_multiple_with_stack_update_and_return to pop in PC. */
16399 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
16405 /* Calculate the size of the return value that is passed in registers. */
16407 arm_size_return_regs (void)
16409 enum machine_mode mode
;
16411 if (crtl
->return_rtx
!= 0)
16412 mode
= GET_MODE (crtl
->return_rtx
);
16414 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
16416 return GET_MODE_SIZE (mode
);
16419 /* Return true if the current function needs to save/restore LR. */
16421 thumb_force_lr_save (void)
16423 return !cfun
->machine
->lr_save_eliminated
16424 && (!leaf_function_p ()
16425 || thumb_far_jump_used_p ()
16426 || df_regs_ever_live_p (LR_REGNUM
));
16430 /* Return true if r3 is used by any of the tail call insns in the
16431 current function. */
16433 any_sibcall_uses_r3 (void)
16438 if (!crtl
->tail_call_emit
)
16440 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
16441 if (e
->flags
& EDGE_SIBCALL
)
16443 rtx call
= BB_END (e
->src
);
16444 if (!CALL_P (call
))
16445 call
= prev_nonnote_nondebug_insn (call
);
16446 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
16447 if (find_regno_fusage (call
, USE
, 3))
16454 /* Compute the distance from register FROM to register TO.
16455 These can be the arg pointer (26), the soft frame pointer (25),
16456 the stack pointer (13) or the hard frame pointer (11).
16457 In thumb mode r7 is used as the soft frame pointer, if needed.
16458 Typical stack layout looks like this:
16460 old stack pointer -> | |
16463 | | saved arguments for
16464 | | vararg functions
16467 hard FP & arg pointer -> | | \
16475 soft frame pointer -> | | /
16480 locals base pointer -> | | /
16485 current stack pointer -> | | /
16488 For a given function some or all of these stack components
16489 may not be needed, giving rise to the possibility of
16490 eliminating some of the registers.
16492 The values returned by this function must reflect the behavior
16493 of arm_expand_prologue() and arm_compute_save_reg_mask().
16495 The sign of the number returned reflects the direction of stack
16496 growth, so the values are positive for all eliminations except
16497 from the soft frame pointer to the hard frame pointer.
16499 SFP may point just inside the local variables block to ensure correct
16503 /* Calculate stack offsets. These are used to calculate register elimination
16504 offsets and in prologue/epilogue code. Also calculates which registers
16505 should be saved. */
16507 static arm_stack_offsets
*
16508 arm_get_frame_offsets (void)
16510 struct arm_stack_offsets
*offsets
;
16511 unsigned long func_type
;
16515 HOST_WIDE_INT frame_size
;
16518 offsets
= &cfun
->machine
->stack_offsets
;
16520 /* We need to know if we are a leaf function. Unfortunately, it
16521 is possible to be called after start_sequence has been called,
16522 which causes get_insns to return the insns for the sequence,
16523 not the function, which will cause leaf_function_p to return
16524 the incorrect result.
16526 to know about leaf functions once reload has completed, and the
16527 frame size cannot be changed after that time, so we can safely
16528 use the cached value. */
16530 if (reload_completed
)
16533 /* Initially this is the size of the local variables. It will translated
16534 into an offset once we have determined the size of preceding data. */
16535 frame_size
= ROUND_UP_WORD (get_frame_size ());
16537 leaf
= leaf_function_p ();
16539 /* Space for variadic functions. */
16540 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
16542 /* In Thumb mode this is incorrect, but never used. */
16543 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
16544 arm_compute_static_chain_stack_bytes();
16548 unsigned int regno
;
16550 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
16551 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
16552 saved
= core_saved
;
16554 /* We know that SP will be doubleword aligned on entry, and we must
16555 preserve that condition at any subroutine call. We also require the
16556 soft frame pointer to be doubleword aligned. */
16558 if (TARGET_REALLY_IWMMXT
)
16560 /* Check for the call-saved iWMMXt registers. */
16561 for (regno
= FIRST_IWMMXT_REGNUM
;
16562 regno
<= LAST_IWMMXT_REGNUM
;
16564 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
16568 func_type
= arm_current_func_type ();
16569 /* Space for saved VFP registers. */
16570 if (! IS_VOLATILE (func_type
)
16571 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
16572 saved
+= arm_get_vfp_saved_size ();
16574 else /* TARGET_THUMB1 */
16576 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
16577 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
16578 saved
= core_saved
;
16579 if (TARGET_BACKTRACE
)
16583 /* Saved registers include the stack frame. */
16584 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
16585 arm_compute_static_chain_stack_bytes();
16586 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
16587 /* A leaf function does not need any stack alignment if it has nothing
16589 if (leaf
&& frame_size
== 0
16590 /* However if it calls alloca(), we have a dynamically allocated
16591 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16592 && ! cfun
->calls_alloca
)
16594 offsets
->outgoing_args
= offsets
->soft_frame
;
16595 offsets
->locals_base
= offsets
->soft_frame
;
16599 /* Ensure SFP has the correct alignment. */
16600 if (ARM_DOUBLEWORD_ALIGN
16601 && (offsets
->soft_frame
& 7))
16603 offsets
->soft_frame
+= 4;
16604 /* Try to align stack by pushing an extra reg. Don't bother doing this
16605 when there is a stack frame as the alignment will be rolled into
16606 the normal stack adjustment. */
16607 if (frame_size
+ crtl
->outgoing_args_size
== 0)
16611 /* If it is safe to use r3, then do so. This sometimes
16612 generates better code on Thumb-2 by avoiding the need to
16613 use 32-bit push/pop instructions. */
16614 if (! any_sibcall_uses_r3 ()
16615 && arm_size_return_regs () <= 12
16616 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
16621 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
16623 /* Avoid fixed registers; they may be changed at
16624 arbitrary times so it's unsafe to restore them
16625 during the epilogue. */
16627 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
16636 offsets
->saved_regs
+= 4;
16637 offsets
->saved_regs_mask
|= (1 << reg
);
16642 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
16643 offsets
->outgoing_args
= (offsets
->locals_base
16644 + crtl
->outgoing_args_size
);
16646 if (ARM_DOUBLEWORD_ALIGN
)
16648 /* Ensure SP remains doubleword aligned. */
16649 if (offsets
->outgoing_args
& 7)
16650 offsets
->outgoing_args
+= 4;
16651 gcc_assert (!(offsets
->outgoing_args
& 7));
16658 /* Calculate the relative offsets for the different stack pointers. Positive
16659 offsets are in the direction of stack growth. */
16662 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
16664 arm_stack_offsets
*offsets
;
16666 offsets
= arm_get_frame_offsets ();
16668 /* OK, now we have enough information to compute the distances.
16669 There must be an entry in these switch tables for each pair
16670 of registers in ELIMINABLE_REGS, even if some of the entries
16671 seem to be redundant or useless. */
16674 case ARG_POINTER_REGNUM
:
16677 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16680 case FRAME_POINTER_REGNUM
:
16681 /* This is the reverse of the soft frame pointer
16682 to hard frame pointer elimination below. */
16683 return offsets
->soft_frame
- offsets
->saved_args
;
16685 case ARM_HARD_FRAME_POINTER_REGNUM
:
16686 /* This is only non-zero in the case where the static chain register
16687 is stored above the frame. */
16688 return offsets
->frame
- offsets
->saved_args
- 4;
16690 case STACK_POINTER_REGNUM
:
16691 /* If nothing has been pushed on the stack at all
16692 then this will return -4. This *is* correct! */
16693 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
16696 gcc_unreachable ();
16698 gcc_unreachable ();
16700 case FRAME_POINTER_REGNUM
:
16703 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16706 case ARM_HARD_FRAME_POINTER_REGNUM
:
16707 /* The hard frame pointer points to the top entry in the
16708 stack frame. The soft frame pointer to the bottom entry
16709 in the stack frame. If there is no stack frame at all,
16710 then they are identical. */
16712 return offsets
->frame
- offsets
->soft_frame
;
16714 case STACK_POINTER_REGNUM
:
16715 return offsets
->outgoing_args
- offsets
->soft_frame
;
16718 gcc_unreachable ();
16720 gcc_unreachable ();
16723 /* You cannot eliminate from the stack pointer.
16724 In theory you could eliminate from the hard frame
16725 pointer to the stack pointer, but this will never
16726 happen, since if a stack frame is not needed the
16727 hard frame pointer will never be used. */
16728 gcc_unreachable ();
16732 /* Given FROM and TO register numbers, say whether this elimination is
16733 allowed. Frame pointer elimination is automatically handled.
16735 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16736 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16737 pointer, we must eliminate FRAME_POINTER_REGNUM into
16738 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16739 ARG_POINTER_REGNUM. */
16742 arm_can_eliminate (const int from
, const int to
)
16744 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
16745 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
16746 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
16747 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
16751 /* Emit RTL to save coprocessor registers on function entry. Returns the
16752 number of bytes pushed. */
16755 arm_save_coproc_regs(void)
16757 int saved_size
= 0;
16759 unsigned start_reg
;
16762 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
16763 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
16765 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
16766 insn
= gen_rtx_MEM (V2SImode
, insn
);
16767 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
16768 RTX_FRAME_RELATED_P (insn
) = 1;
16772 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
16774 start_reg
= FIRST_VFP_REGNUM
;
16776 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
16778 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
16779 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
16781 if (start_reg
!= reg
)
16782 saved_size
+= vfp_emit_fstmd (start_reg
,
16783 (reg
- start_reg
) / 2);
16784 start_reg
= reg
+ 2;
16787 if (start_reg
!= reg
)
16788 saved_size
+= vfp_emit_fstmd (start_reg
,
16789 (reg
- start_reg
) / 2);
16795 /* Set the Thumb frame pointer from the stack pointer. */
16798 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
16800 HOST_WIDE_INT amount
;
16803 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
16805 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16806 stack_pointer_rtx
, GEN_INT (amount
)));
16809 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
16810 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16811 expects the first two operands to be the same. */
16814 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16816 hard_frame_pointer_rtx
));
16820 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16821 hard_frame_pointer_rtx
,
16822 stack_pointer_rtx
));
16824 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
16825 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
16826 RTX_FRAME_RELATED_P (dwarf
) = 1;
16827 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16830 RTX_FRAME_RELATED_P (insn
) = 1;
16833 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16836 arm_expand_prologue (void)
16841 unsigned long live_regs_mask
;
16842 unsigned long func_type
;
16844 int saved_pretend_args
= 0;
16845 int saved_regs
= 0;
16846 unsigned HOST_WIDE_INT args_to_push
;
16847 arm_stack_offsets
*offsets
;
16849 func_type
= arm_current_func_type ();
16851 /* Naked functions don't have prologues. */
16852 if (IS_NAKED (func_type
))
16855 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16856 args_to_push
= crtl
->args
.pretend_args_size
;
16858 /* Compute which register we will have to save onto the stack. */
16859 offsets
= arm_get_frame_offsets ();
16860 live_regs_mask
= offsets
->saved_regs_mask
;
16862 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
16864 if (IS_STACKALIGN (func_type
))
16868 /* Handle a word-aligned stack pointer. We generate the following:
16873 <save and restore r0 in normal prologue/epilogue>
16877 The unwinder doesn't need to know about the stack realignment.
16878 Just tell it we saved SP in r0. */
16879 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
16881 r0
= gen_rtx_REG (SImode
, 0);
16882 r1
= gen_rtx_REG (SImode
, 1);
16884 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
16885 RTX_FRAME_RELATED_P (insn
) = 1;
16886 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
16888 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
16890 /* ??? The CFA changes here, which may cause GDB to conclude that it
16891 has entered a different function. That said, the unwind info is
16892 correct, individually, before and after this instruction because
16893 we've described the save of SP, which will override the default
16894 handling of SP as restoring from the CFA. */
16895 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
16898 /* For APCS frames, if IP register is clobbered
16899 when creating frame, save that register in a special
16901 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
16903 if (IS_INTERRUPT (func_type
))
16905 /* Interrupt functions must not corrupt any registers.
16906 Creating a frame pointer however, corrupts the IP
16907 register, so we must push it first. */
16908 emit_multi_reg_push (1 << IP_REGNUM
);
16910 /* Do not set RTX_FRAME_RELATED_P on this insn.
16911 The dwarf stack unwinding code only wants to see one
16912 stack decrement per function, and this is not it. If
16913 this instruction is labeled as being part of the frame
16914 creation sequence then dwarf2out_frame_debug_expr will
16915 die when it encounters the assignment of IP to FP
16916 later on, since the use of SP here establishes SP as
16917 the CFA register and not IP.
16919 Anyway this instruction is not really part of the stack
16920 frame creation although it is part of the prologue. */
16922 else if (IS_NESTED (func_type
))
16924 /* The Static chain register is the same as the IP register
16925 used as a scratch register during stack frame creation.
16926 To get around this need to find somewhere to store IP
16927 whilst the frame is being created. We try the following
16930 1. The last argument register.
16931 2. A slot on the stack above the frame. (This only
16932 works if the function is not a varargs function).
16933 3. Register r3, after pushing the argument registers
16936 Note - we only need to tell the dwarf2 backend about the SP
16937 adjustment in the second variant; the static chain register
16938 doesn't need to be unwound, as it doesn't contain a value
16939 inherited from the caller. */
16941 if (df_regs_ever_live_p (3) == false)
16942 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16943 else if (args_to_push
== 0)
16947 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16950 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
16951 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
16954 /* Just tell the dwarf backend that we adjusted SP. */
16955 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16956 plus_constant (Pmode
, stack_pointer_rtx
,
16958 RTX_FRAME_RELATED_P (insn
) = 1;
16959 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16963 /* Store the args on the stack. */
16964 if (cfun
->machine
->uses_anonymous_args
)
16965 insn
= emit_multi_reg_push
16966 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16969 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16970 GEN_INT (- args_to_push
)));
16972 RTX_FRAME_RELATED_P (insn
) = 1;
16974 saved_pretend_args
= 1;
16975 fp_offset
= args_to_push
;
16978 /* Now reuse r3 to preserve IP. */
16979 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16983 insn
= emit_set_insn (ip_rtx
,
16984 plus_constant (Pmode
, stack_pointer_rtx
,
16986 RTX_FRAME_RELATED_P (insn
) = 1;
16991 /* Push the argument registers, or reserve space for them. */
16992 if (cfun
->machine
->uses_anonymous_args
)
16993 insn
= emit_multi_reg_push
16994 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16997 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16998 GEN_INT (- args_to_push
)));
16999 RTX_FRAME_RELATED_P (insn
) = 1;
17002 /* If this is an interrupt service routine, and the link register
17003 is going to be pushed, and we're not generating extra
17004 push of IP (needed when frame is needed and frame layout if apcs),
17005 subtracting four from LR now will mean that the function return
17006 can be done with a single instruction. */
17007 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
17008 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
17009 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
17012 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
17014 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
17017 if (live_regs_mask
)
17019 saved_regs
+= bit_count (live_regs_mask
) * 4;
17020 if (optimize_size
&& !frame_pointer_needed
17021 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
17023 /* If no coprocessor registers are being pushed and we don't have
17024 to worry about a frame pointer then push extra registers to
17025 create the stack frame. This is done is a way that does not
17026 alter the frame layout, so is independent of the epilogue. */
17030 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
17032 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
17033 if (frame
&& n
* 4 >= frame
)
17036 live_regs_mask
|= (1 << n
) - 1;
17037 saved_regs
+= frame
;
17041 if (current_tune
->prefer_ldrd_strd
17042 && !optimize_function_for_size_p (cfun
))
17046 thumb2_emit_strd_push (live_regs_mask
);
17050 insn
= emit_multi_reg_push (live_regs_mask
);
17051 RTX_FRAME_RELATED_P (insn
) = 1;
17056 insn
= emit_multi_reg_push (live_regs_mask
);
17057 RTX_FRAME_RELATED_P (insn
) = 1;
17061 if (! IS_VOLATILE (func_type
))
17062 saved_regs
+= arm_save_coproc_regs ();
17064 if (frame_pointer_needed
&& TARGET_ARM
)
17066 /* Create the new frame pointer. */
17067 if (TARGET_APCS_FRAME
)
17069 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
17070 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
17071 RTX_FRAME_RELATED_P (insn
) = 1;
17073 if (IS_NESTED (func_type
))
17075 /* Recover the static chain register. */
17076 if (!df_regs_ever_live_p (3)
17077 || saved_pretend_args
)
17078 insn
= gen_rtx_REG (SImode
, 3);
17079 else /* if (crtl->args.pretend_args_size == 0) */
17081 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
17082 insn
= gen_frame_mem (SImode
, insn
);
17084 emit_set_insn (ip_rtx
, insn
);
17085 /* Add a USE to stop propagate_one_insn() from barfing. */
17086 emit_insn (gen_force_register_use (ip_rtx
));
17091 insn
= GEN_INT (saved_regs
- 4);
17092 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
17093 stack_pointer_rtx
, insn
));
17094 RTX_FRAME_RELATED_P (insn
) = 1;
17098 if (flag_stack_usage_info
)
17099 current_function_static_stack_size
17100 = offsets
->outgoing_args
- offsets
->saved_args
;
17102 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
17104 /* This add can produce multiple insns for a large constant, so we
17105 need to get tricky. */
17106 rtx last
= get_last_insn ();
17108 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
17109 - offsets
->outgoing_args
);
17111 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
17115 last
= last
? NEXT_INSN (last
) : get_insns ();
17116 RTX_FRAME_RELATED_P (last
) = 1;
17118 while (last
!= insn
);
17120 /* If the frame pointer is needed, emit a special barrier that
17121 will prevent the scheduler from moving stores to the frame
17122 before the stack adjustment. */
17123 if (frame_pointer_needed
)
17124 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
17125 hard_frame_pointer_rtx
));
17129 if (frame_pointer_needed
&& TARGET_THUMB2
)
17130 thumb_set_frame_pointer (offsets
);
17132 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
17134 unsigned long mask
;
17136 mask
= live_regs_mask
;
17137 mask
&= THUMB2_WORK_REGS
;
17138 if (!IS_NESTED (func_type
))
17139 mask
|= (1 << IP_REGNUM
);
17140 arm_load_pic_register (mask
);
17143 /* If we are profiling, make sure no instructions are scheduled before
17144 the call to mcount. Similarly if the user has requested no
17145 scheduling in the prolog. Similarly if we want non-call exceptions
17146 using the EABI unwinder, to prevent faulting instructions from being
17147 swapped with a stack adjustment. */
17148 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
17149 || (arm_except_unwind_info (&global_options
) == UI_TARGET
17150 && cfun
->can_throw_non_call_exceptions
))
17151 emit_insn (gen_blockage ());
17153 /* If the link register is being kept alive, with the return address in it,
17154 then make sure that it does not get reused by the ce2 pass. */
17155 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
17156 cfun
->machine
->lr_save_eliminated
= 1;
17159 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17161 arm_print_condition (FILE *stream
)
17163 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
17165 /* Branch conversion is not implemented for Thumb-2. */
17168 output_operand_lossage ("predicated Thumb instruction");
17171 if (current_insn_predicate
!= NULL
)
17173 output_operand_lossage
17174 ("predicated instruction in conditional sequence");
17178 fputs (arm_condition_codes
[arm_current_cc
], stream
);
17180 else if (current_insn_predicate
)
17182 enum arm_cond_code code
;
17186 output_operand_lossage ("predicated Thumb instruction");
17190 code
= get_arm_condition_code (current_insn_predicate
);
17191 fputs (arm_condition_codes
[code
], stream
);
17196 /* If CODE is 'd', then the X is a condition operand and the instruction
17197 should only be executed if the condition is true.
17198 if CODE is 'D', then the X is a condition operand and the instruction
17199 should only be executed if the condition is false: however, if the mode
17200 of the comparison is CCFPEmode, then always execute the instruction -- we
17201 do this because in these circumstances !GE does not necessarily imply LT;
17202 in these cases the instruction pattern will take care to make sure that
17203 an instruction containing %d will follow, thereby undoing the effects of
17204 doing this instruction unconditionally.
17205 If CODE is 'N' then X is a floating point operand that must be negated
17207 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17208 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17210 arm_print_operand (FILE *stream
, rtx x
, int code
)
17215 fputs (ASM_COMMENT_START
, stream
);
17219 fputs (user_label_prefix
, stream
);
17223 fputs (REGISTER_PREFIX
, stream
);
17227 arm_print_condition (stream
);
17231 /* Nothing in unified syntax, otherwise the current condition code. */
17232 if (!TARGET_UNIFIED_ASM
)
17233 arm_print_condition (stream
);
17237 /* The current condition code in unified syntax, otherwise nothing. */
17238 if (TARGET_UNIFIED_ASM
)
17239 arm_print_condition (stream
);
17243 /* The current condition code for a condition code setting instruction.
17244 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17245 if (TARGET_UNIFIED_ASM
)
17247 fputc('s', stream
);
17248 arm_print_condition (stream
);
17252 arm_print_condition (stream
);
17253 fputc('s', stream
);
17258 /* If the instruction is conditionally executed then print
17259 the current condition code, otherwise print 's'. */
17260 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
17261 if (current_insn_predicate
)
17262 arm_print_condition (stream
);
17264 fputc('s', stream
);
17267 /* %# is a "break" sequence. It doesn't output anything, but is used to
17268 separate e.g. operand numbers from following text, if that text consists
17269 of further digits which we don't want to be part of the operand
17277 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17278 r
= real_value_negate (&r
);
17279 fprintf (stream
, "%s", fp_const_from_val (&r
));
17283 /* An integer or symbol address without a preceding # sign. */
17285 switch (GET_CODE (x
))
17288 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
17292 output_addr_const (stream
, x
);
17296 if (GET_CODE (XEXP (x
, 0)) == PLUS
17297 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
17299 output_addr_const (stream
, x
);
17302 /* Fall through. */
17305 output_operand_lossage ("Unsupported operand for code '%c'", code
);
17309 /* An integer that we want to print in HEX. */
17311 switch (GET_CODE (x
))
17314 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17318 output_operand_lossage ("Unsupported operand for code '%c'", code
);
17323 if (CONST_INT_P (x
))
17326 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
17327 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
17331 putc ('~', stream
);
17332 output_addr_const (stream
, x
);
17337 /* The low 16 bits of an immediate constant. */
17338 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
17342 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
17346 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
17354 if (!shift_operator (x
, SImode
))
17356 output_operand_lossage ("invalid shift operand");
17360 shift
= shift_op (x
, &val
);
17364 fprintf (stream
, ", %s ", shift
);
17366 arm_print_operand (stream
, XEXP (x
, 1), 0);
17368 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
17373 /* An explanation of the 'Q', 'R' and 'H' register operands:
17375 In a pair of registers containing a DI or DF value the 'Q'
17376 operand returns the register number of the register containing
17377 the least significant part of the value. The 'R' operand returns
17378 the register number of the register containing the most
17379 significant part of the value.
17381 The 'H' operand returns the higher of the two register numbers.
17382 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17383 same as the 'Q' operand, since the most significant part of the
17384 value is held in the lower number register. The reverse is true
17385 on systems where WORDS_BIG_ENDIAN is false.
17387 The purpose of these operands is to distinguish between cases
17388 where the endian-ness of the values is important (for example
17389 when they are added together), and cases where the endian-ness
17390 is irrelevant, but the order of register operations is important.
17391 For example when loading a value from memory into a register
17392 pair, the endian-ness does not matter. Provided that the value
17393 from the lower memory address is put into the lower numbered
17394 register, and the value from the higher address is put into the
17395 higher numbered register, the load will work regardless of whether
17396 the value being loaded is big-wordian or little-wordian. The
17397 order of the two register loads can matter however, if the address
17398 of the memory location is actually held in one of the registers
17399 being overwritten by the load.
17401 The 'Q' and 'R' constraints are also available for 64-bit
17404 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
17406 rtx part
= gen_lowpart (SImode
, x
);
17407 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
17411 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17413 output_operand_lossage ("invalid operand for code '%c'", code
);
17417 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
17421 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
17423 enum machine_mode mode
= GET_MODE (x
);
17426 if (mode
== VOIDmode
)
17428 part
= gen_highpart_mode (SImode
, mode
, x
);
17429 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
17433 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17435 output_operand_lossage ("invalid operand for code '%c'", code
);
17439 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
17443 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17445 output_operand_lossage ("invalid operand for code '%c'", code
);
17449 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
17453 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17455 output_operand_lossage ("invalid operand for code '%c'", code
);
17459 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
17463 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17465 output_operand_lossage ("invalid operand for code '%c'", code
);
17469 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
17473 asm_fprintf (stream
, "%r",
17474 REG_P (XEXP (x
, 0))
17475 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
17479 asm_fprintf (stream
, "{%r-%r}",
17481 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
17484 /* Like 'M', but writing doubleword vector registers, for use by Neon
17488 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
17489 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
17491 asm_fprintf (stream
, "{d%d}", regno
);
17493 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
17498 /* CONST_TRUE_RTX means always -- that's the default. */
17499 if (x
== const_true_rtx
)
17502 if (!COMPARISON_P (x
))
17504 output_operand_lossage ("invalid operand for code '%c'", code
);
17508 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
17513 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17514 want to do that. */
17515 if (x
== const_true_rtx
)
17517 output_operand_lossage ("instruction never executed");
17520 if (!COMPARISON_P (x
))
17522 output_operand_lossage ("invalid operand for code '%c'", code
);
17526 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
17527 (get_arm_condition_code (x
))],
17537 /* Former Maverick support, removed after GCC-4.7. */
17538 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
17543 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
17544 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
17545 /* Bad value for wCG register number. */
17547 output_operand_lossage ("invalid operand for code '%c'", code
);
17552 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
17555 /* Print an iWMMXt control register name. */
17557 if (!CONST_INT_P (x
)
17559 || INTVAL (x
) >= 16)
17560 /* Bad value for wC register number. */
17562 output_operand_lossage ("invalid operand for code '%c'", code
);
17568 static const char * wc_reg_names
[16] =
17570 "wCID", "wCon", "wCSSF", "wCASF",
17571 "wC4", "wC5", "wC6", "wC7",
17572 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17573 "wC12", "wC13", "wC14", "wC15"
17576 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
17580 /* Print the high single-precision register of a VFP double-precision
17584 int mode
= GET_MODE (x
);
17587 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
17589 output_operand_lossage ("invalid operand for code '%c'", code
);
17594 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
17596 output_operand_lossage ("invalid operand for code '%c'", code
);
17600 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
17604 /* Print a VFP/Neon double precision or quad precision register name. */
17608 int mode
= GET_MODE (x
);
17609 int is_quad
= (code
== 'q');
17612 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
17614 output_operand_lossage ("invalid operand for code '%c'", code
);
17619 || !IS_VFP_REGNUM (REGNO (x
)))
17621 output_operand_lossage ("invalid operand for code '%c'", code
);
17626 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
17627 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
17629 output_operand_lossage ("invalid operand for code '%c'", code
);
17633 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
17634 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
17638 /* These two codes print the low/high doubleword register of a Neon quad
17639 register, respectively. For pair-structure types, can also print
17640 low/high quadword registers. */
17644 int mode
= GET_MODE (x
);
17647 if ((GET_MODE_SIZE (mode
) != 16
17648 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
17650 output_operand_lossage ("invalid operand for code '%c'", code
);
17655 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
17657 output_operand_lossage ("invalid operand for code '%c'", code
);
17661 if (GET_MODE_SIZE (mode
) == 16)
17662 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
17663 + (code
== 'f' ? 1 : 0));
17665 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
17666 + (code
== 'f' ? 1 : 0));
17670 /* Print a VFPv3 floating-point constant, represented as an integer
17674 int index
= vfp3_const_double_index (x
);
17675 gcc_assert (index
!= -1);
17676 fprintf (stream
, "%d", index
);
17680 /* Print bits representing opcode features for Neon.
17682 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17683 and polynomials as unsigned.
17685 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17687 Bit 2 is 1 for rounding functions, 0 otherwise. */
17689 /* Identify the type as 's', 'u', 'p' or 'f'. */
17692 HOST_WIDE_INT bits
= INTVAL (x
);
17693 fputc ("uspf"[bits
& 3], stream
);
17697 /* Likewise, but signed and unsigned integers are both 'i'. */
17700 HOST_WIDE_INT bits
= INTVAL (x
);
17701 fputc ("iipf"[bits
& 3], stream
);
17705 /* As for 'T', but emit 'u' instead of 'p'. */
17708 HOST_WIDE_INT bits
= INTVAL (x
);
17709 fputc ("usuf"[bits
& 3], stream
);
17713 /* Bit 2: rounding (vs none). */
17716 HOST_WIDE_INT bits
= INTVAL (x
);
17717 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
17721 /* Memory operand for vld1/vst1 instruction. */
17725 bool postinc
= FALSE
;
17726 unsigned align
, memsize
, align_bits
;
17728 gcc_assert (MEM_P (x
));
17729 addr
= XEXP (x
, 0);
17730 if (GET_CODE (addr
) == POST_INC
)
17733 addr
= XEXP (addr
, 0);
17735 asm_fprintf (stream
, "[%r", REGNO (addr
));
17737 /* We know the alignment of this access, so we can emit a hint in the
17738 instruction (for some alignments) as an aid to the memory subsystem
17740 align
= MEM_ALIGN (x
) >> 3;
17741 memsize
= MEM_SIZE (x
);
17743 /* Only certain alignment specifiers are supported by the hardware. */
17744 if (memsize
== 32 && (align
% 32) == 0)
17746 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
17748 else if (memsize
>= 8 && (align
% 8) == 0)
17753 if (align_bits
!= 0)
17754 asm_fprintf (stream
, ":%d", align_bits
);
17756 asm_fprintf (stream
, "]");
17759 fputs("!", stream
);
17767 gcc_assert (MEM_P (x
));
17768 addr
= XEXP (x
, 0);
17769 gcc_assert (REG_P (addr
));
17770 asm_fprintf (stream
, "[%r]", REGNO (addr
));
17774 /* Translate an S register number into a D register number and element index. */
17777 int mode
= GET_MODE (x
);
17780 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
17782 output_operand_lossage ("invalid operand for code '%c'", code
);
17787 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
17789 output_operand_lossage ("invalid operand for code '%c'", code
);
17793 regno
= regno
- FIRST_VFP_REGNUM
;
17794 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
17799 gcc_assert (CONST_DOUBLE_P (x
));
17800 fprintf (stream
, "#%d", vfp3_const_double_for_fract_bits (x
));
17803 /* Register specifier for vld1.16/vst1.16. Translate the S register
17804 number into a D register number and element index. */
17807 int mode
= GET_MODE (x
);
17810 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
17812 output_operand_lossage ("invalid operand for code '%c'", code
);
17817 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
17819 output_operand_lossage ("invalid operand for code '%c'", code
);
17823 regno
= regno
- FIRST_VFP_REGNUM
;
17824 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
17831 output_operand_lossage ("missing operand");
17835 switch (GET_CODE (x
))
17838 asm_fprintf (stream
, "%r", REGNO (x
));
17842 output_memory_reference_mode
= GET_MODE (x
);
17843 output_address (XEXP (x
, 0));
17850 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17851 sizeof (fpstr
), 0, 1);
17852 fprintf (stream
, "#%s", fpstr
);
17855 fprintf (stream
, "#%s", fp_immediate_constant (x
));
17859 gcc_assert (GET_CODE (x
) != NEG
);
17860 fputc ('#', stream
);
17861 if (GET_CODE (x
) == HIGH
)
17863 fputs (":lower16:", stream
);
17867 output_addr_const (stream
, x
);
17873 /* Target hook for printing a memory address. */
17875 arm_print_operand_address (FILE *stream
, rtx x
)
17879 int is_minus
= GET_CODE (x
) == MINUS
;
17882 asm_fprintf (stream
, "[%r]", REGNO (x
));
17883 else if (GET_CODE (x
) == PLUS
|| is_minus
)
17885 rtx base
= XEXP (x
, 0);
17886 rtx index
= XEXP (x
, 1);
17887 HOST_WIDE_INT offset
= 0;
17889 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
17891 /* Ensure that BASE is a register. */
17892 /* (one of them must be). */
17893 /* Also ensure the SP is not used as in index register. */
17898 switch (GET_CODE (index
))
17901 offset
= INTVAL (index
);
17904 asm_fprintf (stream
, "[%r, #%wd]",
17905 REGNO (base
), offset
);
17909 asm_fprintf (stream
, "[%r, %s%r]",
17910 REGNO (base
), is_minus
? "-" : "",
17920 asm_fprintf (stream
, "[%r, %s%r",
17921 REGNO (base
), is_minus
? "-" : "",
17922 REGNO (XEXP (index
, 0)));
17923 arm_print_operand (stream
, index
, 'S');
17924 fputs ("]", stream
);
17929 gcc_unreachable ();
17932 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
17933 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
17935 extern enum machine_mode output_memory_reference_mode
;
17937 gcc_assert (REG_P (XEXP (x
, 0)));
17939 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
17940 asm_fprintf (stream
, "[%r, #%s%d]!",
17941 REGNO (XEXP (x
, 0)),
17942 GET_CODE (x
) == PRE_DEC
? "-" : "",
17943 GET_MODE_SIZE (output_memory_reference_mode
));
17945 asm_fprintf (stream
, "[%r], #%s%d",
17946 REGNO (XEXP (x
, 0)),
17947 GET_CODE (x
) == POST_DEC
? "-" : "",
17948 GET_MODE_SIZE (output_memory_reference_mode
));
17950 else if (GET_CODE (x
) == PRE_MODIFY
)
17952 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
17953 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
17954 asm_fprintf (stream
, "#%wd]!",
17955 INTVAL (XEXP (XEXP (x
, 1), 1)));
17957 asm_fprintf (stream
, "%r]!",
17958 REGNO (XEXP (XEXP (x
, 1), 1)));
17960 else if (GET_CODE (x
) == POST_MODIFY
)
17962 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
17963 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
17964 asm_fprintf (stream
, "#%wd",
17965 INTVAL (XEXP (XEXP (x
, 1), 1)));
17967 asm_fprintf (stream
, "%r",
17968 REGNO (XEXP (XEXP (x
, 1), 1)));
17970 else output_addr_const (stream
, x
);
17975 asm_fprintf (stream
, "[%r]", REGNO (x
));
17976 else if (GET_CODE (x
) == POST_INC
)
17977 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
17978 else if (GET_CODE (x
) == PLUS
)
17980 gcc_assert (REG_P (XEXP (x
, 0)));
17981 if (CONST_INT_P (XEXP (x
, 1)))
17982 asm_fprintf (stream
, "[%r, #%wd]",
17983 REGNO (XEXP (x
, 0)),
17984 INTVAL (XEXP (x
, 1)));
17986 asm_fprintf (stream
, "[%r, %r]",
17987 REGNO (XEXP (x
, 0)),
17988 REGNO (XEXP (x
, 1)));
17991 output_addr_const (stream
, x
);
17995 /* Target hook for indicating whether a punctuation character for
17996 TARGET_PRINT_OPERAND is valid. */
17998 arm_print_operand_punct_valid_p (unsigned char code
)
18000 return (code
== '@' || code
== '|' || code
== '.'
18001 || code
== '(' || code
== ')' || code
== '#'
18002 || (TARGET_32BIT
&& (code
== '?'))
18003 || (TARGET_THUMB2
&& (code
== '!'))
18004 || (TARGET_THUMB
&& (code
== '_')));
18007 /* Target hook for assembling integer objects. The ARM version needs to
18008 handle word-sized values specially. */
18010 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
18012 enum machine_mode mode
;
18014 if (size
== UNITS_PER_WORD
&& aligned_p
)
18016 fputs ("\t.word\t", asm_out_file
);
18017 output_addr_const (asm_out_file
, x
);
18019 /* Mark symbols as position independent. We only do this in the
18020 .text segment, not in the .data segment. */
18021 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
18022 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
18024 /* See legitimize_pic_address for an explanation of the
18025 TARGET_VXWORKS_RTP check. */
18026 if (TARGET_VXWORKS_RTP
18027 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
18028 fputs ("(GOT)", asm_out_file
);
18030 fputs ("(GOTOFF)", asm_out_file
);
18032 fputc ('\n', asm_out_file
);
18036 mode
= GET_MODE (x
);
18038 if (arm_vector_mode_supported_p (mode
))
18042 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
18044 units
= CONST_VECTOR_NUNITS (x
);
18045 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
18047 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
18048 for (i
= 0; i
< units
; i
++)
18050 rtx elt
= CONST_VECTOR_ELT (x
, i
);
18052 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
18055 for (i
= 0; i
< units
; i
++)
18057 rtx elt
= CONST_VECTOR_ELT (x
, i
);
18058 REAL_VALUE_TYPE rval
;
18060 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
18063 (rval
, GET_MODE_INNER (mode
),
18064 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
18070 return default_assemble_integer (x
, size
, aligned_p
);
18074 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
18078 if (!TARGET_AAPCS_BASED
)
18081 default_named_section_asm_out_constructor
18082 : default_named_section_asm_out_destructor
) (symbol
, priority
);
18086 /* Put these in the .init_array section, using a special relocation. */
18087 if (priority
!= DEFAULT_INIT_PRIORITY
)
18090 sprintf (buf
, "%s.%.5u",
18091 is_ctor
? ".init_array" : ".fini_array",
18093 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
18100 switch_to_section (s
);
18101 assemble_align (POINTER_SIZE
);
18102 fputs ("\t.word\t", asm_out_file
);
18103 output_addr_const (asm_out_file
, symbol
);
18104 fputs ("(target1)\n", asm_out_file
);
18107 /* Add a function to the list of static constructors. */
18110 arm_elf_asm_constructor (rtx symbol
, int priority
)
18112 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
18115 /* Add a function to the list of static destructors. */
18118 arm_elf_asm_destructor (rtx symbol
, int priority
)
18120 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
18123 /* A finite state machine takes care of noticing whether or not instructions
18124 can be conditionally executed, and thus decrease execution time and code
18125 size by deleting branch instructions. The fsm is controlled by
18126 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18128 /* The state of the fsm controlling condition codes are:
18129 0: normal, do nothing special
18130 1: make ASM_OUTPUT_OPCODE not output this instruction
18131 2: make ASM_OUTPUT_OPCODE not output this instruction
18132 3: make instructions conditional
18133 4: make instructions conditional
18135 State transitions (state->state by whom under condition):
18136 0 -> 1 final_prescan_insn if the `target' is a label
18137 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18138 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18139 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18140 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18141 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18142 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18143 (the target insn is arm_target_insn).
18145 If the jump clobbers the conditions then we use states 2 and 4.
18147 A similar thing can be done with conditional return insns.
18149 XXX In case the `target' is an unconditional branch, this conditionalising
18150 of the instructions always reduces code size, but not always execution
18151 time. But then, I want to reduce the code size to somewhere near what
18152 /bin/cc produces. */
18154 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18155 instructions. When a COND_EXEC instruction is seen the subsequent
18156 instructions are scanned so that multiple conditional instructions can be
18157 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18158 specify the length and true/false mask for the IT block. These will be
18159 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18161 /* Returns the index of the ARM condition code string in
18162 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18163 COMPARISON should be an rtx like `(eq (...) (...))'. */
18166 maybe_get_arm_condition_code (rtx comparison
)
18168 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
18169 enum arm_cond_code code
;
18170 enum rtx_code comp_code
= GET_CODE (comparison
);
18172 if (GET_MODE_CLASS (mode
) != MODE_CC
)
18173 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
18174 XEXP (comparison
, 1));
18178 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
18179 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
18180 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
18181 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
18182 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
18183 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
18184 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
18185 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
18186 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
18187 case CC_DLTUmode
: code
= ARM_CC
;
18190 if (comp_code
== EQ
)
18191 return ARM_INVERSE_CONDITION_CODE (code
);
18192 if (comp_code
== NE
)
18199 case NE
: return ARM_NE
;
18200 case EQ
: return ARM_EQ
;
18201 case GE
: return ARM_PL
;
18202 case LT
: return ARM_MI
;
18203 default: return ARM_NV
;
18209 case NE
: return ARM_NE
;
18210 case EQ
: return ARM_EQ
;
18211 default: return ARM_NV
;
18217 case NE
: return ARM_MI
;
18218 case EQ
: return ARM_PL
;
18219 default: return ARM_NV
;
18224 /* We can handle all cases except UNEQ and LTGT. */
18227 case GE
: return ARM_GE
;
18228 case GT
: return ARM_GT
;
18229 case LE
: return ARM_LS
;
18230 case LT
: return ARM_MI
;
18231 case NE
: return ARM_NE
;
18232 case EQ
: return ARM_EQ
;
18233 case ORDERED
: return ARM_VC
;
18234 case UNORDERED
: return ARM_VS
;
18235 case UNLT
: return ARM_LT
;
18236 case UNLE
: return ARM_LE
;
18237 case UNGT
: return ARM_HI
;
18238 case UNGE
: return ARM_PL
;
18239 /* UNEQ and LTGT do not have a representation. */
18240 case UNEQ
: /* Fall through. */
18241 case LTGT
: /* Fall through. */
18242 default: return ARM_NV
;
18248 case NE
: return ARM_NE
;
18249 case EQ
: return ARM_EQ
;
18250 case GE
: return ARM_LE
;
18251 case GT
: return ARM_LT
;
18252 case LE
: return ARM_GE
;
18253 case LT
: return ARM_GT
;
18254 case GEU
: return ARM_LS
;
18255 case GTU
: return ARM_CC
;
18256 case LEU
: return ARM_CS
;
18257 case LTU
: return ARM_HI
;
18258 default: return ARM_NV
;
18264 case LTU
: return ARM_CS
;
18265 case GEU
: return ARM_CC
;
18266 default: return ARM_NV
;
18272 case NE
: return ARM_NE
;
18273 case EQ
: return ARM_EQ
;
18274 case GEU
: return ARM_CS
;
18275 case GTU
: return ARM_HI
;
18276 case LEU
: return ARM_LS
;
18277 case LTU
: return ARM_CC
;
18278 default: return ARM_NV
;
18284 case GE
: return ARM_GE
;
18285 case LT
: return ARM_LT
;
18286 case GEU
: return ARM_CS
;
18287 case LTU
: return ARM_CC
;
18288 default: return ARM_NV
;
18294 case NE
: return ARM_NE
;
18295 case EQ
: return ARM_EQ
;
18296 case GE
: return ARM_GE
;
18297 case GT
: return ARM_GT
;
18298 case LE
: return ARM_LE
;
18299 case LT
: return ARM_LT
;
18300 case GEU
: return ARM_CS
;
18301 case GTU
: return ARM_HI
;
18302 case LEU
: return ARM_LS
;
18303 case LTU
: return ARM_CC
;
18304 default: return ARM_NV
;
18307 default: gcc_unreachable ();
18311 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18312 static enum arm_cond_code
18313 get_arm_condition_code (rtx comparison
)
18315 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
18316 gcc_assert (code
!= ARM_NV
);
18320 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18323 thumb2_final_prescan_insn (rtx insn
)
18325 rtx first_insn
= insn
;
18326 rtx body
= PATTERN (insn
);
18328 enum arm_cond_code code
;
18332 /* Remove the previous insn from the count of insns to be output. */
18333 if (arm_condexec_count
)
18334 arm_condexec_count
--;
18336 /* Nothing to do if we are already inside a conditional block. */
18337 if (arm_condexec_count
)
18340 if (GET_CODE (body
) != COND_EXEC
)
18343 /* Conditional jumps are implemented directly. */
18347 predicate
= COND_EXEC_TEST (body
);
18348 arm_current_cc
= get_arm_condition_code (predicate
);
18350 n
= get_attr_ce_count (insn
);
18351 arm_condexec_count
= 1;
18352 arm_condexec_mask
= (1 << n
) - 1;
18353 arm_condexec_masklen
= n
;
18354 /* See if subsequent instructions can be combined into the same block. */
18357 insn
= next_nonnote_insn (insn
);
18359 /* Jumping into the middle of an IT block is illegal, so a label or
18360 barrier terminates the block. */
18361 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
18364 body
= PATTERN (insn
);
18365 /* USE and CLOBBER aren't really insns, so just skip them. */
18366 if (GET_CODE (body
) == USE
18367 || GET_CODE (body
) == CLOBBER
)
18370 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18371 if (GET_CODE (body
) != COND_EXEC
)
18373 /* Allow up to 4 conditionally executed instructions in a block. */
18374 n
= get_attr_ce_count (insn
);
18375 if (arm_condexec_masklen
+ n
> 4)
18378 predicate
= COND_EXEC_TEST (body
);
18379 code
= get_arm_condition_code (predicate
);
18380 mask
= (1 << n
) - 1;
18381 if (arm_current_cc
== code
)
18382 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
18383 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
18386 arm_condexec_count
++;
18387 arm_condexec_masklen
+= n
;
18389 /* A jump must be the last instruction in a conditional block. */
18393 /* Restore recog_data (getting the attributes of other insns can
18394 destroy this array, but final.c assumes that it remains intact
18395 across this call). */
18396 extract_constrain_insn_cached (first_insn
);
18400 arm_final_prescan_insn (rtx insn
)
18402 /* BODY will hold the body of INSN. */
18403 rtx body
= PATTERN (insn
);
18405 /* This will be 1 if trying to repeat the trick, and things need to be
18406 reversed if it appears to fail. */
18409 /* If we start with a return insn, we only succeed if we find another one. */
18410 int seeking_return
= 0;
18411 enum rtx_code return_code
= UNKNOWN
;
18413 /* START_INSN will hold the insn from where we start looking. This is the
18414 first insn after the following code_label if REVERSE is true. */
18415 rtx start_insn
= insn
;
18417 /* If in state 4, check if the target branch is reached, in order to
18418 change back to state 0. */
18419 if (arm_ccfsm_state
== 4)
18421 if (insn
== arm_target_insn
)
18423 arm_target_insn
= NULL
;
18424 arm_ccfsm_state
= 0;
18429 /* If in state 3, it is possible to repeat the trick, if this insn is an
18430 unconditional branch to a label, and immediately following this branch
18431 is the previous target label which is only used once, and the label this
18432 branch jumps to is not too far off. */
18433 if (arm_ccfsm_state
== 3)
18435 if (simplejump_p (insn
))
18437 start_insn
= next_nonnote_insn (start_insn
);
18438 if (BARRIER_P (start_insn
))
18440 /* XXX Isn't this always a barrier? */
18441 start_insn
= next_nonnote_insn (start_insn
);
18443 if (LABEL_P (start_insn
)
18444 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
18445 && LABEL_NUSES (start_insn
) == 1)
18450 else if (ANY_RETURN_P (body
))
18452 start_insn
= next_nonnote_insn (start_insn
);
18453 if (BARRIER_P (start_insn
))
18454 start_insn
= next_nonnote_insn (start_insn
);
18455 if (LABEL_P (start_insn
)
18456 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
18457 && LABEL_NUSES (start_insn
) == 1)
18460 seeking_return
= 1;
18461 return_code
= GET_CODE (body
);
18470 gcc_assert (!arm_ccfsm_state
|| reverse
);
18471 if (!JUMP_P (insn
))
18474 /* This jump might be paralleled with a clobber of the condition codes
18475 the jump should always come first */
18476 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
18477 body
= XVECEXP (body
, 0, 0);
18480 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
18481 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
18484 int fail
= FALSE
, succeed
= FALSE
;
18485 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18486 int then_not_else
= TRUE
;
18487 rtx this_insn
= start_insn
, label
= 0;
18489 /* Register the insn jumped to. */
18492 if (!seeking_return
)
18493 label
= XEXP (SET_SRC (body
), 0);
18495 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
18496 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
18497 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
18499 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
18500 then_not_else
= FALSE
;
18502 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
18504 seeking_return
= 1;
18505 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
18507 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
18509 seeking_return
= 1;
18510 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
18511 then_not_else
= FALSE
;
18514 gcc_unreachable ();
18516 /* See how many insns this branch skips, and what kind of insns. If all
18517 insns are okay, and the label or unconditional branch to the same
18518 label is not too far away, succeed. */
18519 for (insns_skipped
= 0;
18520 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
18524 this_insn
= next_nonnote_insn (this_insn
);
18528 switch (GET_CODE (this_insn
))
18531 /* Succeed if it is the target label, otherwise fail since
18532 control falls in from somewhere else. */
18533 if (this_insn
== label
)
18535 arm_ccfsm_state
= 1;
18543 /* Succeed if the following insn is the target label.
18545 If return insns are used then the last insn in a function
18546 will be a barrier. */
18547 this_insn
= next_nonnote_insn (this_insn
);
18548 if (this_insn
&& this_insn
== label
)
18550 arm_ccfsm_state
= 1;
18558 /* The AAPCS says that conditional calls should not be
18559 used since they make interworking inefficient (the
18560 linker can't transform BL<cond> into BLX). That's
18561 only a problem if the machine has BLX. */
18568 /* Succeed if the following insn is the target label, or
18569 if the following two insns are a barrier and the
18571 this_insn
= next_nonnote_insn (this_insn
);
18572 if (this_insn
&& BARRIER_P (this_insn
))
18573 this_insn
= next_nonnote_insn (this_insn
);
18575 if (this_insn
&& this_insn
== label
18576 && insns_skipped
< max_insns_skipped
)
18578 arm_ccfsm_state
= 1;
18586 /* If this is an unconditional branch to the same label, succeed.
18587 If it is to another label, do nothing. If it is conditional,
18589 /* XXX Probably, the tests for SET and the PC are
18592 scanbody
= PATTERN (this_insn
);
18593 if (GET_CODE (scanbody
) == SET
18594 && GET_CODE (SET_DEST (scanbody
)) == PC
)
18596 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
18597 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
18599 arm_ccfsm_state
= 2;
18602 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
18605 /* Fail if a conditional return is undesirable (e.g. on a
18606 StrongARM), but still allow this if optimizing for size. */
18607 else if (GET_CODE (scanbody
) == return_code
18608 && !use_return_insn (TRUE
, NULL
)
18611 else if (GET_CODE (scanbody
) == return_code
)
18613 arm_ccfsm_state
= 2;
18616 else if (GET_CODE (scanbody
) == PARALLEL
)
18618 switch (get_attr_conds (this_insn
))
18628 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
18633 /* Instructions using or affecting the condition codes make it
18635 scanbody
= PATTERN (this_insn
);
18636 if (!(GET_CODE (scanbody
) == SET
18637 || GET_CODE (scanbody
) == PARALLEL
)
18638 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
18648 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
18649 arm_target_label
= CODE_LABEL_NUMBER (label
);
18652 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
18654 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
18656 this_insn
= next_nonnote_insn (this_insn
);
18657 gcc_assert (!this_insn
18658 || (!BARRIER_P (this_insn
)
18659 && !LABEL_P (this_insn
)));
18663 /* Oh, dear! we ran off the end.. give up. */
18664 extract_constrain_insn_cached (insn
);
18665 arm_ccfsm_state
= 0;
18666 arm_target_insn
= NULL
;
18669 arm_target_insn
= this_insn
;
18672 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18675 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
18677 if (reverse
|| then_not_else
)
18678 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
18681 /* Restore recog_data (getting the attributes of other insns can
18682 destroy this array, but final.c assumes that it remains intact
18683 across this call. */
18684 extract_constrain_insn_cached (insn
);
18688 /* Output IT instructions. */
18690 thumb2_asm_output_opcode (FILE * stream
)
18695 if (arm_condexec_mask
)
18697 for (n
= 0; n
< arm_condexec_masklen
; n
++)
18698 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
18700 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
18701 arm_condition_codes
[arm_current_cc
]);
18702 arm_condexec_mask
= 0;
18706 /* Returns true if REGNO is a valid register
18707 for holding a quantity of type MODE. */
18709 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
18711 if (GET_MODE_CLASS (mode
) == MODE_CC
)
18712 return (regno
== CC_REGNUM
18713 || (TARGET_HARD_FLOAT
&& TARGET_VFP
18714 && regno
== VFPCC_REGNUM
));
18717 /* For the Thumb we only allow values bigger than SImode in
18718 registers 0 - 6, so that there is always a second low
18719 register available to hold the upper part of the value.
18720 We probably we ought to ensure that the register is the
18721 start of an even numbered register pair. */
18722 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
18724 if (TARGET_HARD_FLOAT
&& TARGET_VFP
18725 && IS_VFP_REGNUM (regno
))
18727 if (mode
== SFmode
|| mode
== SImode
)
18728 return VFP_REGNO_OK_FOR_SINGLE (regno
);
18730 if (mode
== DFmode
)
18731 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
18733 /* VFP registers can hold HFmode values, but there is no point in
18734 putting them there unless we have hardware conversion insns. */
18735 if (mode
== HFmode
)
18736 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
18739 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
18740 || (VALID_NEON_QREG_MODE (mode
)
18741 && NEON_REGNO_OK_FOR_QUAD (regno
))
18742 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
18743 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
18744 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
18745 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
18746 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
18751 if (TARGET_REALLY_IWMMXT
)
18753 if (IS_IWMMXT_GR_REGNUM (regno
))
18754 return mode
== SImode
;
18756 if (IS_IWMMXT_REGNUM (regno
))
18757 return VALID_IWMMXT_REG_MODE (mode
);
18760 /* We allow almost any value to be stored in the general registers.
18761 Restrict doubleword quantities to even register pairs so that we can
18762 use ldrd. Do not allow very large Neon structure opaque modes in
18763 general registers; they would use too many. */
18764 if (regno
<= LAST_ARM_REGNUM
)
18765 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
18766 && ARM_NUM_REGS (mode
) <= 4;
18768 if (regno
== FRAME_POINTER_REGNUM
18769 || regno
== ARG_POINTER_REGNUM
)
18770 /* We only allow integers in the fake hard registers. */
18771 return GET_MODE_CLASS (mode
) == MODE_INT
;
18776 /* Implement MODES_TIEABLE_P. */
18779 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18781 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
18784 /* We specifically want to allow elements of "structure" modes to
18785 be tieable to the structure. This more general condition allows
18786 other rarer situations too. */
18788 && (VALID_NEON_DREG_MODE (mode1
)
18789 || VALID_NEON_QREG_MODE (mode1
)
18790 || VALID_NEON_STRUCT_MODE (mode1
))
18791 && (VALID_NEON_DREG_MODE (mode2
)
18792 || VALID_NEON_QREG_MODE (mode2
)
18793 || VALID_NEON_STRUCT_MODE (mode2
)))
18799 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18800 not used in arm mode. */
18803 arm_regno_class (int regno
)
18807 if (regno
== STACK_POINTER_REGNUM
)
18809 if (regno
== CC_REGNUM
)
18816 if (TARGET_THUMB2
&& regno
< 8)
18819 if ( regno
<= LAST_ARM_REGNUM
18820 || regno
== FRAME_POINTER_REGNUM
18821 || regno
== ARG_POINTER_REGNUM
)
18822 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
18824 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
18825 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
18827 if (IS_VFP_REGNUM (regno
))
18829 if (regno
<= D7_VFP_REGNUM
)
18830 return VFP_D0_D7_REGS
;
18831 else if (regno
<= LAST_LO_VFP_REGNUM
)
18832 return VFP_LO_REGS
;
18834 return VFP_HI_REGS
;
18837 if (IS_IWMMXT_REGNUM (regno
))
18838 return IWMMXT_REGS
;
18840 if (IS_IWMMXT_GR_REGNUM (regno
))
18841 return IWMMXT_GR_REGS
;
18846 /* Handle a special case when computing the offset
18847 of an argument from the frame pointer. */
18849 arm_debugger_arg_offset (int value
, rtx addr
)
18853 /* We are only interested if dbxout_parms() failed to compute the offset. */
18857 /* We can only cope with the case where the address is held in a register. */
18861 /* If we are using the frame pointer to point at the argument, then
18862 an offset of 0 is correct. */
18863 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
18866 /* If we are using the stack pointer to point at the
18867 argument, then an offset of 0 is correct. */
18868 /* ??? Check this is consistent with thumb2 frame layout. */
18869 if ((TARGET_THUMB
|| !frame_pointer_needed
)
18870 && REGNO (addr
) == SP_REGNUM
)
18873 /* Oh dear. The argument is pointed to by a register rather
18874 than being held in a register, or being stored at a known
18875 offset from the frame pointer. Since GDB only understands
18876 those two kinds of argument we must translate the address
18877 held in the register into an offset from the frame pointer.
18878 We do this by searching through the insns for the function
18879 looking to see where this register gets its value. If the
18880 register is initialized from the frame pointer plus an offset
18881 then we are in luck and we can continue, otherwise we give up.
18883 This code is exercised by producing debugging information
18884 for a function with arguments like this:
18886 double func (double a, double b, int c, double d) {return d;}
18888 Without this code the stab for parameter 'd' will be set to
18889 an offset of 0 from the frame pointer, rather than 8. */
18891 /* The if() statement says:
18893 If the insn is a normal instruction
18894 and if the insn is setting the value in a register
18895 and if the register being set is the register holding the address of the argument
18896 and if the address is computing by an addition
18897 that involves adding to a register
18898 which is the frame pointer
18903 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18905 if ( NONJUMP_INSN_P (insn
)
18906 && GET_CODE (PATTERN (insn
)) == SET
18907 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
18908 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
18909 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
18910 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18911 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
18914 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
18923 warning (0, "unable to compute real location of stacked parameter");
18924 value
= 8; /* XXX magic hack */
18944 T_MAX
/* Size of enum. Keep last. */
18945 } neon_builtin_type_mode
;
18947 #define TYPE_MODE_BIT(X) (1 << (X))
18949 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18950 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18951 | TYPE_MODE_BIT (T_DI))
18952 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18953 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18954 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18956 #define v8qi_UP T_V8QI
18957 #define v4hi_UP T_V4HI
18958 #define v2si_UP T_V2SI
18959 #define v2sf_UP T_V2SF
18961 #define v16qi_UP T_V16QI
18962 #define v8hi_UP T_V8HI
18963 #define v4si_UP T_V4SI
18964 #define v4sf_UP T_V4SF
18965 #define v2di_UP T_V2DI
18970 #define UP(X) X##_UP
19003 NEON_LOADSTRUCTLANE
,
19005 NEON_STORESTRUCTLANE
,
19014 const neon_itype itype
;
19015 const neon_builtin_type_mode mode
;
19016 const enum insn_code code
;
19017 unsigned int fcode
;
19018 } neon_builtin_datum
;
19020 #define CF(N,X) CODE_FOR_neon_##N##X
19022 #define VAR1(T, N, A) \
19023 {#N, NEON_##T, UP (A), CF (N, A), 0}
19024 #define VAR2(T, N, A, B) \
19026 {#N, NEON_##T, UP (B), CF (N, B), 0}
19027 #define VAR3(T, N, A, B, C) \
19028 VAR2 (T, N, A, B), \
19029 {#N, NEON_##T, UP (C), CF (N, C), 0}
19030 #define VAR4(T, N, A, B, C, D) \
19031 VAR3 (T, N, A, B, C), \
19032 {#N, NEON_##T, UP (D), CF (N, D), 0}
19033 #define VAR5(T, N, A, B, C, D, E) \
19034 VAR4 (T, N, A, B, C, D), \
19035 {#N, NEON_##T, UP (E), CF (N, E), 0}
19036 #define VAR6(T, N, A, B, C, D, E, F) \
19037 VAR5 (T, N, A, B, C, D, E), \
19038 {#N, NEON_##T, UP (F), CF (N, F), 0}
19039 #define VAR7(T, N, A, B, C, D, E, F, G) \
19040 VAR6 (T, N, A, B, C, D, E, F), \
19041 {#N, NEON_##T, UP (G), CF (N, G), 0}
19042 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19043 VAR7 (T, N, A, B, C, D, E, F, G), \
19044 {#N, NEON_##T, UP (H), CF (N, H), 0}
19045 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19046 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19047 {#N, NEON_##T, UP (I), CF (N, I), 0}
19048 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19049 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19050 {#N, NEON_##T, UP (J), CF (N, J), 0}
19052 /* The mode entries in the following table correspond to the "key" type of the
19053 instruction variant, i.e. equivalent to that which would be specified after
19054 the assembler mnemonic, which usually refers to the last vector operand.
19055 (Signed/unsigned/polynomial types are not differentiated between though, and
19056 are all mapped onto the same mode for a given element size.) The modes
19057 listed per instruction should be the same as those defined for that
19058 instruction's pattern in neon.md. */
19060 static neon_builtin_datum neon_builtin_data
[] =
19062 VAR10 (BINOP
, vadd
,
19063 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19064 VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
),
19065 VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
),
19066 VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19067 VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19068 VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
),
19069 VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19070 VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19071 VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
),
19072 VAR2 (TERNOP
, vfma
, v2sf
, v4sf
),
19073 VAR2 (TERNOP
, vfms
, v2sf
, v4sf
),
19074 VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19075 VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
),
19076 VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
),
19077 VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
),
19078 VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
),
19079 VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
),
19080 VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
),
19081 VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
),
19082 VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
),
19083 VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
),
19084 VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
),
19085 VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
),
19086 VAR2 (BINOP
, vqdmull
, v4hi
, v2si
),
19087 VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19088 VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19089 VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19090 VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
),
19091 VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
),
19092 VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
),
19093 VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19094 VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19095 VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19096 VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
),
19097 VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19098 VAR10 (BINOP
, vsub
,
19099 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19100 VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
),
19101 VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
),
19102 VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19103 VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19104 VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
),
19105 VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19106 VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19107 VAR6 (BINOP
, vcgeu
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19108 VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19109 VAR6 (BINOP
, vcgtu
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19110 VAR2 (BINOP
, vcage
, v2sf
, v4sf
),
19111 VAR2 (BINOP
, vcagt
, v2sf
, v4sf
),
19112 VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19113 VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19114 VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
),
19115 VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19116 VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
),
19117 VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19118 VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19119 VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
),
19120 VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19121 VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19122 VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
),
19123 VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
),
19124 VAR2 (BINOP
, vrecps
, v2sf
, v4sf
),
19125 VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
),
19126 VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19127 VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
19128 VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19129 VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19130 VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19131 VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19132 VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19133 VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19134 VAR2 (UNOP
, vcnt
, v8qi
, v16qi
),
19135 VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
),
19136 VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
),
19137 VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
19138 /* FIXME: vget_lane supports more variants than this! */
19139 VAR10 (GETLANE
, vget_lane
,
19140 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19141 VAR10 (SETLANE
, vset_lane
,
19142 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19143 VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19144 VAR10 (DUP
, vdup_n
,
19145 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19146 VAR10 (DUPLANE
, vdup_lane
,
19147 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19148 VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19149 VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19150 VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19151 VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
),
19152 VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
),
19153 VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
),
19154 VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
),
19155 VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19156 VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19157 VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
),
19158 VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
),
19159 VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19160 VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
),
19161 VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
),
19162 VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19163 VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19164 VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
),
19165 VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
),
19166 VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19167 VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
),
19168 VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
),
19169 VAR10 (BINOP
, vext
,
19170 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19171 VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19172 VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
),
19173 VAR2 (UNOP
, vrev16
, v8qi
, v16qi
),
19174 VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
),
19175 VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
),
19176 VAR10 (SELECT
, vbsl
,
19177 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19178 VAR1 (VTBL
, vtbl1
, v8qi
),
19179 VAR1 (VTBL
, vtbl2
, v8qi
),
19180 VAR1 (VTBL
, vtbl3
, v8qi
),
19181 VAR1 (VTBL
, vtbl4
, v8qi
),
19182 VAR1 (VTBX
, vtbx1
, v8qi
),
19183 VAR1 (VTBX
, vtbx2
, v8qi
),
19184 VAR1 (VTBX
, vtbx3
, v8qi
),
19185 VAR1 (VTBX
, vtbx4
, v8qi
),
19186 VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19187 VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19188 VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
19189 VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19190 VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19191 VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19192 VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19193 VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19194 VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19195 VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19196 VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19197 VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19198 VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19199 VAR10 (LOAD1
, vld1
,
19200 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19201 VAR10 (LOAD1LANE
, vld1_lane
,
19202 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19203 VAR10 (LOAD1
, vld1_dup
,
19204 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19205 VAR10 (STORE1
, vst1
,
19206 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19207 VAR10 (STORE1LANE
, vst1_lane
,
19208 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19210 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
19211 VAR7 (LOADSTRUCTLANE
, vld2_lane
,
19212 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19213 VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19214 VAR9 (STORESTRUCT
, vst2
,
19215 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
19216 VAR7 (STORESTRUCTLANE
, vst2_lane
,
19217 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19219 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
19220 VAR7 (LOADSTRUCTLANE
, vld3_lane
,
19221 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19222 VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19223 VAR9 (STORESTRUCT
, vst3
,
19224 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
19225 VAR7 (STORESTRUCTLANE
, vst3_lane
,
19226 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19227 VAR9 (LOADSTRUCT
, vld4
,
19228 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
19229 VAR7 (LOADSTRUCTLANE
, vld4_lane
,
19230 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19231 VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
19232 VAR9 (STORESTRUCT
, vst4
,
19233 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
19234 VAR7 (STORESTRUCTLANE
, vst4_lane
,
19235 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
19236 VAR10 (LOGICBINOP
, vand
,
19237 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19238 VAR10 (LOGICBINOP
, vorr
,
19239 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19240 VAR10 (BINOP
, veor
,
19241 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19242 VAR10 (LOGICBINOP
, vbic
,
19243 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
19244 VAR10 (LOGICBINOP
, vorn
,
19245 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
)
19260 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19261 symbolic names defined here (which would require too much duplication).
19265 ARM_BUILTIN_GETWCGR0
,
19266 ARM_BUILTIN_GETWCGR1
,
19267 ARM_BUILTIN_GETWCGR2
,
19268 ARM_BUILTIN_GETWCGR3
,
19270 ARM_BUILTIN_SETWCGR0
,
19271 ARM_BUILTIN_SETWCGR1
,
19272 ARM_BUILTIN_SETWCGR2
,
19273 ARM_BUILTIN_SETWCGR3
,
19277 ARM_BUILTIN_WAVG2BR
,
19278 ARM_BUILTIN_WAVG2HR
,
19279 ARM_BUILTIN_WAVG2B
,
19280 ARM_BUILTIN_WAVG2H
,
19287 ARM_BUILTIN_WMACSZ
,
19289 ARM_BUILTIN_WMACUZ
,
19292 ARM_BUILTIN_WSADBZ
,
19294 ARM_BUILTIN_WSADHZ
,
19296 ARM_BUILTIN_WALIGNI
,
19297 ARM_BUILTIN_WALIGNR0
,
19298 ARM_BUILTIN_WALIGNR1
,
19299 ARM_BUILTIN_WALIGNR2
,
19300 ARM_BUILTIN_WALIGNR3
,
19303 ARM_BUILTIN_TMIAPH
,
19304 ARM_BUILTIN_TMIABB
,
19305 ARM_BUILTIN_TMIABT
,
19306 ARM_BUILTIN_TMIATB
,
19307 ARM_BUILTIN_TMIATT
,
19309 ARM_BUILTIN_TMOVMSKB
,
19310 ARM_BUILTIN_TMOVMSKH
,
19311 ARM_BUILTIN_TMOVMSKW
,
19313 ARM_BUILTIN_TBCSTB
,
19314 ARM_BUILTIN_TBCSTH
,
19315 ARM_BUILTIN_TBCSTW
,
19317 ARM_BUILTIN_WMADDS
,
19318 ARM_BUILTIN_WMADDU
,
19320 ARM_BUILTIN_WPACKHSS
,
19321 ARM_BUILTIN_WPACKWSS
,
19322 ARM_BUILTIN_WPACKDSS
,
19323 ARM_BUILTIN_WPACKHUS
,
19324 ARM_BUILTIN_WPACKWUS
,
19325 ARM_BUILTIN_WPACKDUS
,
19330 ARM_BUILTIN_WADDSSB
,
19331 ARM_BUILTIN_WADDSSH
,
19332 ARM_BUILTIN_WADDSSW
,
19333 ARM_BUILTIN_WADDUSB
,
19334 ARM_BUILTIN_WADDUSH
,
19335 ARM_BUILTIN_WADDUSW
,
19339 ARM_BUILTIN_WSUBSSB
,
19340 ARM_BUILTIN_WSUBSSH
,
19341 ARM_BUILTIN_WSUBSSW
,
19342 ARM_BUILTIN_WSUBUSB
,
19343 ARM_BUILTIN_WSUBUSH
,
19344 ARM_BUILTIN_WSUBUSW
,
19351 ARM_BUILTIN_WCMPEQB
,
19352 ARM_BUILTIN_WCMPEQH
,
19353 ARM_BUILTIN_WCMPEQW
,
19354 ARM_BUILTIN_WCMPGTUB
,
19355 ARM_BUILTIN_WCMPGTUH
,
19356 ARM_BUILTIN_WCMPGTUW
,
19357 ARM_BUILTIN_WCMPGTSB
,
19358 ARM_BUILTIN_WCMPGTSH
,
19359 ARM_BUILTIN_WCMPGTSW
,
19361 ARM_BUILTIN_TEXTRMSB
,
19362 ARM_BUILTIN_TEXTRMSH
,
19363 ARM_BUILTIN_TEXTRMSW
,
19364 ARM_BUILTIN_TEXTRMUB
,
19365 ARM_BUILTIN_TEXTRMUH
,
19366 ARM_BUILTIN_TEXTRMUW
,
19367 ARM_BUILTIN_TINSRB
,
19368 ARM_BUILTIN_TINSRH
,
19369 ARM_BUILTIN_TINSRW
,
19371 ARM_BUILTIN_WMAXSW
,
19372 ARM_BUILTIN_WMAXSH
,
19373 ARM_BUILTIN_WMAXSB
,
19374 ARM_BUILTIN_WMAXUW
,
19375 ARM_BUILTIN_WMAXUH
,
19376 ARM_BUILTIN_WMAXUB
,
19377 ARM_BUILTIN_WMINSW
,
19378 ARM_BUILTIN_WMINSH
,
19379 ARM_BUILTIN_WMINSB
,
19380 ARM_BUILTIN_WMINUW
,
19381 ARM_BUILTIN_WMINUH
,
19382 ARM_BUILTIN_WMINUB
,
19384 ARM_BUILTIN_WMULUM
,
19385 ARM_BUILTIN_WMULSM
,
19386 ARM_BUILTIN_WMULUL
,
19388 ARM_BUILTIN_PSADBH
,
19389 ARM_BUILTIN_WSHUFH
,
19403 ARM_BUILTIN_WSLLHI
,
19404 ARM_BUILTIN_WSLLWI
,
19405 ARM_BUILTIN_WSLLDI
,
19406 ARM_BUILTIN_WSRAHI
,
19407 ARM_BUILTIN_WSRAWI
,
19408 ARM_BUILTIN_WSRADI
,
19409 ARM_BUILTIN_WSRLHI
,
19410 ARM_BUILTIN_WSRLWI
,
19411 ARM_BUILTIN_WSRLDI
,
19412 ARM_BUILTIN_WRORHI
,
19413 ARM_BUILTIN_WRORWI
,
19414 ARM_BUILTIN_WRORDI
,
19416 ARM_BUILTIN_WUNPCKIHB
,
19417 ARM_BUILTIN_WUNPCKIHH
,
19418 ARM_BUILTIN_WUNPCKIHW
,
19419 ARM_BUILTIN_WUNPCKILB
,
19420 ARM_BUILTIN_WUNPCKILH
,
19421 ARM_BUILTIN_WUNPCKILW
,
19423 ARM_BUILTIN_WUNPCKEHSB
,
19424 ARM_BUILTIN_WUNPCKEHSH
,
19425 ARM_BUILTIN_WUNPCKEHSW
,
19426 ARM_BUILTIN_WUNPCKEHUB
,
19427 ARM_BUILTIN_WUNPCKEHUH
,
19428 ARM_BUILTIN_WUNPCKEHUW
,
19429 ARM_BUILTIN_WUNPCKELSB
,
19430 ARM_BUILTIN_WUNPCKELSH
,
19431 ARM_BUILTIN_WUNPCKELSW
,
19432 ARM_BUILTIN_WUNPCKELUB
,
19433 ARM_BUILTIN_WUNPCKELUH
,
19434 ARM_BUILTIN_WUNPCKELUW
,
19440 ARM_BUILTIN_WADDSUBHX
,
19441 ARM_BUILTIN_WSUBADDHX
,
19443 ARM_BUILTIN_WABSDIFFB
,
19444 ARM_BUILTIN_WABSDIFFH
,
19445 ARM_BUILTIN_WABSDIFFW
,
19447 ARM_BUILTIN_WADDCH
,
19448 ARM_BUILTIN_WADDCW
,
19451 ARM_BUILTIN_WAVG4R
,
19453 ARM_BUILTIN_WMADDSX
,
19454 ARM_BUILTIN_WMADDUX
,
19456 ARM_BUILTIN_WMADDSN
,
19457 ARM_BUILTIN_WMADDUN
,
19459 ARM_BUILTIN_WMULWSM
,
19460 ARM_BUILTIN_WMULWUM
,
19462 ARM_BUILTIN_WMULWSMR
,
19463 ARM_BUILTIN_WMULWUMR
,
19465 ARM_BUILTIN_WMULWL
,
19467 ARM_BUILTIN_WMULSMR
,
19468 ARM_BUILTIN_WMULUMR
,
19470 ARM_BUILTIN_WQMULM
,
19471 ARM_BUILTIN_WQMULMR
,
19473 ARM_BUILTIN_WQMULWM
,
19474 ARM_BUILTIN_WQMULWMR
,
19476 ARM_BUILTIN_WADDBHUSM
,
19477 ARM_BUILTIN_WADDBHUSL
,
19479 ARM_BUILTIN_WQMIABB
,
19480 ARM_BUILTIN_WQMIABT
,
19481 ARM_BUILTIN_WQMIATB
,
19482 ARM_BUILTIN_WQMIATT
,
19484 ARM_BUILTIN_WQMIABBN
,
19485 ARM_BUILTIN_WQMIABTN
,
19486 ARM_BUILTIN_WQMIATBN
,
19487 ARM_BUILTIN_WQMIATTN
,
19489 ARM_BUILTIN_WMIABB
,
19490 ARM_BUILTIN_WMIABT
,
19491 ARM_BUILTIN_WMIATB
,
19492 ARM_BUILTIN_WMIATT
,
19494 ARM_BUILTIN_WMIABBN
,
19495 ARM_BUILTIN_WMIABTN
,
19496 ARM_BUILTIN_WMIATBN
,
19497 ARM_BUILTIN_WMIATTN
,
19499 ARM_BUILTIN_WMIAWBB
,
19500 ARM_BUILTIN_WMIAWBT
,
19501 ARM_BUILTIN_WMIAWTB
,
19502 ARM_BUILTIN_WMIAWTT
,
19504 ARM_BUILTIN_WMIAWBBN
,
19505 ARM_BUILTIN_WMIAWBTN
,
19506 ARM_BUILTIN_WMIAWTBN
,
19507 ARM_BUILTIN_WMIAWTTN
,
19509 ARM_BUILTIN_WMERGE
,
19511 ARM_BUILTIN_NEON_BASE
,
19513 ARM_BUILTIN_MAX
= ARM_BUILTIN_NEON_BASE
+ ARRAY_SIZE (neon_builtin_data
)
19516 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
19519 arm_init_neon_builtins (void)
19521 unsigned int i
, fcode
;
19524 tree neon_intQI_type_node
;
19525 tree neon_intHI_type_node
;
19526 tree neon_polyQI_type_node
;
19527 tree neon_polyHI_type_node
;
19528 tree neon_intSI_type_node
;
19529 tree neon_intDI_type_node
;
19530 tree neon_float_type_node
;
19532 tree intQI_pointer_node
;
19533 tree intHI_pointer_node
;
19534 tree intSI_pointer_node
;
19535 tree intDI_pointer_node
;
19536 tree float_pointer_node
;
19538 tree const_intQI_node
;
19539 tree const_intHI_node
;
19540 tree const_intSI_node
;
19541 tree const_intDI_node
;
19542 tree const_float_node
;
19544 tree const_intQI_pointer_node
;
19545 tree const_intHI_pointer_node
;
19546 tree const_intSI_pointer_node
;
19547 tree const_intDI_pointer_node
;
19548 tree const_float_pointer_node
;
19550 tree V8QI_type_node
;
19551 tree V4HI_type_node
;
19552 tree V2SI_type_node
;
19553 tree V2SF_type_node
;
19554 tree V16QI_type_node
;
19555 tree V8HI_type_node
;
19556 tree V4SI_type_node
;
19557 tree V4SF_type_node
;
19558 tree V2DI_type_node
;
19560 tree intUQI_type_node
;
19561 tree intUHI_type_node
;
19562 tree intUSI_type_node
;
19563 tree intUDI_type_node
;
19565 tree intEI_type_node
;
19566 tree intOI_type_node
;
19567 tree intCI_type_node
;
19568 tree intXI_type_node
;
19570 tree V8QI_pointer_node
;
19571 tree V4HI_pointer_node
;
19572 tree V2SI_pointer_node
;
19573 tree V2SF_pointer_node
;
19574 tree V16QI_pointer_node
;
19575 tree V8HI_pointer_node
;
19576 tree V4SI_pointer_node
;
19577 tree V4SF_pointer_node
;
19578 tree V2DI_pointer_node
;
19580 tree void_ftype_pv8qi_v8qi_v8qi
;
19581 tree void_ftype_pv4hi_v4hi_v4hi
;
19582 tree void_ftype_pv2si_v2si_v2si
;
19583 tree void_ftype_pv2sf_v2sf_v2sf
;
19584 tree void_ftype_pdi_di_di
;
19585 tree void_ftype_pv16qi_v16qi_v16qi
;
19586 tree void_ftype_pv8hi_v8hi_v8hi
;
19587 tree void_ftype_pv4si_v4si_v4si
;
19588 tree void_ftype_pv4sf_v4sf_v4sf
;
19589 tree void_ftype_pv2di_v2di_v2di
;
19591 tree reinterp_ftype_dreg
[5][5];
19592 tree reinterp_ftype_qreg
[5][5];
19593 tree dreg_types
[5], qreg_types
[5];
19595 /* Create distinguished type nodes for NEON vector element types,
19596 and pointers to values of such types, so we can detect them later. */
19597 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
19598 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
19599 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
19600 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
19601 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
19602 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
19603 neon_float_type_node
= make_node (REAL_TYPE
);
19604 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
19605 layout_type (neon_float_type_node
);
19607 /* Define typedefs which exactly correspond to the modes we are basing vector
19608 types on. If you change these names you'll need to change
19609 the table used by arm_mangle_type too. */
19610 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
19611 "__builtin_neon_qi");
19612 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
19613 "__builtin_neon_hi");
19614 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
19615 "__builtin_neon_si");
19616 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
19617 "__builtin_neon_sf");
19618 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
19619 "__builtin_neon_di");
19620 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
19621 "__builtin_neon_poly8");
19622 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
19623 "__builtin_neon_poly16");
19625 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
19626 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
19627 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
19628 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
19629 float_pointer_node
= build_pointer_type (neon_float_type_node
);
19631 /* Next create constant-qualified versions of the above types. */
19632 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
19634 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
19636 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
19638 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
19640 const_float_node
= build_qualified_type (neon_float_type_node
,
19643 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
19644 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
19645 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
19646 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
19647 const_float_pointer_node
= build_pointer_type (const_float_node
);
19649 /* Now create vector types based on our NEON element types. */
19650 /* 64-bit vectors. */
19652 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
19654 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
19656 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
19658 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
19659 /* 128-bit vectors. */
19661 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
19663 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
19665 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
19667 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
19669 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
19671 /* Unsigned integer types for various mode sizes. */
19672 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
19673 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
19674 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
19675 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
19677 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
19678 "__builtin_neon_uqi");
19679 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
19680 "__builtin_neon_uhi");
19681 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
19682 "__builtin_neon_usi");
19683 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
19684 "__builtin_neon_udi");
19686 /* Opaque integer types for structures of vectors. */
19687 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
19688 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
19689 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
19690 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
19692 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
19693 "__builtin_neon_ti");
19694 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
19695 "__builtin_neon_ei");
19696 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
19697 "__builtin_neon_oi");
19698 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
19699 "__builtin_neon_ci");
19700 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
19701 "__builtin_neon_xi");
19703 /* Pointers to vector types. */
19704 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
19705 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
19706 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
19707 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
19708 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
19709 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
19710 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
19711 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
19712 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
19714 /* Operations which return results as pairs. */
19715 void_ftype_pv8qi_v8qi_v8qi
=
19716 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
19717 V8QI_type_node
, NULL
);
19718 void_ftype_pv4hi_v4hi_v4hi
=
19719 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
19720 V4HI_type_node
, NULL
);
19721 void_ftype_pv2si_v2si_v2si
=
19722 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
19723 V2SI_type_node
, NULL
);
19724 void_ftype_pv2sf_v2sf_v2sf
=
19725 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
19726 V2SF_type_node
, NULL
);
19727 void_ftype_pdi_di_di
=
19728 build_function_type_list (void_type_node
, intDI_pointer_node
,
19729 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
19730 void_ftype_pv16qi_v16qi_v16qi
=
19731 build_function_type_list (void_type_node
, V16QI_pointer_node
,
19732 V16QI_type_node
, V16QI_type_node
, NULL
);
19733 void_ftype_pv8hi_v8hi_v8hi
=
19734 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
19735 V8HI_type_node
, NULL
);
19736 void_ftype_pv4si_v4si_v4si
=
19737 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
19738 V4SI_type_node
, NULL
);
19739 void_ftype_pv4sf_v4sf_v4sf
=
19740 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
19741 V4SF_type_node
, NULL
);
19742 void_ftype_pv2di_v2di_v2di
=
19743 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
19744 V2DI_type_node
, NULL
);
19746 dreg_types
[0] = V8QI_type_node
;
19747 dreg_types
[1] = V4HI_type_node
;
19748 dreg_types
[2] = V2SI_type_node
;
19749 dreg_types
[3] = V2SF_type_node
;
19750 dreg_types
[4] = neon_intDI_type_node
;
19752 qreg_types
[0] = V16QI_type_node
;
19753 qreg_types
[1] = V8HI_type_node
;
19754 qreg_types
[2] = V4SI_type_node
;
19755 qreg_types
[3] = V4SF_type_node
;
19756 qreg_types
[4] = V2DI_type_node
;
19758 for (i
= 0; i
< 5; i
++)
19761 for (j
= 0; j
< 5; j
++)
19763 reinterp_ftype_dreg
[i
][j
]
19764 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
19765 reinterp_ftype_qreg
[i
][j
]
19766 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
19770 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
19771 i
< ARRAY_SIZE (neon_builtin_data
);
19774 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
19776 const char* const modenames
[] = {
19777 "v8qi", "v4hi", "v2si", "v2sf", "di",
19778 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19783 int is_load
= 0, is_store
= 0;
19785 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
19792 case NEON_LOAD1LANE
:
19793 case NEON_LOADSTRUCT
:
19794 case NEON_LOADSTRUCTLANE
:
19796 /* Fall through. */
19798 case NEON_STORE1LANE
:
19799 case NEON_STORESTRUCT
:
19800 case NEON_STORESTRUCTLANE
:
19803 /* Fall through. */
19806 case NEON_LOGICBINOP
:
19807 case NEON_SHIFTINSERT
:
19814 case NEON_SHIFTIMM
:
19815 case NEON_SHIFTACC
:
19821 case NEON_LANEMULL
:
19822 case NEON_LANEMULH
:
19824 case NEON_SCALARMUL
:
19825 case NEON_SCALARMULL
:
19826 case NEON_SCALARMULH
:
19827 case NEON_SCALARMAC
:
19833 tree return_type
= void_type_node
, args
= void_list_node
;
19835 /* Build a function type directly from the insn_data for
19836 this builtin. The build_function_type() function takes
19837 care of removing duplicates for us. */
19838 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
19842 if (is_load
&& k
== 1)
19844 /* Neon load patterns always have the memory
19845 operand in the operand 1 position. */
19846 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
19847 == neon_struct_operand
);
19853 eltype
= const_intQI_pointer_node
;
19858 eltype
= const_intHI_pointer_node
;
19863 eltype
= const_intSI_pointer_node
;
19868 eltype
= const_float_pointer_node
;
19873 eltype
= const_intDI_pointer_node
;
19876 default: gcc_unreachable ();
19879 else if (is_store
&& k
== 0)
19881 /* Similarly, Neon store patterns use operand 0 as
19882 the memory location to store to. */
19883 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
19884 == neon_struct_operand
);
19890 eltype
= intQI_pointer_node
;
19895 eltype
= intHI_pointer_node
;
19900 eltype
= intSI_pointer_node
;
19905 eltype
= float_pointer_node
;
19910 eltype
= intDI_pointer_node
;
19913 default: gcc_unreachable ();
19918 switch (insn_data
[d
->code
].operand
[k
].mode
)
19920 case VOIDmode
: eltype
= void_type_node
; break;
19922 case QImode
: eltype
= neon_intQI_type_node
; break;
19923 case HImode
: eltype
= neon_intHI_type_node
; break;
19924 case SImode
: eltype
= neon_intSI_type_node
; break;
19925 case SFmode
: eltype
= neon_float_type_node
; break;
19926 case DImode
: eltype
= neon_intDI_type_node
; break;
19927 case TImode
: eltype
= intTI_type_node
; break;
19928 case EImode
: eltype
= intEI_type_node
; break;
19929 case OImode
: eltype
= intOI_type_node
; break;
19930 case CImode
: eltype
= intCI_type_node
; break;
19931 case XImode
: eltype
= intXI_type_node
; break;
19932 /* 64-bit vectors. */
19933 case V8QImode
: eltype
= V8QI_type_node
; break;
19934 case V4HImode
: eltype
= V4HI_type_node
; break;
19935 case V2SImode
: eltype
= V2SI_type_node
; break;
19936 case V2SFmode
: eltype
= V2SF_type_node
; break;
19937 /* 128-bit vectors. */
19938 case V16QImode
: eltype
= V16QI_type_node
; break;
19939 case V8HImode
: eltype
= V8HI_type_node
; break;
19940 case V4SImode
: eltype
= V4SI_type_node
; break;
19941 case V4SFmode
: eltype
= V4SF_type_node
; break;
19942 case V2DImode
: eltype
= V2DI_type_node
; break;
19943 default: gcc_unreachable ();
19947 if (k
== 0 && !is_store
)
19948 return_type
= eltype
;
19950 args
= tree_cons (NULL_TREE
, eltype
, args
);
19953 ftype
= build_function_type (return_type
, args
);
19957 case NEON_RESULTPAIR
:
19959 switch (insn_data
[d
->code
].operand
[1].mode
)
19961 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
19962 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
19963 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
19964 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
19965 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
19966 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
19967 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
19968 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
19969 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
19970 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
19971 default: gcc_unreachable ();
19976 case NEON_REINTERP
:
19978 /* We iterate over 5 doubleword types, then 5 quadword
19980 int rhs
= d
->mode
% 5;
19981 switch (insn_data
[d
->code
].operand
[0].mode
)
19983 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
19984 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
19985 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
19986 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
19987 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
19988 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
19989 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
19990 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
19991 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
19992 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
19993 default: gcc_unreachable ();
19999 gcc_unreachable ();
20002 gcc_assert (ftype
!= NULL
);
20004 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
20006 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
20008 arm_builtin_decls
[fcode
] = decl
;
20012 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
20015 if ((MASK) & insn_flags) \
20018 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
20019 BUILT_IN_MD, NULL, NULL_TREE); \
20020 arm_builtin_decls[CODE] = bdecl; \
20025 struct builtin_description
20027 const unsigned int mask
;
20028 const enum insn_code icode
;
20029 const char * const name
;
20030 const enum arm_builtins code
;
20031 const enum rtx_code comparison
;
20032 const unsigned int flag
;
20035 static const struct builtin_description bdesc_2arg
[] =
20037 #define IWMMXT_BUILTIN(code, string, builtin) \
20038 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
20039 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20041 #define IWMMXT2_BUILTIN(code, string, builtin) \
20042 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
20043 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20045 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
20046 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
20047 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
20048 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
20049 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
20050 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
20051 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
20052 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
20053 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
20054 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
20055 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
20056 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
20057 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
20058 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
20059 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
20060 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
20061 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
20062 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
20063 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
20064 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
20065 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
20066 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
20067 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
20068 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
20069 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
20070 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
20071 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
20072 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
20073 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
20074 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
20075 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
20076 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
20077 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
20078 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
20079 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
20080 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
20081 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
20082 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
20083 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
20084 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
20085 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
20086 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
20087 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
20088 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
20089 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
20090 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
20091 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
20092 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
20093 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
20094 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
20095 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
20096 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
20097 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
20098 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
20099 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
20100 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
20101 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
20102 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
20103 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
20104 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
20105 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
20106 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
20107 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
20108 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
20109 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
20110 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
20111 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
20112 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
20113 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
20114 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
20115 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
20116 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
20117 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
20118 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
20119 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
20120 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
20121 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
20122 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
20124 #define IWMMXT_BUILTIN2(code, builtin) \
20125 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20127 #define IWMMXT2_BUILTIN2(code, builtin) \
20128 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20130 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
20131 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
20132 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
20133 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
20134 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
20135 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
20136 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
20137 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
20138 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
20139 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
20142 static const struct builtin_description bdesc_1arg
[] =
20144 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
20145 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
20146 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
20147 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
20148 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
20149 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
20150 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
20151 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
20152 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
20153 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
20154 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
20155 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
20156 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
20157 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
20158 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
20159 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
20160 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
20161 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
20162 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
20163 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
20164 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
20165 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
20166 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
20167 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
20170 /* Set up all the iWMMXt builtins. This is not called if
20171 TARGET_IWMMXT is zero. */
20174 arm_init_iwmmxt_builtins (void)
20176 const struct builtin_description
* d
;
20179 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
20180 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
20181 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
20183 tree v8qi_ftype_v8qi_v8qi_int
20184 = build_function_type_list (V8QI_type_node
,
20185 V8QI_type_node
, V8QI_type_node
,
20186 integer_type_node
, NULL_TREE
);
20187 tree v4hi_ftype_v4hi_int
20188 = build_function_type_list (V4HI_type_node
,
20189 V4HI_type_node
, integer_type_node
, NULL_TREE
);
20190 tree v2si_ftype_v2si_int
20191 = build_function_type_list (V2SI_type_node
,
20192 V2SI_type_node
, integer_type_node
, NULL_TREE
);
20193 tree v2si_ftype_di_di
20194 = build_function_type_list (V2SI_type_node
,
20195 long_long_integer_type_node
,
20196 long_long_integer_type_node
,
20198 tree di_ftype_di_int
20199 = build_function_type_list (long_long_integer_type_node
,
20200 long_long_integer_type_node
,
20201 integer_type_node
, NULL_TREE
);
20202 tree di_ftype_di_int_int
20203 = build_function_type_list (long_long_integer_type_node
,
20204 long_long_integer_type_node
,
20206 integer_type_node
, NULL_TREE
);
20207 tree int_ftype_v8qi
20208 = build_function_type_list (integer_type_node
,
20209 V8QI_type_node
, NULL_TREE
);
20210 tree int_ftype_v4hi
20211 = build_function_type_list (integer_type_node
,
20212 V4HI_type_node
, NULL_TREE
);
20213 tree int_ftype_v2si
20214 = build_function_type_list (integer_type_node
,
20215 V2SI_type_node
, NULL_TREE
);
20216 tree int_ftype_v8qi_int
20217 = build_function_type_list (integer_type_node
,
20218 V8QI_type_node
, integer_type_node
, NULL_TREE
);
20219 tree int_ftype_v4hi_int
20220 = build_function_type_list (integer_type_node
,
20221 V4HI_type_node
, integer_type_node
, NULL_TREE
);
20222 tree int_ftype_v2si_int
20223 = build_function_type_list (integer_type_node
,
20224 V2SI_type_node
, integer_type_node
, NULL_TREE
);
20225 tree v8qi_ftype_v8qi_int_int
20226 = build_function_type_list (V8QI_type_node
,
20227 V8QI_type_node
, integer_type_node
,
20228 integer_type_node
, NULL_TREE
);
20229 tree v4hi_ftype_v4hi_int_int
20230 = build_function_type_list (V4HI_type_node
,
20231 V4HI_type_node
, integer_type_node
,
20232 integer_type_node
, NULL_TREE
);
20233 tree v2si_ftype_v2si_int_int
20234 = build_function_type_list (V2SI_type_node
,
20235 V2SI_type_node
, integer_type_node
,
20236 integer_type_node
, NULL_TREE
);
20237 /* Miscellaneous. */
20238 tree v8qi_ftype_v4hi_v4hi
20239 = build_function_type_list (V8QI_type_node
,
20240 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
20241 tree v4hi_ftype_v2si_v2si
20242 = build_function_type_list (V4HI_type_node
,
20243 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
20244 tree v8qi_ftype_v4hi_v8qi
20245 = build_function_type_list (V8QI_type_node
,
20246 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
20247 tree v2si_ftype_v4hi_v4hi
20248 = build_function_type_list (V2SI_type_node
,
20249 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
20250 tree v2si_ftype_v8qi_v8qi
20251 = build_function_type_list (V2SI_type_node
,
20252 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
20253 tree v4hi_ftype_v4hi_di
20254 = build_function_type_list (V4HI_type_node
,
20255 V4HI_type_node
, long_long_integer_type_node
,
20257 tree v2si_ftype_v2si_di
20258 = build_function_type_list (V2SI_type_node
,
20259 V2SI_type_node
, long_long_integer_type_node
,
20262 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
20263 tree int_ftype_void
20264 = build_function_type_list (integer_type_node
, NULL_TREE
);
20266 = build_function_type_list (long_long_integer_type_node
,
20267 V8QI_type_node
, NULL_TREE
);
20269 = build_function_type_list (long_long_integer_type_node
,
20270 V4HI_type_node
, NULL_TREE
);
20272 = build_function_type_list (long_long_integer_type_node
,
20273 V2SI_type_node
, NULL_TREE
);
20274 tree v2si_ftype_v4hi
20275 = build_function_type_list (V2SI_type_node
,
20276 V4HI_type_node
, NULL_TREE
);
20277 tree v4hi_ftype_v8qi
20278 = build_function_type_list (V4HI_type_node
,
20279 V8QI_type_node
, NULL_TREE
);
20280 tree v8qi_ftype_v8qi
20281 = build_function_type_list (V8QI_type_node
,
20282 V8QI_type_node
, NULL_TREE
);
20283 tree v4hi_ftype_v4hi
20284 = build_function_type_list (V4HI_type_node
,
20285 V4HI_type_node
, NULL_TREE
);
20286 tree v2si_ftype_v2si
20287 = build_function_type_list (V2SI_type_node
,
20288 V2SI_type_node
, NULL_TREE
);
20290 tree di_ftype_di_v4hi_v4hi
20291 = build_function_type_list (long_long_unsigned_type_node
,
20292 long_long_unsigned_type_node
,
20293 V4HI_type_node
, V4HI_type_node
,
20296 tree di_ftype_v4hi_v4hi
20297 = build_function_type_list (long_long_unsigned_type_node
,
20298 V4HI_type_node
,V4HI_type_node
,
20301 tree v2si_ftype_v2si_v4hi_v4hi
20302 = build_function_type_list (V2SI_type_node
,
20303 V2SI_type_node
, V4HI_type_node
,
20304 V4HI_type_node
, NULL_TREE
);
20306 tree v2si_ftype_v2si_v8qi_v8qi
20307 = build_function_type_list (V2SI_type_node
,
20308 V2SI_type_node
, V8QI_type_node
,
20309 V8QI_type_node
, NULL_TREE
);
20311 tree di_ftype_di_v2si_v2si
20312 = build_function_type_list (long_long_unsigned_type_node
,
20313 long_long_unsigned_type_node
,
20314 V2SI_type_node
, V2SI_type_node
,
20317 tree di_ftype_di_di_int
20318 = build_function_type_list (long_long_unsigned_type_node
,
20319 long_long_unsigned_type_node
,
20320 long_long_unsigned_type_node
,
20321 integer_type_node
, NULL_TREE
);
20323 tree void_ftype_int
20324 = build_function_type_list (void_type_node
,
20325 integer_type_node
, NULL_TREE
);
20327 tree v8qi_ftype_char
20328 = build_function_type_list (V8QI_type_node
,
20329 signed_char_type_node
, NULL_TREE
);
20331 tree v4hi_ftype_short
20332 = build_function_type_list (V4HI_type_node
,
20333 short_integer_type_node
, NULL_TREE
);
20335 tree v2si_ftype_int
20336 = build_function_type_list (V2SI_type_node
,
20337 integer_type_node
, NULL_TREE
);
20339 /* Normal vector binops. */
20340 tree v8qi_ftype_v8qi_v8qi
20341 = build_function_type_list (V8QI_type_node
,
20342 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
20343 tree v4hi_ftype_v4hi_v4hi
20344 = build_function_type_list (V4HI_type_node
,
20345 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
20346 tree v2si_ftype_v2si_v2si
20347 = build_function_type_list (V2SI_type_node
,
20348 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
20349 tree di_ftype_di_di
20350 = build_function_type_list (long_long_unsigned_type_node
,
20351 long_long_unsigned_type_node
,
20352 long_long_unsigned_type_node
,
20355 /* Add all builtins that are more or less simple operations on two
20357 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
20359 /* Use one of the operands; the target can have a different mode for
20360 mask-generating compares. */
20361 enum machine_mode mode
;
20367 mode
= insn_data
[d
->icode
].operand
[1].mode
;
20372 type
= v8qi_ftype_v8qi_v8qi
;
20375 type
= v4hi_ftype_v4hi_v4hi
;
20378 type
= v2si_ftype_v2si_v2si
;
20381 type
= di_ftype_di_di
;
20385 gcc_unreachable ();
20388 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
20391 /* Add the remaining MMX insns with somewhat more complicated types. */
20392 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20393 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20394 ARM_BUILTIN_ ## CODE)
20396 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20397 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20398 ARM_BUILTIN_ ## CODE)
20400 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
20401 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
20402 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
20403 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
20404 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
20405 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
20406 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
20407 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
20408 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
20410 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
20411 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
20412 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
20413 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
20414 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
20415 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
20417 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
20418 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
20419 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
20420 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
20421 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
20422 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
20424 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
20425 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
20426 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
20427 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
20428 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
20429 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
20431 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
20432 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
20433 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
20434 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
20435 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
20436 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
20438 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
20440 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
20441 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
20442 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
20443 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
20444 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
20445 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
20446 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
20447 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
20448 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
20449 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
20451 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
20452 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
20453 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
20454 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
20455 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
20456 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
20457 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
20458 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
20459 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
20461 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
20462 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
20463 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
20465 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
20466 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
20467 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
20469 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
20470 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
20472 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
20473 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
20474 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
20475 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
20476 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
20477 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
20479 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
20480 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
20481 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
20482 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
20483 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
20484 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
20485 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
20486 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
20487 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
20488 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
20489 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
20490 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
20492 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
20493 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
20494 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
20495 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
20497 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
20498 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
20499 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
20500 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
20501 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
20502 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
20503 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
20505 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
20506 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
20507 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
20509 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
20510 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
20511 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
20512 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
20514 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
20515 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
20516 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
20517 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
20519 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
20520 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
20521 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
20522 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
20524 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
20525 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
20526 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
20527 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
20529 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
20530 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
20531 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
20532 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
20534 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
20535 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
20536 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
20537 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
20539 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
20541 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
20542 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
20543 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
20545 #undef iwmmx_mbuiltin
20546 #undef iwmmx2_mbuiltin
20550 arm_init_fp16_builtins (void)
20552 tree fp16_type
= make_node (REAL_TYPE
);
20553 TYPE_PRECISION (fp16_type
) = 16;
20554 layout_type (fp16_type
);
20555 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
20559 arm_init_builtins (void)
20561 if (TARGET_REALLY_IWMMXT
)
20562 arm_init_iwmmxt_builtins ();
20565 arm_init_neon_builtins ();
20567 if (arm_fp16_format
)
20568 arm_init_fp16_builtins ();
20571 /* Return the ARM builtin for CODE. */
20574 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
20576 if (code
>= ARM_BUILTIN_MAX
)
20577 return error_mark_node
;
20579 return arm_builtin_decls
[code
];
20582 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20584 static const char *
20585 arm_invalid_parameter_type (const_tree t
)
20587 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20588 return N_("function parameters cannot have __fp16 type");
20592 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20594 static const char *
20595 arm_invalid_return_type (const_tree t
)
20597 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20598 return N_("functions cannot return __fp16 type");
20602 /* Implement TARGET_PROMOTED_TYPE. */
20605 arm_promoted_type (const_tree t
)
20607 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20608 return float_type_node
;
20612 /* Implement TARGET_CONVERT_TO_TYPE.
20613 Specifically, this hook implements the peculiarity of the ARM
20614 half-precision floating-point C semantics that requires conversions between
20615 __fp16 to or from double to do an intermediate conversion to float. */
20618 arm_convert_to_type (tree type
, tree expr
)
20620 tree fromtype
= TREE_TYPE (expr
);
20621 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
20623 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
20624 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
20625 return convert (type
, convert (float_type_node
, expr
));
20629 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20630 This simply adds HFmode as a supported mode; even though we don't
20631 implement arithmetic on this type directly, it's supported by
20632 optabs conversions, much the way the double-word arithmetic is
20633 special-cased in the default hook. */
20636 arm_scalar_mode_supported_p (enum machine_mode mode
)
20638 if (mode
== HFmode
)
20639 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
20640 else if (ALL_FIXED_POINT_MODE_P (mode
))
20643 return default_scalar_mode_supported_p (mode
);
20646 /* Errors in the source file can cause expand_expr to return const0_rtx
20647 where we expect a vector. To avoid crashing, use one of the vector
20648 clear instructions. */
20651 safe_vector_operand (rtx x
, enum machine_mode mode
)
20653 if (x
!= const0_rtx
)
20655 x
= gen_reg_rtx (mode
);
20657 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
20658 : gen_rtx_SUBREG (DImode
, x
, 0)));
20662 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20665 arm_expand_binop_builtin (enum insn_code icode
,
20666 tree exp
, rtx target
)
20669 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20670 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20671 rtx op0
= expand_normal (arg0
);
20672 rtx op1
= expand_normal (arg1
);
20673 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20674 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20675 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
20677 if (VECTOR_MODE_P (mode0
))
20678 op0
= safe_vector_operand (op0
, mode0
);
20679 if (VECTOR_MODE_P (mode1
))
20680 op1
= safe_vector_operand (op1
, mode1
);
20683 || GET_MODE (target
) != tmode
20684 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20685 target
= gen_reg_rtx (tmode
);
20687 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
20688 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
20690 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20691 op0
= copy_to_mode_reg (mode0
, op0
);
20692 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20693 op1
= copy_to_mode_reg (mode1
, op1
);
20695 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20702 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20705 arm_expand_unop_builtin (enum insn_code icode
,
20706 tree exp
, rtx target
, int do_load
)
20709 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20710 rtx op0
= expand_normal (arg0
);
20711 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20712 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20715 || GET_MODE (target
) != tmode
20716 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20717 target
= gen_reg_rtx (tmode
);
20719 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
20722 if (VECTOR_MODE_P (mode0
))
20723 op0
= safe_vector_operand (op0
, mode0
);
20725 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20726 op0
= copy_to_mode_reg (mode0
, op0
);
20729 pat
= GEN_FCN (icode
) (target
, op0
);
20737 NEON_ARG_COPY_TO_REG
,
20743 #define NEON_MAX_BUILTIN_ARGS 5
20745 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20746 and return an expression for the accessed memory.
20748 The intrinsic function operates on a block of registers that has
20749 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20750 function references the memory at EXP of type TYPE and in mode
20751 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20755 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
20756 enum machine_mode reg_mode
,
20757 neon_builtin_type_mode type_mode
)
20759 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
20760 tree elem_type
, upper_bound
, array_type
;
20762 /* Work out the size of the register block in bytes. */
20763 reg_size
= GET_MODE_SIZE (reg_mode
);
20765 /* Work out the size of each vector in bytes. */
20766 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
20767 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
20769 /* Work out how many vectors there are. */
20770 gcc_assert (reg_size
% vector_size
== 0);
20771 nvectors
= reg_size
/ vector_size
;
20773 /* Work out the type of each element. */
20774 gcc_assert (POINTER_TYPE_P (type
));
20775 elem_type
= TREE_TYPE (type
);
20777 /* Work out how many elements are being loaded or stored.
20778 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20779 and memory elements; anything else implies a lane load or store. */
20780 if (mem_mode
== reg_mode
)
20781 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
20785 /* Create a type that describes the full access. */
20786 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
20787 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
20789 /* Dereference EXP using that type. */
20790 return fold_build2 (MEM_REF
, array_type
, exp
,
20791 build_int_cst (build_pointer_type (array_type
), 0));
20794 /* Expand a Neon builtin. */
20796 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
20797 neon_builtin_type_mode type_mode
,
20798 tree exp
, int fcode
, ...)
20802 tree arg
[NEON_MAX_BUILTIN_ARGS
];
20803 rtx op
[NEON_MAX_BUILTIN_ARGS
];
20806 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20807 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
20808 enum machine_mode other_mode
;
20814 || GET_MODE (target
) != tmode
20815 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
20816 target
= gen_reg_rtx (tmode
);
20818 va_start (ap
, fcode
);
20820 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
20824 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
20826 if (thisarg
== NEON_ARG_STOP
)
20830 opno
= argc
+ have_retval
;
20831 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
20832 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
20833 arg_type
= TREE_VALUE (formals
);
20834 if (thisarg
== NEON_ARG_MEMORY
)
20836 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
20837 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
20838 mode
[argc
], other_mode
,
20842 op
[argc
] = expand_normal (arg
[argc
]);
20846 case NEON_ARG_COPY_TO_REG
:
20847 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20848 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20849 (op
[argc
], mode
[argc
]))
20850 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
20853 case NEON_ARG_CONSTANT
:
20854 /* FIXME: This error message is somewhat unhelpful. */
20855 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20856 (op
[argc
], mode
[argc
]))
20857 error ("argument must be a constant");
20860 case NEON_ARG_MEMORY
:
20861 gcc_assert (MEM_P (op
[argc
]));
20862 PUT_MODE (op
[argc
], mode
[argc
]);
20863 /* ??? arm_neon.h uses the same built-in functions for signed
20864 and unsigned accesses, casting where necessary. This isn't
20866 set_mem_alias_set (op
[argc
], 0);
20867 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20868 (op
[argc
], mode
[argc
]))
20869 op
[argc
] = (replace_equiv_address
20870 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
20873 case NEON_ARG_STOP
:
20874 gcc_unreachable ();
20878 formals
= TREE_CHAIN (formals
);
20888 pat
= GEN_FCN (icode
) (target
, op
[0]);
20892 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
20896 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
20900 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
20904 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
20908 gcc_unreachable ();
20914 pat
= GEN_FCN (icode
) (op
[0]);
20918 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
20922 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
20926 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
20930 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
20934 gcc_unreachable ();
20945 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20946 constants defined per-instruction or per instruction-variant. Instead, the
20947 required info is looked up in the table neon_builtin_data. */
20949 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
20951 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
20952 neon_itype itype
= d
->itype
;
20953 enum insn_code icode
= d
->code
;
20954 neon_builtin_type_mode type_mode
= d
->mode
;
20961 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20962 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20966 case NEON_SCALARMUL
:
20967 case NEON_SCALARMULL
:
20968 case NEON_SCALARMULH
:
20969 case NEON_SHIFTINSERT
:
20970 case NEON_LOGICBINOP
:
20971 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20972 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20976 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20977 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20978 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20982 case NEON_SHIFTIMM
:
20983 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20984 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
20988 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20989 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20993 case NEON_REINTERP
:
20994 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20995 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20999 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21000 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
21002 case NEON_RESULTPAIR
:
21003 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
21004 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
21008 case NEON_LANEMULL
:
21009 case NEON_LANEMULH
:
21010 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21011 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
21012 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
21015 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21016 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
21017 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
21019 case NEON_SHIFTACC
:
21020 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21021 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
21022 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
21024 case NEON_SCALARMAC
:
21025 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21026 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
21027 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
21031 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21032 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
21036 case NEON_LOADSTRUCT
:
21037 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21038 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
21040 case NEON_LOAD1LANE
:
21041 case NEON_LOADSTRUCTLANE
:
21042 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
21043 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
21047 case NEON_STORESTRUCT
:
21048 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
21049 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
21051 case NEON_STORE1LANE
:
21052 case NEON_STORESTRUCTLANE
:
21053 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
21054 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
21058 gcc_unreachable ();
21061 /* Emit code to reinterpret one Neon type as another, without altering bits. */
21063 neon_reinterpret (rtx dest
, rtx src
)
21065 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
21068 /* Emit code to place a Neon pair result in memory locations (with equal
21071 neon_emit_pair_result_insn (enum machine_mode mode
,
21072 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
21075 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
21076 rtx tmp1
= gen_reg_rtx (mode
);
21077 rtx tmp2
= gen_reg_rtx (mode
);
21079 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
21081 emit_move_insn (mem
, tmp1
);
21082 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
21083 emit_move_insn (mem
, tmp2
);
21086 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
21087 not to early-clobber SRC registers in the process.
21089 We assume that the operands described by SRC and DEST represent a
21090 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
21091 number of components into which the copy has been decomposed. */
21093 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
21097 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
21098 || REGNO (operands
[0]) < REGNO (operands
[1]))
21100 for (i
= 0; i
< count
; i
++)
21102 operands
[2 * i
] = dest
[i
];
21103 operands
[2 * i
+ 1] = src
[i
];
21108 for (i
= 0; i
< count
; i
++)
21110 operands
[2 * i
] = dest
[count
- i
- 1];
21111 operands
[2 * i
+ 1] = src
[count
- i
- 1];
21116 /* Split operands into moves from op[1] + op[2] into op[0]. */
21119 neon_split_vcombine (rtx operands
[3])
21121 unsigned int dest
= REGNO (operands
[0]);
21122 unsigned int src1
= REGNO (operands
[1]);
21123 unsigned int src2
= REGNO (operands
[2]);
21124 enum machine_mode halfmode
= GET_MODE (operands
[1]);
21125 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
21126 rtx destlo
, desthi
;
21128 if (src1
== dest
&& src2
== dest
+ halfregs
)
21130 /* No-op move. Can't split to nothing; emit something. */
21131 emit_note (NOTE_INSN_DELETED
);
21135 /* Preserve register attributes for variable tracking. */
21136 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
21137 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
21138 GET_MODE_SIZE (halfmode
));
21140 /* Special case of reversed high/low parts. Use VSWP. */
21141 if (src2
== dest
&& src1
== dest
+ halfregs
)
21143 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
21144 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
21145 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
21149 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
21151 /* Try to avoid unnecessary moves if part of the result
21152 is in the right place already. */
21154 emit_move_insn (destlo
, operands
[1]);
21155 if (src2
!= dest
+ halfregs
)
21156 emit_move_insn (desthi
, operands
[2]);
21160 if (src2
!= dest
+ halfregs
)
21161 emit_move_insn (desthi
, operands
[2]);
21163 emit_move_insn (destlo
, operands
[1]);
21167 /* Expand an expression EXP that calls a built-in function,
21168 with result going to TARGET if that's convenient
21169 (and in mode MODE if that's convenient).
21170 SUBTARGET may be used as the target for computing one of EXP's operands.
21171 IGNORE is nonzero if the value is to be ignored. */
21174 arm_expand_builtin (tree exp
,
21176 rtx subtarget ATTRIBUTE_UNUSED
,
21177 enum machine_mode mode ATTRIBUTE_UNUSED
,
21178 int ignore ATTRIBUTE_UNUSED
)
21180 const struct builtin_description
* d
;
21181 enum insn_code icode
;
21182 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
21190 int fcode
= DECL_FUNCTION_CODE (fndecl
);
21192 enum machine_mode tmode
;
21193 enum machine_mode mode0
;
21194 enum machine_mode mode1
;
21195 enum machine_mode mode2
;
21201 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
21202 return arm_expand_neon_builtin (fcode
, exp
, target
);
21206 case ARM_BUILTIN_TEXTRMSB
:
21207 case ARM_BUILTIN_TEXTRMUB
:
21208 case ARM_BUILTIN_TEXTRMSH
:
21209 case ARM_BUILTIN_TEXTRMUH
:
21210 case ARM_BUILTIN_TEXTRMSW
:
21211 case ARM_BUILTIN_TEXTRMUW
:
21212 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
21213 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
21214 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
21215 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
21216 : CODE_FOR_iwmmxt_textrmw
);
21218 arg0
= CALL_EXPR_ARG (exp
, 0);
21219 arg1
= CALL_EXPR_ARG (exp
, 1);
21220 op0
= expand_normal (arg0
);
21221 op1
= expand_normal (arg1
);
21222 tmode
= insn_data
[icode
].operand
[0].mode
;
21223 mode0
= insn_data
[icode
].operand
[1].mode
;
21224 mode1
= insn_data
[icode
].operand
[2].mode
;
21226 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21227 op0
= copy_to_mode_reg (mode0
, op0
);
21228 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21230 /* @@@ better error message */
21231 error ("selector must be an immediate");
21232 return gen_reg_rtx (tmode
);
21235 opint
= INTVAL (op1
);
21236 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
21238 if (opint
> 7 || opint
< 0)
21239 error ("the range of selector should be in 0 to 7");
21241 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
21243 if (opint
> 3 || opint
< 0)
21244 error ("the range of selector should be in 0 to 3");
21246 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
21248 if (opint
> 1 || opint
< 0)
21249 error ("the range of selector should be in 0 to 1");
21253 || GET_MODE (target
) != tmode
21254 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21255 target
= gen_reg_rtx (tmode
);
21256 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21262 case ARM_BUILTIN_WALIGNI
:
21263 /* If op2 is immediate, call walighi, else call walighr. */
21264 arg0
= CALL_EXPR_ARG (exp
, 0);
21265 arg1
= CALL_EXPR_ARG (exp
, 1);
21266 arg2
= CALL_EXPR_ARG (exp
, 2);
21267 op0
= expand_normal (arg0
);
21268 op1
= expand_normal (arg1
);
21269 op2
= expand_normal (arg2
);
21270 if (CONST_INT_P (op2
))
21272 icode
= CODE_FOR_iwmmxt_waligni
;
21273 tmode
= insn_data
[icode
].operand
[0].mode
;
21274 mode0
= insn_data
[icode
].operand
[1].mode
;
21275 mode1
= insn_data
[icode
].operand
[2].mode
;
21276 mode2
= insn_data
[icode
].operand
[3].mode
;
21277 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21278 op0
= copy_to_mode_reg (mode0
, op0
);
21279 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21280 op1
= copy_to_mode_reg (mode1
, op1
);
21281 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
21282 selector
= INTVAL (op2
);
21283 if (selector
> 7 || selector
< 0)
21284 error ("the range of selector should be in 0 to 7");
21288 icode
= CODE_FOR_iwmmxt_walignr
;
21289 tmode
= insn_data
[icode
].operand
[0].mode
;
21290 mode0
= insn_data
[icode
].operand
[1].mode
;
21291 mode1
= insn_data
[icode
].operand
[2].mode
;
21292 mode2
= insn_data
[icode
].operand
[3].mode
;
21293 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21294 op0
= copy_to_mode_reg (mode0
, op0
);
21295 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21296 op1
= copy_to_mode_reg (mode1
, op1
);
21297 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21298 op2
= copy_to_mode_reg (mode2
, op2
);
21301 || GET_MODE (target
) != tmode
21302 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21303 target
= gen_reg_rtx (tmode
);
21304 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21310 case ARM_BUILTIN_TINSRB
:
21311 case ARM_BUILTIN_TINSRH
:
21312 case ARM_BUILTIN_TINSRW
:
21313 case ARM_BUILTIN_WMERGE
:
21314 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
21315 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
21316 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
21317 : CODE_FOR_iwmmxt_tinsrw
);
21318 arg0
= CALL_EXPR_ARG (exp
, 0);
21319 arg1
= CALL_EXPR_ARG (exp
, 1);
21320 arg2
= CALL_EXPR_ARG (exp
, 2);
21321 op0
= expand_normal (arg0
);
21322 op1
= expand_normal (arg1
);
21323 op2
= expand_normal (arg2
);
21324 tmode
= insn_data
[icode
].operand
[0].mode
;
21325 mode0
= insn_data
[icode
].operand
[1].mode
;
21326 mode1
= insn_data
[icode
].operand
[2].mode
;
21327 mode2
= insn_data
[icode
].operand
[3].mode
;
21329 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21330 op0
= copy_to_mode_reg (mode0
, op0
);
21331 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21332 op1
= copy_to_mode_reg (mode1
, op1
);
21333 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21335 error ("selector must be an immediate");
21338 if (icode
== CODE_FOR_iwmmxt_wmerge
)
21340 selector
= INTVAL (op2
);
21341 if (selector
> 7 || selector
< 0)
21342 error ("the range of selector should be in 0 to 7");
21344 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
21345 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
21346 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
21349 selector
= INTVAL (op2
);
21350 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
21351 error ("the range of selector should be in 0 to 7");
21352 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
21353 error ("the range of selector should be in 0 to 3");
21354 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
21355 error ("the range of selector should be in 0 to 1");
21357 op2
= GEN_INT (mask
);
21360 || GET_MODE (target
) != tmode
21361 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21362 target
= gen_reg_rtx (tmode
);
21363 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21369 case ARM_BUILTIN_SETWCGR0
:
21370 case ARM_BUILTIN_SETWCGR1
:
21371 case ARM_BUILTIN_SETWCGR2
:
21372 case ARM_BUILTIN_SETWCGR3
:
21373 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
21374 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
21375 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
21376 : CODE_FOR_iwmmxt_setwcgr3
);
21377 arg0
= CALL_EXPR_ARG (exp
, 0);
21378 op0
= expand_normal (arg0
);
21379 mode0
= insn_data
[icode
].operand
[0].mode
;
21380 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
21381 op0
= copy_to_mode_reg (mode0
, op0
);
21382 pat
= GEN_FCN (icode
) (op0
);
21388 case ARM_BUILTIN_GETWCGR0
:
21389 case ARM_BUILTIN_GETWCGR1
:
21390 case ARM_BUILTIN_GETWCGR2
:
21391 case ARM_BUILTIN_GETWCGR3
:
21392 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
21393 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
21394 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
21395 : CODE_FOR_iwmmxt_getwcgr3
);
21396 tmode
= insn_data
[icode
].operand
[0].mode
;
21398 || GET_MODE (target
) != tmode
21399 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21400 target
= gen_reg_rtx (tmode
);
21401 pat
= GEN_FCN (icode
) (target
);
21407 case ARM_BUILTIN_WSHUFH
:
21408 icode
= CODE_FOR_iwmmxt_wshufh
;
21409 arg0
= CALL_EXPR_ARG (exp
, 0);
21410 arg1
= CALL_EXPR_ARG (exp
, 1);
21411 op0
= expand_normal (arg0
);
21412 op1
= expand_normal (arg1
);
21413 tmode
= insn_data
[icode
].operand
[0].mode
;
21414 mode1
= insn_data
[icode
].operand
[1].mode
;
21415 mode2
= insn_data
[icode
].operand
[2].mode
;
21417 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21418 op0
= copy_to_mode_reg (mode1
, op0
);
21419 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21421 error ("mask must be an immediate");
21424 selector
= INTVAL (op1
);
21425 if (selector
< 0 || selector
> 255)
21426 error ("the range of mask should be in 0 to 255");
21428 || GET_MODE (target
) != tmode
21429 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21430 target
= gen_reg_rtx (tmode
);
21431 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21437 case ARM_BUILTIN_WMADDS
:
21438 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
21439 case ARM_BUILTIN_WMADDSX
:
21440 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
21441 case ARM_BUILTIN_WMADDSN
:
21442 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
21443 case ARM_BUILTIN_WMADDU
:
21444 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
21445 case ARM_BUILTIN_WMADDUX
:
21446 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
21447 case ARM_BUILTIN_WMADDUN
:
21448 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
21449 case ARM_BUILTIN_WSADBZ
:
21450 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
21451 case ARM_BUILTIN_WSADHZ
:
21452 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
21454 /* Several three-argument builtins. */
21455 case ARM_BUILTIN_WMACS
:
21456 case ARM_BUILTIN_WMACU
:
21457 case ARM_BUILTIN_TMIA
:
21458 case ARM_BUILTIN_TMIAPH
:
21459 case ARM_BUILTIN_TMIATT
:
21460 case ARM_BUILTIN_TMIATB
:
21461 case ARM_BUILTIN_TMIABT
:
21462 case ARM_BUILTIN_TMIABB
:
21463 case ARM_BUILTIN_WQMIABB
:
21464 case ARM_BUILTIN_WQMIABT
:
21465 case ARM_BUILTIN_WQMIATB
:
21466 case ARM_BUILTIN_WQMIATT
:
21467 case ARM_BUILTIN_WQMIABBN
:
21468 case ARM_BUILTIN_WQMIABTN
:
21469 case ARM_BUILTIN_WQMIATBN
:
21470 case ARM_BUILTIN_WQMIATTN
:
21471 case ARM_BUILTIN_WMIABB
:
21472 case ARM_BUILTIN_WMIABT
:
21473 case ARM_BUILTIN_WMIATB
:
21474 case ARM_BUILTIN_WMIATT
:
21475 case ARM_BUILTIN_WMIABBN
:
21476 case ARM_BUILTIN_WMIABTN
:
21477 case ARM_BUILTIN_WMIATBN
:
21478 case ARM_BUILTIN_WMIATTN
:
21479 case ARM_BUILTIN_WMIAWBB
:
21480 case ARM_BUILTIN_WMIAWBT
:
21481 case ARM_BUILTIN_WMIAWTB
:
21482 case ARM_BUILTIN_WMIAWTT
:
21483 case ARM_BUILTIN_WMIAWBBN
:
21484 case ARM_BUILTIN_WMIAWBTN
:
21485 case ARM_BUILTIN_WMIAWTBN
:
21486 case ARM_BUILTIN_WMIAWTTN
:
21487 case ARM_BUILTIN_WSADB
:
21488 case ARM_BUILTIN_WSADH
:
21489 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
21490 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
21491 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
21492 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
21493 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
21494 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
21495 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
21496 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
21497 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
21498 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
21499 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
21500 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
21501 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
21502 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
21503 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
21504 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
21505 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
21506 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
21507 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
21508 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
21509 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
21510 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
21511 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
21512 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
21513 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
21514 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
21515 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
21516 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
21517 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
21518 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
21519 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
21520 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
21521 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
21522 : CODE_FOR_iwmmxt_wsadh
);
21523 arg0
= CALL_EXPR_ARG (exp
, 0);
21524 arg1
= CALL_EXPR_ARG (exp
, 1);
21525 arg2
= CALL_EXPR_ARG (exp
, 2);
21526 op0
= expand_normal (arg0
);
21527 op1
= expand_normal (arg1
);
21528 op2
= expand_normal (arg2
);
21529 tmode
= insn_data
[icode
].operand
[0].mode
;
21530 mode0
= insn_data
[icode
].operand
[1].mode
;
21531 mode1
= insn_data
[icode
].operand
[2].mode
;
21532 mode2
= insn_data
[icode
].operand
[3].mode
;
21534 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21535 op0
= copy_to_mode_reg (mode0
, op0
);
21536 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21537 op1
= copy_to_mode_reg (mode1
, op1
);
21538 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21539 op2
= copy_to_mode_reg (mode2
, op2
);
21541 || GET_MODE (target
) != tmode
21542 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21543 target
= gen_reg_rtx (tmode
);
21544 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21550 case ARM_BUILTIN_WZERO
:
21551 target
= gen_reg_rtx (DImode
);
21552 emit_insn (gen_iwmmxt_clrdi (target
));
21555 case ARM_BUILTIN_WSRLHI
:
21556 case ARM_BUILTIN_WSRLWI
:
21557 case ARM_BUILTIN_WSRLDI
:
21558 case ARM_BUILTIN_WSLLHI
:
21559 case ARM_BUILTIN_WSLLWI
:
21560 case ARM_BUILTIN_WSLLDI
:
21561 case ARM_BUILTIN_WSRAHI
:
21562 case ARM_BUILTIN_WSRAWI
:
21563 case ARM_BUILTIN_WSRADI
:
21564 case ARM_BUILTIN_WRORHI
:
21565 case ARM_BUILTIN_WRORWI
:
21566 case ARM_BUILTIN_WRORDI
:
21567 case ARM_BUILTIN_WSRLH
:
21568 case ARM_BUILTIN_WSRLW
:
21569 case ARM_BUILTIN_WSRLD
:
21570 case ARM_BUILTIN_WSLLH
:
21571 case ARM_BUILTIN_WSLLW
:
21572 case ARM_BUILTIN_WSLLD
:
21573 case ARM_BUILTIN_WSRAH
:
21574 case ARM_BUILTIN_WSRAW
:
21575 case ARM_BUILTIN_WSRAD
:
21576 case ARM_BUILTIN_WRORH
:
21577 case ARM_BUILTIN_WRORW
:
21578 case ARM_BUILTIN_WRORD
:
21579 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
21580 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
21581 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
21582 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
21583 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
21584 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
21585 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
21586 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
21587 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
21588 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
21589 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
21590 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
21591 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
21592 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
21593 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
21594 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
21595 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
21596 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
21597 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
21598 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
21599 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
21600 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
21601 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
21602 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
21603 : CODE_FOR_nothing
);
21604 arg1
= CALL_EXPR_ARG (exp
, 1);
21605 op1
= expand_normal (arg1
);
21606 if (GET_MODE (op1
) == VOIDmode
)
21608 imm
= INTVAL (op1
);
21609 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
21610 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
21611 && (imm
< 0 || imm
> 32))
21613 if (fcode
== ARM_BUILTIN_WRORHI
)
21614 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21615 else if (fcode
== ARM_BUILTIN_WRORWI
)
21616 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21617 else if (fcode
== ARM_BUILTIN_WRORH
)
21618 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21620 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21622 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
21623 && (imm
< 0 || imm
> 64))
21625 if (fcode
== ARM_BUILTIN_WRORDI
)
21626 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21628 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21632 if (fcode
== ARM_BUILTIN_WSRLHI
)
21633 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21634 else if (fcode
== ARM_BUILTIN_WSRLWI
)
21635 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21636 else if (fcode
== ARM_BUILTIN_WSRLDI
)
21637 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21638 else if (fcode
== ARM_BUILTIN_WSLLHI
)
21639 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21640 else if (fcode
== ARM_BUILTIN_WSLLWI
)
21641 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21642 else if (fcode
== ARM_BUILTIN_WSLLDI
)
21643 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21644 else if (fcode
== ARM_BUILTIN_WSRAHI
)
21645 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21646 else if (fcode
== ARM_BUILTIN_WSRAWI
)
21647 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21648 else if (fcode
== ARM_BUILTIN_WSRADI
)
21649 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21650 else if (fcode
== ARM_BUILTIN_WSRLH
)
21651 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21652 else if (fcode
== ARM_BUILTIN_WSRLW
)
21653 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21654 else if (fcode
== ARM_BUILTIN_WSRLD
)
21655 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21656 else if (fcode
== ARM_BUILTIN_WSLLH
)
21657 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21658 else if (fcode
== ARM_BUILTIN_WSLLW
)
21659 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21660 else if (fcode
== ARM_BUILTIN_WSLLD
)
21661 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21662 else if (fcode
== ARM_BUILTIN_WSRAH
)
21663 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21664 else if (fcode
== ARM_BUILTIN_WSRAW
)
21665 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21667 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21670 return arm_expand_binop_builtin (icode
, exp
, target
);
21676 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
21677 if (d
->code
== (const enum arm_builtins
) fcode
)
21678 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
21680 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
21681 if (d
->code
== (const enum arm_builtins
) fcode
)
21682 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
21684 /* @@@ Should really do something sensible here. */
21688 /* Return the number (counting from 0) of
21689 the least significant set bit in MASK. */
21692 number_of_first_bit_set (unsigned mask
)
21694 return ctz_hwi (mask
);
21697 /* Like emit_multi_reg_push, but allowing for a different set of
21698 registers to be described as saved. MASK is the set of registers
21699 to be saved; REAL_REGS is the set of registers to be described as
21700 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21703 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
21705 unsigned long regno
;
21706 rtx par
[10], tmp
, reg
, insn
;
21709 /* Build the parallel of the registers actually being stored. */
21710 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
21712 regno
= ctz_hwi (mask
);
21713 reg
= gen_rtx_REG (SImode
, regno
);
21716 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
21718 tmp
= gen_rtx_USE (VOIDmode
, reg
);
21723 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
21724 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21725 tmp
= gen_frame_mem (BLKmode
, tmp
);
21726 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
21729 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
21730 insn
= emit_insn (tmp
);
21732 /* Always build the stack adjustment note for unwind info. */
21733 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
21734 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
21737 /* Build the parallel of the registers recorded as saved for unwind. */
21738 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
21740 regno
= ctz_hwi (real_regs
);
21741 reg
= gen_rtx_REG (SImode
, regno
);
21743 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
21744 tmp
= gen_frame_mem (SImode
, tmp
);
21745 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
21746 RTX_FRAME_RELATED_P (tmp
) = 1;
21754 RTX_FRAME_RELATED_P (par
[0]) = 1;
21755 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
21758 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
21763 /* Emit code to push or pop registers to or from the stack. F is the
21764 assembly file. MASK is the registers to pop. */
21766 thumb_pop (FILE *f
, unsigned long mask
)
21769 int lo_mask
= mask
& 0xFF;
21770 int pushed_words
= 0;
21774 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
21776 /* Special case. Do not generate a POP PC statement here, do it in
21778 thumb_exit (f
, -1);
21782 fprintf (f
, "\tpop\t{");
21784 /* Look at the low registers first. */
21785 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
21789 asm_fprintf (f
, "%r", regno
);
21791 if ((lo_mask
& ~1) != 0)
21798 if (mask
& (1 << PC_REGNUM
))
21800 /* Catch popping the PC. */
21801 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
21802 || crtl
->calls_eh_return
)
21804 /* The PC is never poped directly, instead
21805 it is popped into r3 and then BX is used. */
21806 fprintf (f
, "}\n");
21808 thumb_exit (f
, -1);
21817 asm_fprintf (f
, "%r", PC_REGNUM
);
21821 fprintf (f
, "}\n");
21824 /* Generate code to return from a thumb function.
21825 If 'reg_containing_return_addr' is -1, then the return address is
21826 actually on the stack, at the stack pointer. */
21828 thumb_exit (FILE *f
, int reg_containing_return_addr
)
21830 unsigned regs_available_for_popping
;
21831 unsigned regs_to_pop
;
21833 unsigned available
;
21837 int restore_a4
= FALSE
;
21839 /* Compute the registers we need to pop. */
21843 if (reg_containing_return_addr
== -1)
21845 regs_to_pop
|= 1 << LR_REGNUM
;
21849 if (TARGET_BACKTRACE
)
21851 /* Restore the (ARM) frame pointer and stack pointer. */
21852 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
21856 /* If there is nothing to pop then just emit the BX instruction and
21858 if (pops_needed
== 0)
21860 if (crtl
->calls_eh_return
)
21861 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
21863 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
21866 /* Otherwise if we are not supporting interworking and we have not created
21867 a backtrace structure and the function was not entered in ARM mode then
21868 just pop the return address straight into the PC. */
21869 else if (!TARGET_INTERWORK
21870 && !TARGET_BACKTRACE
21871 && !is_called_in_ARM_mode (current_function_decl
)
21872 && !crtl
->calls_eh_return
)
21874 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
21878 /* Find out how many of the (return) argument registers we can corrupt. */
21879 regs_available_for_popping
= 0;
21881 /* If returning via __builtin_eh_return, the bottom three registers
21882 all contain information needed for the return. */
21883 if (crtl
->calls_eh_return
)
21887 /* If we can deduce the registers used from the function's
21888 return value. This is more reliable that examining
21889 df_regs_ever_live_p () because that will be set if the register is
21890 ever used in the function, not just if the register is used
21891 to hold a return value. */
21893 if (crtl
->return_rtx
!= 0)
21894 mode
= GET_MODE (crtl
->return_rtx
);
21896 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
21898 size
= GET_MODE_SIZE (mode
);
21902 /* In a void function we can use any argument register.
21903 In a function that returns a structure on the stack
21904 we can use the second and third argument registers. */
21905 if (mode
== VOIDmode
)
21906 regs_available_for_popping
=
21907 (1 << ARG_REGISTER (1))
21908 | (1 << ARG_REGISTER (2))
21909 | (1 << ARG_REGISTER (3));
21911 regs_available_for_popping
=
21912 (1 << ARG_REGISTER (2))
21913 | (1 << ARG_REGISTER (3));
21915 else if (size
<= 4)
21916 regs_available_for_popping
=
21917 (1 << ARG_REGISTER (2))
21918 | (1 << ARG_REGISTER (3));
21919 else if (size
<= 8)
21920 regs_available_for_popping
=
21921 (1 << ARG_REGISTER (3));
21924 /* Match registers to be popped with registers into which we pop them. */
21925 for (available
= regs_available_for_popping
,
21926 required
= regs_to_pop
;
21927 required
!= 0 && available
!= 0;
21928 available
&= ~(available
& - available
),
21929 required
&= ~(required
& - required
))
21932 /* If we have any popping registers left over, remove them. */
21934 regs_available_for_popping
&= ~available
;
21936 /* Otherwise if we need another popping register we can use
21937 the fourth argument register. */
21938 else if (pops_needed
)
21940 /* If we have not found any free argument registers and
21941 reg a4 contains the return address, we must move it. */
21942 if (regs_available_for_popping
== 0
21943 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
21945 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
21946 reg_containing_return_addr
= LR_REGNUM
;
21948 else if (size
> 12)
21950 /* Register a4 is being used to hold part of the return value,
21951 but we have dire need of a free, low register. */
21954 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
21957 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
21959 /* The fourth argument register is available. */
21960 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
21966 /* Pop as many registers as we can. */
21967 thumb_pop (f
, regs_available_for_popping
);
21969 /* Process the registers we popped. */
21970 if (reg_containing_return_addr
== -1)
21972 /* The return address was popped into the lowest numbered register. */
21973 regs_to_pop
&= ~(1 << LR_REGNUM
);
21975 reg_containing_return_addr
=
21976 number_of_first_bit_set (regs_available_for_popping
);
21978 /* Remove this register for the mask of available registers, so that
21979 the return address will not be corrupted by further pops. */
21980 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
21983 /* If we popped other registers then handle them here. */
21984 if (regs_available_for_popping
)
21988 /* Work out which register currently contains the frame pointer. */
21989 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
21991 /* Move it into the correct place. */
21992 asm_fprintf (f
, "\tmov\t%r, %r\n",
21993 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
21995 /* (Temporarily) remove it from the mask of popped registers. */
21996 regs_available_for_popping
&= ~(1 << frame_pointer
);
21997 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
21999 if (regs_available_for_popping
)
22003 /* We popped the stack pointer as well,
22004 find the register that contains it. */
22005 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
22007 /* Move it into the stack register. */
22008 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
22010 /* At this point we have popped all necessary registers, so
22011 do not worry about restoring regs_available_for_popping
22012 to its correct value:
22014 assert (pops_needed == 0)
22015 assert (regs_available_for_popping == (1 << frame_pointer))
22016 assert (regs_to_pop == (1 << STACK_POINTER)) */
22020 /* Since we have just move the popped value into the frame
22021 pointer, the popping register is available for reuse, and
22022 we know that we still have the stack pointer left to pop. */
22023 regs_available_for_popping
|= (1 << frame_pointer
);
22027 /* If we still have registers left on the stack, but we no longer have
22028 any registers into which we can pop them, then we must move the return
22029 address into the link register and make available the register that
22031 if (regs_available_for_popping
== 0 && pops_needed
> 0)
22033 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
22035 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
22036 reg_containing_return_addr
);
22038 reg_containing_return_addr
= LR_REGNUM
;
22041 /* If we have registers left on the stack then pop some more.
22042 We know that at most we will want to pop FP and SP. */
22043 if (pops_needed
> 0)
22048 thumb_pop (f
, regs_available_for_popping
);
22050 /* We have popped either FP or SP.
22051 Move whichever one it is into the correct register. */
22052 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
22053 move_to
= number_of_first_bit_set (regs_to_pop
);
22055 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
22057 regs_to_pop
&= ~(1 << move_to
);
22062 /* If we still have not popped everything then we must have only
22063 had one register available to us and we are now popping the SP. */
22064 if (pops_needed
> 0)
22068 thumb_pop (f
, regs_available_for_popping
);
22070 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
22072 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
22074 assert (regs_to_pop == (1 << STACK_POINTER))
22075 assert (pops_needed == 1)
22079 /* If necessary restore the a4 register. */
22082 if (reg_containing_return_addr
!= LR_REGNUM
)
22084 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
22085 reg_containing_return_addr
= LR_REGNUM
;
22088 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
22091 if (crtl
->calls_eh_return
)
22092 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
22094 /* Return to caller. */
22095 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
22098 /* Scan INSN just before assembler is output for it.
22099 For Thumb-1, we track the status of the condition codes; this
22100 information is used in the cbranchsi4_insn pattern. */
22102 thumb1_final_prescan_insn (rtx insn
)
22104 if (flag_print_asm_name
)
22105 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
22106 INSN_ADDRESSES (INSN_UID (insn
)));
22107 /* Don't overwrite the previous setter when we get to a cbranch. */
22108 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
22110 enum attr_conds conds
;
22112 if (cfun
->machine
->thumb1_cc_insn
)
22114 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
22115 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
22118 conds
= get_attr_conds (insn
);
22119 if (conds
== CONDS_SET
)
22121 rtx set
= single_set (insn
);
22122 cfun
->machine
->thumb1_cc_insn
= insn
;
22123 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
22124 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
22125 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
22126 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
22128 rtx src1
= XEXP (SET_SRC (set
), 1);
22129 if (src1
== const0_rtx
)
22130 cfun
->machine
->thumb1_cc_mode
= CCmode
;
22133 else if (conds
!= CONDS_NOCOND
)
22134 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
22139 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
22141 unsigned HOST_WIDE_INT mask
= 0xff;
22144 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
22145 if (val
== 0) /* XXX */
22148 for (i
= 0; i
< 25; i
++)
22149 if ((val
& (mask
<< i
)) == val
)
22155 /* Returns nonzero if the current function contains,
22156 or might contain a far jump. */
22158 thumb_far_jump_used_p (void)
22162 /* This test is only important for leaf functions. */
22163 /* assert (!leaf_function_p ()); */
22165 /* If we have already decided that far jumps may be used,
22166 do not bother checking again, and always return true even if
22167 it turns out that they are not being used. Once we have made
22168 the decision that far jumps are present (and that hence the link
22169 register will be pushed onto the stack) we cannot go back on it. */
22170 if (cfun
->machine
->far_jump_used
)
22173 /* If this function is not being called from the prologue/epilogue
22174 generation code then it must be being called from the
22175 INITIAL_ELIMINATION_OFFSET macro. */
22176 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
22178 /* In this case we know that we are being asked about the elimination
22179 of the arg pointer register. If that register is not being used,
22180 then there are no arguments on the stack, and we do not have to
22181 worry that a far jump might force the prologue to push the link
22182 register, changing the stack offsets. In this case we can just
22183 return false, since the presence of far jumps in the function will
22184 not affect stack offsets.
22186 If the arg pointer is live (or if it was live, but has now been
22187 eliminated and so set to dead) then we do have to test to see if
22188 the function might contain a far jump. This test can lead to some
22189 false negatives, since before reload is completed, then length of
22190 branch instructions is not known, so gcc defaults to returning their
22191 longest length, which in turn sets the far jump attribute to true.
22193 A false negative will not result in bad code being generated, but it
22194 will result in a needless push and pop of the link register. We
22195 hope that this does not occur too often.
22197 If we need doubleword stack alignment this could affect the other
22198 elimination offsets so we can't risk getting it wrong. */
22199 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
22200 cfun
->machine
->arg_pointer_live
= 1;
22201 else if (!cfun
->machine
->arg_pointer_live
)
22205 /* Check to see if the function contains a branch
22206 insn with the far jump attribute set. */
22207 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22210 /* Ignore tablejump patterns. */
22211 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
22212 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
22213 && get_attr_far_jump (insn
) == FAR_JUMP_YES
22216 /* Record the fact that we have decided that
22217 the function does use far jumps. */
22218 cfun
->machine
->far_jump_used
= 1;
22226 /* Return nonzero if FUNC must be entered in ARM mode. */
22228 is_called_in_ARM_mode (tree func
)
22230 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
22232 /* Ignore the problem about functions whose address is taken. */
22233 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
22237 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
22243 /* Given the stack offsets and register mask in OFFSETS, decide how
22244 many additional registers to push instead of subtracting a constant
22245 from SP. For epilogues the principle is the same except we use pop.
22246 FOR_PROLOGUE indicates which we're generating. */
22248 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
22250 HOST_WIDE_INT amount
;
22251 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
22252 /* Extract a mask of the ones we can give to the Thumb's push/pop
22254 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
22255 /* Then count how many other high registers will need to be pushed. */
22256 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
22257 int n_free
, reg_base
, size
;
22259 if (!for_prologue
&& frame_pointer_needed
)
22260 amount
= offsets
->locals_base
- offsets
->saved_regs
;
22262 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22264 /* If the stack frame size is 512 exactly, we can save one load
22265 instruction, which should make this a win even when optimizing
22267 if (!optimize_size
&& amount
!= 512)
22270 /* Can't do this if there are high registers to push. */
22271 if (high_regs_pushed
!= 0)
22274 /* Shouldn't do it in the prologue if no registers would normally
22275 be pushed at all. In the epilogue, also allow it if we'll have
22276 a pop insn for the PC. */
22279 || TARGET_BACKTRACE
22280 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
22281 || TARGET_INTERWORK
22282 || crtl
->args
.pretend_args_size
!= 0))
22285 /* Don't do this if thumb_expand_prologue wants to emit instructions
22286 between the push and the stack frame allocation. */
22288 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22289 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
22296 size
= arm_size_return_regs ();
22297 reg_base
= ARM_NUM_INTS (size
);
22298 live_regs_mask
>>= reg_base
;
22301 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
22302 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
22304 live_regs_mask
>>= 1;
22310 gcc_assert (amount
/ 4 * 4 == amount
);
22312 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
22313 return (amount
- 508) / 4;
22314 if (amount
<= n_free
* 4)
22319 /* The bits which aren't usefully expanded as rtl. */
22321 thumb1_unexpanded_epilogue (void)
22323 arm_stack_offsets
*offsets
;
22325 unsigned long live_regs_mask
= 0;
22326 int high_regs_pushed
= 0;
22328 int had_to_push_lr
;
22331 if (cfun
->machine
->return_used_this_function
!= 0)
22334 if (IS_NAKED (arm_current_func_type ()))
22337 offsets
= arm_get_frame_offsets ();
22338 live_regs_mask
= offsets
->saved_regs_mask
;
22339 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
22341 /* If we can deduce the registers used from the function's return value.
22342 This is more reliable that examining df_regs_ever_live_p () because that
22343 will be set if the register is ever used in the function, not just if
22344 the register is used to hold a return value. */
22345 size
= arm_size_return_regs ();
22347 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
22350 unsigned long extra_mask
= (1 << extra_pop
) - 1;
22351 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
22354 /* The prolog may have pushed some high registers to use as
22355 work registers. e.g. the testsuite file:
22356 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22357 compiles to produce:
22358 push {r4, r5, r6, r7, lr}
22362 as part of the prolog. We have to undo that pushing here. */
22364 if (high_regs_pushed
)
22366 unsigned long mask
= live_regs_mask
& 0xff;
22369 /* The available low registers depend on the size of the value we are
22377 /* Oh dear! We have no low registers into which we can pop
22380 ("no low registers available for popping high registers");
22382 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
22383 if (live_regs_mask
& (1 << next_hi_reg
))
22386 while (high_regs_pushed
)
22388 /* Find lo register(s) into which the high register(s) can
22390 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
22392 if (mask
& (1 << regno
))
22393 high_regs_pushed
--;
22394 if (high_regs_pushed
== 0)
22398 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
22400 /* Pop the values into the low register(s). */
22401 thumb_pop (asm_out_file
, mask
);
22403 /* Move the value(s) into the high registers. */
22404 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
22406 if (mask
& (1 << regno
))
22408 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
22411 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
22412 if (live_regs_mask
& (1 << next_hi_reg
))
22417 live_regs_mask
&= ~0x0f00;
22420 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
22421 live_regs_mask
&= 0xff;
22423 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
22425 /* Pop the return address into the PC. */
22426 if (had_to_push_lr
)
22427 live_regs_mask
|= 1 << PC_REGNUM
;
22429 /* Either no argument registers were pushed or a backtrace
22430 structure was created which includes an adjusted stack
22431 pointer, so just pop everything. */
22432 if (live_regs_mask
)
22433 thumb_pop (asm_out_file
, live_regs_mask
);
22435 /* We have either just popped the return address into the
22436 PC or it is was kept in LR for the entire function.
22437 Note that thumb_pop has already called thumb_exit if the
22438 PC was in the list. */
22439 if (!had_to_push_lr
)
22440 thumb_exit (asm_out_file
, LR_REGNUM
);
22444 /* Pop everything but the return address. */
22445 if (live_regs_mask
)
22446 thumb_pop (asm_out_file
, live_regs_mask
);
22448 if (had_to_push_lr
)
22452 /* We have no free low regs, so save one. */
22453 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
22457 /* Get the return address into a temporary register. */
22458 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
22462 /* Move the return address to lr. */
22463 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
22465 /* Restore the low register. */
22466 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
22471 regno
= LAST_ARG_REGNUM
;
22476 /* Remove the argument registers that were pushed onto the stack. */
22477 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
22478 SP_REGNUM
, SP_REGNUM
,
22479 crtl
->args
.pretend_args_size
);
22481 thumb_exit (asm_out_file
, regno
);
22487 /* Functions to save and restore machine-specific function data. */
22488 static struct machine_function
*
22489 arm_init_machine_status (void)
22491 struct machine_function
*machine
;
22492 machine
= ggc_alloc_cleared_machine_function ();
22494 #if ARM_FT_UNKNOWN != 0
22495 machine
->func_type
= ARM_FT_UNKNOWN
;
22500 /* Return an RTX indicating where the return address to the
22501 calling function can be found. */
22503 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
22508 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
22511 /* Do anything needed before RTL is emitted for each function. */
22513 arm_init_expanders (void)
22515 /* Arrange to initialize and mark the machine per-function status. */
22516 init_machine_status
= arm_init_machine_status
;
22518 /* This is to stop the combine pass optimizing away the alignment
22519 adjustment of va_arg. */
22520 /* ??? It is claimed that this should not be necessary. */
22522 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
22526 /* Like arm_compute_initial_elimination offset. Simpler because there
22527 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22528 to point at the base of the local variables after static stack
22529 space for a function has been allocated. */
22532 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
22534 arm_stack_offsets
*offsets
;
22536 offsets
= arm_get_frame_offsets ();
22540 case ARG_POINTER_REGNUM
:
22543 case STACK_POINTER_REGNUM
:
22544 return offsets
->outgoing_args
- offsets
->saved_args
;
22546 case FRAME_POINTER_REGNUM
:
22547 return offsets
->soft_frame
- offsets
->saved_args
;
22549 case ARM_HARD_FRAME_POINTER_REGNUM
:
22550 return offsets
->saved_regs
- offsets
->saved_args
;
22552 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22553 return offsets
->locals_base
- offsets
->saved_args
;
22556 gcc_unreachable ();
22560 case FRAME_POINTER_REGNUM
:
22563 case STACK_POINTER_REGNUM
:
22564 return offsets
->outgoing_args
- offsets
->soft_frame
;
22566 case ARM_HARD_FRAME_POINTER_REGNUM
:
22567 return offsets
->saved_regs
- offsets
->soft_frame
;
22569 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22570 return offsets
->locals_base
- offsets
->soft_frame
;
22573 gcc_unreachable ();
22578 gcc_unreachable ();
22582 /* Generate the function's prologue. */
22585 thumb1_expand_prologue (void)
22589 HOST_WIDE_INT amount
;
22590 arm_stack_offsets
*offsets
;
22591 unsigned long func_type
;
22593 unsigned long live_regs_mask
;
22594 unsigned long l_mask
;
22595 unsigned high_regs_pushed
= 0;
22597 func_type
= arm_current_func_type ();
22599 /* Naked functions don't have prologues. */
22600 if (IS_NAKED (func_type
))
22603 if (IS_INTERRUPT (func_type
))
22605 error ("interrupt Service Routines cannot be coded in Thumb mode");
22609 if (is_called_in_ARM_mode (current_function_decl
))
22610 emit_insn (gen_prologue_thumb1_interwork ());
22612 offsets
= arm_get_frame_offsets ();
22613 live_regs_mask
= offsets
->saved_regs_mask
;
22615 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22616 l_mask
= live_regs_mask
& 0x40ff;
22617 /* Then count how many other high registers will need to be pushed. */
22618 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
22620 if (crtl
->args
.pretend_args_size
)
22622 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
22624 if (cfun
->machine
->uses_anonymous_args
)
22626 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
22627 unsigned long mask
;
22629 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
22630 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
22632 insn
= thumb1_emit_multi_reg_push (mask
, 0);
22636 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22637 stack_pointer_rtx
, x
));
22639 RTX_FRAME_RELATED_P (insn
) = 1;
22642 if (TARGET_BACKTRACE
)
22644 HOST_WIDE_INT offset
= 0;
22645 unsigned work_register
;
22646 rtx work_reg
, x
, arm_hfp_rtx
;
22648 /* We have been asked to create a stack backtrace structure.
22649 The code looks like this:
22653 0 sub SP, #16 Reserve space for 4 registers.
22654 2 push {R7} Push low registers.
22655 4 add R7, SP, #20 Get the stack pointer before the push.
22656 6 str R7, [SP, #8] Store the stack pointer
22657 (before reserving the space).
22658 8 mov R7, PC Get hold of the start of this code + 12.
22659 10 str R7, [SP, #16] Store it.
22660 12 mov R7, FP Get hold of the current frame pointer.
22661 14 str R7, [SP, #4] Store it.
22662 16 mov R7, LR Get hold of the current return address.
22663 18 str R7, [SP, #12] Store it.
22664 20 add R7, SP, #16 Point at the start of the
22665 backtrace structure.
22666 22 mov FP, R7 Put this value into the frame pointer. */
22668 work_register
= thumb_find_work_register (live_regs_mask
);
22669 work_reg
= gen_rtx_REG (SImode
, work_register
);
22670 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
22672 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22673 stack_pointer_rtx
, GEN_INT (-16)));
22674 RTX_FRAME_RELATED_P (insn
) = 1;
22678 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
22679 RTX_FRAME_RELATED_P (insn
) = 1;
22681 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
22684 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
22685 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
22687 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
22688 x
= gen_frame_mem (SImode
, x
);
22689 emit_move_insn (x
, work_reg
);
22691 /* Make sure that the instruction fetching the PC is in the right place
22692 to calculate "start of backtrace creation code + 12". */
22693 /* ??? The stores using the common WORK_REG ought to be enough to
22694 prevent the scheduler from doing anything weird. Failing that
22695 we could always move all of the following into an UNSPEC_VOLATILE. */
22698 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
22699 emit_move_insn (work_reg
, x
);
22701 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
22702 x
= gen_frame_mem (SImode
, x
);
22703 emit_move_insn (x
, work_reg
);
22705 emit_move_insn (work_reg
, arm_hfp_rtx
);
22707 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22708 x
= gen_frame_mem (SImode
, x
);
22709 emit_move_insn (x
, work_reg
);
22713 emit_move_insn (work_reg
, arm_hfp_rtx
);
22715 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22716 x
= gen_frame_mem (SImode
, x
);
22717 emit_move_insn (x
, work_reg
);
22719 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
22720 emit_move_insn (work_reg
, x
);
22722 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
22723 x
= gen_frame_mem (SImode
, x
);
22724 emit_move_insn (x
, work_reg
);
22727 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
22728 emit_move_insn (work_reg
, x
);
22730 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
22731 x
= gen_frame_mem (SImode
, x
);
22732 emit_move_insn (x
, work_reg
);
22734 x
= GEN_INT (offset
+ 12);
22735 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
22737 emit_move_insn (arm_hfp_rtx
, work_reg
);
22739 /* Optimization: If we are not pushing any low registers but we are going
22740 to push some high registers then delay our first push. This will just
22741 be a push of LR and we can combine it with the push of the first high
22743 else if ((l_mask
& 0xff) != 0
22744 || (high_regs_pushed
== 0 && l_mask
))
22746 unsigned long mask
= l_mask
;
22747 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
22748 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
22749 RTX_FRAME_RELATED_P (insn
) = 1;
22752 if (high_regs_pushed
)
22754 unsigned pushable_regs
;
22755 unsigned next_hi_reg
;
22756 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
22757 : crtl
->args
.info
.nregs
;
22758 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
22760 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
22761 if (live_regs_mask
& (1 << next_hi_reg
))
22764 /* Here we need to mask out registers used for passing arguments
22765 even if they can be pushed. This is to avoid using them to stash the high
22766 registers. Such kind of stash may clobber the use of arguments. */
22767 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
22769 if (pushable_regs
== 0)
22770 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
22772 while (high_regs_pushed
> 0)
22774 unsigned long real_regs_mask
= 0;
22776 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
22778 if (pushable_regs
& (1 << regno
))
22780 emit_move_insn (gen_rtx_REG (SImode
, regno
),
22781 gen_rtx_REG (SImode
, next_hi_reg
));
22783 high_regs_pushed
--;
22784 real_regs_mask
|= (1 << next_hi_reg
);
22786 if (high_regs_pushed
)
22788 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
22790 if (live_regs_mask
& (1 << next_hi_reg
))
22795 pushable_regs
&= ~((1 << regno
) - 1);
22801 /* If we had to find a work register and we have not yet
22802 saved the LR then add it to the list of regs to push. */
22803 if (l_mask
== (1 << LR_REGNUM
))
22805 pushable_regs
|= l_mask
;
22806 real_regs_mask
|= l_mask
;
22810 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
22811 RTX_FRAME_RELATED_P (insn
) = 1;
22815 /* Load the pic register before setting the frame pointer,
22816 so we can use r7 as a temporary work register. */
22817 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22818 arm_load_pic_register (live_regs_mask
);
22820 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
22821 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
22822 stack_pointer_rtx
);
22824 if (flag_stack_usage_info
)
22825 current_function_static_stack_size
22826 = offsets
->outgoing_args
- offsets
->saved_args
;
22828 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22829 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
22834 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22835 GEN_INT (- amount
)));
22836 RTX_FRAME_RELATED_P (insn
) = 1;
22842 /* The stack decrement is too big for an immediate value in a single
22843 insn. In theory we could issue multiple subtracts, but after
22844 three of them it becomes more space efficient to place the full
22845 value in the constant pool and load into a register. (Also the
22846 ARM debugger really likes to see only one stack decrement per
22847 function). So instead we look for a scratch register into which
22848 we can load the decrement, and then we subtract this from the
22849 stack pointer. Unfortunately on the thumb the only available
22850 scratch registers are the argument registers, and we cannot use
22851 these as they may hold arguments to the function. Instead we
22852 attempt to locate a call preserved register which is used by this
22853 function. If we can find one, then we know that it will have
22854 been pushed at the start of the prologue and so we can corrupt
22856 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
22857 if (live_regs_mask
& (1 << regno
))
22860 gcc_assert(regno
<= LAST_LO_REGNUM
);
22862 reg
= gen_rtx_REG (SImode
, regno
);
22864 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
22866 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22867 stack_pointer_rtx
, reg
));
22869 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
22870 plus_constant (Pmode
, stack_pointer_rtx
,
22872 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22873 RTX_FRAME_RELATED_P (insn
) = 1;
22877 if (frame_pointer_needed
)
22878 thumb_set_frame_pointer (offsets
);
22880 /* If we are profiling, make sure no instructions are scheduled before
22881 the call to mcount. Similarly if the user has requested no
22882 scheduling in the prolog. Similarly if we want non-call exceptions
22883 using the EABI unwinder, to prevent faulting instructions from being
22884 swapped with a stack adjustment. */
22885 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22886 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22887 && cfun
->can_throw_non_call_exceptions
))
22888 emit_insn (gen_blockage ());
22890 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
22891 if (live_regs_mask
& 0xff)
22892 cfun
->machine
->lr_save_eliminated
= 0;
22895 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22896 POP instruction can be generated. LR should be replaced by PC. All
22897 the checks required are already done by USE_RETURN_INSN (). Hence,
22898 all we really need to check here is if single register is to be
22899 returned, or multiple register return. */
22901 thumb2_expand_return (void)
22904 unsigned long saved_regs_mask
;
22905 arm_stack_offsets
*offsets
;
22907 offsets
= arm_get_frame_offsets ();
22908 saved_regs_mask
= offsets
->saved_regs_mask
;
22910 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22911 if (saved_regs_mask
& (1 << i
))
22914 if (saved_regs_mask
)
22918 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22919 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
22920 rtx addr
= gen_rtx_MEM (SImode
,
22921 gen_rtx_POST_INC (SImode
,
22922 stack_pointer_rtx
));
22923 set_mem_alias_set (addr
, get_frame_alias_set ());
22924 XVECEXP (par
, 0, 0) = ret_rtx
;
22925 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
22926 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
22927 emit_jump_insn (par
);
22931 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
22932 saved_regs_mask
|= (1 << PC_REGNUM
);
22933 arm_emit_multi_reg_pop (saved_regs_mask
);
22938 emit_jump_insn (simple_return_rtx
);
22943 thumb1_expand_epilogue (void)
22945 HOST_WIDE_INT amount
;
22946 arm_stack_offsets
*offsets
;
22949 /* Naked functions don't have prologues. */
22950 if (IS_NAKED (arm_current_func_type ()))
22953 offsets
= arm_get_frame_offsets ();
22954 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22956 if (frame_pointer_needed
)
22958 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
22959 amount
= offsets
->locals_base
- offsets
->saved_regs
;
22961 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
22963 gcc_assert (amount
>= 0);
22966 emit_insn (gen_blockage ());
22969 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22970 GEN_INT (amount
)));
22973 /* r3 is always free in the epilogue. */
22974 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
22976 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
22977 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
22981 /* Emit a USE (stack_pointer_rtx), so that
22982 the stack adjustment will not be deleted. */
22983 emit_insn (gen_force_register_use (stack_pointer_rtx
));
22985 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
22986 emit_insn (gen_blockage ());
22988 /* Emit a clobber for each insn that will be restored in the epilogue,
22989 so that flow2 will get register lifetimes correct. */
22990 for (regno
= 0; regno
< 13; regno
++)
22991 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
22992 emit_clobber (gen_rtx_REG (SImode
, regno
));
22994 if (! df_regs_ever_live_p (LR_REGNUM
))
22995 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
22998 /* Epilogue code for APCS frame. */
23000 arm_expand_epilogue_apcs_frame (bool really_return
)
23002 unsigned long func_type
;
23003 unsigned long saved_regs_mask
;
23006 int floats_from_frame
= 0;
23007 arm_stack_offsets
*offsets
;
23009 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
23010 func_type
= arm_current_func_type ();
23012 /* Get frame offsets for ARM. */
23013 offsets
= arm_get_frame_offsets ();
23014 saved_regs_mask
= offsets
->saved_regs_mask
;
23016 /* Find the offset of the floating-point save area in the frame. */
23017 floats_from_frame
= offsets
->saved_args
- offsets
->frame
;
23019 /* Compute how many core registers saved and how far away the floats are. */
23020 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
23021 if (saved_regs_mask
& (1 << i
))
23024 floats_from_frame
+= 4;
23027 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
23031 /* The offset is from IP_REGNUM. */
23032 int saved_size
= arm_get_vfp_saved_size ();
23033 if (saved_size
> 0)
23035 floats_from_frame
+= saved_size
;
23036 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
23037 hard_frame_pointer_rtx
,
23038 GEN_INT (-floats_from_frame
)));
23041 /* Generate VFP register multi-pop. */
23042 start_reg
= FIRST_VFP_REGNUM
;
23044 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
23045 /* Look for a case where a reg does not need restoring. */
23046 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
23047 && (!df_regs_ever_live_p (i
+ 1)
23048 || call_used_regs
[i
+ 1]))
23050 if (start_reg
!= i
)
23051 arm_emit_vfp_multi_reg_pop (start_reg
,
23052 (i
- start_reg
) / 2,
23053 gen_rtx_REG (SImode
,
23058 /* Restore the remaining regs that we have discovered (or possibly
23059 even all of them, if the conditional in the for loop never
23061 if (start_reg
!= i
)
23062 arm_emit_vfp_multi_reg_pop (start_reg
,
23063 (i
- start_reg
) / 2,
23064 gen_rtx_REG (SImode
, IP_REGNUM
));
23069 /* The frame pointer is guaranteed to be non-double-word aligned, as
23070 it is set to double-word-aligned old_stack_pointer - 4. */
23072 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
23074 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
23075 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
23077 rtx addr
= gen_frame_mem (V2SImode
,
23078 plus_constant (Pmode
, hard_frame_pointer_rtx
,
23080 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
23081 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
23082 gen_rtx_REG (V2SImode
, i
),
23088 /* saved_regs_mask should contain IP which contains old stack pointer
23089 at the time of activation creation. Since SP and IP are adjacent registers,
23090 we can restore the value directly into SP. */
23091 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
23092 saved_regs_mask
&= ~(1 << IP_REGNUM
);
23093 saved_regs_mask
|= (1 << SP_REGNUM
);
23095 /* There are two registers left in saved_regs_mask - LR and PC. We
23096 only need to restore LR (the return address), but to
23097 save time we can load it directly into PC, unless we need a
23098 special function exit sequence, or we are not really returning. */
23100 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
23101 && !crtl
->calls_eh_return
)
23102 /* Delete LR from the register mask, so that LR on
23103 the stack is loaded into the PC in the register mask. */
23104 saved_regs_mask
&= ~(1 << LR_REGNUM
);
23106 saved_regs_mask
&= ~(1 << PC_REGNUM
);
23108 num_regs
= bit_count (saved_regs_mask
);
23109 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
23111 /* Unwind the stack to just below the saved registers. */
23112 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23113 hard_frame_pointer_rtx
,
23114 GEN_INT (- 4 * num_regs
)));
23117 arm_emit_multi_reg_pop (saved_regs_mask
);
23119 if (IS_INTERRUPT (func_type
))
23121 /* Interrupt handlers will have pushed the
23122 IP onto the stack, so restore it now. */
23124 rtx addr
= gen_rtx_MEM (SImode
,
23125 gen_rtx_POST_INC (SImode
,
23126 stack_pointer_rtx
));
23127 set_mem_alias_set (addr
, get_frame_alias_set ());
23128 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
23129 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
23130 gen_rtx_REG (SImode
, IP_REGNUM
),
23134 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
23137 if (crtl
->calls_eh_return
)
23138 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23140 GEN_INT (ARM_EH_STACKADJ_REGNUM
)));
23142 if (IS_STACKALIGN (func_type
))
23143 /* Restore the original stack pointer. Before prologue, the stack was
23144 realigned and the original stack pointer saved in r0. For details,
23145 see comment in arm_expand_prologue. */
23146 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
23148 emit_jump_insn (simple_return_rtx
);
23151 /* Generate RTL to represent ARM epilogue. Really_return is true if the
23152 function is not a sibcall. */
23154 arm_expand_epilogue (bool really_return
)
23156 unsigned long func_type
;
23157 unsigned long saved_regs_mask
;
23161 arm_stack_offsets
*offsets
;
23163 func_type
= arm_current_func_type ();
23165 /* Naked functions don't have epilogue. Hence, generate return pattern, and
23166 let output_return_instruction take care of instruction emition if any. */
23167 if (IS_NAKED (func_type
)
23168 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
23170 emit_jump_insn (simple_return_rtx
);
23174 /* If we are throwing an exception, then we really must be doing a
23175 return, so we can't tail-call. */
23176 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
23178 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23180 arm_expand_epilogue_apcs_frame (really_return
);
23184 /* Get frame offsets for ARM. */
23185 offsets
= arm_get_frame_offsets ();
23186 saved_regs_mask
= offsets
->saved_regs_mask
;
23187 num_regs
= bit_count (saved_regs_mask
);
23189 if (frame_pointer_needed
)
23191 /* Restore stack pointer if necessary. */
23194 /* In ARM mode, frame pointer points to first saved register.
23195 Restore stack pointer to last saved register. */
23196 amount
= offsets
->frame
- offsets
->saved_regs
;
23198 /* Force out any pending memory operations that reference stacked data
23199 before stack de-allocation occurs. */
23200 emit_insn (gen_blockage ());
23201 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23202 hard_frame_pointer_rtx
,
23203 GEN_INT (amount
)));
23205 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23207 emit_insn (gen_force_register_use (stack_pointer_rtx
));
23211 /* In Thumb-2 mode, the frame pointer points to the last saved
23213 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23215 emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23216 hard_frame_pointer_rtx
,
23217 GEN_INT (amount
)));
23219 /* Force out any pending memory operations that reference stacked data
23220 before stack de-allocation occurs. */
23221 emit_insn (gen_blockage ());
23222 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
23223 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23225 emit_insn (gen_force_register_use (stack_pointer_rtx
));
23230 /* Pop off outgoing args and local frame to adjust stack pointer to
23231 last saved register. */
23232 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23235 /* Force out any pending memory operations that reference stacked data
23236 before stack de-allocation occurs. */
23237 emit_insn (gen_blockage ());
23238 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23240 GEN_INT (amount
)));
23241 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
23243 emit_insn (gen_force_register_use (stack_pointer_rtx
));
23247 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
23249 /* Generate VFP register multi-pop. */
23250 int end_reg
= LAST_VFP_REGNUM
+ 1;
23252 /* Scan the registers in reverse order. We need to match
23253 any groupings made in the prologue and generate matching
23254 vldm operations. The need to match groups is because,
23255 unlike pop, vldm can only do consecutive regs. */
23256 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
23257 /* Look for a case where a reg does not need restoring. */
23258 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
23259 && (!df_regs_ever_live_p (i
+ 1)
23260 || call_used_regs
[i
+ 1]))
23262 /* Restore the regs discovered so far (from reg+2 to
23264 if (end_reg
> i
+ 2)
23265 arm_emit_vfp_multi_reg_pop (i
+ 2,
23266 (end_reg
- (i
+ 2)) / 2,
23267 stack_pointer_rtx
);
23271 /* Restore the remaining regs that we have discovered (or possibly
23272 even all of them, if the conditional in the for loop never
23274 if (end_reg
> i
+ 2)
23275 arm_emit_vfp_multi_reg_pop (i
+ 2,
23276 (end_reg
- (i
+ 2)) / 2,
23277 stack_pointer_rtx
);
23281 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
23282 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
23285 rtx addr
= gen_rtx_MEM (V2SImode
,
23286 gen_rtx_POST_INC (SImode
,
23287 stack_pointer_rtx
));
23288 set_mem_alias_set (addr
, get_frame_alias_set ());
23289 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
23290 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
23291 gen_rtx_REG (V2SImode
, i
),
23295 if (saved_regs_mask
)
23298 bool return_in_pc
= false;
23300 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
23301 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
23302 && !IS_STACKALIGN (func_type
)
23304 && crtl
->args
.pretend_args_size
== 0
23305 && saved_regs_mask
& (1 << LR_REGNUM
)
23306 && !crtl
->calls_eh_return
)
23308 saved_regs_mask
&= ~(1 << LR_REGNUM
);
23309 saved_regs_mask
|= (1 << PC_REGNUM
);
23310 return_in_pc
= true;
23313 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
23315 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
23316 if (saved_regs_mask
& (1 << i
))
23318 rtx addr
= gen_rtx_MEM (SImode
,
23319 gen_rtx_POST_INC (SImode
,
23320 stack_pointer_rtx
));
23321 set_mem_alias_set (addr
, get_frame_alias_set ());
23323 if (i
== PC_REGNUM
)
23325 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
23326 XVECEXP (insn
, 0, 0) = ret_rtx
;
23327 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
23328 gen_rtx_REG (SImode
, i
),
23330 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
23331 insn
= emit_jump_insn (insn
);
23335 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
23337 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
23338 gen_rtx_REG (SImode
, i
),
23345 if (current_tune
->prefer_ldrd_strd
23346 && !optimize_function_for_size_p (cfun
))
23349 thumb2_emit_ldrd_pop (saved_regs_mask
);
23351 arm_emit_multi_reg_pop (saved_regs_mask
);
23354 arm_emit_multi_reg_pop (saved_regs_mask
);
23357 if (return_in_pc
== true)
23361 if (crtl
->args
.pretend_args_size
)
23362 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23364 GEN_INT (crtl
->args
.pretend_args_size
)));
23366 if (!really_return
)
23369 if (crtl
->calls_eh_return
)
23370 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23372 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
23374 if (IS_STACKALIGN (func_type
))
23375 /* Restore the original stack pointer. Before prologue, the stack was
23376 realigned and the original stack pointer saved in r0. For details,
23377 see comment in arm_expand_prologue. */
23378 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
23380 emit_jump_insn (simple_return_rtx
);
23383 /* Implementation of insn prologue_thumb1_interwork. This is the first
23384 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23387 thumb1_output_interwork (void)
23390 FILE *f
= asm_out_file
;
23392 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
23393 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
23395 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
23397 /* Generate code sequence to switch us into Thumb mode. */
23398 /* The .code 32 directive has already been emitted by
23399 ASM_DECLARE_FUNCTION_NAME. */
23400 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
23401 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
23403 /* Generate a label, so that the debugger will notice the
23404 change in instruction sets. This label is also used by
23405 the assembler to bypass the ARM code when this function
23406 is called from a Thumb encoded function elsewhere in the
23407 same file. Hence the definition of STUB_NAME here must
23408 agree with the definition in gas/config/tc-arm.c. */
23410 #define STUB_NAME ".real_start_of"
23412 fprintf (f
, "\t.code\t16\n");
23414 if (arm_dllexport_name_p (name
))
23415 name
= arm_strip_name_encoding (name
);
23417 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
23418 fprintf (f
, "\t.thumb_func\n");
23419 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
23424 /* Handle the case of a double word load into a low register from
23425 a computed memory address. The computed address may involve a
23426 register which is overwritten by the load. */
23428 thumb_load_double_from_address (rtx
*operands
)
23436 gcc_assert (REG_P (operands
[0]));
23437 gcc_assert (MEM_P (operands
[1]));
23439 /* Get the memory address. */
23440 addr
= XEXP (operands
[1], 0);
23442 /* Work out how the memory address is computed. */
23443 switch (GET_CODE (addr
))
23446 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23448 if (REGNO (operands
[0]) == REGNO (addr
))
23450 output_asm_insn ("ldr\t%H0, %2", operands
);
23451 output_asm_insn ("ldr\t%0, %1", operands
);
23455 output_asm_insn ("ldr\t%0, %1", operands
);
23456 output_asm_insn ("ldr\t%H0, %2", operands
);
23461 /* Compute <address> + 4 for the high order load. */
23462 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23464 output_asm_insn ("ldr\t%0, %1", operands
);
23465 output_asm_insn ("ldr\t%H0, %2", operands
);
23469 arg1
= XEXP (addr
, 0);
23470 arg2
= XEXP (addr
, 1);
23472 if (CONSTANT_P (arg1
))
23473 base
= arg2
, offset
= arg1
;
23475 base
= arg1
, offset
= arg2
;
23477 gcc_assert (REG_P (base
));
23479 /* Catch the case of <address> = <reg> + <reg> */
23480 if (REG_P (offset
))
23482 int reg_offset
= REGNO (offset
);
23483 int reg_base
= REGNO (base
);
23484 int reg_dest
= REGNO (operands
[0]);
23486 /* Add the base and offset registers together into the
23487 higher destination register. */
23488 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
23489 reg_dest
+ 1, reg_base
, reg_offset
);
23491 /* Load the lower destination register from the address in
23492 the higher destination register. */
23493 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
23494 reg_dest
, reg_dest
+ 1);
23496 /* Load the higher destination register from its own address
23498 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
23499 reg_dest
+ 1, reg_dest
+ 1);
23503 /* Compute <address> + 4 for the high order load. */
23504 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23506 /* If the computed address is held in the low order register
23507 then load the high order register first, otherwise always
23508 load the low order register first. */
23509 if (REGNO (operands
[0]) == REGNO (base
))
23511 output_asm_insn ("ldr\t%H0, %2", operands
);
23512 output_asm_insn ("ldr\t%0, %1", operands
);
23516 output_asm_insn ("ldr\t%0, %1", operands
);
23517 output_asm_insn ("ldr\t%H0, %2", operands
);
23523 /* With no registers to worry about we can just load the value
23525 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23527 output_asm_insn ("ldr\t%H0, %2", operands
);
23528 output_asm_insn ("ldr\t%0, %1", operands
);
23532 gcc_unreachable ();
23539 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
23546 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23549 operands
[4] = operands
[5];
23552 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
23553 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
23557 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23560 operands
[4] = operands
[5];
23563 if (REGNO (operands
[5]) > REGNO (operands
[6]))
23566 operands
[5] = operands
[6];
23569 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23572 operands
[4] = operands
[5];
23576 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
23577 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
23581 gcc_unreachable ();
23587 /* Output a call-via instruction for thumb state. */
23589 thumb_call_via_reg (rtx reg
)
23591 int regno
= REGNO (reg
);
23594 gcc_assert (regno
< LR_REGNUM
);
23596 /* If we are in the normal text section we can use a single instance
23597 per compilation unit. If we are doing function sections, then we need
23598 an entry per section, since we can't rely on reachability. */
23599 if (in_section
== text_section
)
23601 thumb_call_reg_needed
= 1;
23603 if (thumb_call_via_label
[regno
] == NULL
)
23604 thumb_call_via_label
[regno
] = gen_label_rtx ();
23605 labelp
= thumb_call_via_label
+ regno
;
23609 if (cfun
->machine
->call_via
[regno
] == NULL
)
23610 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
23611 labelp
= cfun
->machine
->call_via
+ regno
;
23614 output_asm_insn ("bl\t%a0", labelp
);
23618 /* Routines for generating rtl. */
23620 thumb_expand_movmemqi (rtx
*operands
)
23622 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
23623 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
23624 HOST_WIDE_INT len
= INTVAL (operands
[2]);
23625 HOST_WIDE_INT offset
= 0;
23629 emit_insn (gen_movmem12b (out
, in
, out
, in
));
23635 emit_insn (gen_movmem8b (out
, in
, out
, in
));
23641 rtx reg
= gen_reg_rtx (SImode
);
23642 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
23643 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
23650 rtx reg
= gen_reg_rtx (HImode
);
23651 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
23652 plus_constant (Pmode
, in
,
23654 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
23663 rtx reg
= gen_reg_rtx (QImode
);
23664 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
23665 plus_constant (Pmode
, in
,
23667 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
23674 thumb_reload_out_hi (rtx
*operands
)
23676 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
23679 /* Handle reading a half-word from memory during reload. */
23681 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
23683 gcc_unreachable ();
23686 /* Return the length of a function name prefix
23687 that starts with the character 'c'. */
23689 arm_get_strip_length (int c
)
23693 ARM_NAME_ENCODING_LENGTHS
23698 /* Return a pointer to a function's name with any
23699 and all prefix encodings stripped from it. */
23701 arm_strip_name_encoding (const char *name
)
23705 while ((skip
= arm_get_strip_length (* name
)))
23711 /* If there is a '*' anywhere in the name's prefix, then
23712 emit the stripped name verbatim, otherwise prepend an
23713 underscore if leading underscores are being used. */
23715 arm_asm_output_labelref (FILE *stream
, const char *name
)
23720 while ((skip
= arm_get_strip_length (* name
)))
23722 verbatim
|= (*name
== '*');
23727 fputs (name
, stream
);
23729 asm_fprintf (stream
, "%U%s", name
);
23732 /* This function is used to emit an EABI tag and its associated value.
23733 We emit the numerical value of the tag in case the assembler does not
23734 support textual tags. (Eg gas prior to 2.20). If requested we include
23735 the tag name in a comment so that anyone reading the assembler output
23736 will know which tag is being set.
23738 This function is not static because arm-c.c needs it too. */
23741 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
23743 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
23744 if (flag_verbose_asm
|| flag_debug_asm
)
23745 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
23746 asm_fprintf (asm_out_file
, "\n");
23750 arm_file_start (void)
23754 if (TARGET_UNIFIED_ASM
)
23755 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
23759 const char *fpu_name
;
23760 if (arm_selected_arch
)
23761 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
23762 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
23763 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
23765 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
23767 if (TARGET_SOFT_FLOAT
)
23769 fpu_name
= "softvfp";
23773 fpu_name
= arm_fpu_desc
->name
;
23774 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
23776 if (TARGET_HARD_FLOAT
)
23777 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23778 if (TARGET_HARD_FLOAT_ABI
)
23779 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23782 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
23784 /* Some of these attributes only apply when the corresponding features
23785 are used. However we don't have any easy way of figuring this out.
23786 Conservatively record the setting that would have been used. */
23788 if (flag_rounding_math
)
23789 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23791 if (!flag_unsafe_math_optimizations
)
23793 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23794 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23796 if (flag_signaling_nans
)
23797 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23799 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23800 flag_finite_math_only
? 1 : 3);
23802 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23803 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23804 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23805 flag_short_enums
? 1 : 2);
23807 /* Tag_ABI_optimization_goals. */
23810 else if (optimize
>= 2)
23816 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
23818 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23821 if (arm_fp16_format
)
23822 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23823 (int) arm_fp16_format
);
23825 if (arm_lang_output_object_attributes_hook
)
23826 arm_lang_output_object_attributes_hook();
23829 default_file_start ();
23833 arm_file_end (void)
23837 if (NEED_INDICATE_EXEC_STACK
)
23838 /* Add .note.GNU-stack. */
23839 file_end_indicate_exec_stack ();
23841 if (! thumb_call_reg_needed
)
23844 switch_to_section (text_section
);
23845 asm_fprintf (asm_out_file
, "\t.code 16\n");
23846 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
23848 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
23850 rtx label
= thumb_call_via_label
[regno
];
23854 targetm
.asm_out
.internal_label (asm_out_file
, "L",
23855 CODE_LABEL_NUMBER (label
));
23856 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
23862 /* Symbols in the text segment can be accessed without indirecting via the
23863 constant pool; it may take an extra binary operation, but this is still
23864 faster than indirecting via memory. Don't do this when not optimizing,
23865 since we won't be calculating al of the offsets necessary to do this
23869 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
23871 if (optimize
> 0 && TREE_CONSTANT (decl
))
23872 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
23874 default_encode_section_info (decl
, rtl
, first
);
23876 #endif /* !ARM_PE */
23879 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
23881 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
23882 && !strcmp (prefix
, "L"))
23884 arm_ccfsm_state
= 0;
23885 arm_target_insn
= NULL
;
23887 default_internal_label (stream
, prefix
, labelno
);
23890 /* Output code to add DELTA to the first argument, and then jump
23891 to FUNCTION. Used for C++ multiple inheritance. */
23893 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
23894 HOST_WIDE_INT delta
,
23895 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
23898 static int thunk_label
= 0;
23901 int mi_delta
= delta
;
23902 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
23904 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
23907 mi_delta
= - mi_delta
;
23911 int labelno
= thunk_label
++;
23912 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
23913 /* Thunks are entered in arm mode when avaiable. */
23914 if (TARGET_THUMB1_ONLY
)
23916 /* push r3 so we can use it as a temporary. */
23917 /* TODO: Omit this save if r3 is not used. */
23918 fputs ("\tpush {r3}\n", file
);
23919 fputs ("\tldr\tr3, ", file
);
23923 fputs ("\tldr\tr12, ", file
);
23925 assemble_name (file
, label
);
23926 fputc ('\n', file
);
23929 /* If we are generating PIC, the ldr instruction below loads
23930 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23931 the address of the add + 8, so we have:
23933 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23936 Note that we have "+ 1" because some versions of GNU ld
23937 don't set the low bit of the result for R_ARM_REL32
23938 relocations against thumb function symbols.
23939 On ARMv6M this is +4, not +8. */
23940 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
23941 assemble_name (file
, labelpc
);
23942 fputs (":\n", file
);
23943 if (TARGET_THUMB1_ONLY
)
23945 /* This is 2 insns after the start of the thunk, so we know it
23946 is 4-byte aligned. */
23947 fputs ("\tadd\tr3, pc, r3\n", file
);
23948 fputs ("\tmov r12, r3\n", file
);
23951 fputs ("\tadd\tr12, pc, r12\n", file
);
23953 else if (TARGET_THUMB1_ONLY
)
23954 fputs ("\tmov r12, r3\n", file
);
23956 if (TARGET_THUMB1_ONLY
)
23958 if (mi_delta
> 255)
23960 fputs ("\tldr\tr3, ", file
);
23961 assemble_name (file
, label
);
23962 fputs ("+4\n", file
);
23963 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
23964 mi_op
, this_regno
, this_regno
);
23966 else if (mi_delta
!= 0)
23968 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
23969 mi_op
, this_regno
, this_regno
,
23975 /* TODO: Use movw/movt for large constants when available. */
23976 while (mi_delta
!= 0)
23978 if ((mi_delta
& (3 << shift
)) == 0)
23982 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
23983 mi_op
, this_regno
, this_regno
,
23984 mi_delta
& (0xff << shift
));
23985 mi_delta
&= ~(0xff << shift
);
23992 if (TARGET_THUMB1_ONLY
)
23993 fputs ("\tpop\t{r3}\n", file
);
23995 fprintf (file
, "\tbx\tr12\n");
23996 ASM_OUTPUT_ALIGN (file
, 2);
23997 assemble_name (file
, label
);
23998 fputs (":\n", file
);
24001 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
24002 rtx tem
= XEXP (DECL_RTL (function
), 0);
24003 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
24004 tem
= gen_rtx_MINUS (GET_MODE (tem
),
24006 gen_rtx_SYMBOL_REF (Pmode
,
24007 ggc_strdup (labelpc
)));
24008 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
24011 /* Output ".word .LTHUNKn". */
24012 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
24014 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
24015 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
24019 fputs ("\tb\t", file
);
24020 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
24021 if (NEED_PLT_RELOC
)
24022 fputs ("(PLT)", file
);
24023 fputc ('\n', file
);
24028 arm_emit_vector_const (FILE *file
, rtx x
)
24031 const char * pattern
;
24033 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24035 switch (GET_MODE (x
))
24037 case V2SImode
: pattern
= "%08x"; break;
24038 case V4HImode
: pattern
= "%04x"; break;
24039 case V8QImode
: pattern
= "%02x"; break;
24040 default: gcc_unreachable ();
24043 fprintf (file
, "0x");
24044 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
24048 element
= CONST_VECTOR_ELT (x
, i
);
24049 fprintf (file
, pattern
, INTVAL (element
));
24055 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
24056 HFmode constant pool entries are actually loaded with ldr. */
24058 arm_emit_fp16_const (rtx c
)
24063 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
24064 bits
= real_to_target (NULL
, &r
, HFmode
);
24065 if (WORDS_BIG_ENDIAN
)
24066 assemble_zeros (2);
24067 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
24068 if (!WORDS_BIG_ENDIAN
)
24069 assemble_zeros (2);
24073 arm_output_load_gr (rtx
*operands
)
24080 if (!MEM_P (operands
[1])
24081 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
24082 || !REG_P (reg
= XEXP (sum
, 0))
24083 || !CONST_INT_P (offset
= XEXP (sum
, 1))
24084 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
24085 return "wldrw%?\t%0, %1";
24087 /* Fix up an out-of-range load of a GR register. */
24088 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
24089 wcgr
= operands
[0];
24091 output_asm_insn ("ldr%?\t%0, %1", operands
);
24093 operands
[0] = wcgr
;
24095 output_asm_insn ("tmcr%?\t%0, %1", operands
);
24096 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
24101 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
24103 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
24104 named arg and all anonymous args onto the stack.
24105 XXX I know the prologue shouldn't be pushing registers, but it is faster
24109 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
24110 enum machine_mode mode
,
24113 int second_time ATTRIBUTE_UNUSED
)
24115 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
24118 cfun
->machine
->uses_anonymous_args
= 1;
24119 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
24121 nregs
= pcum
->aapcs_ncrn
;
24122 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
24126 nregs
= pcum
->nregs
;
24128 if (nregs
< NUM_ARG_REGS
)
24129 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
24132 /* Return nonzero if the CONSUMER instruction (a store) does not need
24133 PRODUCER's value to calculate the address. */
24136 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
24138 rtx value
= PATTERN (producer
);
24139 rtx addr
= PATTERN (consumer
);
24141 if (GET_CODE (value
) == COND_EXEC
)
24142 value
= COND_EXEC_CODE (value
);
24143 if (GET_CODE (value
) == PARALLEL
)
24144 value
= XVECEXP (value
, 0, 0);
24145 value
= XEXP (value
, 0);
24146 if (GET_CODE (addr
) == COND_EXEC
)
24147 addr
= COND_EXEC_CODE (addr
);
24148 if (GET_CODE (addr
) == PARALLEL
)
24149 addr
= XVECEXP (addr
, 0, 0);
24150 addr
= XEXP (addr
, 0);
24152 return !reg_overlap_mentioned_p (value
, addr
);
24155 /* Return nonzero if the CONSUMER instruction (a store) does need
24156 PRODUCER's value to calculate the address. */
24159 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
24161 return !arm_no_early_store_addr_dep (producer
, consumer
);
24164 /* Return nonzero if the CONSUMER instruction (a load) does need
24165 PRODUCER's value to calculate the address. */
24168 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
24170 rtx value
= PATTERN (producer
);
24171 rtx addr
= PATTERN (consumer
);
24173 if (GET_CODE (value
) == COND_EXEC
)
24174 value
= COND_EXEC_CODE (value
);
24175 if (GET_CODE (value
) == PARALLEL
)
24176 value
= XVECEXP (value
, 0, 0);
24177 value
= XEXP (value
, 0);
24178 if (GET_CODE (addr
) == COND_EXEC
)
24179 addr
= COND_EXEC_CODE (addr
);
24180 if (GET_CODE (addr
) == PARALLEL
)
24182 if (GET_CODE (XVECEXP (addr
, 0, 0)) == RETURN
)
24183 addr
= XVECEXP (addr
, 0, 1);
24185 addr
= XVECEXP (addr
, 0, 0);
24187 addr
= XEXP (addr
, 1);
24189 return reg_overlap_mentioned_p (value
, addr
);
24192 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24193 have an early register shift value or amount dependency on the
24194 result of PRODUCER. */
24197 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
24199 rtx value
= PATTERN (producer
);
24200 rtx op
= PATTERN (consumer
);
24203 if (GET_CODE (value
) == COND_EXEC
)
24204 value
= COND_EXEC_CODE (value
);
24205 if (GET_CODE (value
) == PARALLEL
)
24206 value
= XVECEXP (value
, 0, 0);
24207 value
= XEXP (value
, 0);
24208 if (GET_CODE (op
) == COND_EXEC
)
24209 op
= COND_EXEC_CODE (op
);
24210 if (GET_CODE (op
) == PARALLEL
)
24211 op
= XVECEXP (op
, 0, 0);
24214 early_op
= XEXP (op
, 0);
24215 /* This is either an actual independent shift, or a shift applied to
24216 the first operand of another operation. We want the whole shift
24218 if (REG_P (early_op
))
24221 return !reg_overlap_mentioned_p (value
, early_op
);
24224 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24225 have an early register shift value dependency on the result of
24229 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
24231 rtx value
= PATTERN (producer
);
24232 rtx op
= PATTERN (consumer
);
24235 if (GET_CODE (value
) == COND_EXEC
)
24236 value
= COND_EXEC_CODE (value
);
24237 if (GET_CODE (value
) == PARALLEL
)
24238 value
= XVECEXP (value
, 0, 0);
24239 value
= XEXP (value
, 0);
24240 if (GET_CODE (op
) == COND_EXEC
)
24241 op
= COND_EXEC_CODE (op
);
24242 if (GET_CODE (op
) == PARALLEL
)
24243 op
= XVECEXP (op
, 0, 0);
24246 early_op
= XEXP (op
, 0);
24248 /* This is either an actual independent shift, or a shift applied to
24249 the first operand of another operation. We want the value being
24250 shifted, in either case. */
24251 if (!REG_P (early_op
))
24252 early_op
= XEXP (early_op
, 0);
24254 return !reg_overlap_mentioned_p (value
, early_op
);
24257 /* Return nonzero if the CONSUMER (a mul or mac op) does not
24258 have an early register mult dependency on the result of
24262 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
24264 rtx value
= PATTERN (producer
);
24265 rtx op
= PATTERN (consumer
);
24267 if (GET_CODE (value
) == COND_EXEC
)
24268 value
= COND_EXEC_CODE (value
);
24269 if (GET_CODE (value
) == PARALLEL
)
24270 value
= XVECEXP (value
, 0, 0);
24271 value
= XEXP (value
, 0);
24272 if (GET_CODE (op
) == COND_EXEC
)
24273 op
= COND_EXEC_CODE (op
);
24274 if (GET_CODE (op
) == PARALLEL
)
24275 op
= XVECEXP (op
, 0, 0);
24278 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
24280 if (GET_CODE (XEXP (op
, 0)) == MULT
)
24281 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
24283 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
24289 /* We can't rely on the caller doing the proper promotion when
24290 using APCS or ATPCS. */
24293 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
24295 return !TARGET_AAPCS_BASED
;
24298 static enum machine_mode
24299 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
24300 enum machine_mode mode
,
24301 int *punsignedp ATTRIBUTE_UNUSED
,
24302 const_tree fntype ATTRIBUTE_UNUSED
,
24303 int for_return ATTRIBUTE_UNUSED
)
24305 if (GET_MODE_CLASS (mode
) == MODE_INT
24306 && GET_MODE_SIZE (mode
) < 4)
24312 /* AAPCS based ABIs use short enums by default. */
24315 arm_default_short_enums (void)
24317 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
24321 /* AAPCS requires that anonymous bitfields affect structure alignment. */
24324 arm_align_anon_bitfield (void)
24326 return TARGET_AAPCS_BASED
;
24330 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
24333 arm_cxx_guard_type (void)
24335 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
24338 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24339 has an accumulator dependency on the result of the producer (a
24340 multiplication instruction) and no other dependency on that result. */
24342 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
24344 rtx mul
= PATTERN (producer
);
24345 rtx mac
= PATTERN (consumer
);
24347 rtx mac_op0
, mac_op1
, mac_acc
;
24349 if (GET_CODE (mul
) == COND_EXEC
)
24350 mul
= COND_EXEC_CODE (mul
);
24351 if (GET_CODE (mac
) == COND_EXEC
)
24352 mac
= COND_EXEC_CODE (mac
);
24354 /* Check that mul is of the form (set (...) (mult ...))
24355 and mla is of the form (set (...) (plus (mult ...) (...))). */
24356 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
24357 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
24358 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
24361 mul_result
= XEXP (mul
, 0);
24362 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
24363 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
24364 mac_acc
= XEXP (XEXP (mac
, 1), 1);
24366 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
24367 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
24368 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
24372 /* The EABI says test the least significant bit of a guard variable. */
24375 arm_cxx_guard_mask_bit (void)
24377 return TARGET_AAPCS_BASED
;
24381 /* The EABI specifies that all array cookies are 8 bytes long. */
24384 arm_get_cookie_size (tree type
)
24388 if (!TARGET_AAPCS_BASED
)
24389 return default_cxx_get_cookie_size (type
);
24391 size
= build_int_cst (sizetype
, 8);
24396 /* The EABI says that array cookies should also contain the element size. */
24399 arm_cookie_has_size (void)
24401 return TARGET_AAPCS_BASED
;
24405 /* The EABI says constructors and destructors should return a pointer to
24406 the object constructed/destroyed. */
24409 arm_cxx_cdtor_returns_this (void)
24411 return TARGET_AAPCS_BASED
;
24414 /* The EABI says that an inline function may never be the key
24418 arm_cxx_key_method_may_be_inline (void)
24420 return !TARGET_AAPCS_BASED
;
24424 arm_cxx_determine_class_data_visibility (tree decl
)
24426 if (!TARGET_AAPCS_BASED
24427 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
24430 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24431 is exported. However, on systems without dynamic vague linkage,
24432 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24433 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
24434 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
24436 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
24437 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
24441 arm_cxx_class_data_always_comdat (void)
24443 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24444 vague linkage if the class has no key function. */
24445 return !TARGET_AAPCS_BASED
;
24449 /* The EABI says __aeabi_atexit should be used to register static
24453 arm_cxx_use_aeabi_atexit (void)
24455 return TARGET_AAPCS_BASED
;
24460 arm_set_return_address (rtx source
, rtx scratch
)
24462 arm_stack_offsets
*offsets
;
24463 HOST_WIDE_INT delta
;
24465 unsigned long saved_regs
;
24467 offsets
= arm_get_frame_offsets ();
24468 saved_regs
= offsets
->saved_regs_mask
;
24470 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
24471 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
24474 if (frame_pointer_needed
)
24475 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
24478 /* LR will be the first saved register. */
24479 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
24484 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
24485 GEN_INT (delta
& ~4095)));
24490 addr
= stack_pointer_rtx
;
24492 addr
= plus_constant (Pmode
, addr
, delta
);
24494 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
24500 thumb_set_return_address (rtx source
, rtx scratch
)
24502 arm_stack_offsets
*offsets
;
24503 HOST_WIDE_INT delta
;
24504 HOST_WIDE_INT limit
;
24507 unsigned long mask
;
24511 offsets
= arm_get_frame_offsets ();
24512 mask
= offsets
->saved_regs_mask
;
24513 if (mask
& (1 << LR_REGNUM
))
24516 /* Find the saved regs. */
24517 if (frame_pointer_needed
)
24519 delta
= offsets
->soft_frame
- offsets
->saved_args
;
24520 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
24526 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
24529 /* Allow for the stack frame. */
24530 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
24532 /* The link register is always the first saved register. */
24535 /* Construct the address. */
24536 addr
= gen_rtx_REG (SImode
, reg
);
24539 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
24540 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
24544 addr
= plus_constant (Pmode
, addr
, delta
);
24546 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
24549 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
24552 /* Implements target hook vector_mode_supported_p. */
24554 arm_vector_mode_supported_p (enum machine_mode mode
)
24556 /* Neon also supports V2SImode, etc. listed in the clause below. */
24557 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
24558 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
24561 if ((TARGET_NEON
|| TARGET_IWMMXT
)
24562 && ((mode
== V2SImode
)
24563 || (mode
== V4HImode
)
24564 || (mode
== V8QImode
)))
24567 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
24568 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
24569 || mode
== V2HAmode
))
24575 /* Implements target hook array_mode_supported_p. */
24578 arm_array_mode_supported_p (enum machine_mode mode
,
24579 unsigned HOST_WIDE_INT nelems
)
24582 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
24583 && (nelems
>= 2 && nelems
<= 4))
24589 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24590 registers when autovectorizing for Neon, at least until multiple vector
24591 widths are supported properly by the middle-end. */
24593 static enum machine_mode
24594 arm_preferred_simd_mode (enum machine_mode mode
)
24600 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
24602 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
24604 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
24606 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
24608 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
24615 if (TARGET_REALLY_IWMMXT
)
24631 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24633 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24634 using r0-r4 for function arguments, r7 for the stack frame and don't have
24635 enough left over to do doubleword arithmetic. For Thumb-2 all the
24636 potentially problematic instructions accept high registers so this is not
24637 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24638 that require many low registers. */
24640 arm_class_likely_spilled_p (reg_class_t rclass
)
24642 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
24643 || rclass
== CC_REG
)
24649 /* Implements target hook small_register_classes_for_mode_p. */
24651 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
24653 return TARGET_THUMB1
;
24656 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24657 ARM insns and therefore guarantee that the shift count is modulo 256.
24658 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24659 guarantee no particular behavior for out-of-range counts. */
24661 static unsigned HOST_WIDE_INT
24662 arm_shift_truncation_mask (enum machine_mode mode
)
24664 return mode
== SImode
? 255 : 0;
24668 /* Map internal gcc register numbers to DWARF2 register numbers. */
24671 arm_dbx_register_number (unsigned int regno
)
24676 if (IS_VFP_REGNUM (regno
))
24678 /* See comment in arm_dwarf_register_span. */
24679 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
24680 return 64 + regno
- FIRST_VFP_REGNUM
;
24682 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
24685 if (IS_IWMMXT_GR_REGNUM (regno
))
24686 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
24688 if (IS_IWMMXT_REGNUM (regno
))
24689 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
24691 gcc_unreachable ();
24694 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24695 GCC models tham as 64 32-bit registers, so we need to describe this to
24696 the DWARF generation code. Other registers can use the default. */
24698 arm_dwarf_register_span (rtx rtl
)
24705 regno
= REGNO (rtl
);
24706 if (!IS_VFP_REGNUM (regno
))
24709 /* XXX FIXME: The EABI defines two VFP register ranges:
24710 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24712 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24713 corresponding D register. Until GDB supports this, we shall use the
24714 legacy encodings. We also use these encodings for D0-D15 for
24715 compatibility with older debuggers. */
24716 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
24719 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
24720 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
24721 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
24722 for (i
= 0; i
< nregs
; i
++)
24723 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
24728 #if ARM_UNWIND_INFO
24729 /* Emit unwind directives for a store-multiple instruction or stack pointer
24730 push during alignment.
24731 These should only ever be generated by the function prologue code, so
24732 expect them to have a particular form. */
24735 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
24738 HOST_WIDE_INT offset
;
24739 HOST_WIDE_INT nregs
;
24745 e
= XVECEXP (p
, 0, 0);
24746 if (GET_CODE (e
) != SET
)
24749 /* First insn will adjust the stack pointer. */
24750 if (GET_CODE (e
) != SET
24751 || !REG_P (XEXP (e
, 0))
24752 || REGNO (XEXP (e
, 0)) != SP_REGNUM
24753 || GET_CODE (XEXP (e
, 1)) != PLUS
)
24756 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
24757 nregs
= XVECLEN (p
, 0) - 1;
24759 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
24762 /* The function prologue may also push pc, but not annotate it as it is
24763 never restored. We turn this into a stack pointer adjustment. */
24764 if (nregs
* 4 == offset
- 4)
24766 fprintf (asm_out_file
, "\t.pad #4\n");
24770 fprintf (asm_out_file
, "\t.save {");
24772 else if (IS_VFP_REGNUM (reg
))
24775 fprintf (asm_out_file
, "\t.vsave {");
24778 /* Unknown register type. */
24781 /* If the stack increment doesn't match the size of the saved registers,
24782 something has gone horribly wrong. */
24783 if (offset
!= nregs
* reg_size
)
24788 /* The remaining insns will describe the stores. */
24789 for (i
= 1; i
<= nregs
; i
++)
24791 /* Expect (set (mem <addr>) (reg)).
24792 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24793 e
= XVECEXP (p
, 0, i
);
24794 if (GET_CODE (e
) != SET
24795 || !MEM_P (XEXP (e
, 0))
24796 || !REG_P (XEXP (e
, 1)))
24799 reg
= REGNO (XEXP (e
, 1));
24804 fprintf (asm_out_file
, ", ");
24805 /* We can't use %r for vfp because we need to use the
24806 double precision register names. */
24807 if (IS_VFP_REGNUM (reg
))
24808 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
24810 asm_fprintf (asm_out_file
, "%r", reg
);
24812 #ifdef ENABLE_CHECKING
24813 /* Check that the addresses are consecutive. */
24814 e
= XEXP (XEXP (e
, 0), 0);
24815 if (GET_CODE (e
) == PLUS
)
24817 offset
+= reg_size
;
24818 if (!REG_P (XEXP (e
, 0))
24819 || REGNO (XEXP (e
, 0)) != SP_REGNUM
24820 || !CONST_INT_P (XEXP (e
, 1))
24821 || offset
!= INTVAL (XEXP (e
, 1)))
24826 || REGNO (e
) != SP_REGNUM
)
24830 fprintf (asm_out_file
, "}\n");
24833 /* Emit unwind directives for a SET. */
24836 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
24844 switch (GET_CODE (e0
))
24847 /* Pushing a single register. */
24848 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
24849 || !REG_P (XEXP (XEXP (e0
, 0), 0))
24850 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
24853 asm_fprintf (asm_out_file
, "\t.save ");
24854 if (IS_VFP_REGNUM (REGNO (e1
)))
24855 asm_fprintf(asm_out_file
, "{d%d}\n",
24856 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
24858 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
24862 if (REGNO (e0
) == SP_REGNUM
)
24864 /* A stack increment. */
24865 if (GET_CODE (e1
) != PLUS
24866 || !REG_P (XEXP (e1
, 0))
24867 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
24868 || !CONST_INT_P (XEXP (e1
, 1)))
24871 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
24872 -INTVAL (XEXP (e1
, 1)));
24874 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
24876 HOST_WIDE_INT offset
;
24878 if (GET_CODE (e1
) == PLUS
)
24880 if (!REG_P (XEXP (e1
, 0))
24881 || !CONST_INT_P (XEXP (e1
, 1)))
24883 reg
= REGNO (XEXP (e1
, 0));
24884 offset
= INTVAL (XEXP (e1
, 1));
24885 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
24886 HARD_FRAME_POINTER_REGNUM
, reg
,
24889 else if (REG_P (e1
))
24892 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
24893 HARD_FRAME_POINTER_REGNUM
, reg
);
24898 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
24900 /* Move from sp to reg. */
24901 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
24903 else if (GET_CODE (e1
) == PLUS
24904 && REG_P (XEXP (e1
, 0))
24905 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
24906 && CONST_INT_P (XEXP (e1
, 1)))
24908 /* Set reg to offset from sp. */
24909 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
24910 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
24922 /* Emit unwind directives for the given insn. */
24925 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
24928 bool handled_one
= false;
24930 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
24933 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
24934 && (TREE_NOTHROW (current_function_decl
)
24935 || crtl
->all_throwers_are_sibcalls
))
24938 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
24941 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
24943 pat
= XEXP (note
, 0);
24944 switch (REG_NOTE_KIND (note
))
24946 case REG_FRAME_RELATED_EXPR
:
24949 case REG_CFA_REGISTER
:
24952 pat
= PATTERN (insn
);
24953 if (GET_CODE (pat
) == PARALLEL
)
24954 pat
= XVECEXP (pat
, 0, 0);
24957 /* Only emitted for IS_STACKALIGN re-alignment. */
24962 src
= SET_SRC (pat
);
24963 dest
= SET_DEST (pat
);
24965 gcc_assert (src
== stack_pointer_rtx
);
24966 reg
= REGNO (dest
);
24967 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24970 handled_one
= true;
24973 case REG_CFA_DEF_CFA
:
24974 case REG_CFA_EXPRESSION
:
24975 case REG_CFA_ADJUST_CFA
:
24976 case REG_CFA_OFFSET
:
24977 /* ??? Only handling here what we actually emit. */
24978 gcc_unreachable ();
24986 pat
= PATTERN (insn
);
24989 switch (GET_CODE (pat
))
24992 arm_unwind_emit_set (asm_out_file
, pat
);
24996 /* Store multiple. */
24997 arm_unwind_emit_sequence (asm_out_file
, pat
);
25006 /* Output a reference from a function exception table to the type_info
25007 object X. The EABI specifies that the symbol should be relocated by
25008 an R_ARM_TARGET2 relocation. */
25011 arm_output_ttype (rtx x
)
25013 fputs ("\t.word\t", asm_out_file
);
25014 output_addr_const (asm_out_file
, x
);
25015 /* Use special relocations for symbol references. */
25016 if (!CONST_INT_P (x
))
25017 fputs ("(TARGET2)", asm_out_file
);
25018 fputc ('\n', asm_out_file
);
25023 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
25026 arm_asm_emit_except_personality (rtx personality
)
25028 fputs ("\t.personality\t", asm_out_file
);
25029 output_addr_const (asm_out_file
, personality
);
25030 fputc ('\n', asm_out_file
);
25033 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
25036 arm_asm_init_sections (void)
25038 exception_section
= get_unnamed_section (0, output_section_asm_op
,
25041 #endif /* ARM_UNWIND_INFO */
25043 /* Output unwind directives for the start/end of a function. */
25046 arm_output_fn_unwind (FILE * f
, bool prologue
)
25048 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
25052 fputs ("\t.fnstart\n", f
);
25055 /* If this function will never be unwound, then mark it as such.
25056 The came condition is used in arm_unwind_emit to suppress
25057 the frame annotations. */
25058 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
25059 && (TREE_NOTHROW (current_function_decl
)
25060 || crtl
->all_throwers_are_sibcalls
))
25061 fputs("\t.cantunwind\n", f
);
25063 fputs ("\t.fnend\n", f
);
25068 arm_emit_tls_decoration (FILE *fp
, rtx x
)
25070 enum tls_reloc reloc
;
25073 val
= XVECEXP (x
, 0, 0);
25074 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
25076 output_addr_const (fp
, val
);
25081 fputs ("(tlsgd)", fp
);
25084 fputs ("(tlsldm)", fp
);
25087 fputs ("(tlsldo)", fp
);
25090 fputs ("(gottpoff)", fp
);
25093 fputs ("(tpoff)", fp
);
25096 fputs ("(tlsdesc)", fp
);
25099 gcc_unreachable ();
25108 fputs (" + (. - ", fp
);
25109 output_addr_const (fp
, XVECEXP (x
, 0, 2));
25110 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
25111 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
25112 output_addr_const (fp
, XVECEXP (x
, 0, 3));
25122 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
25125 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
25127 gcc_assert (size
== 4);
25128 fputs ("\t.word\t", file
);
25129 output_addr_const (file
, x
);
25130 fputs ("(tlsldo)", file
);
25133 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
25136 arm_output_addr_const_extra (FILE *fp
, rtx x
)
25138 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
25139 return arm_emit_tls_decoration (fp
, x
);
25140 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
25143 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
25145 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
25146 assemble_name_raw (fp
, label
);
25150 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
25152 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
25156 output_addr_const (fp
, XVECEXP (x
, 0, 0));
25160 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
25162 output_addr_const (fp
, XVECEXP (x
, 0, 0));
25166 output_addr_const (fp
, XVECEXP (x
, 0, 1));
25170 else if (GET_CODE (x
) == CONST_VECTOR
)
25171 return arm_emit_vector_const (fp
, x
);
25176 /* Output assembly for a shift instruction.
25177 SET_FLAGS determines how the instruction modifies the condition codes.
25178 0 - Do not set condition codes.
25179 1 - Set condition codes.
25180 2 - Use smallest instruction. */
25182 arm_output_shift(rtx
* operands
, int set_flags
)
25185 static const char flag_chars
[3] = {'?', '.', '!'};
25190 c
= flag_chars
[set_flags
];
25191 if (TARGET_UNIFIED_ASM
)
25193 shift
= shift_op(operands
[3], &val
);
25197 operands
[2] = GEN_INT(val
);
25198 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
25201 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
25204 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
25205 output_asm_insn (pattern
, operands
);
25209 /* Output assembly for a WMMX immediate shift instruction. */
25211 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
25213 int shift
= INTVAL (operands
[2]);
25215 enum machine_mode opmode
= GET_MODE (operands
[0]);
25217 gcc_assert (shift
>= 0);
25219 /* If the shift value in the register versions is > 63 (for D qualifier),
25220 31 (for W qualifier) or 15 (for H qualifier). */
25221 if (((opmode
== V4HImode
) && (shift
> 15))
25222 || ((opmode
== V2SImode
) && (shift
> 31))
25223 || ((opmode
== DImode
) && (shift
> 63)))
25227 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
25228 output_asm_insn (templ
, operands
);
25229 if (opmode
== DImode
)
25231 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
25232 output_asm_insn (templ
, operands
);
25237 /* The destination register will contain all zeros. */
25238 sprintf (templ
, "wzero\t%%0");
25239 output_asm_insn (templ
, operands
);
25244 if ((opmode
== DImode
) && (shift
> 32))
25246 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
25247 output_asm_insn (templ
, operands
);
25248 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
25249 output_asm_insn (templ
, operands
);
25253 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
25254 output_asm_insn (templ
, operands
);
25259 /* Output assembly for a WMMX tinsr instruction. */
25261 arm_output_iwmmxt_tinsr (rtx
*operands
)
25263 int mask
= INTVAL (operands
[3]);
25266 int units
= mode_nunits
[GET_MODE (operands
[0])];
25267 gcc_assert ((mask
& (mask
- 1)) == 0);
25268 for (i
= 0; i
< units
; ++i
)
25270 if ((mask
& 0x01) == 1)
25276 gcc_assert (i
< units
);
25278 switch (GET_MODE (operands
[0]))
25281 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
25284 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
25287 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
25290 gcc_unreachable ();
25293 output_asm_insn (templ
, operands
);
25298 /* Output a Thumb-1 casesi dispatch sequence. */
25300 thumb1_output_casesi (rtx
*operands
)
25302 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
25304 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
25306 switch (GET_MODE(diff_vec
))
25309 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
25310 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25312 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
25313 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25315 return "bl\t%___gnu_thumb1_case_si";
25317 gcc_unreachable ();
25321 /* Output a Thumb-2 casesi instruction. */
25323 thumb2_output_casesi (rtx
*operands
)
25325 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
25327 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
25329 output_asm_insn ("cmp\t%0, %1", operands
);
25330 output_asm_insn ("bhi\t%l3", operands
);
25331 switch (GET_MODE(diff_vec
))
25334 return "tbb\t[%|pc, %0]";
25336 return "tbh\t[%|pc, %0, lsl #1]";
25340 output_asm_insn ("adr\t%4, %l2", operands
);
25341 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
25342 output_asm_insn ("add\t%4, %4, %5", operands
);
25347 output_asm_insn ("adr\t%4, %l2", operands
);
25348 return "ldr\t%|pc, [%4, %0, lsl #2]";
25351 gcc_unreachable ();
25355 /* Most ARM cores are single issue, but some newer ones can dual issue.
25356 The scheduler descriptions rely on this being correct. */
25358 arm_issue_rate (void)
25380 /* A table and a function to perform ARM-specific name mangling for
25381 NEON vector types in order to conform to the AAPCS (see "Procedure
25382 Call Standard for the ARM Architecture", Appendix A). To qualify
25383 for emission with the mangled names defined in that document, a
25384 vector type must not only be of the correct mode but also be
25385 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25388 enum machine_mode mode
;
25389 const char *element_type_name
;
25390 const char *aapcs_name
;
25391 } arm_mangle_map_entry
;
25393 static arm_mangle_map_entry arm_mangle_map
[] = {
25394 /* 64-bit containerized types. */
25395 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
25396 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25397 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
25398 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25399 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
25400 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
25401 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
25402 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25403 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25404 /* 128-bit containerized types. */
25405 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
25406 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25407 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
25408 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25409 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
25410 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
25411 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
25412 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25413 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25414 { VOIDmode
, NULL
, NULL
}
25418 arm_mangle_type (const_tree type
)
25420 arm_mangle_map_entry
*pos
= arm_mangle_map
;
25422 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25423 has to be managled as if it is in the "std" namespace. */
25424 if (TARGET_AAPCS_BASED
25425 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
25426 return "St9__va_list";
25428 /* Half-precision float. */
25429 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
25432 if (TREE_CODE (type
) != VECTOR_TYPE
)
25435 /* Check the mode of the vector type, and the name of the vector
25436 element type, against the table. */
25437 while (pos
->mode
!= VOIDmode
)
25439 tree elt_type
= TREE_TYPE (type
);
25441 if (pos
->mode
== TYPE_MODE (type
)
25442 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
25443 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
25444 pos
->element_type_name
))
25445 return pos
->aapcs_name
;
25450 /* Use the default mangling for unrecognized (possibly user-defined)
25455 /* Order of allocation of core registers for Thumb: this allocation is
25456 written over the corresponding initial entries of the array
25457 initialized with REG_ALLOC_ORDER. We allocate all low registers
25458 first. Saving and restoring a low register is usually cheaper than
25459 using a call-clobbered high register. */
25461 static const int thumb_core_reg_alloc_order
[] =
25463 3, 2, 1, 0, 4, 5, 6, 7,
25464 14, 12, 8, 9, 10, 11
25467 /* Adjust register allocation order when compiling for Thumb. */
25470 arm_order_regs_for_local_alloc (void)
25472 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
25473 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
25475 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
25476 sizeof (thumb_core_reg_alloc_order
));
25479 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25482 arm_frame_pointer_required (void)
25484 return (cfun
->has_nonlocal_label
25485 || SUBTARGET_FRAME_POINTER_REQUIRED
25486 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
25489 /* Only thumb1 can't support conditional execution, so return true if
25490 the target is not thumb1. */
25492 arm_have_conditional_execution (void)
25494 return !TARGET_THUMB1
;
25497 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25498 static HOST_WIDE_INT
25499 arm_vector_alignment (const_tree type
)
25501 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
25503 if (TARGET_AAPCS_BASED
)
25504 align
= MIN (align
, 64);
25509 static unsigned int
25510 arm_autovectorize_vector_sizes (void)
25512 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
25516 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
25518 /* Vectors which aren't in packed structures will not be less aligned than
25519 the natural alignment of their element type, so this is safe. */
25520 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
25523 return default_builtin_vector_alignment_reachable (type
, is_packed
);
25527 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
25528 const_tree type
, int misalignment
,
25531 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
25533 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
25538 /* If the misalignment is unknown, we should be able to handle the access
25539 so long as it is not to a member of a packed data structure. */
25540 if (misalignment
== -1)
25543 /* Return true if the misalignment is a multiple of the natural alignment
25544 of the vector's element type. This is probably always going to be
25545 true in practice, since we've already established that this isn't a
25547 return ((misalignment
% align
) == 0);
25550 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
25555 arm_conditional_register_usage (void)
25559 if (TARGET_THUMB1
&& optimize_size
)
25561 /* When optimizing for size on Thumb-1, it's better not
25562 to use the HI regs, because of the overhead of
25564 for (regno
= FIRST_HI_REGNUM
;
25565 regno
<= LAST_HI_REGNUM
; ++regno
)
25566 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
25569 /* The link register can be clobbered by any branch insn,
25570 but we have no way to track that at present, so mark
25571 it as unavailable. */
25573 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
25575 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
25577 /* VFPv3 registers are disabled when earlier VFP
25578 versions are selected due to the definition of
25579 LAST_VFP_REGNUM. */
25580 for (regno
= FIRST_VFP_REGNUM
;
25581 regno
<= LAST_VFP_REGNUM
; ++ regno
)
25583 fixed_regs
[regno
] = 0;
25584 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
25585 || regno
>= FIRST_VFP_REGNUM
+ 32;
25589 if (TARGET_REALLY_IWMMXT
)
25591 regno
= FIRST_IWMMXT_GR_REGNUM
;
25592 /* The 2002/10/09 revision of the XScale ABI has wCG0
25593 and wCG1 as call-preserved registers. The 2002/11/21
25594 revision changed this so that all wCG registers are
25595 scratch registers. */
25596 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
25597 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
25598 fixed_regs
[regno
] = 0;
25599 /* The XScale ABI has wR0 - wR9 as scratch registers,
25600 the rest as call-preserved registers. */
25601 for (regno
= FIRST_IWMMXT_REGNUM
;
25602 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
25604 fixed_regs
[regno
] = 0;
25605 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
25609 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
25611 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
25612 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
25614 else if (TARGET_APCS_STACK
)
25616 fixed_regs
[10] = 1;
25617 call_used_regs
[10] = 1;
25619 /* -mcaller-super-interworking reserves r11 for calls to
25620 _interwork_r11_call_via_rN(). Making the register global
25621 is an easy way of ensuring that it remains valid for all
25623 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
25624 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
25626 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25627 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25628 if (TARGET_CALLER_INTERWORKING
)
25629 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25631 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25635 arm_preferred_rename_class (reg_class_t rclass
)
25637 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25638 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25639 and code size can be reduced. */
25640 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
25646 /* Compute the atrribute "length" of insn "*push_multi".
25647 So this function MUST be kept in sync with that insn pattern. */
25649 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
25651 int i
, regno
, hi_reg
;
25652 int num_saves
= XVECLEN (parallel_op
, 0);
25662 regno
= REGNO (first_op
);
25663 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
25664 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
25666 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
25667 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
25675 /* Compute the number of instructions emitted by output_move_double. */
25677 arm_count_output_move_double_insns (rtx
*operands
)
25681 /* output_move_double may modify the operands array, so call it
25682 here on a copy of the array. */
25683 ops
[0] = operands
[0];
25684 ops
[1] = operands
[1];
25685 output_move_double (ops
, false, &count
);
25690 vfp3_const_double_for_fract_bits (rtx operand
)
25692 REAL_VALUE_TYPE r0
;
25694 if (!CONST_DOUBLE_P (operand
))
25697 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
25698 if (exact_real_inverse (DFmode
, &r0
))
25700 if (exact_real_truncate (DFmode
, &r0
))
25702 HOST_WIDE_INT value
= real_to_integer (&r0
);
25703 value
= value
& 0xffffffff;
25704 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
25705 return int_log2 (value
);
25711 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25714 arm_pre_atomic_barrier (enum memmodel model
)
25716 if (need_atomic_barrier_p (model
, true))
25717 emit_insn (gen_memory_barrier ());
25721 arm_post_atomic_barrier (enum memmodel model
)
25723 if (need_atomic_barrier_p (model
, false))
25724 emit_insn (gen_memory_barrier ());
25727 /* Emit the load-exclusive and store-exclusive instructions. */
25730 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
)
25732 rtx (*gen
) (rtx
, rtx
);
25736 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
25737 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
25738 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
25739 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
25741 gcc_unreachable ();
25744 emit_insn (gen (rval
, mem
));
25748 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
, rtx mem
)
25750 rtx (*gen
) (rtx
, rtx
, rtx
);
25754 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
25755 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
25756 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
25757 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
25759 gcc_unreachable ();
25762 emit_insn (gen (bval
, rval
, mem
));
25765 /* Mark the previous jump instruction as unlikely. */
25768 emit_unlikely_jump (rtx insn
)
25770 rtx very_unlikely
= GEN_INT (REG_BR_PROB_BASE
/ 100 - 1);
25772 insn
= emit_jump_insn (insn
);
25773 add_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
25776 /* Expand a compare and swap pattern. */
25779 arm_expand_compare_and_swap (rtx operands
[])
25781 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
25782 enum machine_mode mode
;
25783 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
25785 bval
= operands
[0];
25786 rval
= operands
[1];
25788 oldval
= operands
[3];
25789 newval
= operands
[4];
25790 is_weak
= operands
[5];
25791 mod_s
= operands
[6];
25792 mod_f
= operands
[7];
25793 mode
= GET_MODE (mem
);
25799 /* For narrow modes, we're going to perform the comparison in SImode,
25800 so do the zero-extension now. */
25801 rval
= gen_reg_rtx (SImode
);
25802 oldval
= convert_modes (SImode
, mode
, oldval
, true);
25806 /* Force the value into a register if needed. We waited until after
25807 the zero-extension above to do this properly. */
25808 if (!arm_add_operand (oldval
, SImode
))
25809 oldval
= force_reg (SImode
, oldval
);
25813 if (!cmpdi_operand (oldval
, mode
))
25814 oldval
= force_reg (mode
, oldval
);
25818 gcc_unreachable ();
25823 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
25824 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
25825 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
25826 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
25828 gcc_unreachable ();
25831 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
25833 if (mode
== QImode
|| mode
== HImode
)
25834 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
25836 /* In all cases, we arrange for success to be signaled by Z set.
25837 This arrangement allows for the boolean result to be used directly
25838 in a subsequent branch, post optimization. */
25839 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
25840 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
25841 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
25844 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25845 another memory store between the load-exclusive and store-exclusive can
25846 reset the monitor from Exclusive to Open state. This means we must wait
25847 until after reload to split the pattern, lest we get a register spill in
25848 the middle of the atomic sequence. */
25851 arm_split_compare_and_swap (rtx operands
[])
25853 rtx rval
, mem
, oldval
, newval
, scratch
;
25854 enum machine_mode mode
;
25855 enum memmodel mod_s
, mod_f
;
25857 rtx label1
, label2
, x
, cond
;
25859 rval
= operands
[0];
25861 oldval
= operands
[2];
25862 newval
= operands
[3];
25863 is_weak
= (operands
[4] != const0_rtx
);
25864 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
25865 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
25866 scratch
= operands
[7];
25867 mode
= GET_MODE (mem
);
25869 arm_pre_atomic_barrier (mod_s
);
25874 label1
= gen_label_rtx ();
25875 emit_label (label1
);
25877 label2
= gen_label_rtx ();
25879 arm_emit_load_exclusive (mode
, rval
, mem
);
25881 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
25882 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25883 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
25884 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
25885 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
25887 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
);
25889 /* Weak or strong, we want EQ to be true for success, so that we
25890 match the flags that we got from the compare above. */
25891 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
25892 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
25893 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
25897 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25898 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
25899 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
25900 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
25903 if (mod_f
!= MEMMODEL_RELAXED
)
25904 emit_label (label2
);
25906 arm_post_atomic_barrier (mod_s
);
25908 if (mod_f
== MEMMODEL_RELAXED
)
25909 emit_label (label2
);
25913 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
25914 rtx value
, rtx model_rtx
, rtx cond
)
25916 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
25917 enum machine_mode mode
= GET_MODE (mem
);
25918 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
25921 arm_pre_atomic_barrier (model
);
25923 label
= gen_label_rtx ();
25924 emit_label (label
);
25927 new_out
= gen_lowpart (wmode
, new_out
);
25929 old_out
= gen_lowpart (wmode
, old_out
);
25932 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
25934 arm_emit_load_exclusive (mode
, old_out
, mem
);
25943 x
= gen_rtx_AND (wmode
, old_out
, value
);
25944 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25945 x
= gen_rtx_NOT (wmode
, new_out
);
25946 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25950 if (CONST_INT_P (value
))
25952 value
= GEN_INT (-INTVAL (value
));
25958 if (mode
== DImode
)
25960 /* DImode plus/minus need to clobber flags. */
25961 /* The adddi3 and subdi3 patterns are incorrectly written so that
25962 they require matching operands, even when we could easily support
25963 three operands. Thankfully, this can be fixed up post-splitting,
25964 as the individual add+adc patterns do accept three operands and
25965 post-reload cprop can make these moves go away. */
25966 emit_move_insn (new_out
, old_out
);
25968 x
= gen_adddi3 (new_out
, new_out
, value
);
25970 x
= gen_subdi3 (new_out
, new_out
, value
);
25977 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
25978 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25982 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
));
25984 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25985 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
25987 arm_post_atomic_barrier (model
);
25990 #define MAX_VECT_LEN 16
25992 struct expand_vec_perm_d
25994 rtx target
, op0
, op1
;
25995 unsigned char perm
[MAX_VECT_LEN
];
25996 enum machine_mode vmode
;
25997 unsigned char nelt
;
26002 /* Generate a variable permutation. */
26005 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
26007 enum machine_mode vmode
= GET_MODE (target
);
26008 bool one_vector_p
= rtx_equal_p (op0
, op1
);
26010 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
26011 gcc_checking_assert (GET_MODE (op0
) == vmode
);
26012 gcc_checking_assert (GET_MODE (op1
) == vmode
);
26013 gcc_checking_assert (GET_MODE (sel
) == vmode
);
26014 gcc_checking_assert (TARGET_NEON
);
26018 if (vmode
== V8QImode
)
26019 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
26021 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
26027 if (vmode
== V8QImode
)
26029 pair
= gen_reg_rtx (V16QImode
);
26030 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
26031 pair
= gen_lowpart (TImode
, pair
);
26032 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
26036 pair
= gen_reg_rtx (OImode
);
26037 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
26038 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
26044 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
26046 enum machine_mode vmode
= GET_MODE (target
);
26047 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
26048 bool one_vector_p
= rtx_equal_p (op0
, op1
);
26049 rtx rmask
[MAX_VECT_LEN
], mask
;
26051 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26052 numbering of elements for big-endian, we must reverse the order. */
26053 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
26055 /* The VTBL instruction does not use a modulo index, so we must take care
26056 of that ourselves. */
26057 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
26058 for (i
= 0; i
< nelt
; ++i
)
26060 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
26061 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
26063 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
26066 /* Generate or test for an insn that supports a constant permutation. */
26068 /* Recognize patterns for the VUZP insns. */
26071 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
26073 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
26074 rtx out0
, out1
, in0
, in1
, x
;
26075 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
26077 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
26080 /* Note that these are little-endian tests. Adjust for big-endian later. */
26081 if (d
->perm
[0] == 0)
26083 else if (d
->perm
[0] == 1)
26087 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
26089 for (i
= 0; i
< nelt
; i
++)
26091 unsigned elt
= (i
* 2 + odd
) & mask
;
26092 if (d
->perm
[i
] != elt
)
26102 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
26103 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
26104 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
26105 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
26106 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
26107 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
26108 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
26109 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
26111 gcc_unreachable ();
26116 if (BYTES_BIG_ENDIAN
)
26118 x
= in0
, in0
= in1
, in1
= x
;
26123 out1
= gen_reg_rtx (d
->vmode
);
26125 x
= out0
, out0
= out1
, out1
= x
;
26127 emit_insn (gen (out0
, in0
, in1
, out1
));
26131 /* Recognize patterns for the VZIP insns. */
26134 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
26136 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
26137 rtx out0
, out1
, in0
, in1
, x
;
26138 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
26140 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
26143 /* Note that these are little-endian tests. Adjust for big-endian later. */
26145 if (d
->perm
[0] == high
)
26147 else if (d
->perm
[0] == 0)
26151 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
26153 for (i
= 0; i
< nelt
/ 2; i
++)
26155 unsigned elt
= (i
+ high
) & mask
;
26156 if (d
->perm
[i
* 2] != elt
)
26158 elt
= (elt
+ nelt
) & mask
;
26159 if (d
->perm
[i
* 2 + 1] != elt
)
26169 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
26170 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
26171 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
26172 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
26173 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
26174 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
26175 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
26176 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
26178 gcc_unreachable ();
26183 if (BYTES_BIG_ENDIAN
)
26185 x
= in0
, in0
= in1
, in1
= x
;
26190 out1
= gen_reg_rtx (d
->vmode
);
26192 x
= out0
, out0
= out1
, out1
= x
;
26194 emit_insn (gen (out0
, in0
, in1
, out1
));
26198 /* Recognize patterns for the VREV insns. */
26201 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
26203 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
26204 rtx (*gen
)(rtx
, rtx
, rtx
);
26206 if (!d
->one_vector_p
)
26215 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
26216 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
26224 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
26225 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
26226 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
26227 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
26235 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
26236 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
26237 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
26238 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
26239 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
26240 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
26241 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
26242 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
26251 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
26252 for (j
= 0; j
<= diff
; j
+= 1)
26254 /* This is guaranteed to be true as the value of diff
26255 is 7, 3, 1 and we should have enough elements in the
26256 queue to generate this. Getting a vector mask with a
26257 value of diff other than these values implies that
26258 something is wrong by the time we get here. */
26259 gcc_assert (i
+ j
< nelt
);
26260 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
26268 /* ??? The third operand is an artifact of the builtin infrastructure
26269 and is ignored by the actual instruction. */
26270 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
26274 /* Recognize patterns for the VTRN insns. */
26277 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
26279 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
26280 rtx out0
, out1
, in0
, in1
, x
;
26281 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
26283 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
26286 /* Note that these are little-endian tests. Adjust for big-endian later. */
26287 if (d
->perm
[0] == 0)
26289 else if (d
->perm
[0] == 1)
26293 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
26295 for (i
= 0; i
< nelt
; i
+= 2)
26297 if (d
->perm
[i
] != i
+ odd
)
26299 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
26309 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
26310 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
26311 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
26312 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
26313 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
26314 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
26315 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
26316 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
26318 gcc_unreachable ();
26323 if (BYTES_BIG_ENDIAN
)
26325 x
= in0
, in0
= in1
, in1
= x
;
26330 out1
= gen_reg_rtx (d
->vmode
);
26332 x
= out0
, out0
= out1
, out1
= x
;
26334 emit_insn (gen (out0
, in0
, in1
, out1
));
26338 /* Recognize patterns for the VEXT insns. */
26341 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
26343 unsigned int i
, nelt
= d
->nelt
;
26344 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
26347 unsigned int location
;
26349 unsigned int next
= d
->perm
[0] + 1;
26351 /* TODO: Handle GCC's numbering of elements for big-endian. */
26352 if (BYTES_BIG_ENDIAN
)
26355 /* Check if the extracted indexes are increasing by one. */
26356 for (i
= 1; i
< nelt
; next
++, i
++)
26358 /* If we hit the most significant element of the 2nd vector in
26359 the previous iteration, no need to test further. */
26360 if (next
== 2 * nelt
)
26363 /* If we are operating on only one vector: it could be a
26364 rotation. If there are only two elements of size < 64, let
26365 arm_evpc_neon_vrev catch it. */
26366 if (d
->one_vector_p
&& (next
== nelt
))
26368 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
26374 if (d
->perm
[i
] != next
)
26378 location
= d
->perm
[0];
26382 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
26383 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
26384 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
26385 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
26386 case V2SImode
: gen
= gen_neon_vextv2si
; break;
26387 case V4SImode
: gen
= gen_neon_vextv4si
; break;
26388 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
26389 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
26390 case V2DImode
: gen
= gen_neon_vextv2di
; break;
26399 offset
= GEN_INT (location
);
26400 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
26404 /* The NEON VTBL instruction is a fully variable permuation that's even
26405 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26406 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26407 can do slightly better by expanding this as a constant where we don't
26408 have to apply a mask. */
26411 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
26413 rtx rperm
[MAX_VECT_LEN
], sel
;
26414 enum machine_mode vmode
= d
->vmode
;
26415 unsigned int i
, nelt
= d
->nelt
;
26417 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26418 numbering of elements for big-endian, we must reverse the order. */
26419 if (BYTES_BIG_ENDIAN
)
26425 /* Generic code will try constant permutation twice. Once with the
26426 original mode and again with the elements lowered to QImode.
26427 So wait and don't do the selector expansion ourselves. */
26428 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
26431 for (i
= 0; i
< nelt
; ++i
)
26432 rperm
[i
] = GEN_INT (d
->perm
[i
]);
26433 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
26434 sel
= force_reg (vmode
, sel
);
26436 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
26441 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
26443 /* Check if the input mask matches vext before reordering the
26446 if (arm_evpc_neon_vext (d
))
26449 /* The pattern matching functions above are written to look for a small
26450 number to begin the sequence (0, 1, N/2). If we begin with an index
26451 from the second operand, we can swap the operands. */
26452 if (d
->perm
[0] >= d
->nelt
)
26454 unsigned i
, nelt
= d
->nelt
;
26457 for (i
= 0; i
< nelt
; ++i
)
26458 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
26467 if (arm_evpc_neon_vuzp (d
))
26469 if (arm_evpc_neon_vzip (d
))
26471 if (arm_evpc_neon_vrev (d
))
26473 if (arm_evpc_neon_vtrn (d
))
26475 return arm_evpc_neon_vtbl (d
);
26480 /* Expand a vec_perm_const pattern. */
26483 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
26485 struct expand_vec_perm_d d
;
26486 int i
, nelt
, which
;
26492 d
.vmode
= GET_MODE (target
);
26493 gcc_assert (VECTOR_MODE_P (d
.vmode
));
26494 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
26495 d
.testing_p
= false;
26497 for (i
= which
= 0; i
< nelt
; ++i
)
26499 rtx e
= XVECEXP (sel
, 0, i
);
26500 int ei
= INTVAL (e
) & (2 * nelt
- 1);
26501 which
|= (ei
< nelt
? 1 : 2);
26511 d
.one_vector_p
= false;
26512 if (!rtx_equal_p (op0
, op1
))
26515 /* The elements of PERM do not suggest that only the first operand
26516 is used, but both operands are identical. Allow easier matching
26517 of the permutation by folding the permutation into the single
26521 for (i
= 0; i
< nelt
; ++i
)
26522 d
.perm
[i
] &= nelt
- 1;
26524 d
.one_vector_p
= true;
26529 d
.one_vector_p
= true;
26533 return arm_expand_vec_perm_const_1 (&d
);
26536 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26539 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
26540 const unsigned char *sel
)
26542 struct expand_vec_perm_d d
;
26543 unsigned int i
, nelt
, which
;
26547 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
26548 d
.testing_p
= true;
26549 memcpy (d
.perm
, sel
, nelt
);
26551 /* Categorize the set of elements in the selector. */
26552 for (i
= which
= 0; i
< nelt
; ++i
)
26554 unsigned char e
= d
.perm
[i
];
26555 gcc_assert (e
< 2 * nelt
);
26556 which
|= (e
< nelt
? 1 : 2);
26559 /* For all elements from second vector, fold the elements to first. */
26561 for (i
= 0; i
< nelt
; ++i
)
26564 /* Check whether the mask can be applied to the vector type. */
26565 d
.one_vector_p
= (which
!= 3);
26567 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
26568 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
26569 if (!d
.one_vector_p
)
26570 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
26573 ret
= arm_expand_vec_perm_const_1 (&d
);
26580 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
26582 /* If we are soft float and we do not have ldrd
26583 then all auto increment forms are ok. */
26584 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
26589 /* Post increment and Pre Decrement are supported for all
26590 instruction forms except for vector forms. */
26593 if (VECTOR_MODE_P (mode
))
26595 if (code
!= ARM_PRE_DEC
)
26605 /* Without LDRD and mode size greater than
26606 word size, there is no point in auto-incrementing
26607 because ldm and stm will not have these forms. */
26608 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
26611 /* Vector and floating point modes do not support
26612 these auto increment forms. */
26613 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
26626 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26627 on ARM, since we know that shifts by negative amounts are no-ops.
26628 Additionally, the default expansion code is not available or suitable
26629 for post-reload insn splits (this can occur when the register allocator
26630 chooses not to do a shift in NEON).
26632 This function is used in both initial expand and post-reload splits, and
26633 handles all kinds of 64-bit shifts.
26635 Input requirements:
26636 - It is safe for the input and output to be the same register, but
26637 early-clobber rules apply for the shift amount and scratch registers.
26638 - Shift by register requires both scratch registers. In all other cases
26639 the scratch registers may be NULL.
26640 - Ashiftrt by a register also clobbers the CC register. */
26642 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
26643 rtx amount
, rtx scratch1
, rtx scratch2
)
26645 rtx out_high
= gen_highpart (SImode
, out
);
26646 rtx out_low
= gen_lowpart (SImode
, out
);
26647 rtx in_high
= gen_highpart (SImode
, in
);
26648 rtx in_low
= gen_lowpart (SImode
, in
);
26651 in = the register pair containing the input value.
26652 out = the destination register pair.
26653 up = the high- or low-part of each pair.
26654 down = the opposite part to "up".
26655 In a shift, we can consider bits to shift from "up"-stream to
26656 "down"-stream, so in a left-shift "up" is the low-part and "down"
26657 is the high-part of each register pair. */
26659 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
26660 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
26661 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
26662 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
26664 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
26666 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
26667 && GET_MODE (out
) == DImode
);
26669 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
26670 && GET_MODE (in
) == DImode
);
26672 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
26673 && GET_MODE (amount
) == SImode
)
26674 || CONST_INT_P (amount
)));
26675 gcc_assert (scratch1
== NULL
26676 || (GET_CODE (scratch1
) == SCRATCH
)
26677 || (GET_MODE (scratch1
) == SImode
26678 && REG_P (scratch1
)));
26679 gcc_assert (scratch2
== NULL
26680 || (GET_CODE (scratch2
) == SCRATCH
)
26681 || (GET_MODE (scratch2
) == SImode
26682 && REG_P (scratch2
)));
26683 gcc_assert (!REG_P (out
) || !REG_P (amount
)
26684 || !HARD_REGISTER_P (out
)
26685 || (REGNO (out
) != REGNO (amount
)
26686 && REGNO (out
) + 1 != REGNO (amount
)));
26688 /* Macros to make following code more readable. */
26689 #define SUB_32(DEST,SRC) \
26690 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26691 #define RSB_32(DEST,SRC) \
26692 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26693 #define SUB_S_32(DEST,SRC) \
26694 gen_addsi3_compare0 ((DEST), (SRC), \
26696 #define SET(DEST,SRC) \
26697 gen_rtx_SET (SImode, (DEST), (SRC))
26698 #define SHIFT(CODE,SRC,AMOUNT) \
26699 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26700 #define LSHIFT(CODE,SRC,AMOUNT) \
26701 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26702 SImode, (SRC), (AMOUNT))
26703 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26704 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26705 SImode, (SRC), (AMOUNT))
26707 gen_rtx_IOR (SImode, (A), (B))
26708 #define BRANCH(COND,LABEL) \
26709 gen_arm_cond_branch ((LABEL), \
26710 gen_rtx_ ## COND (CCmode, cc_reg, \
26714 /* Shifts by register and shifts by constant are handled separately. */
26715 if (CONST_INT_P (amount
))
26717 /* We have a shift-by-constant. */
26719 /* First, handle out-of-range shift amounts.
26720 In both cases we try to match the result an ARM instruction in a
26721 shift-by-register would give. This helps reduce execution
26722 differences between optimization levels, but it won't stop other
26723 parts of the compiler doing different things. This is "undefined
26724 behaviour, in any case. */
26725 if (INTVAL (amount
) <= 0)
26726 emit_insn (gen_movdi (out
, in
));
26727 else if (INTVAL (amount
) >= 64)
26729 if (code
== ASHIFTRT
)
26731 rtx const31_rtx
= GEN_INT (31);
26732 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
26733 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
26736 emit_insn (gen_movdi (out
, const0_rtx
));
26739 /* Now handle valid shifts. */
26740 else if (INTVAL (amount
) < 32)
26742 /* Shifts by a constant less than 32. */
26743 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
26745 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
26746 emit_insn (SET (out_down
,
26747 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
26749 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
26753 /* Shifts by a constant greater than 31. */
26754 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
26756 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
26757 if (code
== ASHIFTRT
)
26758 emit_insn (gen_ashrsi3 (out_up
, in_up
,
26761 emit_insn (SET (out_up
, const0_rtx
));
26766 /* We have a shift-by-register. */
26767 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
26769 /* This alternative requires the scratch registers. */
26770 gcc_assert (scratch1
&& REG_P (scratch1
));
26771 gcc_assert (scratch2
&& REG_P (scratch2
));
26773 /* We will need the values "amount-32" and "32-amount" later.
26774 Swapping them around now allows the later code to be more general. */
26778 emit_insn (SUB_32 (scratch1
, amount
));
26779 emit_insn (RSB_32 (scratch2
, amount
));
26782 emit_insn (RSB_32 (scratch1
, amount
));
26783 /* Also set CC = amount > 32. */
26784 emit_insn (SUB_S_32 (scratch2
, amount
));
26787 emit_insn (RSB_32 (scratch1
, amount
));
26788 emit_insn (SUB_32 (scratch2
, amount
));
26791 gcc_unreachable ();
26794 /* Emit code like this:
26797 out_down = in_down << amount;
26798 out_down = (in_up << (amount - 32)) | out_down;
26799 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26800 out_up = in_up << amount;
26803 out_down = in_down >> amount;
26804 out_down = (in_up << (32 - amount)) | out_down;
26806 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26807 out_up = in_up << amount;
26810 out_down = in_down >> amount;
26811 out_down = (in_up << (32 - amount)) | out_down;
26813 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26814 out_up = in_up << amount;
26816 The ARM and Thumb2 variants are the same but implemented slightly
26817 differently. If this were only called during expand we could just
26818 use the Thumb2 case and let combine do the right thing, but this
26819 can also be called from post-reload splitters. */
26821 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
26823 if (!TARGET_THUMB2
)
26825 /* Emit code for ARM mode. */
26826 emit_insn (SET (out_down
,
26827 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
26828 if (code
== ASHIFTRT
)
26830 rtx done_label
= gen_label_rtx ();
26831 emit_jump_insn (BRANCH (LT
, done_label
));
26832 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
26834 emit_label (done_label
);
26837 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
26842 /* Emit code for Thumb2 mode.
26843 Thumb2 can't do shift and or in one insn. */
26844 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
26845 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
26847 if (code
== ASHIFTRT
)
26849 rtx done_label
= gen_label_rtx ();
26850 emit_jump_insn (BRANCH (LT
, done_label
));
26851 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
26852 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
26853 emit_label (done_label
);
26857 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
26858 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
26862 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
26877 /* Returns true if a valid comparison operation and makes
26878 the operands in a form that is valid. */
26880 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
26882 enum rtx_code code
= GET_CODE (*comparison
);
26883 enum rtx_code canonical_code
;
26884 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
26885 ? GET_MODE (*op2
) : GET_MODE (*op1
);
26887 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
26889 if (code
== UNEQ
|| code
== LTGT
)
26892 canonical_code
= arm_canonicalize_comparison (code
, op1
, op2
);
26893 PUT_CODE (*comparison
, canonical_code
);
26898 if (!arm_add_operand (*op1
, mode
))
26899 *op1
= force_reg (mode
, *op1
);
26900 if (!arm_add_operand (*op2
, mode
))
26901 *op2
= force_reg (mode
, *op2
);
26905 if (!cmpdi_operand (*op1
, mode
))
26906 *op1
= force_reg (mode
, *op1
);
26907 if (!cmpdi_operand (*op2
, mode
))
26908 *op2
= force_reg (mode
, *op2
);
26913 if (!arm_float_compare_operand (*op1
, mode
))
26914 *op1
= force_reg (mode
, *op1
);
26915 if (!arm_float_compare_operand (*op2
, mode
))
26916 *op2
= force_reg (mode
, *op2
);
26926 #include "gt-arm.h"