params.c (set_param_value_internal): New.
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "ggc.h"
48 #include "except.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
64
65 void (*arm_lang_output_object_attributes_hook)(void);
66
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 int, HOST_WIDE_INT);
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 rtx);
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 #endif
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
137 const_tree, int);
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
141
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 tree);
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 tree, bool);
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
168 const_tree, bool);
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
170 const_tree, bool);
171 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
172 const_tree);
173 static int aapcs_select_return_coproc (const_tree, const_tree);
174
175 #ifdef OBJECT_FORMAT_ELF
176 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
177 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
178 #endif
179 #ifndef ARM_PE
180 static void arm_encode_section_info (tree, rtx, int);
181 #endif
182
183 static void arm_file_end (void);
184 static void arm_file_start (void);
185
186 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
187 tree, int *, int);
188 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
189 enum machine_mode, const_tree, bool);
190 static bool arm_promote_prototypes (const_tree);
191 static bool arm_default_short_enums (void);
192 static bool arm_align_anon_bitfield (void);
193 static bool arm_return_in_msb (const_tree);
194 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
195 static bool arm_return_in_memory (const_tree, const_tree);
196 #if ARM_UNWIND_INFO
197 static void arm_unwind_emit (FILE *, rtx);
198 static bool arm_output_ttype (rtx);
199 static void arm_asm_emit_except_personality (rtx);
200 static void arm_asm_init_sections (void);
201 #endif
202 static enum unwind_info_type arm_except_unwind_info (void);
203 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
204 static rtx arm_dwarf_register_span (rtx);
205
206 static tree arm_cxx_guard_type (void);
207 static bool arm_cxx_guard_mask_bit (void);
208 static tree arm_get_cookie_size (tree);
209 static bool arm_cookie_has_size (void);
210 static bool arm_cxx_cdtor_returns_this (void);
211 static bool arm_cxx_key_method_may_be_inline (void);
212 static void arm_cxx_determine_class_data_visibility (tree);
213 static bool arm_cxx_class_data_always_comdat (void);
214 static bool arm_cxx_use_aeabi_atexit (void);
215 static void arm_init_libfuncs (void);
216 static tree arm_build_builtin_va_list (void);
217 static void arm_expand_builtin_va_start (tree, rtx);
218 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
219 static void arm_option_override (void);
220 static void arm_option_optimization (int, int);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
247 const_tree type,
248 int misalignment,
249 bool is_packed);
250
251 \f
252 /* Table of machine attributes. */
253 static const struct attribute_spec arm_attribute_table[] =
254 {
255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
256 /* Function calls made to this symbol must be done indirectly, because
257 it may lie outside of the 26 bit addressing range of a normal function
258 call. */
259 { "long_call", 0, 0, false, true, true, NULL },
260 /* Whereas these functions are always known to reside within the 26 bit
261 addressing range. */
262 { "short_call", 0, 0, false, true, true, NULL },
263 /* Specify the procedure call conventions for a function. */
264 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
265 /* Interrupt Service Routines have special prologue and epilogue requirements. */
266 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
267 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
268 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
269 #ifdef ARM_PE
270 /* ARM/PE has three new attributes:
271 interfacearm - ?
272 dllexport - for exporting a function/variable that will live in a dll
273 dllimport - for importing a function/variable from a dll
274
275 Microsoft allows multiple declspecs in one __declspec, separating
276 them with spaces. We do NOT support this. Instead, use __declspec
277 multiple times.
278 */
279 { "dllimport", 0, 0, true, false, false, NULL },
280 { "dllexport", 0, 0, true, false, false, NULL },
281 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
282 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
283 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
284 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
285 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
286 #endif
287 { NULL, 0, 0, false, false, false, NULL }
288 };
289 \f
290 /* Initialize the GCC target structure. */
291 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
292 #undef TARGET_MERGE_DECL_ATTRIBUTES
293 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
294 #endif
295
296 #undef TARGET_LEGITIMIZE_ADDRESS
297 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
298
299 #undef TARGET_ATTRIBUTE_TABLE
300 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
301
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START arm_file_start
304 #undef TARGET_ASM_FILE_END
305 #define TARGET_ASM_FILE_END arm_file_end
306
307 #undef TARGET_ASM_ALIGNED_SI_OP
308 #define TARGET_ASM_ALIGNED_SI_OP NULL
309 #undef TARGET_ASM_INTEGER
310 #define TARGET_ASM_INTEGER arm_assemble_integer
311
312 #undef TARGET_PRINT_OPERAND
313 #define TARGET_PRINT_OPERAND arm_print_operand
314 #undef TARGET_PRINT_OPERAND_ADDRESS
315 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
316 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
317 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
318
319 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
320 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
321
322 #undef TARGET_ASM_FUNCTION_PROLOGUE
323 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
324
325 #undef TARGET_ASM_FUNCTION_EPILOGUE
326 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
327
328 #undef TARGET_DEFAULT_TARGET_FLAGS
329 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
330 #undef TARGET_HANDLE_OPTION
331 #define TARGET_HANDLE_OPTION arm_handle_option
332 #undef TARGET_HELP
333 #define TARGET_HELP arm_target_help
334 #undef TARGET_OPTION_OVERRIDE
335 #define TARGET_OPTION_OVERRIDE arm_option_override
336 #undef TARGET_OPTION_OPTIMIZATION
337 #define TARGET_OPTION_OPTIMIZATION arm_option_optimization
338
339 #undef TARGET_COMP_TYPE_ATTRIBUTES
340 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
341
342 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
343 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
344
345 #undef TARGET_SCHED_ADJUST_COST
346 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
347
348 #undef TARGET_ENCODE_SECTION_INFO
349 #ifdef ARM_PE
350 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
351 #else
352 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
353 #endif
354
355 #undef TARGET_STRIP_NAME_ENCODING
356 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
357
358 #undef TARGET_ASM_INTERNAL_LABEL
359 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
360
361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
362 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
363
364 #undef TARGET_FUNCTION_VALUE
365 #define TARGET_FUNCTION_VALUE arm_function_value
366
367 #undef TARGET_LIBCALL_VALUE
368 #define TARGET_LIBCALL_VALUE arm_libcall_value
369
370 #undef TARGET_ASM_OUTPUT_MI_THUNK
371 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
372 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
373 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
374
375 #undef TARGET_RTX_COSTS
376 #define TARGET_RTX_COSTS arm_rtx_costs
377 #undef TARGET_ADDRESS_COST
378 #define TARGET_ADDRESS_COST arm_address_cost
379
380 #undef TARGET_SHIFT_TRUNCATION_MASK
381 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
382 #undef TARGET_VECTOR_MODE_SUPPORTED_P
383 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
384 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
385 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
386
387 #undef TARGET_MACHINE_DEPENDENT_REORG
388 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
389
390 #undef TARGET_INIT_BUILTINS
391 #define TARGET_INIT_BUILTINS arm_init_builtins
392 #undef TARGET_EXPAND_BUILTIN
393 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
394
395 #undef TARGET_INIT_LIBFUNCS
396 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
397
398 #undef TARGET_PROMOTE_FUNCTION_MODE
399 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
400 #undef TARGET_PROMOTE_PROTOTYPES
401 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
402 #undef TARGET_PASS_BY_REFERENCE
403 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
404 #undef TARGET_ARG_PARTIAL_BYTES
405 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
406 #undef TARGET_FUNCTION_ARG
407 #define TARGET_FUNCTION_ARG arm_function_arg
408 #undef TARGET_FUNCTION_ARG_ADVANCE
409 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
410
411 #undef TARGET_SETUP_INCOMING_VARARGS
412 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
413
414 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
415 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
416
417 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
418 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
419 #undef TARGET_TRAMPOLINE_INIT
420 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
421 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
422 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
423
424 #undef TARGET_DEFAULT_SHORT_ENUMS
425 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
426
427 #undef TARGET_ALIGN_ANON_BITFIELD
428 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
429
430 #undef TARGET_NARROW_VOLATILE_BITFIELD
431 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
432
433 #undef TARGET_CXX_GUARD_TYPE
434 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
435
436 #undef TARGET_CXX_GUARD_MASK_BIT
437 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
438
439 #undef TARGET_CXX_GET_COOKIE_SIZE
440 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
441
442 #undef TARGET_CXX_COOKIE_HAS_SIZE
443 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
444
445 #undef TARGET_CXX_CDTOR_RETURNS_THIS
446 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
447
448 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
449 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
450
451 #undef TARGET_CXX_USE_AEABI_ATEXIT
452 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
453
454 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
455 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
456 arm_cxx_determine_class_data_visibility
457
458 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
459 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
460
461 #undef TARGET_RETURN_IN_MSB
462 #define TARGET_RETURN_IN_MSB arm_return_in_msb
463
464 #undef TARGET_RETURN_IN_MEMORY
465 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
466
467 #undef TARGET_MUST_PASS_IN_STACK
468 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
469
470 #if ARM_UNWIND_INFO
471 #undef TARGET_ASM_UNWIND_EMIT
472 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
473
474 /* EABI unwinding tables use a different format for the typeinfo tables. */
475 #undef TARGET_ASM_TTYPE
476 #define TARGET_ASM_TTYPE arm_output_ttype
477
478 #undef TARGET_ARM_EABI_UNWINDER
479 #define TARGET_ARM_EABI_UNWINDER true
480
481 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
482 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
483
484 #undef TARGET_ASM_INIT_SECTIONS
485 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
486 #endif /* ARM_UNWIND_INFO */
487
488 #undef TARGET_EXCEPT_UNWIND_INFO
489 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
490
491 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
492 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
493
494 #undef TARGET_DWARF_REGISTER_SPAN
495 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
496
497 #undef TARGET_CANNOT_COPY_INSN_P
498 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
499
500 #ifdef HAVE_AS_TLS
501 #undef TARGET_HAVE_TLS
502 #define TARGET_HAVE_TLS true
503 #endif
504
505 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
506 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
507
508 #undef TARGET_CANNOT_FORCE_CONST_MEM
509 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
510
511 #undef TARGET_MAX_ANCHOR_OFFSET
512 #define TARGET_MAX_ANCHOR_OFFSET 4095
513
514 /* The minimum is set such that the total size of the block
515 for a particular anchor is -4088 + 1 + 4095 bytes, which is
516 divisible by eight, ensuring natural spacing of anchors. */
517 #undef TARGET_MIN_ANCHOR_OFFSET
518 #define TARGET_MIN_ANCHOR_OFFSET -4088
519
520 #undef TARGET_SCHED_ISSUE_RATE
521 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
522
523 #undef TARGET_MANGLE_TYPE
524 #define TARGET_MANGLE_TYPE arm_mangle_type
525
526 #undef TARGET_BUILD_BUILTIN_VA_LIST
527 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
528 #undef TARGET_EXPAND_BUILTIN_VA_START
529 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
530 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
531 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
532
533 #ifdef HAVE_AS_TLS
534 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
535 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
536 #endif
537
538 #undef TARGET_LEGITIMATE_ADDRESS_P
539 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
540
541 #undef TARGET_INVALID_PARAMETER_TYPE
542 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
543
544 #undef TARGET_INVALID_RETURN_TYPE
545 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
546
547 #undef TARGET_PROMOTED_TYPE
548 #define TARGET_PROMOTED_TYPE arm_promoted_type
549
550 #undef TARGET_CONVERT_TO_TYPE
551 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
552
553 #undef TARGET_SCALAR_MODE_SUPPORTED_P
554 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
555
556 #undef TARGET_FRAME_POINTER_REQUIRED
557 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
558
559 #undef TARGET_CAN_ELIMINATE
560 #define TARGET_CAN_ELIMINATE arm_can_eliminate
561
562 #undef TARGET_CLASS_LIKELY_SPILLED_P
563 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
564
565 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
566 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
567 arm_vector_alignment_reachable
568
569 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
570 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
571 arm_builtin_support_vector_misalignment
572
573 struct gcc_target targetm = TARGET_INITIALIZER;
574 \f
575 /* Obstack for minipool constant handling. */
576 static struct obstack minipool_obstack;
577 static char * minipool_startobj;
578
579 /* The maximum number of insns skipped which
580 will be conditionalised if possible. */
581 static int max_insns_skipped = 5;
582
583 extern FILE * asm_out_file;
584
585 /* True if we are currently building a constant table. */
586 int making_const_table;
587
588 /* The processor for which instructions should be scheduled. */
589 enum processor_type arm_tune = arm_none;
590
591 /* The current tuning set. */
592 const struct tune_params *current_tune;
593
594 /* Which floating point hardware to schedule for. */
595 int arm_fpu_attr;
596
597 /* Which floating popint hardware to use. */
598 const struct arm_fpu_desc *arm_fpu_desc;
599
600 /* Whether to use floating point hardware. */
601 enum float_abi_type arm_float_abi;
602
603 /* Which __fp16 format to use. */
604 enum arm_fp16_format_type arm_fp16_format;
605
606 /* Which ABI to use. */
607 enum arm_abi_type arm_abi;
608
609 /* Which thread pointer model to use. */
610 enum arm_tp_type target_thread_pointer = TP_AUTO;
611
612 /* Used to parse -mstructure_size_boundary command line option. */
613 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
614
615 /* Used for Thumb call_via trampolines. */
616 rtx thumb_call_via_label[14];
617 static int thumb_call_reg_needed;
618
619 /* Bit values used to identify processor capabilities. */
620 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
621 #define FL_ARCH3M (1 << 1) /* Extended multiply */
622 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
623 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
624 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
625 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
626 #define FL_THUMB (1 << 6) /* Thumb aware */
627 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
628 #define FL_STRONG (1 << 8) /* StrongARM */
629 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
630 #define FL_XSCALE (1 << 10) /* XScale */
631 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
632 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
633 media instructions. */
634 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
635 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
636 Note: ARM6 & 7 derivatives only. */
637 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
638 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
639 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
640 profile. */
641 #define FL_DIV (1 << 18) /* Hardware divide. */
642 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
643 #define FL_NEON (1 << 20) /* Neon instructions. */
644 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
645 architecture. */
646 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
647
648 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
649
650 /* Flags that only effect tuning, not available instructions. */
651 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
652 | FL_CO_PROC)
653
654 #define FL_FOR_ARCH2 FL_NOTM
655 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
656 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
657 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
658 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
659 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
660 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
661 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
662 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
663 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
664 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
665 #define FL_FOR_ARCH6J FL_FOR_ARCH6
666 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
667 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
668 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
669 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
670 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
671 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
672 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
673 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
674 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
675 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
676
677 /* The bits in this mask specify which
678 instructions we are allowed to generate. */
679 static unsigned long insn_flags = 0;
680
681 /* The bits in this mask specify which instruction scheduling options should
682 be used. */
683 static unsigned long tune_flags = 0;
684
685 /* The following are used in the arm.md file as equivalents to bits
686 in the above two flag variables. */
687
688 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
689 int arm_arch3m = 0;
690
691 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
692 int arm_arch4 = 0;
693
694 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
695 int arm_arch4t = 0;
696
697 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
698 int arm_arch5 = 0;
699
700 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
701 int arm_arch5e = 0;
702
703 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
704 int arm_arch6 = 0;
705
706 /* Nonzero if this chip supports the ARM 6K extensions. */
707 int arm_arch6k = 0;
708
709 /* Nonzero if this chip supports the ARM 7 extensions. */
710 int arm_arch7 = 0;
711
712 /* Nonzero if instructions not present in the 'M' profile can be used. */
713 int arm_arch_notm = 0;
714
715 /* Nonzero if instructions present in ARMv7E-M can be used. */
716 int arm_arch7em = 0;
717
718 /* Nonzero if this chip can benefit from load scheduling. */
719 int arm_ld_sched = 0;
720
721 /* Nonzero if this chip is a StrongARM. */
722 int arm_tune_strongarm = 0;
723
724 /* Nonzero if this chip is a Cirrus variant. */
725 int arm_arch_cirrus = 0;
726
727 /* Nonzero if this chip supports Intel Wireless MMX technology. */
728 int arm_arch_iwmmxt = 0;
729
730 /* Nonzero if this chip is an XScale. */
731 int arm_arch_xscale = 0;
732
733 /* Nonzero if tuning for XScale */
734 int arm_tune_xscale = 0;
735
736 /* Nonzero if we want to tune for stores that access the write-buffer.
737 This typically means an ARM6 or ARM7 with MMU or MPU. */
738 int arm_tune_wbuf = 0;
739
740 /* Nonzero if tuning for Cortex-A9. */
741 int arm_tune_cortex_a9 = 0;
742
743 /* Nonzero if generating Thumb instructions. */
744 int thumb_code = 0;
745
746 /* Nonzero if generating Thumb-1 instructions. */
747 int thumb1_code = 0;
748
749 /* Nonzero if we should define __THUMB_INTERWORK__ in the
750 preprocessor.
751 XXX This is a bit of a hack, it's intended to help work around
752 problems in GLD which doesn't understand that armv5t code is
753 interworking clean. */
754 int arm_cpp_interwork = 0;
755
756 /* Nonzero if chip supports Thumb 2. */
757 int arm_arch_thumb2;
758
759 /* Nonzero if chip supports integer division instruction. */
760 int arm_arch_hwdiv;
761
762 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
763 we must report the mode of the memory reference from
764 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
765 enum machine_mode output_memory_reference_mode;
766
767 /* The register number to be used for the PIC offset register. */
768 unsigned arm_pic_register = INVALID_REGNUM;
769
770 /* Set to 1 after arm_reorg has started. Reset to start at the start of
771 the next function. */
772 static int after_arm_reorg = 0;
773
774 enum arm_pcs arm_pcs_default;
775
776 /* For an explanation of these variables, see final_prescan_insn below. */
777 int arm_ccfsm_state;
778 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
779 enum arm_cond_code arm_current_cc;
780
781 rtx arm_target_insn;
782 int arm_target_label;
783 /* The number of conditionally executed insns, including the current insn. */
784 int arm_condexec_count = 0;
785 /* A bitmask specifying the patterns for the IT block.
786 Zero means do not output an IT block before this insn. */
787 int arm_condexec_mask = 0;
788 /* The number of bits used in arm_condexec_mask. */
789 int arm_condexec_masklen = 0;
790
791 /* The condition codes of the ARM, and the inverse function. */
792 static const char * const arm_condition_codes[] =
793 {
794 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
795 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
796 };
797
798 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
799 int arm_regs_in_sequence[] =
800 {
801 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
802 };
803
804 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
805 #define streq(string1, string2) (strcmp (string1, string2) == 0)
806
807 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
808 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
809 | (1 << PIC_OFFSET_TABLE_REGNUM)))
810 \f
811 /* Initialization code. */
812
813 struct processors
814 {
815 const char *const name;
816 enum processor_type core;
817 const char *arch;
818 const unsigned long flags;
819 const struct tune_params *const tune;
820 };
821
822 const struct tune_params arm_slowmul_tune =
823 {
824 arm_slowmul_rtx_costs,
825 NULL,
826 3
827 };
828
829 const struct tune_params arm_fastmul_tune =
830 {
831 arm_fastmul_rtx_costs,
832 NULL,
833 1
834 };
835
836 const struct tune_params arm_xscale_tune =
837 {
838 arm_xscale_rtx_costs,
839 xscale_sched_adjust_cost,
840 2
841 };
842
843 const struct tune_params arm_9e_tune =
844 {
845 arm_9e_rtx_costs,
846 NULL,
847 1
848 };
849
850 const struct tune_params arm_cortex_a9_tune =
851 {
852 arm_9e_rtx_costs,
853 cortex_a9_sched_adjust_cost,
854 1
855 };
856
857
858 /* Not all of these give usefully different compilation alternatives,
859 but there is no simple way of generalizing them. */
860 static const struct processors all_cores[] =
861 {
862 /* ARM Cores */
863 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
864 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
865 #include "arm-cores.def"
866 #undef ARM_CORE
867 {NULL, arm_none, NULL, 0, NULL}
868 };
869
870 static const struct processors all_architectures[] =
871 {
872 /* ARM Architectures */
873 /* We don't specify tuning costs here as it will be figured out
874 from the core. */
875
876 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
877 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
878 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
879 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
880 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
881 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
882 implementations that support it, so we will leave it out for now. */
883 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
884 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
885 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
886 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
887 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
888 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
889 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
890 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
891 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
892 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
893 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
894 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
895 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
896 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
897 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
898 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
899 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
900 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
901 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
902 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
903 {NULL, arm_none, NULL, 0 , NULL}
904 };
905
906
907 /* These are populated as commandline arguments are processed, or NULL
908 if not specified. */
909 static const struct processors *arm_selected_arch;
910 static const struct processors *arm_selected_cpu;
911 static const struct processors *arm_selected_tune;
912
913 /* The name of the preprocessor macro to define for this architecture. */
914
915 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
916
917 /* Available values for -mfpu=. */
918
919 static const struct arm_fpu_desc all_fpus[] =
920 {
921 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
922 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
923 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
924 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
925 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
926 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
927 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
928 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
929 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
930 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
931 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
932 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
933 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
934 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
935 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
936 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
937 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
938 /* Compatibility aliases. */
939 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
940 };
941
942
943 struct float_abi
944 {
945 const char * name;
946 enum float_abi_type abi_type;
947 };
948
949
950 /* Available values for -mfloat-abi=. */
951
952 static const struct float_abi all_float_abis[] =
953 {
954 {"soft", ARM_FLOAT_ABI_SOFT},
955 {"softfp", ARM_FLOAT_ABI_SOFTFP},
956 {"hard", ARM_FLOAT_ABI_HARD}
957 };
958
959
960 struct fp16_format
961 {
962 const char *name;
963 enum arm_fp16_format_type fp16_format_type;
964 };
965
966
967 /* Available values for -mfp16-format=. */
968
969 static const struct fp16_format all_fp16_formats[] =
970 {
971 {"none", ARM_FP16_FORMAT_NONE},
972 {"ieee", ARM_FP16_FORMAT_IEEE},
973 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
974 };
975
976
977 struct abi_name
978 {
979 const char *name;
980 enum arm_abi_type abi_type;
981 };
982
983
984 /* Available values for -mabi=. */
985
986 static const struct abi_name arm_all_abis[] =
987 {
988 {"apcs-gnu", ARM_ABI_APCS},
989 {"atpcs", ARM_ABI_ATPCS},
990 {"aapcs", ARM_ABI_AAPCS},
991 {"iwmmxt", ARM_ABI_IWMMXT},
992 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
993 };
994
995 /* Supported TLS relocations. */
996
997 enum tls_reloc {
998 TLS_GD32,
999 TLS_LDM32,
1000 TLS_LDO32,
1001 TLS_IE32,
1002 TLS_LE32
1003 };
1004
1005 /* The maximum number of insns to be used when loading a constant. */
1006 inline static int
1007 arm_constant_limit (bool size_p)
1008 {
1009 return size_p ? 1 : current_tune->constant_limit;
1010 }
1011
1012 /* Emit an insn that's a simple single-set. Both the operands must be known
1013 to be valid. */
1014 inline static rtx
1015 emit_set_insn (rtx x, rtx y)
1016 {
1017 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1018 }
1019
1020 /* Return the number of bits set in VALUE. */
1021 static unsigned
1022 bit_count (unsigned long value)
1023 {
1024 unsigned long count = 0;
1025
1026 while (value)
1027 {
1028 count++;
1029 value &= value - 1; /* Clear the least-significant set bit. */
1030 }
1031
1032 return count;
1033 }
1034
1035 /* Set up library functions unique to ARM. */
1036
1037 static void
1038 arm_init_libfuncs (void)
1039 {
1040 /* There are no special library functions unless we are using the
1041 ARM BPABI. */
1042 if (!TARGET_BPABI)
1043 return;
1044
1045 /* The functions below are described in Section 4 of the "Run-Time
1046 ABI for the ARM architecture", Version 1.0. */
1047
1048 /* Double-precision floating-point arithmetic. Table 2. */
1049 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1050 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1051 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1052 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1053 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1054
1055 /* Double-precision comparisons. Table 3. */
1056 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1057 set_optab_libfunc (ne_optab, DFmode, NULL);
1058 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1059 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1060 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1061 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1062 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1063
1064 /* Single-precision floating-point arithmetic. Table 4. */
1065 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1066 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1067 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1068 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1069 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1070
1071 /* Single-precision comparisons. Table 5. */
1072 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1073 set_optab_libfunc (ne_optab, SFmode, NULL);
1074 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1075 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1076 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1077 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1078 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1079
1080 /* Floating-point to integer conversions. Table 6. */
1081 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1082 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1083 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1084 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1085 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1086 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1087 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1088 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1089
1090 /* Conversions between floating types. Table 7. */
1091 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1092 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1093
1094 /* Integer to floating-point conversions. Table 8. */
1095 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1096 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1097 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1098 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1099 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1100 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1101 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1102 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1103
1104 /* Long long. Table 9. */
1105 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1106 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1107 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1108 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1109 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1110 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1111 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1112 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1113
1114 /* Integer (32/32->32) division. \S 4.3.1. */
1115 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1116 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1117
1118 /* The divmod functions are designed so that they can be used for
1119 plain division, even though they return both the quotient and the
1120 remainder. The quotient is returned in the usual location (i.e.,
1121 r0 for SImode, {r0, r1} for DImode), just as would be expected
1122 for an ordinary division routine. Because the AAPCS calling
1123 conventions specify that all of { r0, r1, r2, r3 } are
1124 callee-saved registers, there is no need to tell the compiler
1125 explicitly that those registers are clobbered by these
1126 routines. */
1127 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1128 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1129
1130 /* For SImode division the ABI provides div-without-mod routines,
1131 which are faster. */
1132 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1133 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1134
1135 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1136 divmod libcalls instead. */
1137 set_optab_libfunc (smod_optab, DImode, NULL);
1138 set_optab_libfunc (umod_optab, DImode, NULL);
1139 set_optab_libfunc (smod_optab, SImode, NULL);
1140 set_optab_libfunc (umod_optab, SImode, NULL);
1141
1142 /* Half-precision float operations. The compiler handles all operations
1143 with NULL libfuncs by converting the SFmode. */
1144 switch (arm_fp16_format)
1145 {
1146 case ARM_FP16_FORMAT_IEEE:
1147 case ARM_FP16_FORMAT_ALTERNATIVE:
1148
1149 /* Conversions. */
1150 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1151 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1152 ? "__gnu_f2h_ieee"
1153 : "__gnu_f2h_alternative"));
1154 set_conv_libfunc (sext_optab, SFmode, HFmode,
1155 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1156 ? "__gnu_h2f_ieee"
1157 : "__gnu_h2f_alternative"));
1158
1159 /* Arithmetic. */
1160 set_optab_libfunc (add_optab, HFmode, NULL);
1161 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1162 set_optab_libfunc (smul_optab, HFmode, NULL);
1163 set_optab_libfunc (neg_optab, HFmode, NULL);
1164 set_optab_libfunc (sub_optab, HFmode, NULL);
1165
1166 /* Comparisons. */
1167 set_optab_libfunc (eq_optab, HFmode, NULL);
1168 set_optab_libfunc (ne_optab, HFmode, NULL);
1169 set_optab_libfunc (lt_optab, HFmode, NULL);
1170 set_optab_libfunc (le_optab, HFmode, NULL);
1171 set_optab_libfunc (ge_optab, HFmode, NULL);
1172 set_optab_libfunc (gt_optab, HFmode, NULL);
1173 set_optab_libfunc (unord_optab, HFmode, NULL);
1174 break;
1175
1176 default:
1177 break;
1178 }
1179
1180 if (TARGET_AAPCS_BASED)
1181 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1182 }
1183
1184 /* On AAPCS systems, this is the "struct __va_list". */
1185 static GTY(()) tree va_list_type;
1186
1187 /* Return the type to use as __builtin_va_list. */
1188 static tree
1189 arm_build_builtin_va_list (void)
1190 {
1191 tree va_list_name;
1192 tree ap_field;
1193
1194 if (!TARGET_AAPCS_BASED)
1195 return std_build_builtin_va_list ();
1196
1197 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1198 defined as:
1199
1200 struct __va_list
1201 {
1202 void *__ap;
1203 };
1204
1205 The C Library ABI further reinforces this definition in \S
1206 4.1.
1207
1208 We must follow this definition exactly. The structure tag
1209 name is visible in C++ mangled names, and thus forms a part
1210 of the ABI. The field name may be used by people who
1211 #include <stdarg.h>. */
1212 /* Create the type. */
1213 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1214 /* Give it the required name. */
1215 va_list_name = build_decl (BUILTINS_LOCATION,
1216 TYPE_DECL,
1217 get_identifier ("__va_list"),
1218 va_list_type);
1219 DECL_ARTIFICIAL (va_list_name) = 1;
1220 TYPE_NAME (va_list_type) = va_list_name;
1221 /* Create the __ap field. */
1222 ap_field = build_decl (BUILTINS_LOCATION,
1223 FIELD_DECL,
1224 get_identifier ("__ap"),
1225 ptr_type_node);
1226 DECL_ARTIFICIAL (ap_field) = 1;
1227 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1228 TYPE_FIELDS (va_list_type) = ap_field;
1229 /* Compute its layout. */
1230 layout_type (va_list_type);
1231
1232 return va_list_type;
1233 }
1234
1235 /* Return an expression of type "void *" pointing to the next
1236 available argument in a variable-argument list. VALIST is the
1237 user-level va_list object, of type __builtin_va_list. */
1238 static tree
1239 arm_extract_valist_ptr (tree valist)
1240 {
1241 if (TREE_TYPE (valist) == error_mark_node)
1242 return error_mark_node;
1243
1244 /* On an AAPCS target, the pointer is stored within "struct
1245 va_list". */
1246 if (TARGET_AAPCS_BASED)
1247 {
1248 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1249 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1250 valist, ap_field, NULL_TREE);
1251 }
1252
1253 return valist;
1254 }
1255
1256 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1257 static void
1258 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1259 {
1260 valist = arm_extract_valist_ptr (valist);
1261 std_expand_builtin_va_start (valist, nextarg);
1262 }
1263
1264 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1265 static tree
1266 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1267 gimple_seq *post_p)
1268 {
1269 valist = arm_extract_valist_ptr (valist);
1270 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1271 }
1272
1273 /* Lookup NAME in SEL. */
1274
1275 static const struct processors *
1276 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1277 {
1278 if (!(name && *name))
1279 return NULL;
1280
1281 for (; sel->name != NULL; sel++)
1282 {
1283 if (streq (name, sel->name))
1284 return sel;
1285 }
1286
1287 error ("bad value (%s) for %s switch", name, desc);
1288 return NULL;
1289 }
1290
1291 /* Implement TARGET_HANDLE_OPTION. */
1292
1293 static bool
1294 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1295 {
1296 switch (code)
1297 {
1298 case OPT_march_:
1299 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1300 return true;
1301
1302 case OPT_mcpu_:
1303 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1304 return true;
1305
1306 case OPT_mhard_float:
1307 target_float_abi_name = "hard";
1308 return true;
1309
1310 case OPT_msoft_float:
1311 target_float_abi_name = "soft";
1312 return true;
1313
1314 case OPT_mtune_:
1315 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1316 return true;
1317
1318 default:
1319 return true;
1320 }
1321 }
1322
1323 static void
1324 arm_target_help (void)
1325 {
1326 int i;
1327 static int columns = 0;
1328 int remaining;
1329
1330 /* If we have not done so already, obtain the desired maximum width of
1331 the output. Note - this is a duplication of the code at the start of
1332 gcc/opts.c:print_specific_help() - the two copies should probably be
1333 replaced by a single function. */
1334 if (columns == 0)
1335 {
1336 const char *p;
1337
1338 GET_ENVIRONMENT (p, "COLUMNS");
1339 if (p != NULL)
1340 {
1341 int value = atoi (p);
1342
1343 if (value > 0)
1344 columns = value;
1345 }
1346
1347 if (columns == 0)
1348 /* Use a reasonable default. */
1349 columns = 80;
1350 }
1351
1352 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1353
1354 /* The - 2 is because we know that the last entry in the array is NULL. */
1355 i = ARRAY_SIZE (all_cores) - 2;
1356 gcc_assert (i > 0);
1357 printf (" %s", all_cores[i].name);
1358 remaining = columns - (strlen (all_cores[i].name) + 4);
1359 gcc_assert (remaining >= 0);
1360
1361 while (i--)
1362 {
1363 int len = strlen (all_cores[i].name);
1364
1365 if (remaining > len + 2)
1366 {
1367 printf (", %s", all_cores[i].name);
1368 remaining -= len + 2;
1369 }
1370 else
1371 {
1372 if (remaining > 0)
1373 printf (",");
1374 printf ("\n %s", all_cores[i].name);
1375 remaining = columns - (len + 4);
1376 }
1377 }
1378
1379 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1380
1381 i = ARRAY_SIZE (all_architectures) - 2;
1382 gcc_assert (i > 0);
1383
1384 printf (" %s", all_architectures[i].name);
1385 remaining = columns - (strlen (all_architectures[i].name) + 4);
1386 gcc_assert (remaining >= 0);
1387
1388 while (i--)
1389 {
1390 int len = strlen (all_architectures[i].name);
1391
1392 if (remaining > len + 2)
1393 {
1394 printf (", %s", all_architectures[i].name);
1395 remaining -= len + 2;
1396 }
1397 else
1398 {
1399 if (remaining > 0)
1400 printf (",");
1401 printf ("\n %s", all_architectures[i].name);
1402 remaining = columns - (len + 4);
1403 }
1404 }
1405 printf ("\n");
1406
1407 }
1408
1409 /* Fix up any incompatible options that the user has specified. */
1410 static void
1411 arm_option_override (void)
1412 {
1413 unsigned i;
1414
1415 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1416 SUBTARGET_OVERRIDE_OPTIONS;
1417 #endif
1418
1419 if (arm_selected_arch)
1420 {
1421 if (arm_selected_cpu)
1422 {
1423 /* Check for conflict between mcpu and march. */
1424 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1425 {
1426 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1427 arm_selected_cpu->name, arm_selected_arch->name);
1428 /* -march wins for code generation.
1429 -mcpu wins for default tuning. */
1430 if (!arm_selected_tune)
1431 arm_selected_tune = arm_selected_cpu;
1432
1433 arm_selected_cpu = arm_selected_arch;
1434 }
1435 else
1436 /* -mcpu wins. */
1437 arm_selected_arch = NULL;
1438 }
1439 else
1440 /* Pick a CPU based on the architecture. */
1441 arm_selected_cpu = arm_selected_arch;
1442 }
1443
1444 /* If the user did not specify a processor, choose one for them. */
1445 if (!arm_selected_cpu)
1446 {
1447 const struct processors * sel;
1448 unsigned int sought;
1449
1450 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1451 if (!arm_selected_cpu->name)
1452 {
1453 #ifdef SUBTARGET_CPU_DEFAULT
1454 /* Use the subtarget default CPU if none was specified by
1455 configure. */
1456 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1457 #endif
1458 /* Default to ARM6. */
1459 if (!arm_selected_cpu->name)
1460 arm_selected_cpu = &all_cores[arm6];
1461 }
1462
1463 sel = arm_selected_cpu;
1464 insn_flags = sel->flags;
1465
1466 /* Now check to see if the user has specified some command line
1467 switch that require certain abilities from the cpu. */
1468 sought = 0;
1469
1470 if (TARGET_INTERWORK || TARGET_THUMB)
1471 {
1472 sought |= (FL_THUMB | FL_MODE32);
1473
1474 /* There are no ARM processors that support both APCS-26 and
1475 interworking. Therefore we force FL_MODE26 to be removed
1476 from insn_flags here (if it was set), so that the search
1477 below will always be able to find a compatible processor. */
1478 insn_flags &= ~FL_MODE26;
1479 }
1480
1481 if (sought != 0 && ((sought & insn_flags) != sought))
1482 {
1483 /* Try to locate a CPU type that supports all of the abilities
1484 of the default CPU, plus the extra abilities requested by
1485 the user. */
1486 for (sel = all_cores; sel->name != NULL; sel++)
1487 if ((sel->flags & sought) == (sought | insn_flags))
1488 break;
1489
1490 if (sel->name == NULL)
1491 {
1492 unsigned current_bit_count = 0;
1493 const struct processors * best_fit = NULL;
1494
1495 /* Ideally we would like to issue an error message here
1496 saying that it was not possible to find a CPU compatible
1497 with the default CPU, but which also supports the command
1498 line options specified by the programmer, and so they
1499 ought to use the -mcpu=<name> command line option to
1500 override the default CPU type.
1501
1502 If we cannot find a cpu that has both the
1503 characteristics of the default cpu and the given
1504 command line options we scan the array again looking
1505 for a best match. */
1506 for (sel = all_cores; sel->name != NULL; sel++)
1507 if ((sel->flags & sought) == sought)
1508 {
1509 unsigned count;
1510
1511 count = bit_count (sel->flags & insn_flags);
1512
1513 if (count >= current_bit_count)
1514 {
1515 best_fit = sel;
1516 current_bit_count = count;
1517 }
1518 }
1519
1520 gcc_assert (best_fit);
1521 sel = best_fit;
1522 }
1523
1524 arm_selected_cpu = sel;
1525 }
1526 }
1527
1528 gcc_assert (arm_selected_cpu);
1529 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1530 if (!arm_selected_tune)
1531 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1532
1533 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1534 insn_flags = arm_selected_cpu->flags;
1535
1536 arm_tune = arm_selected_tune->core;
1537 tune_flags = arm_selected_tune->flags;
1538 current_tune = arm_selected_tune->tune;
1539
1540 if (target_fp16_format_name)
1541 {
1542 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1543 {
1544 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1545 {
1546 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1547 break;
1548 }
1549 }
1550 if (i == ARRAY_SIZE (all_fp16_formats))
1551 error ("invalid __fp16 format option: -mfp16-format=%s",
1552 target_fp16_format_name);
1553 }
1554 else
1555 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1556
1557 if (target_abi_name)
1558 {
1559 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1560 {
1561 if (streq (arm_all_abis[i].name, target_abi_name))
1562 {
1563 arm_abi = arm_all_abis[i].abi_type;
1564 break;
1565 }
1566 }
1567 if (i == ARRAY_SIZE (arm_all_abis))
1568 error ("invalid ABI option: -mabi=%s", target_abi_name);
1569 }
1570 else
1571 arm_abi = ARM_DEFAULT_ABI;
1572
1573 /* Make sure that the processor choice does not conflict with any of the
1574 other command line choices. */
1575 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1576 error ("target CPU does not support ARM mode");
1577
1578 /* BPABI targets use linker tricks to allow interworking on cores
1579 without thumb support. */
1580 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1581 {
1582 warning (0, "target CPU does not support interworking" );
1583 target_flags &= ~MASK_INTERWORK;
1584 }
1585
1586 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1587 {
1588 warning (0, "target CPU does not support THUMB instructions");
1589 target_flags &= ~MASK_THUMB;
1590 }
1591
1592 if (TARGET_APCS_FRAME && TARGET_THUMB)
1593 {
1594 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1595 target_flags &= ~MASK_APCS_FRAME;
1596 }
1597
1598 /* Callee super interworking implies thumb interworking. Adding
1599 this to the flags here simplifies the logic elsewhere. */
1600 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1601 target_flags |= MASK_INTERWORK;
1602
1603 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1604 from here where no function is being compiled currently. */
1605 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1606 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1607
1608 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1609 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1610
1611 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1612 {
1613 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1614 target_flags |= MASK_APCS_FRAME;
1615 }
1616
1617 if (TARGET_POKE_FUNCTION_NAME)
1618 target_flags |= MASK_APCS_FRAME;
1619
1620 if (TARGET_APCS_REENT && flag_pic)
1621 error ("-fpic and -mapcs-reent are incompatible");
1622
1623 if (TARGET_APCS_REENT)
1624 warning (0, "APCS reentrant code not supported. Ignored");
1625
1626 /* If this target is normally configured to use APCS frames, warn if they
1627 are turned off and debugging is turned on. */
1628 if (TARGET_ARM
1629 && write_symbols != NO_DEBUG
1630 && !TARGET_APCS_FRAME
1631 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1632 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1633
1634 if (TARGET_APCS_FLOAT)
1635 warning (0, "passing floating point arguments in fp regs not yet supported");
1636
1637 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1638 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1639 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1640 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1641 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1642 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1643 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1644 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1645 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1646 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1647 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1648 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1649 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1650 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1651
1652 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1653 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1654 thumb_code = TARGET_ARM == 0;
1655 thumb1_code = TARGET_THUMB1 != 0;
1656 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1657 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1658 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1659 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1660 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1661
1662 /* If we are not using the default (ARM mode) section anchor offset
1663 ranges, then set the correct ranges now. */
1664 if (TARGET_THUMB1)
1665 {
1666 /* Thumb-1 LDR instructions cannot have negative offsets.
1667 Permissible positive offset ranges are 5-bit (for byte loads),
1668 6-bit (for halfword loads), or 7-bit (for word loads).
1669 Empirical results suggest a 7-bit anchor range gives the best
1670 overall code size. */
1671 targetm.min_anchor_offset = 0;
1672 targetm.max_anchor_offset = 127;
1673 }
1674 else if (TARGET_THUMB2)
1675 {
1676 /* The minimum is set such that the total size of the block
1677 for a particular anchor is 248 + 1 + 4095 bytes, which is
1678 divisible by eight, ensuring natural spacing of anchors. */
1679 targetm.min_anchor_offset = -248;
1680 targetm.max_anchor_offset = 4095;
1681 }
1682
1683 /* V5 code we generate is completely interworking capable, so we turn off
1684 TARGET_INTERWORK here to avoid many tests later on. */
1685
1686 /* XXX However, we must pass the right pre-processor defines to CPP
1687 or GLD can get confused. This is a hack. */
1688 if (TARGET_INTERWORK)
1689 arm_cpp_interwork = 1;
1690
1691 if (arm_arch5)
1692 target_flags &= ~MASK_INTERWORK;
1693
1694 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1695 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1696
1697 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1698 error ("iwmmxt abi requires an iwmmxt capable cpu");
1699
1700 if (target_fpu_name == NULL && target_fpe_name != NULL)
1701 {
1702 if (streq (target_fpe_name, "2"))
1703 target_fpu_name = "fpe2";
1704 else if (streq (target_fpe_name, "3"))
1705 target_fpu_name = "fpe3";
1706 else
1707 error ("invalid floating point emulation option: -mfpe=%s",
1708 target_fpe_name);
1709 }
1710
1711 if (target_fpu_name == NULL)
1712 {
1713 #ifdef FPUTYPE_DEFAULT
1714 target_fpu_name = FPUTYPE_DEFAULT;
1715 #else
1716 if (arm_arch_cirrus)
1717 target_fpu_name = "maverick";
1718 else
1719 target_fpu_name = "fpe2";
1720 #endif
1721 }
1722
1723 arm_fpu_desc = NULL;
1724 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1725 {
1726 if (streq (all_fpus[i].name, target_fpu_name))
1727 {
1728 arm_fpu_desc = &all_fpus[i];
1729 break;
1730 }
1731 }
1732
1733 if (!arm_fpu_desc)
1734 {
1735 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1736 return;
1737 }
1738
1739 switch (arm_fpu_desc->model)
1740 {
1741 case ARM_FP_MODEL_FPA:
1742 if (arm_fpu_desc->rev == 2)
1743 arm_fpu_attr = FPU_FPE2;
1744 else if (arm_fpu_desc->rev == 3)
1745 arm_fpu_attr = FPU_FPE3;
1746 else
1747 arm_fpu_attr = FPU_FPA;
1748 break;
1749
1750 case ARM_FP_MODEL_MAVERICK:
1751 arm_fpu_attr = FPU_MAVERICK;
1752 break;
1753
1754 case ARM_FP_MODEL_VFP:
1755 arm_fpu_attr = FPU_VFP;
1756 break;
1757
1758 default:
1759 gcc_unreachable();
1760 }
1761
1762 if (target_float_abi_name != NULL)
1763 {
1764 /* The user specified a FP ABI. */
1765 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1766 {
1767 if (streq (all_float_abis[i].name, target_float_abi_name))
1768 {
1769 arm_float_abi = all_float_abis[i].abi_type;
1770 break;
1771 }
1772 }
1773 if (i == ARRAY_SIZE (all_float_abis))
1774 error ("invalid floating point abi: -mfloat-abi=%s",
1775 target_float_abi_name);
1776 }
1777 else
1778 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1779
1780 if (TARGET_AAPCS_BASED
1781 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1782 error ("FPA is unsupported in the AAPCS");
1783
1784 if (TARGET_AAPCS_BASED)
1785 {
1786 if (TARGET_CALLER_INTERWORKING)
1787 error ("AAPCS does not support -mcaller-super-interworking");
1788 else
1789 if (TARGET_CALLEE_INTERWORKING)
1790 error ("AAPCS does not support -mcallee-super-interworking");
1791 }
1792
1793 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1794 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1795 will ever exist. GCC makes no attempt to support this combination. */
1796 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1797 sorry ("iWMMXt and hardware floating point");
1798
1799 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1800 if (TARGET_THUMB2 && TARGET_IWMMXT)
1801 sorry ("Thumb-2 iWMMXt");
1802
1803 /* __fp16 support currently assumes the core has ldrh. */
1804 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1805 sorry ("__fp16 and no ldrh");
1806
1807 /* If soft-float is specified then don't use FPU. */
1808 if (TARGET_SOFT_FLOAT)
1809 arm_fpu_attr = FPU_NONE;
1810
1811 if (TARGET_AAPCS_BASED)
1812 {
1813 if (arm_abi == ARM_ABI_IWMMXT)
1814 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1815 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1816 && TARGET_HARD_FLOAT
1817 && TARGET_VFP)
1818 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1819 else
1820 arm_pcs_default = ARM_PCS_AAPCS;
1821 }
1822 else
1823 {
1824 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1825 sorry ("-mfloat-abi=hard and VFP");
1826
1827 if (arm_abi == ARM_ABI_APCS)
1828 arm_pcs_default = ARM_PCS_APCS;
1829 else
1830 arm_pcs_default = ARM_PCS_ATPCS;
1831 }
1832
1833 /* For arm2/3 there is no need to do any scheduling if there is only
1834 a floating point emulator, or we are doing software floating-point. */
1835 if ((TARGET_SOFT_FLOAT
1836 || (TARGET_FPA && arm_fpu_desc->rev))
1837 && (tune_flags & FL_MODE32) == 0)
1838 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1839
1840 if (target_thread_switch)
1841 {
1842 if (strcmp (target_thread_switch, "soft") == 0)
1843 target_thread_pointer = TP_SOFT;
1844 else if (strcmp (target_thread_switch, "auto") == 0)
1845 target_thread_pointer = TP_AUTO;
1846 else if (strcmp (target_thread_switch, "cp15") == 0)
1847 target_thread_pointer = TP_CP15;
1848 else
1849 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1850 }
1851
1852 /* Use the cp15 method if it is available. */
1853 if (target_thread_pointer == TP_AUTO)
1854 {
1855 if (arm_arch6k && !TARGET_THUMB1)
1856 target_thread_pointer = TP_CP15;
1857 else
1858 target_thread_pointer = TP_SOFT;
1859 }
1860
1861 if (TARGET_HARD_TP && TARGET_THUMB1)
1862 error ("can not use -mtp=cp15 with 16-bit Thumb");
1863
1864 /* Override the default structure alignment for AAPCS ABI. */
1865 if (TARGET_AAPCS_BASED)
1866 arm_structure_size_boundary = 8;
1867
1868 if (structure_size_string != NULL)
1869 {
1870 int size = strtol (structure_size_string, NULL, 0);
1871
1872 if (size == 8 || size == 32
1873 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1874 arm_structure_size_boundary = size;
1875 else
1876 warning (0, "structure size boundary can only be set to %s",
1877 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1878 }
1879
1880 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1881 {
1882 error ("RTP PIC is incompatible with Thumb");
1883 flag_pic = 0;
1884 }
1885
1886 /* If stack checking is disabled, we can use r10 as the PIC register,
1887 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1888 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1889 {
1890 if (TARGET_VXWORKS_RTP)
1891 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1892 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1893 }
1894
1895 if (flag_pic && TARGET_VXWORKS_RTP)
1896 arm_pic_register = 9;
1897
1898 if (arm_pic_register_string != NULL)
1899 {
1900 int pic_register = decode_reg_name (arm_pic_register_string);
1901
1902 if (!flag_pic)
1903 warning (0, "-mpic-register= is useless without -fpic");
1904
1905 /* Prevent the user from choosing an obviously stupid PIC register. */
1906 else if (pic_register < 0 || call_used_regs[pic_register]
1907 || pic_register == HARD_FRAME_POINTER_REGNUM
1908 || pic_register == STACK_POINTER_REGNUM
1909 || pic_register >= PC_REGNUM
1910 || (TARGET_VXWORKS_RTP
1911 && (unsigned int) pic_register != arm_pic_register))
1912 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1913 else
1914 arm_pic_register = pic_register;
1915 }
1916
1917 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1918 if (fix_cm3_ldrd == 2)
1919 {
1920 if (arm_selected_cpu->core == cortexm3)
1921 fix_cm3_ldrd = 1;
1922 else
1923 fix_cm3_ldrd = 0;
1924 }
1925
1926 if (TARGET_THUMB1 && flag_schedule_insns)
1927 {
1928 /* Don't warn since it's on by default in -O2. */
1929 flag_schedule_insns = 0;
1930 }
1931
1932 if (optimize_size)
1933 {
1934 /* If optimizing for size, bump the number of instructions that we
1935 are prepared to conditionally execute (even on a StrongARM). */
1936 max_insns_skipped = 6;
1937 }
1938 else
1939 {
1940 /* StrongARM has early execution of branches, so a sequence
1941 that is worth skipping is shorter. */
1942 if (arm_tune_strongarm)
1943 max_insns_skipped = 3;
1944 }
1945
1946 /* Hot/Cold partitioning is not currently supported, since we can't
1947 handle literal pool placement in that case. */
1948 if (flag_reorder_blocks_and_partition)
1949 {
1950 inform (input_location,
1951 "-freorder-blocks-and-partition not supported on this architecture");
1952 flag_reorder_blocks_and_partition = 0;
1953 flag_reorder_blocks = 1;
1954 }
1955
1956 if (flag_pic)
1957 /* Hoisting PIC address calculations more aggressively provides a small,
1958 but measurable, size reduction for PIC code. Therefore, we decrease
1959 the bar for unrestricted expression hoisting to the cost of PIC address
1960 calculation, which is 2 instructions. */
1961 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2);
1962
1963 /* Register global variables with the garbage collector. */
1964 arm_add_gc_roots ();
1965 }
1966
1967 static void
1968 arm_add_gc_roots (void)
1969 {
1970 gcc_obstack_init(&minipool_obstack);
1971 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1972 }
1973 \f
1974 /* A table of known ARM exception types.
1975 For use with the interrupt function attribute. */
1976
1977 typedef struct
1978 {
1979 const char *const arg;
1980 const unsigned long return_value;
1981 }
1982 isr_attribute_arg;
1983
1984 static const isr_attribute_arg isr_attribute_args [] =
1985 {
1986 { "IRQ", ARM_FT_ISR },
1987 { "irq", ARM_FT_ISR },
1988 { "FIQ", ARM_FT_FIQ },
1989 { "fiq", ARM_FT_FIQ },
1990 { "ABORT", ARM_FT_ISR },
1991 { "abort", ARM_FT_ISR },
1992 { "ABORT", ARM_FT_ISR },
1993 { "abort", ARM_FT_ISR },
1994 { "UNDEF", ARM_FT_EXCEPTION },
1995 { "undef", ARM_FT_EXCEPTION },
1996 { "SWI", ARM_FT_EXCEPTION },
1997 { "swi", ARM_FT_EXCEPTION },
1998 { NULL, ARM_FT_NORMAL }
1999 };
2000
2001 /* Returns the (interrupt) function type of the current
2002 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2003
2004 static unsigned long
2005 arm_isr_value (tree argument)
2006 {
2007 const isr_attribute_arg * ptr;
2008 const char * arg;
2009
2010 if (!arm_arch_notm)
2011 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2012
2013 /* No argument - default to IRQ. */
2014 if (argument == NULL_TREE)
2015 return ARM_FT_ISR;
2016
2017 /* Get the value of the argument. */
2018 if (TREE_VALUE (argument) == NULL_TREE
2019 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2020 return ARM_FT_UNKNOWN;
2021
2022 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2023
2024 /* Check it against the list of known arguments. */
2025 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2026 if (streq (arg, ptr->arg))
2027 return ptr->return_value;
2028
2029 /* An unrecognized interrupt type. */
2030 return ARM_FT_UNKNOWN;
2031 }
2032
2033 /* Computes the type of the current function. */
2034
2035 static unsigned long
2036 arm_compute_func_type (void)
2037 {
2038 unsigned long type = ARM_FT_UNKNOWN;
2039 tree a;
2040 tree attr;
2041
2042 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2043
2044 /* Decide if the current function is volatile. Such functions
2045 never return, and many memory cycles can be saved by not storing
2046 register values that will never be needed again. This optimization
2047 was added to speed up context switching in a kernel application. */
2048 if (optimize > 0
2049 && (TREE_NOTHROW (current_function_decl)
2050 || !(flag_unwind_tables
2051 || (flag_exceptions && arm_except_unwind_info () != UI_SJLJ)))
2052 && TREE_THIS_VOLATILE (current_function_decl))
2053 type |= ARM_FT_VOLATILE;
2054
2055 if (cfun->static_chain_decl != NULL)
2056 type |= ARM_FT_NESTED;
2057
2058 attr = DECL_ATTRIBUTES (current_function_decl);
2059
2060 a = lookup_attribute ("naked", attr);
2061 if (a != NULL_TREE)
2062 type |= ARM_FT_NAKED;
2063
2064 a = lookup_attribute ("isr", attr);
2065 if (a == NULL_TREE)
2066 a = lookup_attribute ("interrupt", attr);
2067
2068 if (a == NULL_TREE)
2069 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2070 else
2071 type |= arm_isr_value (TREE_VALUE (a));
2072
2073 return type;
2074 }
2075
2076 /* Returns the type of the current function. */
2077
2078 unsigned long
2079 arm_current_func_type (void)
2080 {
2081 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2082 cfun->machine->func_type = arm_compute_func_type ();
2083
2084 return cfun->machine->func_type;
2085 }
2086
2087 bool
2088 arm_allocate_stack_slots_for_args (void)
2089 {
2090 /* Naked functions should not allocate stack slots for arguments. */
2091 return !IS_NAKED (arm_current_func_type ());
2092 }
2093
2094 \f
2095 /* Output assembler code for a block containing the constant parts
2096 of a trampoline, leaving space for the variable parts.
2097
2098 On the ARM, (if r8 is the static chain regnum, and remembering that
2099 referencing pc adds an offset of 8) the trampoline looks like:
2100 ldr r8, [pc, #0]
2101 ldr pc, [pc]
2102 .word static chain value
2103 .word function's address
2104 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2105
2106 static void
2107 arm_asm_trampoline_template (FILE *f)
2108 {
2109 if (TARGET_ARM)
2110 {
2111 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2112 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2113 }
2114 else if (TARGET_THUMB2)
2115 {
2116 /* The Thumb-2 trampoline is similar to the arm implementation.
2117 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2118 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2119 STATIC_CHAIN_REGNUM, PC_REGNUM);
2120 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2121 }
2122 else
2123 {
2124 ASM_OUTPUT_ALIGN (f, 2);
2125 fprintf (f, "\t.code\t16\n");
2126 fprintf (f, ".Ltrampoline_start:\n");
2127 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2128 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2129 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2130 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2131 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2132 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2133 }
2134 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2135 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2136 }
2137
2138 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2139
2140 static void
2141 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2142 {
2143 rtx fnaddr, mem, a_tramp;
2144
2145 emit_block_move (m_tramp, assemble_trampoline_template (),
2146 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2147
2148 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2149 emit_move_insn (mem, chain_value);
2150
2151 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2152 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2153 emit_move_insn (mem, fnaddr);
2154
2155 a_tramp = XEXP (m_tramp, 0);
2156 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2157 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2158 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2159 }
2160
2161 /* Thumb trampolines should be entered in thumb mode, so set
2162 the bottom bit of the address. */
2163
2164 static rtx
2165 arm_trampoline_adjust_address (rtx addr)
2166 {
2167 if (TARGET_THUMB)
2168 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2169 NULL, 0, OPTAB_LIB_WIDEN);
2170 return addr;
2171 }
2172 \f
2173 /* Return 1 if it is possible to return using a single instruction.
2174 If SIBLING is non-null, this is a test for a return before a sibling
2175 call. SIBLING is the call insn, so we can examine its register usage. */
2176
2177 int
2178 use_return_insn (int iscond, rtx sibling)
2179 {
2180 int regno;
2181 unsigned int func_type;
2182 unsigned long saved_int_regs;
2183 unsigned HOST_WIDE_INT stack_adjust;
2184 arm_stack_offsets *offsets;
2185
2186 /* Never use a return instruction before reload has run. */
2187 if (!reload_completed)
2188 return 0;
2189
2190 func_type = arm_current_func_type ();
2191
2192 /* Naked, volatile and stack alignment functions need special
2193 consideration. */
2194 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2195 return 0;
2196
2197 /* So do interrupt functions that use the frame pointer and Thumb
2198 interrupt functions. */
2199 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2200 return 0;
2201
2202 offsets = arm_get_frame_offsets ();
2203 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2204
2205 /* As do variadic functions. */
2206 if (crtl->args.pretend_args_size
2207 || cfun->machine->uses_anonymous_args
2208 /* Or if the function calls __builtin_eh_return () */
2209 || crtl->calls_eh_return
2210 /* Or if the function calls alloca */
2211 || cfun->calls_alloca
2212 /* Or if there is a stack adjustment. However, if the stack pointer
2213 is saved on the stack, we can use a pre-incrementing stack load. */
2214 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2215 && stack_adjust == 4)))
2216 return 0;
2217
2218 saved_int_regs = offsets->saved_regs_mask;
2219
2220 /* Unfortunately, the insn
2221
2222 ldmib sp, {..., sp, ...}
2223
2224 triggers a bug on most SA-110 based devices, such that the stack
2225 pointer won't be correctly restored if the instruction takes a
2226 page fault. We work around this problem by popping r3 along with
2227 the other registers, since that is never slower than executing
2228 another instruction.
2229
2230 We test for !arm_arch5 here, because code for any architecture
2231 less than this could potentially be run on one of the buggy
2232 chips. */
2233 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2234 {
2235 /* Validate that r3 is a call-clobbered register (always true in
2236 the default abi) ... */
2237 if (!call_used_regs[3])
2238 return 0;
2239
2240 /* ... that it isn't being used for a return value ... */
2241 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2242 return 0;
2243
2244 /* ... or for a tail-call argument ... */
2245 if (sibling)
2246 {
2247 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2248
2249 if (find_regno_fusage (sibling, USE, 3))
2250 return 0;
2251 }
2252
2253 /* ... and that there are no call-saved registers in r0-r2
2254 (always true in the default ABI). */
2255 if (saved_int_regs & 0x7)
2256 return 0;
2257 }
2258
2259 /* Can't be done if interworking with Thumb, and any registers have been
2260 stacked. */
2261 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2262 return 0;
2263
2264 /* On StrongARM, conditional returns are expensive if they aren't
2265 taken and multiple registers have been stacked. */
2266 if (iscond && arm_tune_strongarm)
2267 {
2268 /* Conditional return when just the LR is stored is a simple
2269 conditional-load instruction, that's not expensive. */
2270 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2271 return 0;
2272
2273 if (flag_pic
2274 && arm_pic_register != INVALID_REGNUM
2275 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2276 return 0;
2277 }
2278
2279 /* If there are saved registers but the LR isn't saved, then we need
2280 two instructions for the return. */
2281 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2282 return 0;
2283
2284 /* Can't be done if any of the FPA regs are pushed,
2285 since this also requires an insn. */
2286 if (TARGET_HARD_FLOAT && TARGET_FPA)
2287 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2288 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2289 return 0;
2290
2291 /* Likewise VFP regs. */
2292 if (TARGET_HARD_FLOAT && TARGET_VFP)
2293 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2294 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2295 return 0;
2296
2297 if (TARGET_REALLY_IWMMXT)
2298 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2299 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2300 return 0;
2301
2302 return 1;
2303 }
2304
2305 /* Return TRUE if int I is a valid immediate ARM constant. */
2306
2307 int
2308 const_ok_for_arm (HOST_WIDE_INT i)
2309 {
2310 int lowbit;
2311
2312 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2313 be all zero, or all one. */
2314 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2315 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2316 != ((~(unsigned HOST_WIDE_INT) 0)
2317 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2318 return FALSE;
2319
2320 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2321
2322 /* Fast return for 0 and small values. We must do this for zero, since
2323 the code below can't handle that one case. */
2324 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2325 return TRUE;
2326
2327 /* Get the number of trailing zeros. */
2328 lowbit = ffs((int) i) - 1;
2329
2330 /* Only even shifts are allowed in ARM mode so round down to the
2331 nearest even number. */
2332 if (TARGET_ARM)
2333 lowbit &= ~1;
2334
2335 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2336 return TRUE;
2337
2338 if (TARGET_ARM)
2339 {
2340 /* Allow rotated constants in ARM mode. */
2341 if (lowbit <= 4
2342 && ((i & ~0xc000003f) == 0
2343 || (i & ~0xf000000f) == 0
2344 || (i & ~0xfc000003) == 0))
2345 return TRUE;
2346 }
2347 else
2348 {
2349 HOST_WIDE_INT v;
2350
2351 /* Allow repeated pattern. */
2352 v = i & 0xff;
2353 v |= v << 16;
2354 if (i == v || i == (v | (v << 8)))
2355 return TRUE;
2356 }
2357
2358 return FALSE;
2359 }
2360
2361 /* Return true if I is a valid constant for the operation CODE. */
2362 static int
2363 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2364 {
2365 if (const_ok_for_arm (i))
2366 return 1;
2367
2368 switch (code)
2369 {
2370 case PLUS:
2371 case COMPARE:
2372 case EQ:
2373 case NE:
2374 case GT:
2375 case LE:
2376 case LT:
2377 case GE:
2378 case GEU:
2379 case LTU:
2380 case GTU:
2381 case LEU:
2382 case UNORDERED:
2383 case ORDERED:
2384 case UNEQ:
2385 case UNGE:
2386 case UNLT:
2387 case UNGT:
2388 case UNLE:
2389 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2390
2391 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2392 case XOR:
2393 return 0;
2394
2395 case IOR:
2396 if (TARGET_THUMB2)
2397 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2398 return 0;
2399
2400 case AND:
2401 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2402
2403 default:
2404 gcc_unreachable ();
2405 }
2406 }
2407
2408 /* Emit a sequence of insns to handle a large constant.
2409 CODE is the code of the operation required, it can be any of SET, PLUS,
2410 IOR, AND, XOR, MINUS;
2411 MODE is the mode in which the operation is being performed;
2412 VAL is the integer to operate on;
2413 SOURCE is the other operand (a register, or a null-pointer for SET);
2414 SUBTARGETS means it is safe to create scratch registers if that will
2415 either produce a simpler sequence, or we will want to cse the values.
2416 Return value is the number of insns emitted. */
2417
2418 /* ??? Tweak this for thumb2. */
2419 int
2420 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2421 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2422 {
2423 rtx cond;
2424
2425 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2426 cond = COND_EXEC_TEST (PATTERN (insn));
2427 else
2428 cond = NULL_RTX;
2429
2430 if (subtargets || code == SET
2431 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2432 && REGNO (target) != REGNO (source)))
2433 {
2434 /* After arm_reorg has been called, we can't fix up expensive
2435 constants by pushing them into memory so we must synthesize
2436 them in-line, regardless of the cost. This is only likely to
2437 be more costly on chips that have load delay slots and we are
2438 compiling without running the scheduler (so no splitting
2439 occurred before the final instruction emission).
2440
2441 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2442 */
2443 if (!after_arm_reorg
2444 && !cond
2445 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2446 1, 0)
2447 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2448 + (code != SET))))
2449 {
2450 if (code == SET)
2451 {
2452 /* Currently SET is the only monadic value for CODE, all
2453 the rest are diadic. */
2454 if (TARGET_USE_MOVT)
2455 arm_emit_movpair (target, GEN_INT (val));
2456 else
2457 emit_set_insn (target, GEN_INT (val));
2458
2459 return 1;
2460 }
2461 else
2462 {
2463 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2464
2465 if (TARGET_USE_MOVT)
2466 arm_emit_movpair (temp, GEN_INT (val));
2467 else
2468 emit_set_insn (temp, GEN_INT (val));
2469
2470 /* For MINUS, the value is subtracted from, since we never
2471 have subtraction of a constant. */
2472 if (code == MINUS)
2473 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2474 else
2475 emit_set_insn (target,
2476 gen_rtx_fmt_ee (code, mode, source, temp));
2477 return 2;
2478 }
2479 }
2480 }
2481
2482 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2483 1);
2484 }
2485
2486 /* Return the number of instructions required to synthesize the given
2487 constant, if we start emitting them from bit-position I. */
2488 static int
2489 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2490 {
2491 HOST_WIDE_INT temp1;
2492 int step_size = TARGET_ARM ? 2 : 1;
2493 int num_insns = 0;
2494
2495 gcc_assert (TARGET_ARM || i == 0);
2496
2497 do
2498 {
2499 int end;
2500
2501 if (i <= 0)
2502 i += 32;
2503 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2504 {
2505 end = i - 8;
2506 if (end < 0)
2507 end += 32;
2508 temp1 = remainder & ((0x0ff << end)
2509 | ((i < end) ? (0xff >> (32 - end)) : 0));
2510 remainder &= ~temp1;
2511 num_insns++;
2512 i -= 8 - step_size;
2513 }
2514 i -= step_size;
2515 } while (remainder);
2516 return num_insns;
2517 }
2518
2519 static int
2520 find_best_start (unsigned HOST_WIDE_INT remainder)
2521 {
2522 int best_consecutive_zeros = 0;
2523 int i;
2524 int best_start = 0;
2525
2526 /* If we aren't targetting ARM, the best place to start is always at
2527 the bottom. */
2528 if (! TARGET_ARM)
2529 return 0;
2530
2531 for (i = 0; i < 32; i += 2)
2532 {
2533 int consecutive_zeros = 0;
2534
2535 if (!(remainder & (3 << i)))
2536 {
2537 while ((i < 32) && !(remainder & (3 << i)))
2538 {
2539 consecutive_zeros += 2;
2540 i += 2;
2541 }
2542 if (consecutive_zeros > best_consecutive_zeros)
2543 {
2544 best_consecutive_zeros = consecutive_zeros;
2545 best_start = i - consecutive_zeros;
2546 }
2547 i -= 2;
2548 }
2549 }
2550
2551 /* So long as it won't require any more insns to do so, it's
2552 desirable to emit a small constant (in bits 0...9) in the last
2553 insn. This way there is more chance that it can be combined with
2554 a later addressing insn to form a pre-indexed load or store
2555 operation. Consider:
2556
2557 *((volatile int *)0xe0000100) = 1;
2558 *((volatile int *)0xe0000110) = 2;
2559
2560 We want this to wind up as:
2561
2562 mov rA, #0xe0000000
2563 mov rB, #1
2564 str rB, [rA, #0x100]
2565 mov rB, #2
2566 str rB, [rA, #0x110]
2567
2568 rather than having to synthesize both large constants from scratch.
2569
2570 Therefore, we calculate how many insns would be required to emit
2571 the constant starting from `best_start', and also starting from
2572 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2573 yield a shorter sequence, we may as well use zero. */
2574 if (best_start != 0
2575 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2576 && (count_insns_for_constant (remainder, 0) <=
2577 count_insns_for_constant (remainder, best_start)))
2578 best_start = 0;
2579
2580 return best_start;
2581 }
2582
2583 /* Emit an instruction with the indicated PATTERN. If COND is
2584 non-NULL, conditionalize the execution of the instruction on COND
2585 being true. */
2586
2587 static void
2588 emit_constant_insn (rtx cond, rtx pattern)
2589 {
2590 if (cond)
2591 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2592 emit_insn (pattern);
2593 }
2594
2595 /* As above, but extra parameter GENERATE which, if clear, suppresses
2596 RTL generation. */
2597 /* ??? This needs more work for thumb2. */
2598
2599 static int
2600 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2601 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2602 int generate)
2603 {
2604 int can_invert = 0;
2605 int can_negate = 0;
2606 int final_invert = 0;
2607 int can_negate_initial = 0;
2608 int i;
2609 int num_bits_set = 0;
2610 int set_sign_bit_copies = 0;
2611 int clear_sign_bit_copies = 0;
2612 int clear_zero_bit_copies = 0;
2613 int set_zero_bit_copies = 0;
2614 int insns = 0;
2615 unsigned HOST_WIDE_INT temp1, temp2;
2616 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2617 int step_size = TARGET_ARM ? 2 : 1;
2618
2619 /* Find out which operations are safe for a given CODE. Also do a quick
2620 check for degenerate cases; these can occur when DImode operations
2621 are split. */
2622 switch (code)
2623 {
2624 case SET:
2625 can_invert = 1;
2626 can_negate = 1;
2627 break;
2628
2629 case PLUS:
2630 can_negate = 1;
2631 can_negate_initial = 1;
2632 break;
2633
2634 case IOR:
2635 if (remainder == 0xffffffff)
2636 {
2637 if (generate)
2638 emit_constant_insn (cond,
2639 gen_rtx_SET (VOIDmode, target,
2640 GEN_INT (ARM_SIGN_EXTEND (val))));
2641 return 1;
2642 }
2643
2644 if (remainder == 0)
2645 {
2646 if (reload_completed && rtx_equal_p (target, source))
2647 return 0;
2648
2649 if (generate)
2650 emit_constant_insn (cond,
2651 gen_rtx_SET (VOIDmode, target, source));
2652 return 1;
2653 }
2654
2655 if (TARGET_THUMB2)
2656 can_invert = 1;
2657 break;
2658
2659 case AND:
2660 if (remainder == 0)
2661 {
2662 if (generate)
2663 emit_constant_insn (cond,
2664 gen_rtx_SET (VOIDmode, target, const0_rtx));
2665 return 1;
2666 }
2667 if (remainder == 0xffffffff)
2668 {
2669 if (reload_completed && rtx_equal_p (target, source))
2670 return 0;
2671 if (generate)
2672 emit_constant_insn (cond,
2673 gen_rtx_SET (VOIDmode, target, source));
2674 return 1;
2675 }
2676 can_invert = 1;
2677 break;
2678
2679 case XOR:
2680 if (remainder == 0)
2681 {
2682 if (reload_completed && rtx_equal_p (target, source))
2683 return 0;
2684 if (generate)
2685 emit_constant_insn (cond,
2686 gen_rtx_SET (VOIDmode, target, source));
2687 return 1;
2688 }
2689
2690 if (remainder == 0xffffffff)
2691 {
2692 if (generate)
2693 emit_constant_insn (cond,
2694 gen_rtx_SET (VOIDmode, target,
2695 gen_rtx_NOT (mode, source)));
2696 return 1;
2697 }
2698 break;
2699
2700 case MINUS:
2701 /* We treat MINUS as (val - source), since (source - val) is always
2702 passed as (source + (-val)). */
2703 if (remainder == 0)
2704 {
2705 if (generate)
2706 emit_constant_insn (cond,
2707 gen_rtx_SET (VOIDmode, target,
2708 gen_rtx_NEG (mode, source)));
2709 return 1;
2710 }
2711 if (const_ok_for_arm (val))
2712 {
2713 if (generate)
2714 emit_constant_insn (cond,
2715 gen_rtx_SET (VOIDmode, target,
2716 gen_rtx_MINUS (mode, GEN_INT (val),
2717 source)));
2718 return 1;
2719 }
2720 can_negate = 1;
2721
2722 break;
2723
2724 default:
2725 gcc_unreachable ();
2726 }
2727
2728 /* If we can do it in one insn get out quickly. */
2729 if (const_ok_for_arm (val)
2730 || (can_negate_initial && const_ok_for_arm (-val))
2731 || (can_invert && const_ok_for_arm (~val)))
2732 {
2733 if (generate)
2734 emit_constant_insn (cond,
2735 gen_rtx_SET (VOIDmode, target,
2736 (source
2737 ? gen_rtx_fmt_ee (code, mode, source,
2738 GEN_INT (val))
2739 : GEN_INT (val))));
2740 return 1;
2741 }
2742
2743 /* Calculate a few attributes that may be useful for specific
2744 optimizations. */
2745 /* Count number of leading zeros. */
2746 for (i = 31; i >= 0; i--)
2747 {
2748 if ((remainder & (1 << i)) == 0)
2749 clear_sign_bit_copies++;
2750 else
2751 break;
2752 }
2753
2754 /* Count number of leading 1's. */
2755 for (i = 31; i >= 0; i--)
2756 {
2757 if ((remainder & (1 << i)) != 0)
2758 set_sign_bit_copies++;
2759 else
2760 break;
2761 }
2762
2763 /* Count number of trailing zero's. */
2764 for (i = 0; i <= 31; i++)
2765 {
2766 if ((remainder & (1 << i)) == 0)
2767 clear_zero_bit_copies++;
2768 else
2769 break;
2770 }
2771
2772 /* Count number of trailing 1's. */
2773 for (i = 0; i <= 31; i++)
2774 {
2775 if ((remainder & (1 << i)) != 0)
2776 set_zero_bit_copies++;
2777 else
2778 break;
2779 }
2780
2781 switch (code)
2782 {
2783 case SET:
2784 /* See if we can use movw. */
2785 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2786 {
2787 if (generate)
2788 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2789 GEN_INT (val)));
2790 return 1;
2791 }
2792
2793 /* See if we can do this by sign_extending a constant that is known
2794 to be negative. This is a good, way of doing it, since the shift
2795 may well merge into a subsequent insn. */
2796 if (set_sign_bit_copies > 1)
2797 {
2798 if (const_ok_for_arm
2799 (temp1 = ARM_SIGN_EXTEND (remainder
2800 << (set_sign_bit_copies - 1))))
2801 {
2802 if (generate)
2803 {
2804 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2805 emit_constant_insn (cond,
2806 gen_rtx_SET (VOIDmode, new_src,
2807 GEN_INT (temp1)));
2808 emit_constant_insn (cond,
2809 gen_ashrsi3 (target, new_src,
2810 GEN_INT (set_sign_bit_copies - 1)));
2811 }
2812 return 2;
2813 }
2814 /* For an inverted constant, we will need to set the low bits,
2815 these will be shifted out of harm's way. */
2816 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2817 if (const_ok_for_arm (~temp1))
2818 {
2819 if (generate)
2820 {
2821 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2822 emit_constant_insn (cond,
2823 gen_rtx_SET (VOIDmode, new_src,
2824 GEN_INT (temp1)));
2825 emit_constant_insn (cond,
2826 gen_ashrsi3 (target, new_src,
2827 GEN_INT (set_sign_bit_copies - 1)));
2828 }
2829 return 2;
2830 }
2831 }
2832
2833 /* See if we can calculate the value as the difference between two
2834 valid immediates. */
2835 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2836 {
2837 int topshift = clear_sign_bit_copies & ~1;
2838
2839 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2840 & (0xff000000 >> topshift));
2841
2842 /* If temp1 is zero, then that means the 9 most significant
2843 bits of remainder were 1 and we've caused it to overflow.
2844 When topshift is 0 we don't need to do anything since we
2845 can borrow from 'bit 32'. */
2846 if (temp1 == 0 && topshift != 0)
2847 temp1 = 0x80000000 >> (topshift - 1);
2848
2849 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2850
2851 if (const_ok_for_arm (temp2))
2852 {
2853 if (generate)
2854 {
2855 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2856 emit_constant_insn (cond,
2857 gen_rtx_SET (VOIDmode, new_src,
2858 GEN_INT (temp1)));
2859 emit_constant_insn (cond,
2860 gen_addsi3 (target, new_src,
2861 GEN_INT (-temp2)));
2862 }
2863
2864 return 2;
2865 }
2866 }
2867
2868 /* See if we can generate this by setting the bottom (or the top)
2869 16 bits, and then shifting these into the other half of the
2870 word. We only look for the simplest cases, to do more would cost
2871 too much. Be careful, however, not to generate this when the
2872 alternative would take fewer insns. */
2873 if (val & 0xffff0000)
2874 {
2875 temp1 = remainder & 0xffff0000;
2876 temp2 = remainder & 0x0000ffff;
2877
2878 /* Overlaps outside this range are best done using other methods. */
2879 for (i = 9; i < 24; i++)
2880 {
2881 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2882 && !const_ok_for_arm (temp2))
2883 {
2884 rtx new_src = (subtargets
2885 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2886 : target);
2887 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2888 source, subtargets, generate);
2889 source = new_src;
2890 if (generate)
2891 emit_constant_insn
2892 (cond,
2893 gen_rtx_SET
2894 (VOIDmode, target,
2895 gen_rtx_IOR (mode,
2896 gen_rtx_ASHIFT (mode, source,
2897 GEN_INT (i)),
2898 source)));
2899 return insns + 1;
2900 }
2901 }
2902
2903 /* Don't duplicate cases already considered. */
2904 for (i = 17; i < 24; i++)
2905 {
2906 if (((temp1 | (temp1 >> i)) == remainder)
2907 && !const_ok_for_arm (temp1))
2908 {
2909 rtx new_src = (subtargets
2910 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2911 : target);
2912 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2913 source, subtargets, generate);
2914 source = new_src;
2915 if (generate)
2916 emit_constant_insn
2917 (cond,
2918 gen_rtx_SET (VOIDmode, target,
2919 gen_rtx_IOR
2920 (mode,
2921 gen_rtx_LSHIFTRT (mode, source,
2922 GEN_INT (i)),
2923 source)));
2924 return insns + 1;
2925 }
2926 }
2927 }
2928 break;
2929
2930 case IOR:
2931 case XOR:
2932 /* If we have IOR or XOR, and the constant can be loaded in a
2933 single instruction, and we can find a temporary to put it in,
2934 then this can be done in two instructions instead of 3-4. */
2935 if (subtargets
2936 /* TARGET can't be NULL if SUBTARGETS is 0 */
2937 || (reload_completed && !reg_mentioned_p (target, source)))
2938 {
2939 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2940 {
2941 if (generate)
2942 {
2943 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2944
2945 emit_constant_insn (cond,
2946 gen_rtx_SET (VOIDmode, sub,
2947 GEN_INT (val)));
2948 emit_constant_insn (cond,
2949 gen_rtx_SET (VOIDmode, target,
2950 gen_rtx_fmt_ee (code, mode,
2951 source, sub)));
2952 }
2953 return 2;
2954 }
2955 }
2956
2957 if (code == XOR)
2958 break;
2959
2960 /* Convert.
2961 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2962 and the remainder 0s for e.g. 0xfff00000)
2963 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2964
2965 This can be done in 2 instructions by using shifts with mov or mvn.
2966 e.g. for
2967 x = x | 0xfff00000;
2968 we generate.
2969 mvn r0, r0, asl #12
2970 mvn r0, r0, lsr #12 */
2971 if (set_sign_bit_copies > 8
2972 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2973 {
2974 if (generate)
2975 {
2976 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2977 rtx shift = GEN_INT (set_sign_bit_copies);
2978
2979 emit_constant_insn
2980 (cond,
2981 gen_rtx_SET (VOIDmode, sub,
2982 gen_rtx_NOT (mode,
2983 gen_rtx_ASHIFT (mode,
2984 source,
2985 shift))));
2986 emit_constant_insn
2987 (cond,
2988 gen_rtx_SET (VOIDmode, target,
2989 gen_rtx_NOT (mode,
2990 gen_rtx_LSHIFTRT (mode, sub,
2991 shift))));
2992 }
2993 return 2;
2994 }
2995
2996 /* Convert
2997 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2998 to
2999 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3000
3001 For eg. r0 = r0 | 0xfff
3002 mvn r0, r0, lsr #12
3003 mvn r0, r0, asl #12
3004
3005 */
3006 if (set_zero_bit_copies > 8
3007 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3008 {
3009 if (generate)
3010 {
3011 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3012 rtx shift = GEN_INT (set_zero_bit_copies);
3013
3014 emit_constant_insn
3015 (cond,
3016 gen_rtx_SET (VOIDmode, sub,
3017 gen_rtx_NOT (mode,
3018 gen_rtx_LSHIFTRT (mode,
3019 source,
3020 shift))));
3021 emit_constant_insn
3022 (cond,
3023 gen_rtx_SET (VOIDmode, target,
3024 gen_rtx_NOT (mode,
3025 gen_rtx_ASHIFT (mode, sub,
3026 shift))));
3027 }
3028 return 2;
3029 }
3030
3031 /* This will never be reached for Thumb2 because orn is a valid
3032 instruction. This is for Thumb1 and the ARM 32 bit cases.
3033
3034 x = y | constant (such that ~constant is a valid constant)
3035 Transform this to
3036 x = ~(~y & ~constant).
3037 */
3038 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3039 {
3040 if (generate)
3041 {
3042 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3043 emit_constant_insn (cond,
3044 gen_rtx_SET (VOIDmode, sub,
3045 gen_rtx_NOT (mode, source)));
3046 source = sub;
3047 if (subtargets)
3048 sub = gen_reg_rtx (mode);
3049 emit_constant_insn (cond,
3050 gen_rtx_SET (VOIDmode, sub,
3051 gen_rtx_AND (mode, source,
3052 GEN_INT (temp1))));
3053 emit_constant_insn (cond,
3054 gen_rtx_SET (VOIDmode, target,
3055 gen_rtx_NOT (mode, sub)));
3056 }
3057 return 3;
3058 }
3059 break;
3060
3061 case AND:
3062 /* See if two shifts will do 2 or more insn's worth of work. */
3063 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3064 {
3065 HOST_WIDE_INT shift_mask = ((0xffffffff
3066 << (32 - clear_sign_bit_copies))
3067 & 0xffffffff);
3068
3069 if ((remainder | shift_mask) != 0xffffffff)
3070 {
3071 if (generate)
3072 {
3073 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3074 insns = arm_gen_constant (AND, mode, cond,
3075 remainder | shift_mask,
3076 new_src, source, subtargets, 1);
3077 source = new_src;
3078 }
3079 else
3080 {
3081 rtx targ = subtargets ? NULL_RTX : target;
3082 insns = arm_gen_constant (AND, mode, cond,
3083 remainder | shift_mask,
3084 targ, source, subtargets, 0);
3085 }
3086 }
3087
3088 if (generate)
3089 {
3090 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3091 rtx shift = GEN_INT (clear_sign_bit_copies);
3092
3093 emit_insn (gen_ashlsi3 (new_src, source, shift));
3094 emit_insn (gen_lshrsi3 (target, new_src, shift));
3095 }
3096
3097 return insns + 2;
3098 }
3099
3100 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3101 {
3102 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3103
3104 if ((remainder | shift_mask) != 0xffffffff)
3105 {
3106 if (generate)
3107 {
3108 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3109
3110 insns = arm_gen_constant (AND, mode, cond,
3111 remainder | shift_mask,
3112 new_src, source, subtargets, 1);
3113 source = new_src;
3114 }
3115 else
3116 {
3117 rtx targ = subtargets ? NULL_RTX : target;
3118
3119 insns = arm_gen_constant (AND, mode, cond,
3120 remainder | shift_mask,
3121 targ, source, subtargets, 0);
3122 }
3123 }
3124
3125 if (generate)
3126 {
3127 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3128 rtx shift = GEN_INT (clear_zero_bit_copies);
3129
3130 emit_insn (gen_lshrsi3 (new_src, source, shift));
3131 emit_insn (gen_ashlsi3 (target, new_src, shift));
3132 }
3133
3134 return insns + 2;
3135 }
3136
3137 break;
3138
3139 default:
3140 break;
3141 }
3142
3143 for (i = 0; i < 32; i++)
3144 if (remainder & (1 << i))
3145 num_bits_set++;
3146
3147 if ((code == AND)
3148 || (code != IOR && can_invert && num_bits_set > 16))
3149 remainder ^= 0xffffffff;
3150 else if (code == PLUS && num_bits_set > 16)
3151 remainder = (-remainder) & 0xffffffff;
3152
3153 /* For XOR, if more than half the bits are set and there's a sequence
3154 of more than 8 consecutive ones in the pattern then we can XOR by the
3155 inverted constant and then invert the final result; this may save an
3156 instruction and might also lead to the final mvn being merged with
3157 some other operation. */
3158 else if (code == XOR && num_bits_set > 16
3159 && (count_insns_for_constant (remainder ^ 0xffffffff,
3160 find_best_start
3161 (remainder ^ 0xffffffff))
3162 < count_insns_for_constant (remainder,
3163 find_best_start (remainder))))
3164 {
3165 remainder ^= 0xffffffff;
3166 final_invert = 1;
3167 }
3168 else
3169 {
3170 can_invert = 0;
3171 can_negate = 0;
3172 }
3173
3174 /* Now try and find a way of doing the job in either two or three
3175 instructions.
3176 We start by looking for the largest block of zeros that are aligned on
3177 a 2-bit boundary, we then fill up the temps, wrapping around to the
3178 top of the word when we drop off the bottom.
3179 In the worst case this code should produce no more than four insns.
3180 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3181 best place to start. */
3182
3183 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3184 the same. */
3185 {
3186 /* Now start emitting the insns. */
3187 i = find_best_start (remainder);
3188 do
3189 {
3190 int end;
3191
3192 if (i <= 0)
3193 i += 32;
3194 if (remainder & (3 << (i - 2)))
3195 {
3196 end = i - 8;
3197 if (end < 0)
3198 end += 32;
3199 temp1 = remainder & ((0x0ff << end)
3200 | ((i < end) ? (0xff >> (32 - end)) : 0));
3201 remainder &= ~temp1;
3202
3203 if (generate)
3204 {
3205 rtx new_src, temp1_rtx;
3206
3207 if (code == SET || code == MINUS)
3208 {
3209 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3210 if (can_invert && code != MINUS)
3211 temp1 = ~temp1;
3212 }
3213 else
3214 {
3215 if ((final_invert || remainder) && subtargets)
3216 new_src = gen_reg_rtx (mode);
3217 else
3218 new_src = target;
3219 if (can_invert)
3220 temp1 = ~temp1;
3221 else if (can_negate)
3222 temp1 = -temp1;
3223 }
3224
3225 temp1 = trunc_int_for_mode (temp1, mode);
3226 temp1_rtx = GEN_INT (temp1);
3227
3228 if (code == SET)
3229 ;
3230 else if (code == MINUS)
3231 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3232 else
3233 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3234
3235 emit_constant_insn (cond,
3236 gen_rtx_SET (VOIDmode, new_src,
3237 temp1_rtx));
3238 source = new_src;
3239 }
3240
3241 if (code == SET)
3242 {
3243 can_invert = 0;
3244 code = PLUS;
3245 }
3246 else if (code == MINUS)
3247 code = PLUS;
3248
3249 insns++;
3250 i -= 8 - step_size;
3251 }
3252 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3253 shifts. */
3254 i -= step_size;
3255 }
3256 while (remainder);
3257 }
3258
3259 if (final_invert)
3260 {
3261 if (generate)
3262 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3263 gen_rtx_NOT (mode, source)));
3264 insns++;
3265 }
3266
3267 return insns;
3268 }
3269
3270 /* Canonicalize a comparison so that we are more likely to recognize it.
3271 This can be done for a few constant compares, where we can make the
3272 immediate value easier to load. */
3273
3274 enum rtx_code
3275 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3276 {
3277 enum machine_mode mode;
3278 unsigned HOST_WIDE_INT i, maxval;
3279
3280 mode = GET_MODE (*op0);
3281 if (mode == VOIDmode)
3282 mode = GET_MODE (*op1);
3283
3284 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3285
3286 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3287 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3288 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3289 for GTU/LEU in Thumb mode. */
3290 if (mode == DImode)
3291 {
3292 rtx tem;
3293
3294 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3295 available. */
3296 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3297 return code;
3298
3299 if (code == GT || code == LE
3300 || (!TARGET_ARM && (code == GTU || code == LEU)))
3301 {
3302 /* Missing comparison. First try to use an available
3303 comparison. */
3304 if (GET_CODE (*op1) == CONST_INT)
3305 {
3306 i = INTVAL (*op1);
3307 switch (code)
3308 {
3309 case GT:
3310 case LE:
3311 if (i != maxval
3312 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3313 {
3314 *op1 = GEN_INT (i + 1);
3315 return code == GT ? GE : LT;
3316 }
3317 break;
3318 case GTU:
3319 case LEU:
3320 if (i != ~((unsigned HOST_WIDE_INT) 0)
3321 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3322 {
3323 *op1 = GEN_INT (i + 1);
3324 return code == GTU ? GEU : LTU;
3325 }
3326 break;
3327 default:
3328 gcc_unreachable ();
3329 }
3330 }
3331
3332 /* If that did not work, reverse the condition. */
3333 tem = *op0;
3334 *op0 = *op1;
3335 *op1 = tem;
3336 return swap_condition (code);
3337 }
3338
3339 return code;
3340 }
3341
3342 /* Comparisons smaller than DImode. Only adjust comparisons against
3343 an out-of-range constant. */
3344 if (GET_CODE (*op1) != CONST_INT
3345 || const_ok_for_arm (INTVAL (*op1))
3346 || const_ok_for_arm (- INTVAL (*op1)))
3347 return code;
3348
3349 i = INTVAL (*op1);
3350
3351 switch (code)
3352 {
3353 case EQ:
3354 case NE:
3355 return code;
3356
3357 case GT:
3358 case LE:
3359 if (i != maxval
3360 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3361 {
3362 *op1 = GEN_INT (i + 1);
3363 return code == GT ? GE : LT;
3364 }
3365 break;
3366
3367 case GE:
3368 case LT:
3369 if (i != ~maxval
3370 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3371 {
3372 *op1 = GEN_INT (i - 1);
3373 return code == GE ? GT : LE;
3374 }
3375 break;
3376
3377 case GTU:
3378 case LEU:
3379 if (i != ~((unsigned HOST_WIDE_INT) 0)
3380 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3381 {
3382 *op1 = GEN_INT (i + 1);
3383 return code == GTU ? GEU : LTU;
3384 }
3385 break;
3386
3387 case GEU:
3388 case LTU:
3389 if (i != 0
3390 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3391 {
3392 *op1 = GEN_INT (i - 1);
3393 return code == GEU ? GTU : LEU;
3394 }
3395 break;
3396
3397 default:
3398 gcc_unreachable ();
3399 }
3400
3401 return code;
3402 }
3403
3404
3405 /* Define how to find the value returned by a function. */
3406
3407 static rtx
3408 arm_function_value(const_tree type, const_tree func,
3409 bool outgoing ATTRIBUTE_UNUSED)
3410 {
3411 enum machine_mode mode;
3412 int unsignedp ATTRIBUTE_UNUSED;
3413 rtx r ATTRIBUTE_UNUSED;
3414
3415 mode = TYPE_MODE (type);
3416
3417 if (TARGET_AAPCS_BASED)
3418 return aapcs_allocate_return_reg (mode, type, func);
3419
3420 /* Promote integer types. */
3421 if (INTEGRAL_TYPE_P (type))
3422 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3423
3424 /* Promotes small structs returned in a register to full-word size
3425 for big-endian AAPCS. */
3426 if (arm_return_in_msb (type))
3427 {
3428 HOST_WIDE_INT size = int_size_in_bytes (type);
3429 if (size % UNITS_PER_WORD != 0)
3430 {
3431 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3432 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3433 }
3434 }
3435
3436 return LIBCALL_VALUE (mode);
3437 }
3438
3439 static int
3440 libcall_eq (const void *p1, const void *p2)
3441 {
3442 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3443 }
3444
3445 static hashval_t
3446 libcall_hash (const void *p1)
3447 {
3448 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3449 }
3450
3451 static void
3452 add_libcall (htab_t htab, rtx libcall)
3453 {
3454 *htab_find_slot (htab, libcall, INSERT) = libcall;
3455 }
3456
3457 static bool
3458 arm_libcall_uses_aapcs_base (const_rtx libcall)
3459 {
3460 static bool init_done = false;
3461 static htab_t libcall_htab;
3462
3463 if (!init_done)
3464 {
3465 init_done = true;
3466
3467 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3468 NULL);
3469 add_libcall (libcall_htab,
3470 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3471 add_libcall (libcall_htab,
3472 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3473 add_libcall (libcall_htab,
3474 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3475 add_libcall (libcall_htab,
3476 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3477
3478 add_libcall (libcall_htab,
3479 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3480 add_libcall (libcall_htab,
3481 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3482 add_libcall (libcall_htab,
3483 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3484 add_libcall (libcall_htab,
3485 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3486
3487 add_libcall (libcall_htab,
3488 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3489 add_libcall (libcall_htab,
3490 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3491 add_libcall (libcall_htab,
3492 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3493 add_libcall (libcall_htab,
3494 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3495 add_libcall (libcall_htab,
3496 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3497 add_libcall (libcall_htab,
3498 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3499 }
3500
3501 return libcall && htab_find (libcall_htab, libcall) != NULL;
3502 }
3503
3504 rtx
3505 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3506 {
3507 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3508 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3509 {
3510 /* The following libcalls return their result in integer registers,
3511 even though they return a floating point value. */
3512 if (arm_libcall_uses_aapcs_base (libcall))
3513 return gen_rtx_REG (mode, ARG_REGISTER(1));
3514
3515 }
3516
3517 return LIBCALL_VALUE (mode);
3518 }
3519
3520 /* Determine the amount of memory needed to store the possible return
3521 registers of an untyped call. */
3522 int
3523 arm_apply_result_size (void)
3524 {
3525 int size = 16;
3526
3527 if (TARGET_32BIT)
3528 {
3529 if (TARGET_HARD_FLOAT_ABI)
3530 {
3531 if (TARGET_VFP)
3532 size += 32;
3533 if (TARGET_FPA)
3534 size += 12;
3535 if (TARGET_MAVERICK)
3536 size += 8;
3537 }
3538 if (TARGET_IWMMXT_ABI)
3539 size += 8;
3540 }
3541
3542 return size;
3543 }
3544
3545 /* Decide whether TYPE should be returned in memory (true)
3546 or in a register (false). FNTYPE is the type of the function making
3547 the call. */
3548 static bool
3549 arm_return_in_memory (const_tree type, const_tree fntype)
3550 {
3551 HOST_WIDE_INT size;
3552
3553 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3554
3555 if (TARGET_AAPCS_BASED)
3556 {
3557 /* Simple, non-aggregate types (ie not including vectors and
3558 complex) are always returned in a register (or registers).
3559 We don't care about which register here, so we can short-cut
3560 some of the detail. */
3561 if (!AGGREGATE_TYPE_P (type)
3562 && TREE_CODE (type) != VECTOR_TYPE
3563 && TREE_CODE (type) != COMPLEX_TYPE)
3564 return false;
3565
3566 /* Any return value that is no larger than one word can be
3567 returned in r0. */
3568 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3569 return false;
3570
3571 /* Check any available co-processors to see if they accept the
3572 type as a register candidate (VFP, for example, can return
3573 some aggregates in consecutive registers). These aren't
3574 available if the call is variadic. */
3575 if (aapcs_select_return_coproc (type, fntype) >= 0)
3576 return false;
3577
3578 /* Vector values should be returned using ARM registers, not
3579 memory (unless they're over 16 bytes, which will break since
3580 we only have four call-clobbered registers to play with). */
3581 if (TREE_CODE (type) == VECTOR_TYPE)
3582 return (size < 0 || size > (4 * UNITS_PER_WORD));
3583
3584 /* The rest go in memory. */
3585 return true;
3586 }
3587
3588 if (TREE_CODE (type) == VECTOR_TYPE)
3589 return (size < 0 || size > (4 * UNITS_PER_WORD));
3590
3591 if (!AGGREGATE_TYPE_P (type) &&
3592 (TREE_CODE (type) != VECTOR_TYPE))
3593 /* All simple types are returned in registers. */
3594 return false;
3595
3596 if (arm_abi != ARM_ABI_APCS)
3597 {
3598 /* ATPCS and later return aggregate types in memory only if they are
3599 larger than a word (or are variable size). */
3600 return (size < 0 || size > UNITS_PER_WORD);
3601 }
3602
3603 /* For the arm-wince targets we choose to be compatible with Microsoft's
3604 ARM and Thumb compilers, which always return aggregates in memory. */
3605 #ifndef ARM_WINCE
3606 /* All structures/unions bigger than one word are returned in memory.
3607 Also catch the case where int_size_in_bytes returns -1. In this case
3608 the aggregate is either huge or of variable size, and in either case
3609 we will want to return it via memory and not in a register. */
3610 if (size < 0 || size > UNITS_PER_WORD)
3611 return true;
3612
3613 if (TREE_CODE (type) == RECORD_TYPE)
3614 {
3615 tree field;
3616
3617 /* For a struct the APCS says that we only return in a register
3618 if the type is 'integer like' and every addressable element
3619 has an offset of zero. For practical purposes this means
3620 that the structure can have at most one non bit-field element
3621 and that this element must be the first one in the structure. */
3622
3623 /* Find the first field, ignoring non FIELD_DECL things which will
3624 have been created by C++. */
3625 for (field = TYPE_FIELDS (type);
3626 field && TREE_CODE (field) != FIELD_DECL;
3627 field = DECL_CHAIN (field))
3628 continue;
3629
3630 if (field == NULL)
3631 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3632
3633 /* Check that the first field is valid for returning in a register. */
3634
3635 /* ... Floats are not allowed */
3636 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3637 return true;
3638
3639 /* ... Aggregates that are not themselves valid for returning in
3640 a register are not allowed. */
3641 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3642 return true;
3643
3644 /* Now check the remaining fields, if any. Only bitfields are allowed,
3645 since they are not addressable. */
3646 for (field = DECL_CHAIN (field);
3647 field;
3648 field = DECL_CHAIN (field))
3649 {
3650 if (TREE_CODE (field) != FIELD_DECL)
3651 continue;
3652
3653 if (!DECL_BIT_FIELD_TYPE (field))
3654 return true;
3655 }
3656
3657 return false;
3658 }
3659
3660 if (TREE_CODE (type) == UNION_TYPE)
3661 {
3662 tree field;
3663
3664 /* Unions can be returned in registers if every element is
3665 integral, or can be returned in an integer register. */
3666 for (field = TYPE_FIELDS (type);
3667 field;
3668 field = DECL_CHAIN (field))
3669 {
3670 if (TREE_CODE (field) != FIELD_DECL)
3671 continue;
3672
3673 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3674 return true;
3675
3676 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3677 return true;
3678 }
3679
3680 return false;
3681 }
3682 #endif /* not ARM_WINCE */
3683
3684 /* Return all other types in memory. */
3685 return true;
3686 }
3687
3688 /* Indicate whether or not words of a double are in big-endian order. */
3689
3690 int
3691 arm_float_words_big_endian (void)
3692 {
3693 if (TARGET_MAVERICK)
3694 return 0;
3695
3696 /* For FPA, float words are always big-endian. For VFP, floats words
3697 follow the memory system mode. */
3698
3699 if (TARGET_FPA)
3700 {
3701 return 1;
3702 }
3703
3704 if (TARGET_VFP)
3705 return (TARGET_BIG_END ? 1 : 0);
3706
3707 return 1;
3708 }
3709
3710 const struct pcs_attribute_arg
3711 {
3712 const char *arg;
3713 enum arm_pcs value;
3714 } pcs_attribute_args[] =
3715 {
3716 {"aapcs", ARM_PCS_AAPCS},
3717 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3718 #if 0
3719 /* We could recognize these, but changes would be needed elsewhere
3720 * to implement them. */
3721 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3722 {"atpcs", ARM_PCS_ATPCS},
3723 {"apcs", ARM_PCS_APCS},
3724 #endif
3725 {NULL, ARM_PCS_UNKNOWN}
3726 };
3727
3728 static enum arm_pcs
3729 arm_pcs_from_attribute (tree attr)
3730 {
3731 const struct pcs_attribute_arg *ptr;
3732 const char *arg;
3733
3734 /* Get the value of the argument. */
3735 if (TREE_VALUE (attr) == NULL_TREE
3736 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3737 return ARM_PCS_UNKNOWN;
3738
3739 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3740
3741 /* Check it against the list of known arguments. */
3742 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3743 if (streq (arg, ptr->arg))
3744 return ptr->value;
3745
3746 /* An unrecognized interrupt type. */
3747 return ARM_PCS_UNKNOWN;
3748 }
3749
3750 /* Get the PCS variant to use for this call. TYPE is the function's type
3751 specification, DECL is the specific declartion. DECL may be null if
3752 the call could be indirect or if this is a library call. */
3753 static enum arm_pcs
3754 arm_get_pcs_model (const_tree type, const_tree decl)
3755 {
3756 bool user_convention = false;
3757 enum arm_pcs user_pcs = arm_pcs_default;
3758 tree attr;
3759
3760 gcc_assert (type);
3761
3762 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3763 if (attr)
3764 {
3765 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3766 user_convention = true;
3767 }
3768
3769 if (TARGET_AAPCS_BASED)
3770 {
3771 /* Detect varargs functions. These always use the base rules
3772 (no argument is ever a candidate for a co-processor
3773 register). */
3774 bool base_rules = stdarg_p (type);
3775
3776 if (user_convention)
3777 {
3778 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3779 sorry ("Non-AAPCS derived PCS variant");
3780 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3781 error ("Variadic functions must use the base AAPCS variant");
3782 }
3783
3784 if (base_rules)
3785 return ARM_PCS_AAPCS;
3786 else if (user_convention)
3787 return user_pcs;
3788 else if (decl && flag_unit_at_a_time)
3789 {
3790 /* Local functions never leak outside this compilation unit,
3791 so we are free to use whatever conventions are
3792 appropriate. */
3793 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3794 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3795 if (i && i->local)
3796 return ARM_PCS_AAPCS_LOCAL;
3797 }
3798 }
3799 else if (user_convention && user_pcs != arm_pcs_default)
3800 sorry ("PCS variant");
3801
3802 /* For everything else we use the target's default. */
3803 return arm_pcs_default;
3804 }
3805
3806
3807 static void
3808 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3809 const_tree fntype ATTRIBUTE_UNUSED,
3810 rtx libcall ATTRIBUTE_UNUSED,
3811 const_tree fndecl ATTRIBUTE_UNUSED)
3812 {
3813 /* Record the unallocated VFP registers. */
3814 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3815 pcum->aapcs_vfp_reg_alloc = 0;
3816 }
3817
3818 /* Walk down the type tree of TYPE counting consecutive base elements.
3819 If *MODEP is VOIDmode, then set it to the first valid floating point
3820 type. If a non-floating point type is found, or if a floating point
3821 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3822 otherwise return the count in the sub-tree. */
3823 static int
3824 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3825 {
3826 enum machine_mode mode;
3827 HOST_WIDE_INT size;
3828
3829 switch (TREE_CODE (type))
3830 {
3831 case REAL_TYPE:
3832 mode = TYPE_MODE (type);
3833 if (mode != DFmode && mode != SFmode)
3834 return -1;
3835
3836 if (*modep == VOIDmode)
3837 *modep = mode;
3838
3839 if (*modep == mode)
3840 return 1;
3841
3842 break;
3843
3844 case COMPLEX_TYPE:
3845 mode = TYPE_MODE (TREE_TYPE (type));
3846 if (mode != DFmode && mode != SFmode)
3847 return -1;
3848
3849 if (*modep == VOIDmode)
3850 *modep = mode;
3851
3852 if (*modep == mode)
3853 return 2;
3854
3855 break;
3856
3857 case VECTOR_TYPE:
3858 /* Use V2SImode and V4SImode as representatives of all 64-bit
3859 and 128-bit vector types, whether or not those modes are
3860 supported with the present options. */
3861 size = int_size_in_bytes (type);
3862 switch (size)
3863 {
3864 case 8:
3865 mode = V2SImode;
3866 break;
3867 case 16:
3868 mode = V4SImode;
3869 break;
3870 default:
3871 return -1;
3872 }
3873
3874 if (*modep == VOIDmode)
3875 *modep = mode;
3876
3877 /* Vector modes are considered to be opaque: two vectors are
3878 equivalent for the purposes of being homogeneous aggregates
3879 if they are the same size. */
3880 if (*modep == mode)
3881 return 1;
3882
3883 break;
3884
3885 case ARRAY_TYPE:
3886 {
3887 int count;
3888 tree index = TYPE_DOMAIN (type);
3889
3890 /* Can't handle incomplete types. */
3891 if (!COMPLETE_TYPE_P(type))
3892 return -1;
3893
3894 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3895 if (count == -1
3896 || !index
3897 || !TYPE_MAX_VALUE (index)
3898 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3899 || !TYPE_MIN_VALUE (index)
3900 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3901 || count < 0)
3902 return -1;
3903
3904 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3905 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3906
3907 /* There must be no padding. */
3908 if (!host_integerp (TYPE_SIZE (type), 1)
3909 || (tree_low_cst (TYPE_SIZE (type), 1)
3910 != count * GET_MODE_BITSIZE (*modep)))
3911 return -1;
3912
3913 return count;
3914 }
3915
3916 case RECORD_TYPE:
3917 {
3918 int count = 0;
3919 int sub_count;
3920 tree field;
3921
3922 /* Can't handle incomplete types. */
3923 if (!COMPLETE_TYPE_P(type))
3924 return -1;
3925
3926 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3927 {
3928 if (TREE_CODE (field) != FIELD_DECL)
3929 continue;
3930
3931 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3932 if (sub_count < 0)
3933 return -1;
3934 count += sub_count;
3935 }
3936
3937 /* There must be no padding. */
3938 if (!host_integerp (TYPE_SIZE (type), 1)
3939 || (tree_low_cst (TYPE_SIZE (type), 1)
3940 != count * GET_MODE_BITSIZE (*modep)))
3941 return -1;
3942
3943 return count;
3944 }
3945
3946 case UNION_TYPE:
3947 case QUAL_UNION_TYPE:
3948 {
3949 /* These aren't very interesting except in a degenerate case. */
3950 int count = 0;
3951 int sub_count;
3952 tree field;
3953
3954 /* Can't handle incomplete types. */
3955 if (!COMPLETE_TYPE_P(type))
3956 return -1;
3957
3958 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3959 {
3960 if (TREE_CODE (field) != FIELD_DECL)
3961 continue;
3962
3963 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3964 if (sub_count < 0)
3965 return -1;
3966 count = count > sub_count ? count : sub_count;
3967 }
3968
3969 /* There must be no padding. */
3970 if (!host_integerp (TYPE_SIZE (type), 1)
3971 || (tree_low_cst (TYPE_SIZE (type), 1)
3972 != count * GET_MODE_BITSIZE (*modep)))
3973 return -1;
3974
3975 return count;
3976 }
3977
3978 default:
3979 break;
3980 }
3981
3982 return -1;
3983 }
3984
3985 /* Return true if PCS_VARIANT should use VFP registers. */
3986 static bool
3987 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3988 {
3989 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3990 {
3991 static bool seen_thumb1_vfp = false;
3992
3993 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3994 {
3995 sorry ("Thumb-1 hard-float VFP ABI");
3996 /* sorry() is not immediately fatal, so only display this once. */
3997 seen_thumb1_vfp = true;
3998 }
3999
4000 return true;
4001 }
4002
4003 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4004 return false;
4005
4006 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4007 (TARGET_VFP_DOUBLE || !is_double));
4008 }
4009
4010 static bool
4011 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4012 enum machine_mode mode, const_tree type,
4013 enum machine_mode *base_mode, int *count)
4014 {
4015 enum machine_mode new_mode = VOIDmode;
4016
4017 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4018 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4019 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4020 {
4021 *count = 1;
4022 new_mode = mode;
4023 }
4024 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4025 {
4026 *count = 2;
4027 new_mode = (mode == DCmode ? DFmode : SFmode);
4028 }
4029 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4030 {
4031 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4032
4033 if (ag_count > 0 && ag_count <= 4)
4034 *count = ag_count;
4035 else
4036 return false;
4037 }
4038 else
4039 return false;
4040
4041
4042 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4043 return false;
4044
4045 *base_mode = new_mode;
4046 return true;
4047 }
4048
4049 static bool
4050 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4051 enum machine_mode mode, const_tree type)
4052 {
4053 int count ATTRIBUTE_UNUSED;
4054 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4055
4056 if (!use_vfp_abi (pcs_variant, false))
4057 return false;
4058 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4059 &ag_mode, &count);
4060 }
4061
4062 static bool
4063 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4064 const_tree type)
4065 {
4066 if (!use_vfp_abi (pcum->pcs_variant, false))
4067 return false;
4068
4069 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4070 &pcum->aapcs_vfp_rmode,
4071 &pcum->aapcs_vfp_rcount);
4072 }
4073
4074 static bool
4075 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4076 const_tree type ATTRIBUTE_UNUSED)
4077 {
4078 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4079 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4080 int regno;
4081
4082 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4083 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4084 {
4085 pcum->aapcs_vfp_reg_alloc = mask << regno;
4086 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4087 {
4088 int i;
4089 int rcount = pcum->aapcs_vfp_rcount;
4090 int rshift = shift;
4091 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4092 rtx par;
4093 if (!TARGET_NEON)
4094 {
4095 /* Avoid using unsupported vector modes. */
4096 if (rmode == V2SImode)
4097 rmode = DImode;
4098 else if (rmode == V4SImode)
4099 {
4100 rmode = DImode;
4101 rcount *= 2;
4102 rshift /= 2;
4103 }
4104 }
4105 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4106 for (i = 0; i < rcount; i++)
4107 {
4108 rtx tmp = gen_rtx_REG (rmode,
4109 FIRST_VFP_REGNUM + regno + i * rshift);
4110 tmp = gen_rtx_EXPR_LIST
4111 (VOIDmode, tmp,
4112 GEN_INT (i * GET_MODE_SIZE (rmode)));
4113 XVECEXP (par, 0, i) = tmp;
4114 }
4115
4116 pcum->aapcs_reg = par;
4117 }
4118 else
4119 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4120 return true;
4121 }
4122 return false;
4123 }
4124
4125 static rtx
4126 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4127 enum machine_mode mode,
4128 const_tree type ATTRIBUTE_UNUSED)
4129 {
4130 if (!use_vfp_abi (pcs_variant, false))
4131 return false;
4132
4133 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4134 {
4135 int count;
4136 enum machine_mode ag_mode;
4137 int i;
4138 rtx par;
4139 int shift;
4140
4141 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4142 &ag_mode, &count);
4143
4144 if (!TARGET_NEON)
4145 {
4146 if (ag_mode == V2SImode)
4147 ag_mode = DImode;
4148 else if (ag_mode == V4SImode)
4149 {
4150 ag_mode = DImode;
4151 count *= 2;
4152 }
4153 }
4154 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4155 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4156 for (i = 0; i < count; i++)
4157 {
4158 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4159 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4160 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4161 XVECEXP (par, 0, i) = tmp;
4162 }
4163
4164 return par;
4165 }
4166
4167 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4168 }
4169
4170 static void
4171 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4172 enum machine_mode mode ATTRIBUTE_UNUSED,
4173 const_tree type ATTRIBUTE_UNUSED)
4174 {
4175 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4176 pcum->aapcs_vfp_reg_alloc = 0;
4177 return;
4178 }
4179
4180 #define AAPCS_CP(X) \
4181 { \
4182 aapcs_ ## X ## _cum_init, \
4183 aapcs_ ## X ## _is_call_candidate, \
4184 aapcs_ ## X ## _allocate, \
4185 aapcs_ ## X ## _is_return_candidate, \
4186 aapcs_ ## X ## _allocate_return_reg, \
4187 aapcs_ ## X ## _advance \
4188 }
4189
4190 /* Table of co-processors that can be used to pass arguments in
4191 registers. Idealy no arugment should be a candidate for more than
4192 one co-processor table entry, but the table is processed in order
4193 and stops after the first match. If that entry then fails to put
4194 the argument into a co-processor register, the argument will go on
4195 the stack. */
4196 static struct
4197 {
4198 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4199 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4200
4201 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4202 BLKmode) is a candidate for this co-processor's registers; this
4203 function should ignore any position-dependent state in
4204 CUMULATIVE_ARGS and only use call-type dependent information. */
4205 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4206
4207 /* Return true if the argument does get a co-processor register; it
4208 should set aapcs_reg to an RTX of the register allocated as is
4209 required for a return from FUNCTION_ARG. */
4210 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4211
4212 /* Return true if a result of mode MODE (or type TYPE if MODE is
4213 BLKmode) is can be returned in this co-processor's registers. */
4214 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4215
4216 /* Allocate and return an RTX element to hold the return type of a
4217 call, this routine must not fail and will only be called if
4218 is_return_candidate returned true with the same parameters. */
4219 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4220
4221 /* Finish processing this argument and prepare to start processing
4222 the next one. */
4223 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4224 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4225 {
4226 AAPCS_CP(vfp)
4227 };
4228
4229 #undef AAPCS_CP
4230
4231 static int
4232 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4233 const_tree type)
4234 {
4235 int i;
4236
4237 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4238 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4239 return i;
4240
4241 return -1;
4242 }
4243
4244 static int
4245 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4246 {
4247 /* We aren't passed a decl, so we can't check that a call is local.
4248 However, it isn't clear that that would be a win anyway, since it
4249 might limit some tail-calling opportunities. */
4250 enum arm_pcs pcs_variant;
4251
4252 if (fntype)
4253 {
4254 const_tree fndecl = NULL_TREE;
4255
4256 if (TREE_CODE (fntype) == FUNCTION_DECL)
4257 {
4258 fndecl = fntype;
4259 fntype = TREE_TYPE (fntype);
4260 }
4261
4262 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4263 }
4264 else
4265 pcs_variant = arm_pcs_default;
4266
4267 if (pcs_variant != ARM_PCS_AAPCS)
4268 {
4269 int i;
4270
4271 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4272 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4273 TYPE_MODE (type),
4274 type))
4275 return i;
4276 }
4277 return -1;
4278 }
4279
4280 static rtx
4281 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4282 const_tree fntype)
4283 {
4284 /* We aren't passed a decl, so we can't check that a call is local.
4285 However, it isn't clear that that would be a win anyway, since it
4286 might limit some tail-calling opportunities. */
4287 enum arm_pcs pcs_variant;
4288 int unsignedp ATTRIBUTE_UNUSED;
4289
4290 if (fntype)
4291 {
4292 const_tree fndecl = NULL_TREE;
4293
4294 if (TREE_CODE (fntype) == FUNCTION_DECL)
4295 {
4296 fndecl = fntype;
4297 fntype = TREE_TYPE (fntype);
4298 }
4299
4300 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4301 }
4302 else
4303 pcs_variant = arm_pcs_default;
4304
4305 /* Promote integer types. */
4306 if (type && INTEGRAL_TYPE_P (type))
4307 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4308
4309 if (pcs_variant != ARM_PCS_AAPCS)
4310 {
4311 int i;
4312
4313 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4314 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4315 type))
4316 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4317 mode, type);
4318 }
4319
4320 /* Promotes small structs returned in a register to full-word size
4321 for big-endian AAPCS. */
4322 if (type && arm_return_in_msb (type))
4323 {
4324 HOST_WIDE_INT size = int_size_in_bytes (type);
4325 if (size % UNITS_PER_WORD != 0)
4326 {
4327 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4328 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4329 }
4330 }
4331
4332 return gen_rtx_REG (mode, R0_REGNUM);
4333 }
4334
4335 rtx
4336 aapcs_libcall_value (enum machine_mode mode)
4337 {
4338 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4339 }
4340
4341 /* Lay out a function argument using the AAPCS rules. The rule
4342 numbers referred to here are those in the AAPCS. */
4343 static void
4344 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4345 const_tree type, bool named)
4346 {
4347 int nregs, nregs2;
4348 int ncrn;
4349
4350 /* We only need to do this once per argument. */
4351 if (pcum->aapcs_arg_processed)
4352 return;
4353
4354 pcum->aapcs_arg_processed = true;
4355
4356 /* Special case: if named is false then we are handling an incoming
4357 anonymous argument which is on the stack. */
4358 if (!named)
4359 return;
4360
4361 /* Is this a potential co-processor register candidate? */
4362 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4363 {
4364 int slot = aapcs_select_call_coproc (pcum, mode, type);
4365 pcum->aapcs_cprc_slot = slot;
4366
4367 /* We don't have to apply any of the rules from part B of the
4368 preparation phase, these are handled elsewhere in the
4369 compiler. */
4370
4371 if (slot >= 0)
4372 {
4373 /* A Co-processor register candidate goes either in its own
4374 class of registers or on the stack. */
4375 if (!pcum->aapcs_cprc_failed[slot])
4376 {
4377 /* C1.cp - Try to allocate the argument to co-processor
4378 registers. */
4379 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4380 return;
4381
4382 /* C2.cp - Put the argument on the stack and note that we
4383 can't assign any more candidates in this slot. We also
4384 need to note that we have allocated stack space, so that
4385 we won't later try to split a non-cprc candidate between
4386 core registers and the stack. */
4387 pcum->aapcs_cprc_failed[slot] = true;
4388 pcum->can_split = false;
4389 }
4390
4391 /* We didn't get a register, so this argument goes on the
4392 stack. */
4393 gcc_assert (pcum->can_split == false);
4394 return;
4395 }
4396 }
4397
4398 /* C3 - For double-word aligned arguments, round the NCRN up to the
4399 next even number. */
4400 ncrn = pcum->aapcs_ncrn;
4401 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4402 ncrn++;
4403
4404 nregs = ARM_NUM_REGS2(mode, type);
4405
4406 /* Sigh, this test should really assert that nregs > 0, but a GCC
4407 extension allows empty structs and then gives them empty size; it
4408 then allows such a structure to be passed by value. For some of
4409 the code below we have to pretend that such an argument has
4410 non-zero size so that we 'locate' it correctly either in
4411 registers or on the stack. */
4412 gcc_assert (nregs >= 0);
4413
4414 nregs2 = nregs ? nregs : 1;
4415
4416 /* C4 - Argument fits entirely in core registers. */
4417 if (ncrn + nregs2 <= NUM_ARG_REGS)
4418 {
4419 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4420 pcum->aapcs_next_ncrn = ncrn + nregs;
4421 return;
4422 }
4423
4424 /* C5 - Some core registers left and there are no arguments already
4425 on the stack: split this argument between the remaining core
4426 registers and the stack. */
4427 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4428 {
4429 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4430 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4431 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4432 return;
4433 }
4434
4435 /* C6 - NCRN is set to 4. */
4436 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4437
4438 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4439 return;
4440 }
4441
4442 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4443 for a call to a function whose data type is FNTYPE.
4444 For a library call, FNTYPE is NULL. */
4445 void
4446 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4447 rtx libname,
4448 tree fndecl ATTRIBUTE_UNUSED)
4449 {
4450 /* Long call handling. */
4451 if (fntype)
4452 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4453 else
4454 pcum->pcs_variant = arm_pcs_default;
4455
4456 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4457 {
4458 if (arm_libcall_uses_aapcs_base (libname))
4459 pcum->pcs_variant = ARM_PCS_AAPCS;
4460
4461 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4462 pcum->aapcs_reg = NULL_RTX;
4463 pcum->aapcs_partial = 0;
4464 pcum->aapcs_arg_processed = false;
4465 pcum->aapcs_cprc_slot = -1;
4466 pcum->can_split = true;
4467
4468 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4469 {
4470 int i;
4471
4472 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4473 {
4474 pcum->aapcs_cprc_failed[i] = false;
4475 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4476 }
4477 }
4478 return;
4479 }
4480
4481 /* Legacy ABIs */
4482
4483 /* On the ARM, the offset starts at 0. */
4484 pcum->nregs = 0;
4485 pcum->iwmmxt_nregs = 0;
4486 pcum->can_split = true;
4487
4488 /* Varargs vectors are treated the same as long long.
4489 named_count avoids having to change the way arm handles 'named' */
4490 pcum->named_count = 0;
4491 pcum->nargs = 0;
4492
4493 if (TARGET_REALLY_IWMMXT && fntype)
4494 {
4495 tree fn_arg;
4496
4497 for (fn_arg = TYPE_ARG_TYPES (fntype);
4498 fn_arg;
4499 fn_arg = TREE_CHAIN (fn_arg))
4500 pcum->named_count += 1;
4501
4502 if (! pcum->named_count)
4503 pcum->named_count = INT_MAX;
4504 }
4505 }
4506
4507
4508 /* Return true if mode/type need doubleword alignment. */
4509 bool
4510 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4511 {
4512 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4513 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4514 }
4515
4516
4517 /* Determine where to put an argument to a function.
4518 Value is zero to push the argument on the stack,
4519 or a hard register in which to store the argument.
4520
4521 MODE is the argument's machine mode.
4522 TYPE is the data type of the argument (as a tree).
4523 This is null for libcalls where that information may
4524 not be available.
4525 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4526 the preceding args and about the function being called.
4527 NAMED is nonzero if this argument is a named parameter
4528 (otherwise it is an extra parameter matching an ellipsis).
4529
4530 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4531 other arguments are passed on the stack. If (NAMED == 0) (which happens
4532 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4533 defined), say it is passed in the stack (function_prologue will
4534 indeed make it pass in the stack if necessary). */
4535
4536 static rtx
4537 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4538 const_tree type, bool named)
4539 {
4540 int nregs;
4541
4542 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4543 a call insn (op3 of a call_value insn). */
4544 if (mode == VOIDmode)
4545 return const0_rtx;
4546
4547 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4548 {
4549 aapcs_layout_arg (pcum, mode, type, named);
4550 return pcum->aapcs_reg;
4551 }
4552
4553 /* Varargs vectors are treated the same as long long.
4554 named_count avoids having to change the way arm handles 'named' */
4555 if (TARGET_IWMMXT_ABI
4556 && arm_vector_mode_supported_p (mode)
4557 && pcum->named_count > pcum->nargs + 1)
4558 {
4559 if (pcum->iwmmxt_nregs <= 9)
4560 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4561 else
4562 {
4563 pcum->can_split = false;
4564 return NULL_RTX;
4565 }
4566 }
4567
4568 /* Put doubleword aligned quantities in even register pairs. */
4569 if (pcum->nregs & 1
4570 && ARM_DOUBLEWORD_ALIGN
4571 && arm_needs_doubleword_align (mode, type))
4572 pcum->nregs++;
4573
4574 /* Only allow splitting an arg between regs and memory if all preceding
4575 args were allocated to regs. For args passed by reference we only count
4576 the reference pointer. */
4577 if (pcum->can_split)
4578 nregs = 1;
4579 else
4580 nregs = ARM_NUM_REGS2 (mode, type);
4581
4582 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4583 return NULL_RTX;
4584
4585 return gen_rtx_REG (mode, pcum->nregs);
4586 }
4587
4588 static int
4589 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4590 tree type, bool named)
4591 {
4592 int nregs = pcum->nregs;
4593
4594 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4595 {
4596 aapcs_layout_arg (pcum, mode, type, named);
4597 return pcum->aapcs_partial;
4598 }
4599
4600 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4601 return 0;
4602
4603 if (NUM_ARG_REGS > nregs
4604 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4605 && pcum->can_split)
4606 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4607
4608 return 0;
4609 }
4610
4611 /* Update the data in PCUM to advance over an argument
4612 of mode MODE and data type TYPE.
4613 (TYPE is null for libcalls where that information may not be available.) */
4614
4615 static void
4616 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4617 const_tree type, bool named)
4618 {
4619 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4620 {
4621 aapcs_layout_arg (pcum, mode, type, named);
4622
4623 if (pcum->aapcs_cprc_slot >= 0)
4624 {
4625 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4626 type);
4627 pcum->aapcs_cprc_slot = -1;
4628 }
4629
4630 /* Generic stuff. */
4631 pcum->aapcs_arg_processed = false;
4632 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4633 pcum->aapcs_reg = NULL_RTX;
4634 pcum->aapcs_partial = 0;
4635 }
4636 else
4637 {
4638 pcum->nargs += 1;
4639 if (arm_vector_mode_supported_p (mode)
4640 && pcum->named_count > pcum->nargs
4641 && TARGET_IWMMXT_ABI)
4642 pcum->iwmmxt_nregs += 1;
4643 else
4644 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4645 }
4646 }
4647
4648 /* Variable sized types are passed by reference. This is a GCC
4649 extension to the ARM ABI. */
4650
4651 static bool
4652 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4653 enum machine_mode mode ATTRIBUTE_UNUSED,
4654 const_tree type, bool named ATTRIBUTE_UNUSED)
4655 {
4656 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4657 }
4658 \f
4659 /* Encode the current state of the #pragma [no_]long_calls. */
4660 typedef enum
4661 {
4662 OFF, /* No #pragma [no_]long_calls is in effect. */
4663 LONG, /* #pragma long_calls is in effect. */
4664 SHORT /* #pragma no_long_calls is in effect. */
4665 } arm_pragma_enum;
4666
4667 static arm_pragma_enum arm_pragma_long_calls = OFF;
4668
4669 void
4670 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4671 {
4672 arm_pragma_long_calls = LONG;
4673 }
4674
4675 void
4676 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4677 {
4678 arm_pragma_long_calls = SHORT;
4679 }
4680
4681 void
4682 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4683 {
4684 arm_pragma_long_calls = OFF;
4685 }
4686 \f
4687 /* Handle an attribute requiring a FUNCTION_DECL;
4688 arguments as in struct attribute_spec.handler. */
4689 static tree
4690 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4691 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4692 {
4693 if (TREE_CODE (*node) != FUNCTION_DECL)
4694 {
4695 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4696 name);
4697 *no_add_attrs = true;
4698 }
4699
4700 return NULL_TREE;
4701 }
4702
4703 /* Handle an "interrupt" or "isr" attribute;
4704 arguments as in struct attribute_spec.handler. */
4705 static tree
4706 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4707 bool *no_add_attrs)
4708 {
4709 if (DECL_P (*node))
4710 {
4711 if (TREE_CODE (*node) != FUNCTION_DECL)
4712 {
4713 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4714 name);
4715 *no_add_attrs = true;
4716 }
4717 /* FIXME: the argument if any is checked for type attributes;
4718 should it be checked for decl ones? */
4719 }
4720 else
4721 {
4722 if (TREE_CODE (*node) == FUNCTION_TYPE
4723 || TREE_CODE (*node) == METHOD_TYPE)
4724 {
4725 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4726 {
4727 warning (OPT_Wattributes, "%qE attribute ignored",
4728 name);
4729 *no_add_attrs = true;
4730 }
4731 }
4732 else if (TREE_CODE (*node) == POINTER_TYPE
4733 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4734 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4735 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4736 {
4737 *node = build_variant_type_copy (*node);
4738 TREE_TYPE (*node) = build_type_attribute_variant
4739 (TREE_TYPE (*node),
4740 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4741 *no_add_attrs = true;
4742 }
4743 else
4744 {
4745 /* Possibly pass this attribute on from the type to a decl. */
4746 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4747 | (int) ATTR_FLAG_FUNCTION_NEXT
4748 | (int) ATTR_FLAG_ARRAY_NEXT))
4749 {
4750 *no_add_attrs = true;
4751 return tree_cons (name, args, NULL_TREE);
4752 }
4753 else
4754 {
4755 warning (OPT_Wattributes, "%qE attribute ignored",
4756 name);
4757 }
4758 }
4759 }
4760
4761 return NULL_TREE;
4762 }
4763
4764 /* Handle a "pcs" attribute; arguments as in struct
4765 attribute_spec.handler. */
4766 static tree
4767 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4768 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4769 {
4770 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4771 {
4772 warning (OPT_Wattributes, "%qE attribute ignored", name);
4773 *no_add_attrs = true;
4774 }
4775 return NULL_TREE;
4776 }
4777
4778 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4779 /* Handle the "notshared" attribute. This attribute is another way of
4780 requesting hidden visibility. ARM's compiler supports
4781 "__declspec(notshared)"; we support the same thing via an
4782 attribute. */
4783
4784 static tree
4785 arm_handle_notshared_attribute (tree *node,
4786 tree name ATTRIBUTE_UNUSED,
4787 tree args ATTRIBUTE_UNUSED,
4788 int flags ATTRIBUTE_UNUSED,
4789 bool *no_add_attrs)
4790 {
4791 tree decl = TYPE_NAME (*node);
4792
4793 if (decl)
4794 {
4795 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4796 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4797 *no_add_attrs = false;
4798 }
4799 return NULL_TREE;
4800 }
4801 #endif
4802
4803 /* Return 0 if the attributes for two types are incompatible, 1 if they
4804 are compatible, and 2 if they are nearly compatible (which causes a
4805 warning to be generated). */
4806 static int
4807 arm_comp_type_attributes (const_tree type1, const_tree type2)
4808 {
4809 int l1, l2, s1, s2;
4810
4811 /* Check for mismatch of non-default calling convention. */
4812 if (TREE_CODE (type1) != FUNCTION_TYPE)
4813 return 1;
4814
4815 /* Check for mismatched call attributes. */
4816 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4817 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4818 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4819 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4820
4821 /* Only bother to check if an attribute is defined. */
4822 if (l1 | l2 | s1 | s2)
4823 {
4824 /* If one type has an attribute, the other must have the same attribute. */
4825 if ((l1 != l2) || (s1 != s2))
4826 return 0;
4827
4828 /* Disallow mixed attributes. */
4829 if ((l1 & s2) || (l2 & s1))
4830 return 0;
4831 }
4832
4833 /* Check for mismatched ISR attribute. */
4834 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4835 if (! l1)
4836 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4837 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4838 if (! l2)
4839 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4840 if (l1 != l2)
4841 return 0;
4842
4843 return 1;
4844 }
4845
4846 /* Assigns default attributes to newly defined type. This is used to
4847 set short_call/long_call attributes for function types of
4848 functions defined inside corresponding #pragma scopes. */
4849 static void
4850 arm_set_default_type_attributes (tree type)
4851 {
4852 /* Add __attribute__ ((long_call)) to all functions, when
4853 inside #pragma long_calls or __attribute__ ((short_call)),
4854 when inside #pragma no_long_calls. */
4855 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4856 {
4857 tree type_attr_list, attr_name;
4858 type_attr_list = TYPE_ATTRIBUTES (type);
4859
4860 if (arm_pragma_long_calls == LONG)
4861 attr_name = get_identifier ("long_call");
4862 else if (arm_pragma_long_calls == SHORT)
4863 attr_name = get_identifier ("short_call");
4864 else
4865 return;
4866
4867 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4868 TYPE_ATTRIBUTES (type) = type_attr_list;
4869 }
4870 }
4871 \f
4872 /* Return true if DECL is known to be linked into section SECTION. */
4873
4874 static bool
4875 arm_function_in_section_p (tree decl, section *section)
4876 {
4877 /* We can only be certain about functions defined in the same
4878 compilation unit. */
4879 if (!TREE_STATIC (decl))
4880 return false;
4881
4882 /* Make sure that SYMBOL always binds to the definition in this
4883 compilation unit. */
4884 if (!targetm.binds_local_p (decl))
4885 return false;
4886
4887 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4888 if (!DECL_SECTION_NAME (decl))
4889 {
4890 /* Make sure that we will not create a unique section for DECL. */
4891 if (flag_function_sections || DECL_ONE_ONLY (decl))
4892 return false;
4893 }
4894
4895 return function_section (decl) == section;
4896 }
4897
4898 /* Return nonzero if a 32-bit "long_call" should be generated for
4899 a call from the current function to DECL. We generate a long_call
4900 if the function:
4901
4902 a. has an __attribute__((long call))
4903 or b. is within the scope of a #pragma long_calls
4904 or c. the -mlong-calls command line switch has been specified
4905
4906 However we do not generate a long call if the function:
4907
4908 d. has an __attribute__ ((short_call))
4909 or e. is inside the scope of a #pragma no_long_calls
4910 or f. is defined in the same section as the current function. */
4911
4912 bool
4913 arm_is_long_call_p (tree decl)
4914 {
4915 tree attrs;
4916
4917 if (!decl)
4918 return TARGET_LONG_CALLS;
4919
4920 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4921 if (lookup_attribute ("short_call", attrs))
4922 return false;
4923
4924 /* For "f", be conservative, and only cater for cases in which the
4925 whole of the current function is placed in the same section. */
4926 if (!flag_reorder_blocks_and_partition
4927 && TREE_CODE (decl) == FUNCTION_DECL
4928 && arm_function_in_section_p (decl, current_function_section ()))
4929 return false;
4930
4931 if (lookup_attribute ("long_call", attrs))
4932 return true;
4933
4934 return TARGET_LONG_CALLS;
4935 }
4936
4937 /* Return nonzero if it is ok to make a tail-call to DECL. */
4938 static bool
4939 arm_function_ok_for_sibcall (tree decl, tree exp)
4940 {
4941 unsigned long func_type;
4942
4943 if (cfun->machine->sibcall_blocked)
4944 return false;
4945
4946 /* Never tailcall something for which we have no decl, or if we
4947 are generating code for Thumb-1. */
4948 if (decl == NULL || TARGET_THUMB1)
4949 return false;
4950
4951 /* The PIC register is live on entry to VxWorks PLT entries, so we
4952 must make the call before restoring the PIC register. */
4953 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4954 return false;
4955
4956 /* Cannot tail-call to long calls, since these are out of range of
4957 a branch instruction. */
4958 if (arm_is_long_call_p (decl))
4959 return false;
4960
4961 /* If we are interworking and the function is not declared static
4962 then we can't tail-call it unless we know that it exists in this
4963 compilation unit (since it might be a Thumb routine). */
4964 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4965 return false;
4966
4967 func_type = arm_current_func_type ();
4968 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4969 if (IS_INTERRUPT (func_type))
4970 return false;
4971
4972 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4973 {
4974 /* Check that the return value locations are the same. For
4975 example that we aren't returning a value from the sibling in
4976 a VFP register but then need to transfer it to a core
4977 register. */
4978 rtx a, b;
4979
4980 a = arm_function_value (TREE_TYPE (exp), decl, false);
4981 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4982 cfun->decl, false);
4983 if (!rtx_equal_p (a, b))
4984 return false;
4985 }
4986
4987 /* Never tailcall if function may be called with a misaligned SP. */
4988 if (IS_STACKALIGN (func_type))
4989 return false;
4990
4991 /* Everything else is ok. */
4992 return true;
4993 }
4994
4995 \f
4996 /* Addressing mode support functions. */
4997
4998 /* Return nonzero if X is a legitimate immediate operand when compiling
4999 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5000 int
5001 legitimate_pic_operand_p (rtx x)
5002 {
5003 if (GET_CODE (x) == SYMBOL_REF
5004 || (GET_CODE (x) == CONST
5005 && GET_CODE (XEXP (x, 0)) == PLUS
5006 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5007 return 0;
5008
5009 return 1;
5010 }
5011
5012 /* Record that the current function needs a PIC register. Initialize
5013 cfun->machine->pic_reg if we have not already done so. */
5014
5015 static void
5016 require_pic_register (void)
5017 {
5018 /* A lot of the logic here is made obscure by the fact that this
5019 routine gets called as part of the rtx cost estimation process.
5020 We don't want those calls to affect any assumptions about the real
5021 function; and further, we can't call entry_of_function() until we
5022 start the real expansion process. */
5023 if (!crtl->uses_pic_offset_table)
5024 {
5025 gcc_assert (can_create_pseudo_p ());
5026 if (arm_pic_register != INVALID_REGNUM)
5027 {
5028 if (!cfun->machine->pic_reg)
5029 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5030
5031 /* Play games to avoid marking the function as needing pic
5032 if we are being called as part of the cost-estimation
5033 process. */
5034 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5035 crtl->uses_pic_offset_table = 1;
5036 }
5037 else
5038 {
5039 rtx seq;
5040
5041 if (!cfun->machine->pic_reg)
5042 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5043
5044 /* Play games to avoid marking the function as needing pic
5045 if we are being called as part of the cost-estimation
5046 process. */
5047 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5048 {
5049 crtl->uses_pic_offset_table = 1;
5050 start_sequence ();
5051
5052 arm_load_pic_register (0UL);
5053
5054 seq = get_insns ();
5055 end_sequence ();
5056 /* We can be called during expansion of PHI nodes, where
5057 we can't yet emit instructions directly in the final
5058 insn stream. Queue the insns on the entry edge, they will
5059 be committed after everything else is expanded. */
5060 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5061 }
5062 }
5063 }
5064 }
5065
5066 rtx
5067 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5068 {
5069 if (GET_CODE (orig) == SYMBOL_REF
5070 || GET_CODE (orig) == LABEL_REF)
5071 {
5072 rtx insn;
5073
5074 if (reg == 0)
5075 {
5076 gcc_assert (can_create_pseudo_p ());
5077 reg = gen_reg_rtx (Pmode);
5078 }
5079
5080 /* VxWorks does not impose a fixed gap between segments; the run-time
5081 gap can be different from the object-file gap. We therefore can't
5082 use GOTOFF unless we are absolutely sure that the symbol is in the
5083 same segment as the GOT. Unfortunately, the flexibility of linker
5084 scripts means that we can't be sure of that in general, so assume
5085 that GOTOFF is never valid on VxWorks. */
5086 if ((GET_CODE (orig) == LABEL_REF
5087 || (GET_CODE (orig) == SYMBOL_REF &&
5088 SYMBOL_REF_LOCAL_P (orig)))
5089 && NEED_GOT_RELOC
5090 && !TARGET_VXWORKS_RTP)
5091 insn = arm_pic_static_addr (orig, reg);
5092 else
5093 {
5094 rtx pat;
5095 rtx mem;
5096
5097 /* If this function doesn't have a pic register, create one now. */
5098 require_pic_register ();
5099
5100 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5101
5102 /* Make the MEM as close to a constant as possible. */
5103 mem = SET_SRC (pat);
5104 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5105 MEM_READONLY_P (mem) = 1;
5106 MEM_NOTRAP_P (mem) = 1;
5107
5108 insn = emit_insn (pat);
5109 }
5110
5111 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5112 by loop. */
5113 set_unique_reg_note (insn, REG_EQUAL, orig);
5114
5115 return reg;
5116 }
5117 else if (GET_CODE (orig) == CONST)
5118 {
5119 rtx base, offset;
5120
5121 if (GET_CODE (XEXP (orig, 0)) == PLUS
5122 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5123 return orig;
5124
5125 /* Handle the case where we have: const (UNSPEC_TLS). */
5126 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5127 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5128 return orig;
5129
5130 /* Handle the case where we have:
5131 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5132 CONST_INT. */
5133 if (GET_CODE (XEXP (orig, 0)) == PLUS
5134 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5135 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5136 {
5137 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5138 return orig;
5139 }
5140
5141 if (reg == 0)
5142 {
5143 gcc_assert (can_create_pseudo_p ());
5144 reg = gen_reg_rtx (Pmode);
5145 }
5146
5147 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5148
5149 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5150 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5151 base == reg ? 0 : reg);
5152
5153 if (GET_CODE (offset) == CONST_INT)
5154 {
5155 /* The base register doesn't really matter, we only want to
5156 test the index for the appropriate mode. */
5157 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5158 {
5159 gcc_assert (can_create_pseudo_p ());
5160 offset = force_reg (Pmode, offset);
5161 }
5162
5163 if (GET_CODE (offset) == CONST_INT)
5164 return plus_constant (base, INTVAL (offset));
5165 }
5166
5167 if (GET_MODE_SIZE (mode) > 4
5168 && (GET_MODE_CLASS (mode) == MODE_INT
5169 || TARGET_SOFT_FLOAT))
5170 {
5171 emit_insn (gen_addsi3 (reg, base, offset));
5172 return reg;
5173 }
5174
5175 return gen_rtx_PLUS (Pmode, base, offset);
5176 }
5177
5178 return orig;
5179 }
5180
5181
5182 /* Find a spare register to use during the prolog of a function. */
5183
5184 static int
5185 thumb_find_work_register (unsigned long pushed_regs_mask)
5186 {
5187 int reg;
5188
5189 /* Check the argument registers first as these are call-used. The
5190 register allocation order means that sometimes r3 might be used
5191 but earlier argument registers might not, so check them all. */
5192 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5193 if (!df_regs_ever_live_p (reg))
5194 return reg;
5195
5196 /* Before going on to check the call-saved registers we can try a couple
5197 more ways of deducing that r3 is available. The first is when we are
5198 pushing anonymous arguments onto the stack and we have less than 4
5199 registers worth of fixed arguments(*). In this case r3 will be part of
5200 the variable argument list and so we can be sure that it will be
5201 pushed right at the start of the function. Hence it will be available
5202 for the rest of the prologue.
5203 (*): ie crtl->args.pretend_args_size is greater than 0. */
5204 if (cfun->machine->uses_anonymous_args
5205 && crtl->args.pretend_args_size > 0)
5206 return LAST_ARG_REGNUM;
5207
5208 /* The other case is when we have fixed arguments but less than 4 registers
5209 worth. In this case r3 might be used in the body of the function, but
5210 it is not being used to convey an argument into the function. In theory
5211 we could just check crtl->args.size to see how many bytes are
5212 being passed in argument registers, but it seems that it is unreliable.
5213 Sometimes it will have the value 0 when in fact arguments are being
5214 passed. (See testcase execute/20021111-1.c for an example). So we also
5215 check the args_info.nregs field as well. The problem with this field is
5216 that it makes no allowances for arguments that are passed to the
5217 function but which are not used. Hence we could miss an opportunity
5218 when a function has an unused argument in r3. But it is better to be
5219 safe than to be sorry. */
5220 if (! cfun->machine->uses_anonymous_args
5221 && crtl->args.size >= 0
5222 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5223 && crtl->args.info.nregs < 4)
5224 return LAST_ARG_REGNUM;
5225
5226 /* Otherwise look for a call-saved register that is going to be pushed. */
5227 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5228 if (pushed_regs_mask & (1 << reg))
5229 return reg;
5230
5231 if (TARGET_THUMB2)
5232 {
5233 /* Thumb-2 can use high regs. */
5234 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5235 if (pushed_regs_mask & (1 << reg))
5236 return reg;
5237 }
5238 /* Something went wrong - thumb_compute_save_reg_mask()
5239 should have arranged for a suitable register to be pushed. */
5240 gcc_unreachable ();
5241 }
5242
5243 static GTY(()) int pic_labelno;
5244
5245 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5246 low register. */
5247
5248 void
5249 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5250 {
5251 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5252
5253 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5254 return;
5255
5256 gcc_assert (flag_pic);
5257
5258 pic_reg = cfun->machine->pic_reg;
5259 if (TARGET_VXWORKS_RTP)
5260 {
5261 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5262 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5263 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5264
5265 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5266
5267 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5268 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5269 }
5270 else
5271 {
5272 /* We use an UNSPEC rather than a LABEL_REF because this label
5273 never appears in the code stream. */
5274
5275 labelno = GEN_INT (pic_labelno++);
5276 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5277 l1 = gen_rtx_CONST (VOIDmode, l1);
5278
5279 /* On the ARM the PC register contains 'dot + 8' at the time of the
5280 addition, on the Thumb it is 'dot + 4'. */
5281 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5282 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5283 UNSPEC_GOTSYM_OFF);
5284 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5285
5286 if (TARGET_32BIT)
5287 {
5288 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5289 if (TARGET_ARM)
5290 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5291 else
5292 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5293 }
5294 else /* TARGET_THUMB1 */
5295 {
5296 if (arm_pic_register != INVALID_REGNUM
5297 && REGNO (pic_reg) > LAST_LO_REGNUM)
5298 {
5299 /* We will have pushed the pic register, so we should always be
5300 able to find a work register. */
5301 pic_tmp = gen_rtx_REG (SImode,
5302 thumb_find_work_register (saved_regs));
5303 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5304 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5305 }
5306 else
5307 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5308 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5309 }
5310 }
5311
5312 /* Need to emit this whether or not we obey regdecls,
5313 since setjmp/longjmp can cause life info to screw up. */
5314 emit_use (pic_reg);
5315 }
5316
5317 /* Generate code to load the address of a static var when flag_pic is set. */
5318 static rtx
5319 arm_pic_static_addr (rtx orig, rtx reg)
5320 {
5321 rtx l1, labelno, offset_rtx, insn;
5322
5323 gcc_assert (flag_pic);
5324
5325 /* We use an UNSPEC rather than a LABEL_REF because this label
5326 never appears in the code stream. */
5327 labelno = GEN_INT (pic_labelno++);
5328 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5329 l1 = gen_rtx_CONST (VOIDmode, l1);
5330
5331 /* On the ARM the PC register contains 'dot + 8' at the time of the
5332 addition, on the Thumb it is 'dot + 4'. */
5333 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5334 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5335 UNSPEC_SYMBOL_OFFSET);
5336 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5337
5338 if (TARGET_32BIT)
5339 {
5340 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5341 if (TARGET_ARM)
5342 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5343 else
5344 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5345 }
5346 else /* TARGET_THUMB1 */
5347 {
5348 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5349 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5350 }
5351
5352 return insn;
5353 }
5354
5355 /* Return nonzero if X is valid as an ARM state addressing register. */
5356 static int
5357 arm_address_register_rtx_p (rtx x, int strict_p)
5358 {
5359 int regno;
5360
5361 if (GET_CODE (x) != REG)
5362 return 0;
5363
5364 regno = REGNO (x);
5365
5366 if (strict_p)
5367 return ARM_REGNO_OK_FOR_BASE_P (regno);
5368
5369 return (regno <= LAST_ARM_REGNUM
5370 || regno >= FIRST_PSEUDO_REGISTER
5371 || regno == FRAME_POINTER_REGNUM
5372 || regno == ARG_POINTER_REGNUM);
5373 }
5374
5375 /* Return TRUE if this rtx is the difference of a symbol and a label,
5376 and will reduce to a PC-relative relocation in the object file.
5377 Expressions like this can be left alone when generating PIC, rather
5378 than forced through the GOT. */
5379 static int
5380 pcrel_constant_p (rtx x)
5381 {
5382 if (GET_CODE (x) == MINUS)
5383 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5384
5385 return FALSE;
5386 }
5387
5388 /* Return true if X will surely end up in an index register after next
5389 splitting pass. */
5390 static bool
5391 will_be_in_index_register (const_rtx x)
5392 {
5393 /* arm.md: calculate_pic_address will split this into a register. */
5394 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5395 }
5396
5397 /* Return nonzero if X is a valid ARM state address operand. */
5398 int
5399 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5400 int strict_p)
5401 {
5402 bool use_ldrd;
5403 enum rtx_code code = GET_CODE (x);
5404
5405 if (arm_address_register_rtx_p (x, strict_p))
5406 return 1;
5407
5408 use_ldrd = (TARGET_LDRD
5409 && (mode == DImode
5410 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5411
5412 if (code == POST_INC || code == PRE_DEC
5413 || ((code == PRE_INC || code == POST_DEC)
5414 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5415 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5416
5417 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5418 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5419 && GET_CODE (XEXP (x, 1)) == PLUS
5420 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5421 {
5422 rtx addend = XEXP (XEXP (x, 1), 1);
5423
5424 /* Don't allow ldrd post increment by register because it's hard
5425 to fixup invalid register choices. */
5426 if (use_ldrd
5427 && GET_CODE (x) == POST_MODIFY
5428 && GET_CODE (addend) == REG)
5429 return 0;
5430
5431 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5432 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5433 }
5434
5435 /* After reload constants split into minipools will have addresses
5436 from a LABEL_REF. */
5437 else if (reload_completed
5438 && (code == LABEL_REF
5439 || (code == CONST
5440 && GET_CODE (XEXP (x, 0)) == PLUS
5441 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5442 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5443 return 1;
5444
5445 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5446 return 0;
5447
5448 else if (code == PLUS)
5449 {
5450 rtx xop0 = XEXP (x, 0);
5451 rtx xop1 = XEXP (x, 1);
5452
5453 return ((arm_address_register_rtx_p (xop0, strict_p)
5454 && ((GET_CODE(xop1) == CONST_INT
5455 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5456 || (!strict_p && will_be_in_index_register (xop1))))
5457 || (arm_address_register_rtx_p (xop1, strict_p)
5458 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5459 }
5460
5461 #if 0
5462 /* Reload currently can't handle MINUS, so disable this for now */
5463 else if (GET_CODE (x) == MINUS)
5464 {
5465 rtx xop0 = XEXP (x, 0);
5466 rtx xop1 = XEXP (x, 1);
5467
5468 return (arm_address_register_rtx_p (xop0, strict_p)
5469 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5470 }
5471 #endif
5472
5473 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5474 && code == SYMBOL_REF
5475 && CONSTANT_POOL_ADDRESS_P (x)
5476 && ! (flag_pic
5477 && symbol_mentioned_p (get_pool_constant (x))
5478 && ! pcrel_constant_p (get_pool_constant (x))))
5479 return 1;
5480
5481 return 0;
5482 }
5483
5484 /* Return nonzero if X is a valid Thumb-2 address operand. */
5485 static int
5486 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5487 {
5488 bool use_ldrd;
5489 enum rtx_code code = GET_CODE (x);
5490
5491 if (arm_address_register_rtx_p (x, strict_p))
5492 return 1;
5493
5494 use_ldrd = (TARGET_LDRD
5495 && (mode == DImode
5496 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5497
5498 if (code == POST_INC || code == PRE_DEC
5499 || ((code == PRE_INC || code == POST_DEC)
5500 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5501 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5502
5503 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5504 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5505 && GET_CODE (XEXP (x, 1)) == PLUS
5506 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5507 {
5508 /* Thumb-2 only has autoincrement by constant. */
5509 rtx addend = XEXP (XEXP (x, 1), 1);
5510 HOST_WIDE_INT offset;
5511
5512 if (GET_CODE (addend) != CONST_INT)
5513 return 0;
5514
5515 offset = INTVAL(addend);
5516 if (GET_MODE_SIZE (mode) <= 4)
5517 return (offset > -256 && offset < 256);
5518
5519 return (use_ldrd && offset > -1024 && offset < 1024
5520 && (offset & 3) == 0);
5521 }
5522
5523 /* After reload constants split into minipools will have addresses
5524 from a LABEL_REF. */
5525 else if (reload_completed
5526 && (code == LABEL_REF
5527 || (code == CONST
5528 && GET_CODE (XEXP (x, 0)) == PLUS
5529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5530 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5531 return 1;
5532
5533 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5534 return 0;
5535
5536 else if (code == PLUS)
5537 {
5538 rtx xop0 = XEXP (x, 0);
5539 rtx xop1 = XEXP (x, 1);
5540
5541 return ((arm_address_register_rtx_p (xop0, strict_p)
5542 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5543 || (!strict_p && will_be_in_index_register (xop1))))
5544 || (arm_address_register_rtx_p (xop1, strict_p)
5545 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5546 }
5547
5548 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5549 && code == SYMBOL_REF
5550 && CONSTANT_POOL_ADDRESS_P (x)
5551 && ! (flag_pic
5552 && symbol_mentioned_p (get_pool_constant (x))
5553 && ! pcrel_constant_p (get_pool_constant (x))))
5554 return 1;
5555
5556 return 0;
5557 }
5558
5559 /* Return nonzero if INDEX is valid for an address index operand in
5560 ARM state. */
5561 static int
5562 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5563 int strict_p)
5564 {
5565 HOST_WIDE_INT range;
5566 enum rtx_code code = GET_CODE (index);
5567
5568 /* Standard coprocessor addressing modes. */
5569 if (TARGET_HARD_FLOAT
5570 && (TARGET_FPA || TARGET_MAVERICK)
5571 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5572 || (TARGET_MAVERICK && mode == DImode)))
5573 return (code == CONST_INT && INTVAL (index) < 1024
5574 && INTVAL (index) > -1024
5575 && (INTVAL (index) & 3) == 0);
5576
5577 if (TARGET_NEON
5578 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5579 return (code == CONST_INT
5580 && INTVAL (index) < 1016
5581 && INTVAL (index) > -1024
5582 && (INTVAL (index) & 3) == 0);
5583
5584 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5585 return (code == CONST_INT
5586 && INTVAL (index) < 1024
5587 && INTVAL (index) > -1024
5588 && (INTVAL (index) & 3) == 0);
5589
5590 if (arm_address_register_rtx_p (index, strict_p)
5591 && (GET_MODE_SIZE (mode) <= 4))
5592 return 1;
5593
5594 if (mode == DImode || mode == DFmode)
5595 {
5596 if (code == CONST_INT)
5597 {
5598 HOST_WIDE_INT val = INTVAL (index);
5599
5600 if (TARGET_LDRD)
5601 return val > -256 && val < 256;
5602 else
5603 return val > -4096 && val < 4092;
5604 }
5605
5606 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5607 }
5608
5609 if (GET_MODE_SIZE (mode) <= 4
5610 && ! (arm_arch4
5611 && (mode == HImode
5612 || mode == HFmode
5613 || (mode == QImode && outer == SIGN_EXTEND))))
5614 {
5615 if (code == MULT)
5616 {
5617 rtx xiop0 = XEXP (index, 0);
5618 rtx xiop1 = XEXP (index, 1);
5619
5620 return ((arm_address_register_rtx_p (xiop0, strict_p)
5621 && power_of_two_operand (xiop1, SImode))
5622 || (arm_address_register_rtx_p (xiop1, strict_p)
5623 && power_of_two_operand (xiop0, SImode)));
5624 }
5625 else if (code == LSHIFTRT || code == ASHIFTRT
5626 || code == ASHIFT || code == ROTATERT)
5627 {
5628 rtx op = XEXP (index, 1);
5629
5630 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5631 && GET_CODE (op) == CONST_INT
5632 && INTVAL (op) > 0
5633 && INTVAL (op) <= 31);
5634 }
5635 }
5636
5637 /* For ARM v4 we may be doing a sign-extend operation during the
5638 load. */
5639 if (arm_arch4)
5640 {
5641 if (mode == HImode
5642 || mode == HFmode
5643 || (outer == SIGN_EXTEND && mode == QImode))
5644 range = 256;
5645 else
5646 range = 4096;
5647 }
5648 else
5649 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5650
5651 return (code == CONST_INT
5652 && INTVAL (index) < range
5653 && INTVAL (index) > -range);
5654 }
5655
5656 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5657 index operand. i.e. 1, 2, 4 or 8. */
5658 static bool
5659 thumb2_index_mul_operand (rtx op)
5660 {
5661 HOST_WIDE_INT val;
5662
5663 if (GET_CODE(op) != CONST_INT)
5664 return false;
5665
5666 val = INTVAL(op);
5667 return (val == 1 || val == 2 || val == 4 || val == 8);
5668 }
5669
5670 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5671 static int
5672 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5673 {
5674 enum rtx_code code = GET_CODE (index);
5675
5676 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5677 /* Standard coprocessor addressing modes. */
5678 if (TARGET_HARD_FLOAT
5679 && (TARGET_FPA || TARGET_MAVERICK)
5680 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5681 || (TARGET_MAVERICK && mode == DImode)))
5682 return (code == CONST_INT && INTVAL (index) < 1024
5683 && INTVAL (index) > -1024
5684 && (INTVAL (index) & 3) == 0);
5685
5686 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5687 {
5688 /* For DImode assume values will usually live in core regs
5689 and only allow LDRD addressing modes. */
5690 if (!TARGET_LDRD || mode != DImode)
5691 return (code == CONST_INT
5692 && INTVAL (index) < 1024
5693 && INTVAL (index) > -1024
5694 && (INTVAL (index) & 3) == 0);
5695 }
5696
5697 if (TARGET_NEON
5698 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5699 return (code == CONST_INT
5700 && INTVAL (index) < 1016
5701 && INTVAL (index) > -1024
5702 && (INTVAL (index) & 3) == 0);
5703
5704 if (arm_address_register_rtx_p (index, strict_p)
5705 && (GET_MODE_SIZE (mode) <= 4))
5706 return 1;
5707
5708 if (mode == DImode || mode == DFmode)
5709 {
5710 if (code == CONST_INT)
5711 {
5712 HOST_WIDE_INT val = INTVAL (index);
5713 /* ??? Can we assume ldrd for thumb2? */
5714 /* Thumb-2 ldrd only has reg+const addressing modes. */
5715 /* ldrd supports offsets of +-1020.
5716 However the ldr fallback does not. */
5717 return val > -256 && val < 256 && (val & 3) == 0;
5718 }
5719 else
5720 return 0;
5721 }
5722
5723 if (code == MULT)
5724 {
5725 rtx xiop0 = XEXP (index, 0);
5726 rtx xiop1 = XEXP (index, 1);
5727
5728 return ((arm_address_register_rtx_p (xiop0, strict_p)
5729 && thumb2_index_mul_operand (xiop1))
5730 || (arm_address_register_rtx_p (xiop1, strict_p)
5731 && thumb2_index_mul_operand (xiop0)));
5732 }
5733 else if (code == ASHIFT)
5734 {
5735 rtx op = XEXP (index, 1);
5736
5737 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5738 && GET_CODE (op) == CONST_INT
5739 && INTVAL (op) > 0
5740 && INTVAL (op) <= 3);
5741 }
5742
5743 return (code == CONST_INT
5744 && INTVAL (index) < 4096
5745 && INTVAL (index) > -256);
5746 }
5747
5748 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5749 static int
5750 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5751 {
5752 int regno;
5753
5754 if (GET_CODE (x) != REG)
5755 return 0;
5756
5757 regno = REGNO (x);
5758
5759 if (strict_p)
5760 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5761
5762 return (regno <= LAST_LO_REGNUM
5763 || regno > LAST_VIRTUAL_REGISTER
5764 || regno == FRAME_POINTER_REGNUM
5765 || (GET_MODE_SIZE (mode) >= 4
5766 && (regno == STACK_POINTER_REGNUM
5767 || regno >= FIRST_PSEUDO_REGISTER
5768 || x == hard_frame_pointer_rtx
5769 || x == arg_pointer_rtx)));
5770 }
5771
5772 /* Return nonzero if x is a legitimate index register. This is the case
5773 for any base register that can access a QImode object. */
5774 inline static int
5775 thumb1_index_register_rtx_p (rtx x, int strict_p)
5776 {
5777 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5778 }
5779
5780 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5781
5782 The AP may be eliminated to either the SP or the FP, so we use the
5783 least common denominator, e.g. SImode, and offsets from 0 to 64.
5784
5785 ??? Verify whether the above is the right approach.
5786
5787 ??? Also, the FP may be eliminated to the SP, so perhaps that
5788 needs special handling also.
5789
5790 ??? Look at how the mips16 port solves this problem. It probably uses
5791 better ways to solve some of these problems.
5792
5793 Although it is not incorrect, we don't accept QImode and HImode
5794 addresses based on the frame pointer or arg pointer until the
5795 reload pass starts. This is so that eliminating such addresses
5796 into stack based ones won't produce impossible code. */
5797 static int
5798 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5799 {
5800 /* ??? Not clear if this is right. Experiment. */
5801 if (GET_MODE_SIZE (mode) < 4
5802 && !(reload_in_progress || reload_completed)
5803 && (reg_mentioned_p (frame_pointer_rtx, x)
5804 || reg_mentioned_p (arg_pointer_rtx, x)
5805 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5806 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5807 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5808 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5809 return 0;
5810
5811 /* Accept any base register. SP only in SImode or larger. */
5812 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5813 return 1;
5814
5815 /* This is PC relative data before arm_reorg runs. */
5816 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5817 && GET_CODE (x) == SYMBOL_REF
5818 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5819 return 1;
5820
5821 /* This is PC relative data after arm_reorg runs. */
5822 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5823 && reload_completed
5824 && (GET_CODE (x) == LABEL_REF
5825 || (GET_CODE (x) == CONST
5826 && GET_CODE (XEXP (x, 0)) == PLUS
5827 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5828 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5829 return 1;
5830
5831 /* Post-inc indexing only supported for SImode and larger. */
5832 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5833 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5834 return 1;
5835
5836 else if (GET_CODE (x) == PLUS)
5837 {
5838 /* REG+REG address can be any two index registers. */
5839 /* We disallow FRAME+REG addressing since we know that FRAME
5840 will be replaced with STACK, and SP relative addressing only
5841 permits SP+OFFSET. */
5842 if (GET_MODE_SIZE (mode) <= 4
5843 && XEXP (x, 0) != frame_pointer_rtx
5844 && XEXP (x, 1) != frame_pointer_rtx
5845 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5846 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5847 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5848 return 1;
5849
5850 /* REG+const has 5-7 bit offset for non-SP registers. */
5851 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5852 || XEXP (x, 0) == arg_pointer_rtx)
5853 && GET_CODE (XEXP (x, 1)) == CONST_INT
5854 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5855 return 1;
5856
5857 /* REG+const has 10-bit offset for SP, but only SImode and
5858 larger is supported. */
5859 /* ??? Should probably check for DI/DFmode overflow here
5860 just like GO_IF_LEGITIMATE_OFFSET does. */
5861 else if (GET_CODE (XEXP (x, 0)) == REG
5862 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5863 && GET_MODE_SIZE (mode) >= 4
5864 && GET_CODE (XEXP (x, 1)) == CONST_INT
5865 && INTVAL (XEXP (x, 1)) >= 0
5866 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5867 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5868 return 1;
5869
5870 else if (GET_CODE (XEXP (x, 0)) == REG
5871 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5872 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5873 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5874 && REGNO (XEXP (x, 0))
5875 <= LAST_VIRTUAL_POINTER_REGISTER))
5876 && GET_MODE_SIZE (mode) >= 4
5877 && GET_CODE (XEXP (x, 1)) == CONST_INT
5878 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5879 return 1;
5880 }
5881
5882 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5883 && GET_MODE_SIZE (mode) == 4
5884 && GET_CODE (x) == SYMBOL_REF
5885 && CONSTANT_POOL_ADDRESS_P (x)
5886 && ! (flag_pic
5887 && symbol_mentioned_p (get_pool_constant (x))
5888 && ! pcrel_constant_p (get_pool_constant (x))))
5889 return 1;
5890
5891 return 0;
5892 }
5893
5894 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5895 instruction of mode MODE. */
5896 int
5897 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5898 {
5899 switch (GET_MODE_SIZE (mode))
5900 {
5901 case 1:
5902 return val >= 0 && val < 32;
5903
5904 case 2:
5905 return val >= 0 && val < 64 && (val & 1) == 0;
5906
5907 default:
5908 return (val >= 0
5909 && (val + GET_MODE_SIZE (mode)) <= 128
5910 && (val & 3) == 0);
5911 }
5912 }
5913
5914 bool
5915 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5916 {
5917 if (TARGET_ARM)
5918 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5919 else if (TARGET_THUMB2)
5920 return thumb2_legitimate_address_p (mode, x, strict_p);
5921 else /* if (TARGET_THUMB1) */
5922 return thumb1_legitimate_address_p (mode, x, strict_p);
5923 }
5924
5925 /* Build the SYMBOL_REF for __tls_get_addr. */
5926
5927 static GTY(()) rtx tls_get_addr_libfunc;
5928
5929 static rtx
5930 get_tls_get_addr (void)
5931 {
5932 if (!tls_get_addr_libfunc)
5933 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5934 return tls_get_addr_libfunc;
5935 }
5936
5937 static rtx
5938 arm_load_tp (rtx target)
5939 {
5940 if (!target)
5941 target = gen_reg_rtx (SImode);
5942
5943 if (TARGET_HARD_TP)
5944 {
5945 /* Can return in any reg. */
5946 emit_insn (gen_load_tp_hard (target));
5947 }
5948 else
5949 {
5950 /* Always returned in r0. Immediately copy the result into a pseudo,
5951 otherwise other uses of r0 (e.g. setting up function arguments) may
5952 clobber the value. */
5953
5954 rtx tmp;
5955
5956 emit_insn (gen_load_tp_soft ());
5957
5958 tmp = gen_rtx_REG (SImode, 0);
5959 emit_move_insn (target, tmp);
5960 }
5961 return target;
5962 }
5963
5964 static rtx
5965 load_tls_operand (rtx x, rtx reg)
5966 {
5967 rtx tmp;
5968
5969 if (reg == NULL_RTX)
5970 reg = gen_reg_rtx (SImode);
5971
5972 tmp = gen_rtx_CONST (SImode, x);
5973
5974 emit_move_insn (reg, tmp);
5975
5976 return reg;
5977 }
5978
5979 static rtx
5980 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5981 {
5982 rtx insns, label, labelno, sum;
5983
5984 start_sequence ();
5985
5986 labelno = GEN_INT (pic_labelno++);
5987 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5988 label = gen_rtx_CONST (VOIDmode, label);
5989
5990 sum = gen_rtx_UNSPEC (Pmode,
5991 gen_rtvec (4, x, GEN_INT (reloc), label,
5992 GEN_INT (TARGET_ARM ? 8 : 4)),
5993 UNSPEC_TLS);
5994 reg = load_tls_operand (sum, reg);
5995
5996 if (TARGET_ARM)
5997 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5998 else if (TARGET_THUMB2)
5999 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6000 else /* TARGET_THUMB1 */
6001 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6002
6003 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6004 Pmode, 1, reg, Pmode);
6005
6006 insns = get_insns ();
6007 end_sequence ();
6008
6009 return insns;
6010 }
6011
6012 rtx
6013 legitimize_tls_address (rtx x, rtx reg)
6014 {
6015 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6016 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6017
6018 switch (model)
6019 {
6020 case TLS_MODEL_GLOBAL_DYNAMIC:
6021 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6022 dest = gen_reg_rtx (Pmode);
6023 emit_libcall_block (insns, dest, ret, x);
6024 return dest;
6025
6026 case TLS_MODEL_LOCAL_DYNAMIC:
6027 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6028
6029 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6030 share the LDM result with other LD model accesses. */
6031 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6032 UNSPEC_TLS);
6033 dest = gen_reg_rtx (Pmode);
6034 emit_libcall_block (insns, dest, ret, eqv);
6035
6036 /* Load the addend. */
6037 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6038 UNSPEC_TLS);
6039 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6040 return gen_rtx_PLUS (Pmode, dest, addend);
6041
6042 case TLS_MODEL_INITIAL_EXEC:
6043 labelno = GEN_INT (pic_labelno++);
6044 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6045 label = gen_rtx_CONST (VOIDmode, label);
6046 sum = gen_rtx_UNSPEC (Pmode,
6047 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6048 GEN_INT (TARGET_ARM ? 8 : 4)),
6049 UNSPEC_TLS);
6050 reg = load_tls_operand (sum, reg);
6051
6052 if (TARGET_ARM)
6053 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6054 else if (TARGET_THUMB2)
6055 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6056 else
6057 {
6058 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6059 emit_move_insn (reg, gen_const_mem (SImode, reg));
6060 }
6061
6062 tp = arm_load_tp (NULL_RTX);
6063
6064 return gen_rtx_PLUS (Pmode, tp, reg);
6065
6066 case TLS_MODEL_LOCAL_EXEC:
6067 tp = arm_load_tp (NULL_RTX);
6068
6069 reg = gen_rtx_UNSPEC (Pmode,
6070 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6071 UNSPEC_TLS);
6072 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6073
6074 return gen_rtx_PLUS (Pmode, tp, reg);
6075
6076 default:
6077 abort ();
6078 }
6079 }
6080
6081 /* Try machine-dependent ways of modifying an illegitimate address
6082 to be legitimate. If we find one, return the new, valid address. */
6083 rtx
6084 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6085 {
6086 if (!TARGET_ARM)
6087 {
6088 /* TODO: legitimize_address for Thumb2. */
6089 if (TARGET_THUMB2)
6090 return x;
6091 return thumb_legitimize_address (x, orig_x, mode);
6092 }
6093
6094 if (arm_tls_symbol_p (x))
6095 return legitimize_tls_address (x, NULL_RTX);
6096
6097 if (GET_CODE (x) == PLUS)
6098 {
6099 rtx xop0 = XEXP (x, 0);
6100 rtx xop1 = XEXP (x, 1);
6101
6102 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6103 xop0 = force_reg (SImode, xop0);
6104
6105 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6106 xop1 = force_reg (SImode, xop1);
6107
6108 if (ARM_BASE_REGISTER_RTX_P (xop0)
6109 && GET_CODE (xop1) == CONST_INT)
6110 {
6111 HOST_WIDE_INT n, low_n;
6112 rtx base_reg, val;
6113 n = INTVAL (xop1);
6114
6115 /* VFP addressing modes actually allow greater offsets, but for
6116 now we just stick with the lowest common denominator. */
6117 if (mode == DImode
6118 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6119 {
6120 low_n = n & 0x0f;
6121 n &= ~0x0f;
6122 if (low_n > 4)
6123 {
6124 n += 16;
6125 low_n -= 16;
6126 }
6127 }
6128 else
6129 {
6130 low_n = ((mode) == TImode ? 0
6131 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6132 n -= low_n;
6133 }
6134
6135 base_reg = gen_reg_rtx (SImode);
6136 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6137 emit_move_insn (base_reg, val);
6138 x = plus_constant (base_reg, low_n);
6139 }
6140 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6141 x = gen_rtx_PLUS (SImode, xop0, xop1);
6142 }
6143
6144 /* XXX We don't allow MINUS any more -- see comment in
6145 arm_legitimate_address_outer_p (). */
6146 else if (GET_CODE (x) == MINUS)
6147 {
6148 rtx xop0 = XEXP (x, 0);
6149 rtx xop1 = XEXP (x, 1);
6150
6151 if (CONSTANT_P (xop0))
6152 xop0 = force_reg (SImode, xop0);
6153
6154 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6155 xop1 = force_reg (SImode, xop1);
6156
6157 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6158 x = gen_rtx_MINUS (SImode, xop0, xop1);
6159 }
6160
6161 /* Make sure to take full advantage of the pre-indexed addressing mode
6162 with absolute addresses which often allows for the base register to
6163 be factorized for multiple adjacent memory references, and it might
6164 even allows for the mini pool to be avoided entirely. */
6165 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6166 {
6167 unsigned int bits;
6168 HOST_WIDE_INT mask, base, index;
6169 rtx base_reg;
6170
6171 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6172 use a 8-bit index. So let's use a 12-bit index for SImode only and
6173 hope that arm_gen_constant will enable ldrb to use more bits. */
6174 bits = (mode == SImode) ? 12 : 8;
6175 mask = (1 << bits) - 1;
6176 base = INTVAL (x) & ~mask;
6177 index = INTVAL (x) & mask;
6178 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6179 {
6180 /* It'll most probably be more efficient to generate the base
6181 with more bits set and use a negative index instead. */
6182 base |= mask;
6183 index -= mask;
6184 }
6185 base_reg = force_reg (SImode, GEN_INT (base));
6186 x = plus_constant (base_reg, index);
6187 }
6188
6189 if (flag_pic)
6190 {
6191 /* We need to find and carefully transform any SYMBOL and LABEL
6192 references; so go back to the original address expression. */
6193 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6194
6195 if (new_x != orig_x)
6196 x = new_x;
6197 }
6198
6199 return x;
6200 }
6201
6202
6203 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6204 to be legitimate. If we find one, return the new, valid address. */
6205 rtx
6206 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6207 {
6208 if (arm_tls_symbol_p (x))
6209 return legitimize_tls_address (x, NULL_RTX);
6210
6211 if (GET_CODE (x) == PLUS
6212 && GET_CODE (XEXP (x, 1)) == CONST_INT
6213 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6214 || INTVAL (XEXP (x, 1)) < 0))
6215 {
6216 rtx xop0 = XEXP (x, 0);
6217 rtx xop1 = XEXP (x, 1);
6218 HOST_WIDE_INT offset = INTVAL (xop1);
6219
6220 /* Try and fold the offset into a biasing of the base register and
6221 then offsetting that. Don't do this when optimizing for space
6222 since it can cause too many CSEs. */
6223 if (optimize_size && offset >= 0
6224 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6225 {
6226 HOST_WIDE_INT delta;
6227
6228 if (offset >= 256)
6229 delta = offset - (256 - GET_MODE_SIZE (mode));
6230 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6231 delta = 31 * GET_MODE_SIZE (mode);
6232 else
6233 delta = offset & (~31 * GET_MODE_SIZE (mode));
6234
6235 xop0 = force_operand (plus_constant (xop0, offset - delta),
6236 NULL_RTX);
6237 x = plus_constant (xop0, delta);
6238 }
6239 else if (offset < 0 && offset > -256)
6240 /* Small negative offsets are best done with a subtract before the
6241 dereference, forcing these into a register normally takes two
6242 instructions. */
6243 x = force_operand (x, NULL_RTX);
6244 else
6245 {
6246 /* For the remaining cases, force the constant into a register. */
6247 xop1 = force_reg (SImode, xop1);
6248 x = gen_rtx_PLUS (SImode, xop0, xop1);
6249 }
6250 }
6251 else if (GET_CODE (x) == PLUS
6252 && s_register_operand (XEXP (x, 1), SImode)
6253 && !s_register_operand (XEXP (x, 0), SImode))
6254 {
6255 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6256
6257 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6258 }
6259
6260 if (flag_pic)
6261 {
6262 /* We need to find and carefully transform any SYMBOL and LABEL
6263 references; so go back to the original address expression. */
6264 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6265
6266 if (new_x != orig_x)
6267 x = new_x;
6268 }
6269
6270 return x;
6271 }
6272
6273 rtx
6274 thumb_legitimize_reload_address (rtx *x_p,
6275 enum machine_mode mode,
6276 int opnum, int type,
6277 int ind_levels ATTRIBUTE_UNUSED)
6278 {
6279 rtx x = *x_p;
6280
6281 if (GET_CODE (x) == PLUS
6282 && GET_MODE_SIZE (mode) < 4
6283 && REG_P (XEXP (x, 0))
6284 && XEXP (x, 0) == stack_pointer_rtx
6285 && GET_CODE (XEXP (x, 1)) == CONST_INT
6286 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6287 {
6288 rtx orig_x = x;
6289
6290 x = copy_rtx (x);
6291 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6292 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6293 return x;
6294 }
6295
6296 /* If both registers are hi-regs, then it's better to reload the
6297 entire expression rather than each register individually. That
6298 only requires one reload register rather than two. */
6299 if (GET_CODE (x) == PLUS
6300 && REG_P (XEXP (x, 0))
6301 && REG_P (XEXP (x, 1))
6302 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6303 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6304 {
6305 rtx orig_x = x;
6306
6307 x = copy_rtx (x);
6308 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6309 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6310 return x;
6311 }
6312
6313 return NULL;
6314 }
6315
6316 /* Test for various thread-local symbols. */
6317
6318 /* Return TRUE if X is a thread-local symbol. */
6319
6320 static bool
6321 arm_tls_symbol_p (rtx x)
6322 {
6323 if (! TARGET_HAVE_TLS)
6324 return false;
6325
6326 if (GET_CODE (x) != SYMBOL_REF)
6327 return false;
6328
6329 return SYMBOL_REF_TLS_MODEL (x) != 0;
6330 }
6331
6332 /* Helper for arm_tls_referenced_p. */
6333
6334 static int
6335 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6336 {
6337 if (GET_CODE (*x) == SYMBOL_REF)
6338 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6339
6340 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6341 TLS offsets, not real symbol references. */
6342 if (GET_CODE (*x) == UNSPEC
6343 && XINT (*x, 1) == UNSPEC_TLS)
6344 return -1;
6345
6346 return 0;
6347 }
6348
6349 /* Return TRUE if X contains any TLS symbol references. */
6350
6351 bool
6352 arm_tls_referenced_p (rtx x)
6353 {
6354 if (! TARGET_HAVE_TLS)
6355 return false;
6356
6357 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6358 }
6359
6360 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6361
6362 bool
6363 arm_cannot_force_const_mem (rtx x)
6364 {
6365 rtx base, offset;
6366
6367 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6368 {
6369 split_const (x, &base, &offset);
6370 if (GET_CODE (base) == SYMBOL_REF
6371 && !offset_within_block_p (base, INTVAL (offset)))
6372 return true;
6373 }
6374 return arm_tls_referenced_p (x);
6375 }
6376 \f
6377 #define REG_OR_SUBREG_REG(X) \
6378 (GET_CODE (X) == REG \
6379 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6380
6381 #define REG_OR_SUBREG_RTX(X) \
6382 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6383
6384 static inline int
6385 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6386 {
6387 enum machine_mode mode = GET_MODE (x);
6388 int total;
6389
6390 switch (code)
6391 {
6392 case ASHIFT:
6393 case ASHIFTRT:
6394 case LSHIFTRT:
6395 case ROTATERT:
6396 case PLUS:
6397 case MINUS:
6398 case COMPARE:
6399 case NEG:
6400 case NOT:
6401 return COSTS_N_INSNS (1);
6402
6403 case MULT:
6404 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6405 {
6406 int cycles = 0;
6407 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6408
6409 while (i)
6410 {
6411 i >>= 2;
6412 cycles++;
6413 }
6414 return COSTS_N_INSNS (2) + cycles;
6415 }
6416 return COSTS_N_INSNS (1) + 16;
6417
6418 case SET:
6419 return (COSTS_N_INSNS (1)
6420 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6421 + GET_CODE (SET_DEST (x)) == MEM));
6422
6423 case CONST_INT:
6424 if (outer == SET)
6425 {
6426 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6427 return 0;
6428 if (thumb_shiftable_const (INTVAL (x)))
6429 return COSTS_N_INSNS (2);
6430 return COSTS_N_INSNS (3);
6431 }
6432 else if ((outer == PLUS || outer == COMPARE)
6433 && INTVAL (x) < 256 && INTVAL (x) > -256)
6434 return 0;
6435 else if ((outer == IOR || outer == XOR || outer == AND)
6436 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6437 return COSTS_N_INSNS (1);
6438 else if (outer == AND)
6439 {
6440 int i;
6441 /* This duplicates the tests in the andsi3 expander. */
6442 for (i = 9; i <= 31; i++)
6443 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6444 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6445 return COSTS_N_INSNS (2);
6446 }
6447 else if (outer == ASHIFT || outer == ASHIFTRT
6448 || outer == LSHIFTRT)
6449 return 0;
6450 return COSTS_N_INSNS (2);
6451
6452 case CONST:
6453 case CONST_DOUBLE:
6454 case LABEL_REF:
6455 case SYMBOL_REF:
6456 return COSTS_N_INSNS (3);
6457
6458 case UDIV:
6459 case UMOD:
6460 case DIV:
6461 case MOD:
6462 return 100;
6463
6464 case TRUNCATE:
6465 return 99;
6466
6467 case AND:
6468 case XOR:
6469 case IOR:
6470 /* XXX guess. */
6471 return 8;
6472
6473 case MEM:
6474 /* XXX another guess. */
6475 /* Memory costs quite a lot for the first word, but subsequent words
6476 load at the equivalent of a single insn each. */
6477 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6478 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6479 ? 4 : 0));
6480
6481 case IF_THEN_ELSE:
6482 /* XXX a guess. */
6483 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6484 return 14;
6485 return 2;
6486
6487 case SIGN_EXTEND:
6488 case ZERO_EXTEND:
6489 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6490 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6491
6492 if (mode == SImode)
6493 return total;
6494
6495 if (arm_arch6)
6496 return total + COSTS_N_INSNS (1);
6497
6498 /* Assume a two-shift sequence. Increase the cost slightly so
6499 we prefer actual shifts over an extend operation. */
6500 return total + 1 + COSTS_N_INSNS (2);
6501
6502 default:
6503 return 99;
6504 }
6505 }
6506
6507 static inline bool
6508 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6509 {
6510 enum machine_mode mode = GET_MODE (x);
6511 enum rtx_code subcode;
6512 rtx operand;
6513 enum rtx_code code = GET_CODE (x);
6514 *total = 0;
6515
6516 switch (code)
6517 {
6518 case MEM:
6519 /* Memory costs quite a lot for the first word, but subsequent words
6520 load at the equivalent of a single insn each. */
6521 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6522 return true;
6523
6524 case DIV:
6525 case MOD:
6526 case UDIV:
6527 case UMOD:
6528 if (TARGET_HARD_FLOAT && mode == SFmode)
6529 *total = COSTS_N_INSNS (2);
6530 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6531 *total = COSTS_N_INSNS (4);
6532 else
6533 *total = COSTS_N_INSNS (20);
6534 return false;
6535
6536 case ROTATE:
6537 if (GET_CODE (XEXP (x, 1)) == REG)
6538 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6539 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6540 *total = rtx_cost (XEXP (x, 1), code, speed);
6541
6542 /* Fall through */
6543 case ROTATERT:
6544 if (mode != SImode)
6545 {
6546 *total += COSTS_N_INSNS (4);
6547 return true;
6548 }
6549
6550 /* Fall through */
6551 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6552 *total += rtx_cost (XEXP (x, 0), code, speed);
6553 if (mode == DImode)
6554 {
6555 *total += COSTS_N_INSNS (3);
6556 return true;
6557 }
6558
6559 *total += COSTS_N_INSNS (1);
6560 /* Increase the cost of complex shifts because they aren't any faster,
6561 and reduce dual issue opportunities. */
6562 if (arm_tune_cortex_a9
6563 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6564 ++*total;
6565
6566 return true;
6567
6568 case MINUS:
6569 if (mode == DImode)
6570 {
6571 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6572 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6573 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6574 {
6575 *total += rtx_cost (XEXP (x, 1), code, speed);
6576 return true;
6577 }
6578
6579 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6580 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6581 {
6582 *total += rtx_cost (XEXP (x, 0), code, speed);
6583 return true;
6584 }
6585
6586 return false;
6587 }
6588
6589 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6590 {
6591 if (TARGET_HARD_FLOAT
6592 && (mode == SFmode
6593 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6594 {
6595 *total = COSTS_N_INSNS (1);
6596 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6597 && arm_const_double_rtx (XEXP (x, 0)))
6598 {
6599 *total += rtx_cost (XEXP (x, 1), code, speed);
6600 return true;
6601 }
6602
6603 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6604 && arm_const_double_rtx (XEXP (x, 1)))
6605 {
6606 *total += rtx_cost (XEXP (x, 0), code, speed);
6607 return true;
6608 }
6609
6610 return false;
6611 }
6612 *total = COSTS_N_INSNS (20);
6613 return false;
6614 }
6615
6616 *total = COSTS_N_INSNS (1);
6617 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6618 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6619 {
6620 *total += rtx_cost (XEXP (x, 1), code, speed);
6621 return true;
6622 }
6623
6624 subcode = GET_CODE (XEXP (x, 1));
6625 if (subcode == ASHIFT || subcode == ASHIFTRT
6626 || subcode == LSHIFTRT
6627 || subcode == ROTATE || subcode == ROTATERT)
6628 {
6629 *total += rtx_cost (XEXP (x, 0), code, speed);
6630 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6631 return true;
6632 }
6633
6634 /* A shift as a part of RSB costs no more than RSB itself. */
6635 if (GET_CODE (XEXP (x, 0)) == MULT
6636 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6637 {
6638 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6639 *total += rtx_cost (XEXP (x, 1), code, speed);
6640 return true;
6641 }
6642
6643 if (subcode == MULT
6644 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6645 {
6646 *total += rtx_cost (XEXP (x, 0), code, speed);
6647 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6648 return true;
6649 }
6650
6651 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6652 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6653 {
6654 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6655 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6656 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6657 *total += COSTS_N_INSNS (1);
6658
6659 return true;
6660 }
6661
6662 /* Fall through */
6663
6664 case PLUS:
6665 if (code == PLUS && arm_arch6 && mode == SImode
6666 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6667 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6668 {
6669 *total = COSTS_N_INSNS (1);
6670 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6671 speed);
6672 *total += rtx_cost (XEXP (x, 1), code, speed);
6673 return true;
6674 }
6675
6676 /* MLA: All arguments must be registers. We filter out
6677 multiplication by a power of two, so that we fall down into
6678 the code below. */
6679 if (GET_CODE (XEXP (x, 0)) == MULT
6680 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6681 {
6682 /* The cost comes from the cost of the multiply. */
6683 return false;
6684 }
6685
6686 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6687 {
6688 if (TARGET_HARD_FLOAT
6689 && (mode == SFmode
6690 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6691 {
6692 *total = COSTS_N_INSNS (1);
6693 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6694 && arm_const_double_rtx (XEXP (x, 1)))
6695 {
6696 *total += rtx_cost (XEXP (x, 0), code, speed);
6697 return true;
6698 }
6699
6700 return false;
6701 }
6702
6703 *total = COSTS_N_INSNS (20);
6704 return false;
6705 }
6706
6707 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6708 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6709 {
6710 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6711 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6712 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6713 *total += COSTS_N_INSNS (1);
6714 return true;
6715 }
6716
6717 /* Fall through */
6718
6719 case AND: case XOR: case IOR:
6720
6721 /* Normally the frame registers will be spilt into reg+const during
6722 reload, so it is a bad idea to combine them with other instructions,
6723 since then they might not be moved outside of loops. As a compromise
6724 we allow integration with ops that have a constant as their second
6725 operand. */
6726 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6727 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6728 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6729 *total = COSTS_N_INSNS (1);
6730
6731 if (mode == DImode)
6732 {
6733 *total += COSTS_N_INSNS (2);
6734 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6735 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6736 {
6737 *total += rtx_cost (XEXP (x, 0), code, speed);
6738 return true;
6739 }
6740
6741 return false;
6742 }
6743
6744 *total += COSTS_N_INSNS (1);
6745 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6746 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6747 {
6748 *total += rtx_cost (XEXP (x, 0), code, speed);
6749 return true;
6750 }
6751 subcode = GET_CODE (XEXP (x, 0));
6752 if (subcode == ASHIFT || subcode == ASHIFTRT
6753 || subcode == LSHIFTRT
6754 || subcode == ROTATE || subcode == ROTATERT)
6755 {
6756 *total += rtx_cost (XEXP (x, 1), code, speed);
6757 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6758 return true;
6759 }
6760
6761 if (subcode == MULT
6762 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6763 {
6764 *total += rtx_cost (XEXP (x, 1), code, speed);
6765 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6766 return true;
6767 }
6768
6769 if (subcode == UMIN || subcode == UMAX
6770 || subcode == SMIN || subcode == SMAX)
6771 {
6772 *total = COSTS_N_INSNS (3);
6773 return true;
6774 }
6775
6776 return false;
6777
6778 case MULT:
6779 /* This should have been handled by the CPU specific routines. */
6780 gcc_unreachable ();
6781
6782 case TRUNCATE:
6783 if (arm_arch3m && mode == SImode
6784 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6785 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6786 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6787 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6788 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6789 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6790 {
6791 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6792 return true;
6793 }
6794 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6795 return false;
6796
6797 case NEG:
6798 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6799 {
6800 if (TARGET_HARD_FLOAT
6801 && (mode == SFmode
6802 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6803 {
6804 *total = COSTS_N_INSNS (1);
6805 return false;
6806 }
6807 *total = COSTS_N_INSNS (2);
6808 return false;
6809 }
6810
6811 /* Fall through */
6812 case NOT:
6813 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6814 if (mode == SImode && code == NOT)
6815 {
6816 subcode = GET_CODE (XEXP (x, 0));
6817 if (subcode == ASHIFT || subcode == ASHIFTRT
6818 || subcode == LSHIFTRT
6819 || subcode == ROTATE || subcode == ROTATERT
6820 || (subcode == MULT
6821 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6822 {
6823 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6824 /* Register shifts cost an extra cycle. */
6825 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6826 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6827 subcode, speed);
6828 return true;
6829 }
6830 }
6831
6832 return false;
6833
6834 case IF_THEN_ELSE:
6835 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6836 {
6837 *total = COSTS_N_INSNS (4);
6838 return true;
6839 }
6840
6841 operand = XEXP (x, 0);
6842
6843 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6844 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6845 && GET_CODE (XEXP (operand, 0)) == REG
6846 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6847 *total += COSTS_N_INSNS (1);
6848 *total += (rtx_cost (XEXP (x, 1), code, speed)
6849 + rtx_cost (XEXP (x, 2), code, speed));
6850 return true;
6851
6852 case NE:
6853 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6854 {
6855 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6856 return true;
6857 }
6858 goto scc_insn;
6859
6860 case GE:
6861 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6862 && mode == SImode && XEXP (x, 1) == const0_rtx)
6863 {
6864 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6865 return true;
6866 }
6867 goto scc_insn;
6868
6869 case LT:
6870 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6871 && mode == SImode && XEXP (x, 1) == const0_rtx)
6872 {
6873 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6874 return true;
6875 }
6876 goto scc_insn;
6877
6878 case EQ:
6879 case GT:
6880 case LE:
6881 case GEU:
6882 case LTU:
6883 case GTU:
6884 case LEU:
6885 case UNORDERED:
6886 case ORDERED:
6887 case UNEQ:
6888 case UNGE:
6889 case UNLT:
6890 case UNGT:
6891 case UNLE:
6892 scc_insn:
6893 /* SCC insns. In the case where the comparison has already been
6894 performed, then they cost 2 instructions. Otherwise they need
6895 an additional comparison before them. */
6896 *total = COSTS_N_INSNS (2);
6897 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6898 {
6899 return true;
6900 }
6901
6902 /* Fall through */
6903 case COMPARE:
6904 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6905 {
6906 *total = 0;
6907 return true;
6908 }
6909
6910 *total += COSTS_N_INSNS (1);
6911 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6912 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6913 {
6914 *total += rtx_cost (XEXP (x, 0), code, speed);
6915 return true;
6916 }
6917
6918 subcode = GET_CODE (XEXP (x, 0));
6919 if (subcode == ASHIFT || subcode == ASHIFTRT
6920 || subcode == LSHIFTRT
6921 || subcode == ROTATE || subcode == ROTATERT)
6922 {
6923 *total += rtx_cost (XEXP (x, 1), code, speed);
6924 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6925 return true;
6926 }
6927
6928 if (subcode == MULT
6929 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6930 {
6931 *total += rtx_cost (XEXP (x, 1), code, speed);
6932 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6933 return true;
6934 }
6935
6936 return false;
6937
6938 case UMIN:
6939 case UMAX:
6940 case SMIN:
6941 case SMAX:
6942 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6943 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6944 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6945 *total += rtx_cost (XEXP (x, 1), code, speed);
6946 return true;
6947
6948 case ABS:
6949 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6950 {
6951 if (TARGET_HARD_FLOAT
6952 && (mode == SFmode
6953 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6954 {
6955 *total = COSTS_N_INSNS (1);
6956 return false;
6957 }
6958 *total = COSTS_N_INSNS (20);
6959 return false;
6960 }
6961 *total = COSTS_N_INSNS (1);
6962 if (mode == DImode)
6963 *total += COSTS_N_INSNS (3);
6964 return false;
6965
6966 case SIGN_EXTEND:
6967 case ZERO_EXTEND:
6968 *total = 0;
6969 if (GET_MODE_CLASS (mode) == MODE_INT)
6970 {
6971 rtx op = XEXP (x, 0);
6972 enum machine_mode opmode = GET_MODE (op);
6973
6974 if (mode == DImode)
6975 *total += COSTS_N_INSNS (1);
6976
6977 if (opmode != SImode)
6978 {
6979 if (MEM_P (op))
6980 {
6981 /* If !arm_arch4, we use one of the extendhisi2_mem
6982 or movhi_bytes patterns for HImode. For a QImode
6983 sign extension, we first zero-extend from memory
6984 and then perform a shift sequence. */
6985 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6986 *total += COSTS_N_INSNS (2);
6987 }
6988 else if (arm_arch6)
6989 *total += COSTS_N_INSNS (1);
6990
6991 /* We don't have the necessary insn, so we need to perform some
6992 other operation. */
6993 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6994 /* An and with constant 255. */
6995 *total += COSTS_N_INSNS (1);
6996 else
6997 /* A shift sequence. Increase costs slightly to avoid
6998 combining two shifts into an extend operation. */
6999 *total += COSTS_N_INSNS (2) + 1;
7000 }
7001
7002 return false;
7003 }
7004
7005 switch (GET_MODE (XEXP (x, 0)))
7006 {
7007 case V8QImode:
7008 case V4HImode:
7009 case V2SImode:
7010 case V4QImode:
7011 case V2HImode:
7012 *total = COSTS_N_INSNS (1);
7013 return false;
7014
7015 default:
7016 gcc_unreachable ();
7017 }
7018 gcc_unreachable ();
7019
7020 case ZERO_EXTRACT:
7021 case SIGN_EXTRACT:
7022 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7023 return true;
7024
7025 case CONST_INT:
7026 if (const_ok_for_arm (INTVAL (x))
7027 || const_ok_for_arm (~INTVAL (x)))
7028 *total = COSTS_N_INSNS (1);
7029 else
7030 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7031 INTVAL (x), NULL_RTX,
7032 NULL_RTX, 0, 0));
7033 return true;
7034
7035 case CONST:
7036 case LABEL_REF:
7037 case SYMBOL_REF:
7038 *total = COSTS_N_INSNS (3);
7039 return true;
7040
7041 case HIGH:
7042 *total = COSTS_N_INSNS (1);
7043 return true;
7044
7045 case LO_SUM:
7046 *total = COSTS_N_INSNS (1);
7047 *total += rtx_cost (XEXP (x, 0), code, speed);
7048 return true;
7049
7050 case CONST_DOUBLE:
7051 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7052 && (mode == SFmode || !TARGET_VFP_SINGLE))
7053 *total = COSTS_N_INSNS (1);
7054 else
7055 *total = COSTS_N_INSNS (4);
7056 return true;
7057
7058 default:
7059 *total = COSTS_N_INSNS (4);
7060 return false;
7061 }
7062 }
7063
7064 /* Estimates the size cost of thumb1 instructions.
7065 For now most of the code is copied from thumb1_rtx_costs. We need more
7066 fine grain tuning when we have more related test cases. */
7067 static inline int
7068 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7069 {
7070 enum machine_mode mode = GET_MODE (x);
7071
7072 switch (code)
7073 {
7074 case ASHIFT:
7075 case ASHIFTRT:
7076 case LSHIFTRT:
7077 case ROTATERT:
7078 case PLUS:
7079 case MINUS:
7080 case COMPARE:
7081 case NEG:
7082 case NOT:
7083 return COSTS_N_INSNS (1);
7084
7085 case MULT:
7086 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7087 {
7088 /* Thumb1 mul instruction can't operate on const. We must Load it
7089 into a register first. */
7090 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7091 return COSTS_N_INSNS (1) + const_size;
7092 }
7093 return COSTS_N_INSNS (1);
7094
7095 case SET:
7096 return (COSTS_N_INSNS (1)
7097 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7098 + GET_CODE (SET_DEST (x)) == MEM));
7099
7100 case CONST_INT:
7101 if (outer == SET)
7102 {
7103 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7104 return COSTS_N_INSNS (1);
7105 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7106 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7107 return COSTS_N_INSNS (2);
7108 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7109 if (thumb_shiftable_const (INTVAL (x)))
7110 return COSTS_N_INSNS (2);
7111 return COSTS_N_INSNS (3);
7112 }
7113 else if ((outer == PLUS || outer == COMPARE)
7114 && INTVAL (x) < 256 && INTVAL (x) > -256)
7115 return 0;
7116 else if ((outer == IOR || outer == XOR || outer == AND)
7117 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7118 return COSTS_N_INSNS (1);
7119 else if (outer == AND)
7120 {
7121 int i;
7122 /* This duplicates the tests in the andsi3 expander. */
7123 for (i = 9; i <= 31; i++)
7124 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7125 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7126 return COSTS_N_INSNS (2);
7127 }
7128 else if (outer == ASHIFT || outer == ASHIFTRT
7129 || outer == LSHIFTRT)
7130 return 0;
7131 return COSTS_N_INSNS (2);
7132
7133 case CONST:
7134 case CONST_DOUBLE:
7135 case LABEL_REF:
7136 case SYMBOL_REF:
7137 return COSTS_N_INSNS (3);
7138
7139 case UDIV:
7140 case UMOD:
7141 case DIV:
7142 case MOD:
7143 return 100;
7144
7145 case TRUNCATE:
7146 return 99;
7147
7148 case AND:
7149 case XOR:
7150 case IOR:
7151 /* XXX guess. */
7152 return 8;
7153
7154 case MEM:
7155 /* XXX another guess. */
7156 /* Memory costs quite a lot for the first word, but subsequent words
7157 load at the equivalent of a single insn each. */
7158 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7159 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7160 ? 4 : 0));
7161
7162 case IF_THEN_ELSE:
7163 /* XXX a guess. */
7164 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7165 return 14;
7166 return 2;
7167
7168 case ZERO_EXTEND:
7169 /* XXX still guessing. */
7170 switch (GET_MODE (XEXP (x, 0)))
7171 {
7172 case QImode:
7173 return (1 + (mode == DImode ? 4 : 0)
7174 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7175
7176 case HImode:
7177 return (4 + (mode == DImode ? 4 : 0)
7178 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7179
7180 case SImode:
7181 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7182
7183 default:
7184 return 99;
7185 }
7186
7187 default:
7188 return 99;
7189 }
7190 }
7191
7192 /* RTX costs when optimizing for size. */
7193 static bool
7194 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7195 int *total)
7196 {
7197 enum machine_mode mode = GET_MODE (x);
7198 if (TARGET_THUMB1)
7199 {
7200 *total = thumb1_size_rtx_costs (x, code, outer_code);
7201 return true;
7202 }
7203
7204 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7205 switch (code)
7206 {
7207 case MEM:
7208 /* A memory access costs 1 insn if the mode is small, or the address is
7209 a single register, otherwise it costs one insn per word. */
7210 if (REG_P (XEXP (x, 0)))
7211 *total = COSTS_N_INSNS (1);
7212 else if (flag_pic
7213 && GET_CODE (XEXP (x, 0)) == PLUS
7214 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7215 /* This will be split into two instructions.
7216 See arm.md:calculate_pic_address. */
7217 *total = COSTS_N_INSNS (2);
7218 else
7219 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7220 return true;
7221
7222 case DIV:
7223 case MOD:
7224 case UDIV:
7225 case UMOD:
7226 /* Needs a libcall, so it costs about this. */
7227 *total = COSTS_N_INSNS (2);
7228 return false;
7229
7230 case ROTATE:
7231 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7232 {
7233 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7234 return true;
7235 }
7236 /* Fall through */
7237 case ROTATERT:
7238 case ASHIFT:
7239 case LSHIFTRT:
7240 case ASHIFTRT:
7241 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7242 {
7243 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7244 return true;
7245 }
7246 else if (mode == SImode)
7247 {
7248 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7249 /* Slightly disparage register shifts, but not by much. */
7250 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7251 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7252 return true;
7253 }
7254
7255 /* Needs a libcall. */
7256 *total = COSTS_N_INSNS (2);
7257 return false;
7258
7259 case MINUS:
7260 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7261 && (mode == SFmode || !TARGET_VFP_SINGLE))
7262 {
7263 *total = COSTS_N_INSNS (1);
7264 return false;
7265 }
7266
7267 if (mode == SImode)
7268 {
7269 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7270 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7271
7272 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7273 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7274 || subcode1 == ROTATE || subcode1 == ROTATERT
7275 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7276 || subcode1 == ASHIFTRT)
7277 {
7278 /* It's just the cost of the two operands. */
7279 *total = 0;
7280 return false;
7281 }
7282
7283 *total = COSTS_N_INSNS (1);
7284 return false;
7285 }
7286
7287 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7288 return false;
7289
7290 case PLUS:
7291 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7292 && (mode == SFmode || !TARGET_VFP_SINGLE))
7293 {
7294 *total = COSTS_N_INSNS (1);
7295 return false;
7296 }
7297
7298 /* A shift as a part of ADD costs nothing. */
7299 if (GET_CODE (XEXP (x, 0)) == MULT
7300 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7301 {
7302 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7303 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7304 *total += rtx_cost (XEXP (x, 1), code, false);
7305 return true;
7306 }
7307
7308 /* Fall through */
7309 case AND: case XOR: case IOR:
7310 if (mode == SImode)
7311 {
7312 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7313
7314 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7315 || subcode == LSHIFTRT || subcode == ASHIFTRT
7316 || (code == AND && subcode == NOT))
7317 {
7318 /* It's just the cost of the two operands. */
7319 *total = 0;
7320 return false;
7321 }
7322 }
7323
7324 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7325 return false;
7326
7327 case MULT:
7328 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7329 return false;
7330
7331 case NEG:
7332 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7333 && (mode == SFmode || !TARGET_VFP_SINGLE))
7334 {
7335 *total = COSTS_N_INSNS (1);
7336 return false;
7337 }
7338
7339 /* Fall through */
7340 case NOT:
7341 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7342
7343 return false;
7344
7345 case IF_THEN_ELSE:
7346 *total = 0;
7347 return false;
7348
7349 case COMPARE:
7350 if (cc_register (XEXP (x, 0), VOIDmode))
7351 * total = 0;
7352 else
7353 *total = COSTS_N_INSNS (1);
7354 return false;
7355
7356 case ABS:
7357 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7358 && (mode == SFmode || !TARGET_VFP_SINGLE))
7359 *total = COSTS_N_INSNS (1);
7360 else
7361 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7362 return false;
7363
7364 case SIGN_EXTEND:
7365 case ZERO_EXTEND:
7366 return arm_rtx_costs_1 (x, outer_code, total, 0);
7367
7368 case CONST_INT:
7369 if (const_ok_for_arm (INTVAL (x)))
7370 /* A multiplication by a constant requires another instruction
7371 to load the constant to a register. */
7372 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7373 ? 1 : 0);
7374 else if (const_ok_for_arm (~INTVAL (x)))
7375 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7376 else if (const_ok_for_arm (-INTVAL (x)))
7377 {
7378 if (outer_code == COMPARE || outer_code == PLUS
7379 || outer_code == MINUS)
7380 *total = 0;
7381 else
7382 *total = COSTS_N_INSNS (1);
7383 }
7384 else
7385 *total = COSTS_N_INSNS (2);
7386 return true;
7387
7388 case CONST:
7389 case LABEL_REF:
7390 case SYMBOL_REF:
7391 *total = COSTS_N_INSNS (2);
7392 return true;
7393
7394 case CONST_DOUBLE:
7395 *total = COSTS_N_INSNS (4);
7396 return true;
7397
7398 case HIGH:
7399 case LO_SUM:
7400 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7401 cost of these slightly. */
7402 *total = COSTS_N_INSNS (1) + 1;
7403 return true;
7404
7405 default:
7406 if (mode != VOIDmode)
7407 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7408 else
7409 *total = COSTS_N_INSNS (4); /* How knows? */
7410 return false;
7411 }
7412 }
7413
7414 /* RTX costs when optimizing for size. */
7415 static bool
7416 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7417 bool speed)
7418 {
7419 if (!speed)
7420 return arm_size_rtx_costs (x, (enum rtx_code) code,
7421 (enum rtx_code) outer_code, total);
7422 else
7423 return current_tune->rtx_costs (x, (enum rtx_code) code,
7424 (enum rtx_code) outer_code,
7425 total, speed);
7426 }
7427
7428 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7429 supported on any "slowmul" cores, so it can be ignored. */
7430
7431 static bool
7432 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7433 int *total, bool speed)
7434 {
7435 enum machine_mode mode = GET_MODE (x);
7436
7437 if (TARGET_THUMB)
7438 {
7439 *total = thumb1_rtx_costs (x, code, outer_code);
7440 return true;
7441 }
7442
7443 switch (code)
7444 {
7445 case MULT:
7446 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7447 || mode == DImode)
7448 {
7449 *total = COSTS_N_INSNS (20);
7450 return false;
7451 }
7452
7453 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7454 {
7455 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7456 & (unsigned HOST_WIDE_INT) 0xffffffff);
7457 int cost, const_ok = const_ok_for_arm (i);
7458 int j, booth_unit_size;
7459
7460 /* Tune as appropriate. */
7461 cost = const_ok ? 4 : 8;
7462 booth_unit_size = 2;
7463 for (j = 0; i && j < 32; j += booth_unit_size)
7464 {
7465 i >>= booth_unit_size;
7466 cost++;
7467 }
7468
7469 *total = COSTS_N_INSNS (cost);
7470 *total += rtx_cost (XEXP (x, 0), code, speed);
7471 return true;
7472 }
7473
7474 *total = COSTS_N_INSNS (20);
7475 return false;
7476
7477 default:
7478 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7479 }
7480 }
7481
7482
7483 /* RTX cost for cores with a fast multiply unit (M variants). */
7484
7485 static bool
7486 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7487 int *total, bool speed)
7488 {
7489 enum machine_mode mode = GET_MODE (x);
7490
7491 if (TARGET_THUMB1)
7492 {
7493 *total = thumb1_rtx_costs (x, code, outer_code);
7494 return true;
7495 }
7496
7497 /* ??? should thumb2 use different costs? */
7498 switch (code)
7499 {
7500 case MULT:
7501 /* There is no point basing this on the tuning, since it is always the
7502 fast variant if it exists at all. */
7503 if (mode == DImode
7504 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7505 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7506 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7507 {
7508 *total = COSTS_N_INSNS(2);
7509 return false;
7510 }
7511
7512
7513 if (mode == DImode)
7514 {
7515 *total = COSTS_N_INSNS (5);
7516 return false;
7517 }
7518
7519 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7520 {
7521 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7522 & (unsigned HOST_WIDE_INT) 0xffffffff);
7523 int cost, const_ok = const_ok_for_arm (i);
7524 int j, booth_unit_size;
7525
7526 /* Tune as appropriate. */
7527 cost = const_ok ? 4 : 8;
7528 booth_unit_size = 8;
7529 for (j = 0; i && j < 32; j += booth_unit_size)
7530 {
7531 i >>= booth_unit_size;
7532 cost++;
7533 }
7534
7535 *total = COSTS_N_INSNS(cost);
7536 return false;
7537 }
7538
7539 if (mode == SImode)
7540 {
7541 *total = COSTS_N_INSNS (4);
7542 return false;
7543 }
7544
7545 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7546 {
7547 if (TARGET_HARD_FLOAT
7548 && (mode == SFmode
7549 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7550 {
7551 *total = COSTS_N_INSNS (1);
7552 return false;
7553 }
7554 }
7555
7556 /* Requires a lib call */
7557 *total = COSTS_N_INSNS (20);
7558 return false;
7559
7560 default:
7561 return arm_rtx_costs_1 (x, outer_code, total, speed);
7562 }
7563 }
7564
7565
7566 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7567 so it can be ignored. */
7568
7569 static bool
7570 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7571 int *total, bool speed)
7572 {
7573 enum machine_mode mode = GET_MODE (x);
7574
7575 if (TARGET_THUMB)
7576 {
7577 *total = thumb1_rtx_costs (x, code, outer_code);
7578 return true;
7579 }
7580
7581 switch (code)
7582 {
7583 case COMPARE:
7584 if (GET_CODE (XEXP (x, 0)) != MULT)
7585 return arm_rtx_costs_1 (x, outer_code, total, speed);
7586
7587 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7588 will stall until the multiplication is complete. */
7589 *total = COSTS_N_INSNS (3);
7590 return false;
7591
7592 case MULT:
7593 /* There is no point basing this on the tuning, since it is always the
7594 fast variant if it exists at all. */
7595 if (mode == DImode
7596 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7597 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7598 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7599 {
7600 *total = COSTS_N_INSNS (2);
7601 return false;
7602 }
7603
7604
7605 if (mode == DImode)
7606 {
7607 *total = COSTS_N_INSNS (5);
7608 return false;
7609 }
7610
7611 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7612 {
7613 /* If operand 1 is a constant we can more accurately
7614 calculate the cost of the multiply. The multiplier can
7615 retire 15 bits on the first cycle and a further 12 on the
7616 second. We do, of course, have to load the constant into
7617 a register first. */
7618 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7619 /* There's a general overhead of one cycle. */
7620 int cost = 1;
7621 unsigned HOST_WIDE_INT masked_const;
7622
7623 if (i & 0x80000000)
7624 i = ~i;
7625
7626 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7627
7628 masked_const = i & 0xffff8000;
7629 if (masked_const != 0)
7630 {
7631 cost++;
7632 masked_const = i & 0xf8000000;
7633 if (masked_const != 0)
7634 cost++;
7635 }
7636 *total = COSTS_N_INSNS (cost);
7637 return false;
7638 }
7639
7640 if (mode == SImode)
7641 {
7642 *total = COSTS_N_INSNS (3);
7643 return false;
7644 }
7645
7646 /* Requires a lib call */
7647 *total = COSTS_N_INSNS (20);
7648 return false;
7649
7650 default:
7651 return arm_rtx_costs_1 (x, outer_code, total, speed);
7652 }
7653 }
7654
7655
7656 /* RTX costs for 9e (and later) cores. */
7657
7658 static bool
7659 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7660 int *total, bool speed)
7661 {
7662 enum machine_mode mode = GET_MODE (x);
7663
7664 if (TARGET_THUMB1)
7665 {
7666 switch (code)
7667 {
7668 case MULT:
7669 *total = COSTS_N_INSNS (3);
7670 return true;
7671
7672 default:
7673 *total = thumb1_rtx_costs (x, code, outer_code);
7674 return true;
7675 }
7676 }
7677
7678 switch (code)
7679 {
7680 case MULT:
7681 /* There is no point basing this on the tuning, since it is always the
7682 fast variant if it exists at all. */
7683 if (mode == DImode
7684 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7685 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7686 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7687 {
7688 *total = COSTS_N_INSNS (2);
7689 return false;
7690 }
7691
7692
7693 if (mode == DImode)
7694 {
7695 *total = COSTS_N_INSNS (5);
7696 return false;
7697 }
7698
7699 if (mode == SImode)
7700 {
7701 *total = COSTS_N_INSNS (2);
7702 return false;
7703 }
7704
7705 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7706 {
7707 if (TARGET_HARD_FLOAT
7708 && (mode == SFmode
7709 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7710 {
7711 *total = COSTS_N_INSNS (1);
7712 return false;
7713 }
7714 }
7715
7716 *total = COSTS_N_INSNS (20);
7717 return false;
7718
7719 default:
7720 return arm_rtx_costs_1 (x, outer_code, total, speed);
7721 }
7722 }
7723 /* All address computations that can be done are free, but rtx cost returns
7724 the same for practically all of them. So we weight the different types
7725 of address here in the order (most pref first):
7726 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7727 static inline int
7728 arm_arm_address_cost (rtx x)
7729 {
7730 enum rtx_code c = GET_CODE (x);
7731
7732 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7733 return 0;
7734 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7735 return 10;
7736
7737 if (c == PLUS)
7738 {
7739 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7740 return 2;
7741
7742 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7743 return 3;
7744
7745 return 4;
7746 }
7747
7748 return 6;
7749 }
7750
7751 static inline int
7752 arm_thumb_address_cost (rtx x)
7753 {
7754 enum rtx_code c = GET_CODE (x);
7755
7756 if (c == REG)
7757 return 1;
7758 if (c == PLUS
7759 && GET_CODE (XEXP (x, 0)) == REG
7760 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7761 return 1;
7762
7763 return 2;
7764 }
7765
7766 static int
7767 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7768 {
7769 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7770 }
7771
7772 /* Adjust cost hook for XScale. */
7773 static bool
7774 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7775 {
7776 /* Some true dependencies can have a higher cost depending
7777 on precisely how certain input operands are used. */
7778 if (REG_NOTE_KIND(link) == 0
7779 && recog_memoized (insn) >= 0
7780 && recog_memoized (dep) >= 0)
7781 {
7782 int shift_opnum = get_attr_shift (insn);
7783 enum attr_type attr_type = get_attr_type (dep);
7784
7785 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7786 operand for INSN. If we have a shifted input operand and the
7787 instruction we depend on is another ALU instruction, then we may
7788 have to account for an additional stall. */
7789 if (shift_opnum != 0
7790 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7791 {
7792 rtx shifted_operand;
7793 int opno;
7794
7795 /* Get the shifted operand. */
7796 extract_insn (insn);
7797 shifted_operand = recog_data.operand[shift_opnum];
7798
7799 /* Iterate over all the operands in DEP. If we write an operand
7800 that overlaps with SHIFTED_OPERAND, then we have increase the
7801 cost of this dependency. */
7802 extract_insn (dep);
7803 preprocess_constraints ();
7804 for (opno = 0; opno < recog_data.n_operands; opno++)
7805 {
7806 /* We can ignore strict inputs. */
7807 if (recog_data.operand_type[opno] == OP_IN)
7808 continue;
7809
7810 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7811 shifted_operand))
7812 {
7813 *cost = 2;
7814 return false;
7815 }
7816 }
7817 }
7818 }
7819 return true;
7820 }
7821
7822 /* Adjust cost hook for Cortex A9. */
7823 static bool
7824 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7825 {
7826 switch (REG_NOTE_KIND (link))
7827 {
7828 case REG_DEP_ANTI:
7829 *cost = 0;
7830 return false;
7831
7832 case REG_DEP_TRUE:
7833 case REG_DEP_OUTPUT:
7834 if (recog_memoized (insn) >= 0
7835 && recog_memoized (dep) >= 0)
7836 {
7837 if (GET_CODE (PATTERN (insn)) == SET)
7838 {
7839 if (GET_MODE_CLASS
7840 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7841 || GET_MODE_CLASS
7842 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7843 {
7844 enum attr_type attr_type_insn = get_attr_type (insn);
7845 enum attr_type attr_type_dep = get_attr_type (dep);
7846
7847 /* By default all dependencies of the form
7848 s0 = s0 <op> s1
7849 s0 = s0 <op> s2
7850 have an extra latency of 1 cycle because
7851 of the input and output dependency in this
7852 case. However this gets modeled as an true
7853 dependency and hence all these checks. */
7854 if (REG_P (SET_DEST (PATTERN (insn)))
7855 && REG_P (SET_DEST (PATTERN (dep)))
7856 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7857 SET_DEST (PATTERN (dep))))
7858 {
7859 /* FMACS is a special case where the dependant
7860 instruction can be issued 3 cycles before
7861 the normal latency in case of an output
7862 dependency. */
7863 if ((attr_type_insn == TYPE_FMACS
7864 || attr_type_insn == TYPE_FMACD)
7865 && (attr_type_dep == TYPE_FMACS
7866 || attr_type_dep == TYPE_FMACD))
7867 {
7868 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7869 *cost = insn_default_latency (dep) - 3;
7870 else
7871 *cost = insn_default_latency (dep);
7872 return false;
7873 }
7874 else
7875 {
7876 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7877 *cost = insn_default_latency (dep) + 1;
7878 else
7879 *cost = insn_default_latency (dep);
7880 }
7881 return false;
7882 }
7883 }
7884 }
7885 }
7886 break;
7887
7888 default:
7889 gcc_unreachable ();
7890 }
7891
7892 return true;
7893 }
7894
7895 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7896 It corrects the value of COST based on the relationship between
7897 INSN and DEP through the dependence LINK. It returns the new
7898 value. There is a per-core adjust_cost hook to adjust scheduler costs
7899 and the per-core hook can choose to completely override the generic
7900 adjust_cost function. Only put bits of code into arm_adjust_cost that
7901 are common across all cores. */
7902 static int
7903 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7904 {
7905 rtx i_pat, d_pat;
7906
7907 /* When generating Thumb-1 code, we want to place flag-setting operations
7908 close to a conditional branch which depends on them, so that we can
7909 omit the comparison. */
7910 if (TARGET_THUMB1
7911 && REG_NOTE_KIND (link) == 0
7912 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7913 && recog_memoized (dep) >= 0
7914 && get_attr_conds (dep) == CONDS_SET)
7915 return 0;
7916
7917 if (current_tune->sched_adjust_cost != NULL)
7918 {
7919 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7920 return cost;
7921 }
7922
7923 /* XXX This is not strictly true for the FPA. */
7924 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7925 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7926 return 0;
7927
7928 /* Call insns don't incur a stall, even if they follow a load. */
7929 if (REG_NOTE_KIND (link) == 0
7930 && GET_CODE (insn) == CALL_INSN)
7931 return 1;
7932
7933 if ((i_pat = single_set (insn)) != NULL
7934 && GET_CODE (SET_SRC (i_pat)) == MEM
7935 && (d_pat = single_set (dep)) != NULL
7936 && GET_CODE (SET_DEST (d_pat)) == MEM)
7937 {
7938 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7939 /* This is a load after a store, there is no conflict if the load reads
7940 from a cached area. Assume that loads from the stack, and from the
7941 constant pool are cached, and that others will miss. This is a
7942 hack. */
7943
7944 if ((GET_CODE (src_mem) == SYMBOL_REF
7945 && CONSTANT_POOL_ADDRESS_P (src_mem))
7946 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7947 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7948 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7949 return 1;
7950 }
7951
7952 return cost;
7953 }
7954
7955 static int fp_consts_inited = 0;
7956
7957 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7958 static const char * const strings_fp[8] =
7959 {
7960 "0", "1", "2", "3",
7961 "4", "5", "0.5", "10"
7962 };
7963
7964 static REAL_VALUE_TYPE values_fp[8];
7965
7966 static void
7967 init_fp_table (void)
7968 {
7969 int i;
7970 REAL_VALUE_TYPE r;
7971
7972 if (TARGET_VFP)
7973 fp_consts_inited = 1;
7974 else
7975 fp_consts_inited = 8;
7976
7977 for (i = 0; i < fp_consts_inited; i++)
7978 {
7979 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7980 values_fp[i] = r;
7981 }
7982 }
7983
7984 /* Return TRUE if rtx X is a valid immediate FP constant. */
7985 int
7986 arm_const_double_rtx (rtx x)
7987 {
7988 REAL_VALUE_TYPE r;
7989 int i;
7990
7991 if (!fp_consts_inited)
7992 init_fp_table ();
7993
7994 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7995 if (REAL_VALUE_MINUS_ZERO (r))
7996 return 0;
7997
7998 for (i = 0; i < fp_consts_inited; i++)
7999 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8000 return 1;
8001
8002 return 0;
8003 }
8004
8005 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8006 int
8007 neg_const_double_rtx_ok_for_fpa (rtx x)
8008 {
8009 REAL_VALUE_TYPE r;
8010 int i;
8011
8012 if (!fp_consts_inited)
8013 init_fp_table ();
8014
8015 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8016 r = real_value_negate (&r);
8017 if (REAL_VALUE_MINUS_ZERO (r))
8018 return 0;
8019
8020 for (i = 0; i < 8; i++)
8021 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8022 return 1;
8023
8024 return 0;
8025 }
8026
8027
8028 /* VFPv3 has a fairly wide range of representable immediates, formed from
8029 "quarter-precision" floating-point values. These can be evaluated using this
8030 formula (with ^ for exponentiation):
8031
8032 -1^s * n * 2^-r
8033
8034 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8035 16 <= n <= 31 and 0 <= r <= 7.
8036
8037 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8038
8039 - A (most-significant) is the sign bit.
8040 - BCD are the exponent (encoded as r XOR 3).
8041 - EFGH are the mantissa (encoded as n - 16).
8042 */
8043
8044 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8045 fconst[sd] instruction, or -1 if X isn't suitable. */
8046 static int
8047 vfp3_const_double_index (rtx x)
8048 {
8049 REAL_VALUE_TYPE r, m;
8050 int sign, exponent;
8051 unsigned HOST_WIDE_INT mantissa, mant_hi;
8052 unsigned HOST_WIDE_INT mask;
8053 HOST_WIDE_INT m1, m2;
8054 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8055
8056 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8057 return -1;
8058
8059 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8060
8061 /* We can't represent these things, so detect them first. */
8062 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8063 return -1;
8064
8065 /* Extract sign, exponent and mantissa. */
8066 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8067 r = real_value_abs (&r);
8068 exponent = REAL_EXP (&r);
8069 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8070 highest (sign) bit, with a fixed binary point at bit point_pos.
8071 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8072 bits for the mantissa, this may fail (low bits would be lost). */
8073 real_ldexp (&m, &r, point_pos - exponent);
8074 REAL_VALUE_TO_INT (&m1, &m2, m);
8075 mantissa = m1;
8076 mant_hi = m2;
8077
8078 /* If there are bits set in the low part of the mantissa, we can't
8079 represent this value. */
8080 if (mantissa != 0)
8081 return -1;
8082
8083 /* Now make it so that mantissa contains the most-significant bits, and move
8084 the point_pos to indicate that the least-significant bits have been
8085 discarded. */
8086 point_pos -= HOST_BITS_PER_WIDE_INT;
8087 mantissa = mant_hi;
8088
8089 /* We can permit four significant bits of mantissa only, plus a high bit
8090 which is always 1. */
8091 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8092 if ((mantissa & mask) != 0)
8093 return -1;
8094
8095 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8096 mantissa >>= point_pos - 5;
8097
8098 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8099 floating-point immediate zero with Neon using an integer-zero load, but
8100 that case is handled elsewhere.) */
8101 if (mantissa == 0)
8102 return -1;
8103
8104 gcc_assert (mantissa >= 16 && mantissa <= 31);
8105
8106 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8107 normalized significands are in the range [1, 2). (Our mantissa is shifted
8108 left 4 places at this point relative to normalized IEEE754 values). GCC
8109 internally uses [0.5, 1) (see real.c), so the exponent returned from
8110 REAL_EXP must be altered. */
8111 exponent = 5 - exponent;
8112
8113 if (exponent < 0 || exponent > 7)
8114 return -1;
8115
8116 /* Sign, mantissa and exponent are now in the correct form to plug into the
8117 formula described in the comment above. */
8118 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8119 }
8120
8121 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8122 int
8123 vfp3_const_double_rtx (rtx x)
8124 {
8125 if (!TARGET_VFP3)
8126 return 0;
8127
8128 return vfp3_const_double_index (x) != -1;
8129 }
8130
8131 /* Recognize immediates which can be used in various Neon instructions. Legal
8132 immediates are described by the following table (for VMVN variants, the
8133 bitwise inverse of the constant shown is recognized. In either case, VMOV
8134 is output and the correct instruction to use for a given constant is chosen
8135 by the assembler). The constant shown is replicated across all elements of
8136 the destination vector.
8137
8138 insn elems variant constant (binary)
8139 ---- ----- ------- -----------------
8140 vmov i32 0 00000000 00000000 00000000 abcdefgh
8141 vmov i32 1 00000000 00000000 abcdefgh 00000000
8142 vmov i32 2 00000000 abcdefgh 00000000 00000000
8143 vmov i32 3 abcdefgh 00000000 00000000 00000000
8144 vmov i16 4 00000000 abcdefgh
8145 vmov i16 5 abcdefgh 00000000
8146 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8147 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8148 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8149 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8150 vmvn i16 10 00000000 abcdefgh
8151 vmvn i16 11 abcdefgh 00000000
8152 vmov i32 12 00000000 00000000 abcdefgh 11111111
8153 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8154 vmov i32 14 00000000 abcdefgh 11111111 11111111
8155 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8156 vmov i8 16 abcdefgh
8157 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8158 eeeeeeee ffffffff gggggggg hhhhhhhh
8159 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8160
8161 For case 18, B = !b. Representable values are exactly those accepted by
8162 vfp3_const_double_index, but are output as floating-point numbers rather
8163 than indices.
8164
8165 Variants 0-5 (inclusive) may also be used as immediates for the second
8166 operand of VORR/VBIC instructions.
8167
8168 The INVERSE argument causes the bitwise inverse of the given operand to be
8169 recognized instead (used for recognizing legal immediates for the VAND/VORN
8170 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8171 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8172 output, rather than the real insns vbic/vorr).
8173
8174 INVERSE makes no difference to the recognition of float vectors.
8175
8176 The return value is the variant of immediate as shown in the above table, or
8177 -1 if the given value doesn't match any of the listed patterns.
8178 */
8179 static int
8180 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8181 rtx *modconst, int *elementwidth)
8182 {
8183 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8184 matches = 1; \
8185 for (i = 0; i < idx; i += (STRIDE)) \
8186 if (!(TEST)) \
8187 matches = 0; \
8188 if (matches) \
8189 { \
8190 immtype = (CLASS); \
8191 elsize = (ELSIZE); \
8192 break; \
8193 }
8194
8195 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8196 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8197 unsigned char bytes[16];
8198 int immtype = -1, matches;
8199 unsigned int invmask = inverse ? 0xff : 0;
8200
8201 /* Vectors of float constants. */
8202 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8203 {
8204 rtx el0 = CONST_VECTOR_ELT (op, 0);
8205 REAL_VALUE_TYPE r0;
8206
8207 if (!vfp3_const_double_rtx (el0))
8208 return -1;
8209
8210 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8211
8212 for (i = 1; i < n_elts; i++)
8213 {
8214 rtx elt = CONST_VECTOR_ELT (op, i);
8215 REAL_VALUE_TYPE re;
8216
8217 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8218
8219 if (!REAL_VALUES_EQUAL (r0, re))
8220 return -1;
8221 }
8222
8223 if (modconst)
8224 *modconst = CONST_VECTOR_ELT (op, 0);
8225
8226 if (elementwidth)
8227 *elementwidth = 0;
8228
8229 return 18;
8230 }
8231
8232 /* Splat vector constant out into a byte vector. */
8233 for (i = 0; i < n_elts; i++)
8234 {
8235 rtx el = CONST_VECTOR_ELT (op, i);
8236 unsigned HOST_WIDE_INT elpart;
8237 unsigned int part, parts;
8238
8239 if (GET_CODE (el) == CONST_INT)
8240 {
8241 elpart = INTVAL (el);
8242 parts = 1;
8243 }
8244 else if (GET_CODE (el) == CONST_DOUBLE)
8245 {
8246 elpart = CONST_DOUBLE_LOW (el);
8247 parts = 2;
8248 }
8249 else
8250 gcc_unreachable ();
8251
8252 for (part = 0; part < parts; part++)
8253 {
8254 unsigned int byte;
8255 for (byte = 0; byte < innersize; byte++)
8256 {
8257 bytes[idx++] = (elpart & 0xff) ^ invmask;
8258 elpart >>= BITS_PER_UNIT;
8259 }
8260 if (GET_CODE (el) == CONST_DOUBLE)
8261 elpart = CONST_DOUBLE_HIGH (el);
8262 }
8263 }
8264
8265 /* Sanity check. */
8266 gcc_assert (idx == GET_MODE_SIZE (mode));
8267
8268 do
8269 {
8270 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8271 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8272
8273 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8274 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8275
8276 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8277 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8278
8279 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8280 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8281
8282 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8283
8284 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8285
8286 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8287 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8288
8289 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8290 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8291
8292 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8293 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8294
8295 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8296 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8297
8298 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8299
8300 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8301
8302 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8303 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8304
8305 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8306 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8307
8308 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8309 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8310
8311 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8312 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8313
8314 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8315
8316 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8317 && bytes[i] == bytes[(i + 8) % idx]);
8318 }
8319 while (0);
8320
8321 if (immtype == -1)
8322 return -1;
8323
8324 if (elementwidth)
8325 *elementwidth = elsize;
8326
8327 if (modconst)
8328 {
8329 unsigned HOST_WIDE_INT imm = 0;
8330
8331 /* Un-invert bytes of recognized vector, if necessary. */
8332 if (invmask != 0)
8333 for (i = 0; i < idx; i++)
8334 bytes[i] ^= invmask;
8335
8336 if (immtype == 17)
8337 {
8338 /* FIXME: Broken on 32-bit H_W_I hosts. */
8339 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8340
8341 for (i = 0; i < 8; i++)
8342 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8343 << (i * BITS_PER_UNIT);
8344
8345 *modconst = GEN_INT (imm);
8346 }
8347 else
8348 {
8349 unsigned HOST_WIDE_INT imm = 0;
8350
8351 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8352 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8353
8354 *modconst = GEN_INT (imm);
8355 }
8356 }
8357
8358 return immtype;
8359 #undef CHECK
8360 }
8361
8362 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8363 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8364 float elements), and a modified constant (whatever should be output for a
8365 VMOV) in *MODCONST. */
8366
8367 int
8368 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8369 rtx *modconst, int *elementwidth)
8370 {
8371 rtx tmpconst;
8372 int tmpwidth;
8373 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8374
8375 if (retval == -1)
8376 return 0;
8377
8378 if (modconst)
8379 *modconst = tmpconst;
8380
8381 if (elementwidth)
8382 *elementwidth = tmpwidth;
8383
8384 return 1;
8385 }
8386
8387 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8388 the immediate is valid, write a constant suitable for using as an operand
8389 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8390 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8391
8392 int
8393 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8394 rtx *modconst, int *elementwidth)
8395 {
8396 rtx tmpconst;
8397 int tmpwidth;
8398 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8399
8400 if (retval < 0 || retval > 5)
8401 return 0;
8402
8403 if (modconst)
8404 *modconst = tmpconst;
8405
8406 if (elementwidth)
8407 *elementwidth = tmpwidth;
8408
8409 return 1;
8410 }
8411
8412 /* Return a string suitable for output of Neon immediate logic operation
8413 MNEM. */
8414
8415 char *
8416 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8417 int inverse, int quad)
8418 {
8419 int width, is_valid;
8420 static char templ[40];
8421
8422 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8423
8424 gcc_assert (is_valid != 0);
8425
8426 if (quad)
8427 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8428 else
8429 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8430
8431 return templ;
8432 }
8433
8434 /* Output a sequence of pairwise operations to implement a reduction.
8435 NOTE: We do "too much work" here, because pairwise operations work on two
8436 registers-worth of operands in one go. Unfortunately we can't exploit those
8437 extra calculations to do the full operation in fewer steps, I don't think.
8438 Although all vector elements of the result but the first are ignored, we
8439 actually calculate the same result in each of the elements. An alternative
8440 such as initially loading a vector with zero to use as each of the second
8441 operands would use up an additional register and take an extra instruction,
8442 for no particular gain. */
8443
8444 void
8445 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8446 rtx (*reduc) (rtx, rtx, rtx))
8447 {
8448 enum machine_mode inner = GET_MODE_INNER (mode);
8449 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8450 rtx tmpsum = op1;
8451
8452 for (i = parts / 2; i >= 1; i /= 2)
8453 {
8454 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8455 emit_insn (reduc (dest, tmpsum, tmpsum));
8456 tmpsum = dest;
8457 }
8458 }
8459
8460 /* If VALS is a vector constant that can be loaded into a register
8461 using VDUP, generate instructions to do so and return an RTX to
8462 assign to the register. Otherwise return NULL_RTX. */
8463
8464 static rtx
8465 neon_vdup_constant (rtx vals)
8466 {
8467 enum machine_mode mode = GET_MODE (vals);
8468 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8469 int n_elts = GET_MODE_NUNITS (mode);
8470 bool all_same = true;
8471 rtx x;
8472 int i;
8473
8474 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8475 return NULL_RTX;
8476
8477 for (i = 0; i < n_elts; ++i)
8478 {
8479 x = XVECEXP (vals, 0, i);
8480 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8481 all_same = false;
8482 }
8483
8484 if (!all_same)
8485 /* The elements are not all the same. We could handle repeating
8486 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8487 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8488 vdup.i16). */
8489 return NULL_RTX;
8490
8491 /* We can load this constant by using VDUP and a constant in a
8492 single ARM register. This will be cheaper than a vector
8493 load. */
8494
8495 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8496 return gen_rtx_VEC_DUPLICATE (mode, x);
8497 }
8498
8499 /* Generate code to load VALS, which is a PARALLEL containing only
8500 constants (for vec_init) or CONST_VECTOR, efficiently into a
8501 register. Returns an RTX to copy into the register, or NULL_RTX
8502 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8503
8504 rtx
8505 neon_make_constant (rtx vals)
8506 {
8507 enum machine_mode mode = GET_MODE (vals);
8508 rtx target;
8509 rtx const_vec = NULL_RTX;
8510 int n_elts = GET_MODE_NUNITS (mode);
8511 int n_const = 0;
8512 int i;
8513
8514 if (GET_CODE (vals) == CONST_VECTOR)
8515 const_vec = vals;
8516 else if (GET_CODE (vals) == PARALLEL)
8517 {
8518 /* A CONST_VECTOR must contain only CONST_INTs and
8519 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8520 Only store valid constants in a CONST_VECTOR. */
8521 for (i = 0; i < n_elts; ++i)
8522 {
8523 rtx x = XVECEXP (vals, 0, i);
8524 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8525 n_const++;
8526 }
8527 if (n_const == n_elts)
8528 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8529 }
8530 else
8531 gcc_unreachable ();
8532
8533 if (const_vec != NULL
8534 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8535 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8536 return const_vec;
8537 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8538 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8539 pipeline cycle; creating the constant takes one or two ARM
8540 pipeline cycles. */
8541 return target;
8542 else if (const_vec != NULL_RTX)
8543 /* Load from constant pool. On Cortex-A8 this takes two cycles
8544 (for either double or quad vectors). We can not take advantage
8545 of single-cycle VLD1 because we need a PC-relative addressing
8546 mode. */
8547 return const_vec;
8548 else
8549 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8550 We can not construct an initializer. */
8551 return NULL_RTX;
8552 }
8553
8554 /* Initialize vector TARGET to VALS. */
8555
8556 void
8557 neon_expand_vector_init (rtx target, rtx vals)
8558 {
8559 enum machine_mode mode = GET_MODE (target);
8560 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8561 int n_elts = GET_MODE_NUNITS (mode);
8562 int n_var = 0, one_var = -1;
8563 bool all_same = true;
8564 rtx x, mem;
8565 int i;
8566
8567 for (i = 0; i < n_elts; ++i)
8568 {
8569 x = XVECEXP (vals, 0, i);
8570 if (!CONSTANT_P (x))
8571 ++n_var, one_var = i;
8572
8573 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8574 all_same = false;
8575 }
8576
8577 if (n_var == 0)
8578 {
8579 rtx constant = neon_make_constant (vals);
8580 if (constant != NULL_RTX)
8581 {
8582 emit_move_insn (target, constant);
8583 return;
8584 }
8585 }
8586
8587 /* Splat a single non-constant element if we can. */
8588 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8589 {
8590 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8591 emit_insn (gen_rtx_SET (VOIDmode, target,
8592 gen_rtx_VEC_DUPLICATE (mode, x)));
8593 return;
8594 }
8595
8596 /* One field is non-constant. Load constant then overwrite varying
8597 field. This is more efficient than using the stack. */
8598 if (n_var == 1)
8599 {
8600 rtx copy = copy_rtx (vals);
8601 rtx index = GEN_INT (one_var);
8602
8603 /* Load constant part of vector, substitute neighboring value for
8604 varying element. */
8605 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8606 neon_expand_vector_init (target, copy);
8607
8608 /* Insert variable. */
8609 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8610 switch (mode)
8611 {
8612 case V8QImode:
8613 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8614 break;
8615 case V16QImode:
8616 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8617 break;
8618 case V4HImode:
8619 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8620 break;
8621 case V8HImode:
8622 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8623 break;
8624 case V2SImode:
8625 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8626 break;
8627 case V4SImode:
8628 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8629 break;
8630 case V2SFmode:
8631 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8632 break;
8633 case V4SFmode:
8634 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8635 break;
8636 case V2DImode:
8637 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8638 break;
8639 default:
8640 gcc_unreachable ();
8641 }
8642 return;
8643 }
8644
8645 /* Construct the vector in memory one field at a time
8646 and load the whole vector. */
8647 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8648 for (i = 0; i < n_elts; i++)
8649 emit_move_insn (adjust_address_nv (mem, inner_mode,
8650 i * GET_MODE_SIZE (inner_mode)),
8651 XVECEXP (vals, 0, i));
8652 emit_move_insn (target, mem);
8653 }
8654
8655 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8656 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8657 reported source locations are bogus. */
8658
8659 static void
8660 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8661 const char *err)
8662 {
8663 HOST_WIDE_INT lane;
8664
8665 gcc_assert (GET_CODE (operand) == CONST_INT);
8666
8667 lane = INTVAL (operand);
8668
8669 if (lane < low || lane >= high)
8670 error (err);
8671 }
8672
8673 /* Bounds-check lanes. */
8674
8675 void
8676 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8677 {
8678 bounds_check (operand, low, high, "lane out of range");
8679 }
8680
8681 /* Bounds-check constants. */
8682
8683 void
8684 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8685 {
8686 bounds_check (operand, low, high, "constant out of range");
8687 }
8688
8689 HOST_WIDE_INT
8690 neon_element_bits (enum machine_mode mode)
8691 {
8692 if (mode == DImode)
8693 return GET_MODE_BITSIZE (mode);
8694 else
8695 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8696 }
8697
8698 \f
8699 /* Predicates for `match_operand' and `match_operator'. */
8700
8701 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8702 int
8703 cirrus_memory_offset (rtx op)
8704 {
8705 /* Reject eliminable registers. */
8706 if (! (reload_in_progress || reload_completed)
8707 && ( reg_mentioned_p (frame_pointer_rtx, op)
8708 || reg_mentioned_p (arg_pointer_rtx, op)
8709 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8710 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8711 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8712 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8713 return 0;
8714
8715 if (GET_CODE (op) == MEM)
8716 {
8717 rtx ind;
8718
8719 ind = XEXP (op, 0);
8720
8721 /* Match: (mem (reg)). */
8722 if (GET_CODE (ind) == REG)
8723 return 1;
8724
8725 /* Match:
8726 (mem (plus (reg)
8727 (const))). */
8728 if (GET_CODE (ind) == PLUS
8729 && GET_CODE (XEXP (ind, 0)) == REG
8730 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8731 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8732 return 1;
8733 }
8734
8735 return 0;
8736 }
8737
8738 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8739 WB is true if full writeback address modes are allowed and is false
8740 if limited writeback address modes (POST_INC and PRE_DEC) are
8741 allowed. */
8742
8743 int
8744 arm_coproc_mem_operand (rtx op, bool wb)
8745 {
8746 rtx ind;
8747
8748 /* Reject eliminable registers. */
8749 if (! (reload_in_progress || reload_completed)
8750 && ( reg_mentioned_p (frame_pointer_rtx, op)
8751 || reg_mentioned_p (arg_pointer_rtx, op)
8752 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8753 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8754 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8755 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8756 return FALSE;
8757
8758 /* Constants are converted into offsets from labels. */
8759 if (GET_CODE (op) != MEM)
8760 return FALSE;
8761
8762 ind = XEXP (op, 0);
8763
8764 if (reload_completed
8765 && (GET_CODE (ind) == LABEL_REF
8766 || (GET_CODE (ind) == CONST
8767 && GET_CODE (XEXP (ind, 0)) == PLUS
8768 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8769 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8770 return TRUE;
8771
8772 /* Match: (mem (reg)). */
8773 if (GET_CODE (ind) == REG)
8774 return arm_address_register_rtx_p (ind, 0);
8775
8776 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8777 acceptable in any case (subject to verification by
8778 arm_address_register_rtx_p). We need WB to be true to accept
8779 PRE_INC and POST_DEC. */
8780 if (GET_CODE (ind) == POST_INC
8781 || GET_CODE (ind) == PRE_DEC
8782 || (wb
8783 && (GET_CODE (ind) == PRE_INC
8784 || GET_CODE (ind) == POST_DEC)))
8785 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8786
8787 if (wb
8788 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8789 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8790 && GET_CODE (XEXP (ind, 1)) == PLUS
8791 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8792 ind = XEXP (ind, 1);
8793
8794 /* Match:
8795 (plus (reg)
8796 (const)). */
8797 if (GET_CODE (ind) == PLUS
8798 && GET_CODE (XEXP (ind, 0)) == REG
8799 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8800 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8801 && INTVAL (XEXP (ind, 1)) > -1024
8802 && INTVAL (XEXP (ind, 1)) < 1024
8803 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8804 return TRUE;
8805
8806 return FALSE;
8807 }
8808
8809 /* Return TRUE if OP is a memory operand which we can load or store a vector
8810 to/from. TYPE is one of the following values:
8811 0 - Vector load/stor (vldr)
8812 1 - Core registers (ldm)
8813 2 - Element/structure loads (vld1)
8814 */
8815 int
8816 neon_vector_mem_operand (rtx op, int type)
8817 {
8818 rtx ind;
8819
8820 /* Reject eliminable registers. */
8821 if (! (reload_in_progress || reload_completed)
8822 && ( reg_mentioned_p (frame_pointer_rtx, op)
8823 || reg_mentioned_p (arg_pointer_rtx, op)
8824 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8825 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8826 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8827 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8828 return FALSE;
8829
8830 /* Constants are converted into offsets from labels. */
8831 if (GET_CODE (op) != MEM)
8832 return FALSE;
8833
8834 ind = XEXP (op, 0);
8835
8836 if (reload_completed
8837 && (GET_CODE (ind) == LABEL_REF
8838 || (GET_CODE (ind) == CONST
8839 && GET_CODE (XEXP (ind, 0)) == PLUS
8840 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8841 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8842 return TRUE;
8843
8844 /* Match: (mem (reg)). */
8845 if (GET_CODE (ind) == REG)
8846 return arm_address_register_rtx_p (ind, 0);
8847
8848 /* Allow post-increment with Neon registers. */
8849 if ((type != 1 && GET_CODE (ind) == POST_INC)
8850 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8851 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8852
8853 /* FIXME: vld1 allows register post-modify. */
8854
8855 /* Match:
8856 (plus (reg)
8857 (const)). */
8858 if (type == 0
8859 && GET_CODE (ind) == PLUS
8860 && GET_CODE (XEXP (ind, 0)) == REG
8861 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8862 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8863 && INTVAL (XEXP (ind, 1)) > -1024
8864 && INTVAL (XEXP (ind, 1)) < 1016
8865 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8866 return TRUE;
8867
8868 return FALSE;
8869 }
8870
8871 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8872 type. */
8873 int
8874 neon_struct_mem_operand (rtx op)
8875 {
8876 rtx ind;
8877
8878 /* Reject eliminable registers. */
8879 if (! (reload_in_progress || reload_completed)
8880 && ( reg_mentioned_p (frame_pointer_rtx, op)
8881 || reg_mentioned_p (arg_pointer_rtx, op)
8882 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8883 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8884 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8885 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8886 return FALSE;
8887
8888 /* Constants are converted into offsets from labels. */
8889 if (GET_CODE (op) != MEM)
8890 return FALSE;
8891
8892 ind = XEXP (op, 0);
8893
8894 if (reload_completed
8895 && (GET_CODE (ind) == LABEL_REF
8896 || (GET_CODE (ind) == CONST
8897 && GET_CODE (XEXP (ind, 0)) == PLUS
8898 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8899 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8900 return TRUE;
8901
8902 /* Match: (mem (reg)). */
8903 if (GET_CODE (ind) == REG)
8904 return arm_address_register_rtx_p (ind, 0);
8905
8906 return FALSE;
8907 }
8908
8909 /* Return true if X is a register that will be eliminated later on. */
8910 int
8911 arm_eliminable_register (rtx x)
8912 {
8913 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8914 || REGNO (x) == ARG_POINTER_REGNUM
8915 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8916 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8917 }
8918
8919 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8920 coprocessor registers. Otherwise return NO_REGS. */
8921
8922 enum reg_class
8923 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8924 {
8925 if (mode == HFmode)
8926 {
8927 if (!TARGET_NEON_FP16)
8928 return GENERAL_REGS;
8929 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8930 return NO_REGS;
8931 return GENERAL_REGS;
8932 }
8933
8934 if (TARGET_NEON
8935 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8936 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8937 && neon_vector_mem_operand (x, 0))
8938 return NO_REGS;
8939
8940 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8941 return NO_REGS;
8942
8943 return GENERAL_REGS;
8944 }
8945
8946 /* Values which must be returned in the most-significant end of the return
8947 register. */
8948
8949 static bool
8950 arm_return_in_msb (const_tree valtype)
8951 {
8952 return (TARGET_AAPCS_BASED
8953 && BYTES_BIG_ENDIAN
8954 && (AGGREGATE_TYPE_P (valtype)
8955 || TREE_CODE (valtype) == COMPLEX_TYPE));
8956 }
8957
8958 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8959 Use by the Cirrus Maverick code which has to workaround
8960 a hardware bug triggered by such instructions. */
8961 static bool
8962 arm_memory_load_p (rtx insn)
8963 {
8964 rtx body, lhs, rhs;;
8965
8966 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8967 return false;
8968
8969 body = PATTERN (insn);
8970
8971 if (GET_CODE (body) != SET)
8972 return false;
8973
8974 lhs = XEXP (body, 0);
8975 rhs = XEXP (body, 1);
8976
8977 lhs = REG_OR_SUBREG_RTX (lhs);
8978
8979 /* If the destination is not a general purpose
8980 register we do not have to worry. */
8981 if (GET_CODE (lhs) != REG
8982 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8983 return false;
8984
8985 /* As well as loads from memory we also have to react
8986 to loads of invalid constants which will be turned
8987 into loads from the minipool. */
8988 return (GET_CODE (rhs) == MEM
8989 || GET_CODE (rhs) == SYMBOL_REF
8990 || note_invalid_constants (insn, -1, false));
8991 }
8992
8993 /* Return TRUE if INSN is a Cirrus instruction. */
8994 static bool
8995 arm_cirrus_insn_p (rtx insn)
8996 {
8997 enum attr_cirrus attr;
8998
8999 /* get_attr cannot accept USE or CLOBBER. */
9000 if (!insn
9001 || GET_CODE (insn) != INSN
9002 || GET_CODE (PATTERN (insn)) == USE
9003 || GET_CODE (PATTERN (insn)) == CLOBBER)
9004 return 0;
9005
9006 attr = get_attr_cirrus (insn);
9007
9008 return attr != CIRRUS_NOT;
9009 }
9010
9011 /* Cirrus reorg for invalid instruction combinations. */
9012 static void
9013 cirrus_reorg (rtx first)
9014 {
9015 enum attr_cirrus attr;
9016 rtx body = PATTERN (first);
9017 rtx t;
9018 int nops;
9019
9020 /* Any branch must be followed by 2 non Cirrus instructions. */
9021 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9022 {
9023 nops = 0;
9024 t = next_nonnote_insn (first);
9025
9026 if (arm_cirrus_insn_p (t))
9027 ++ nops;
9028
9029 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9030 ++ nops;
9031
9032 while (nops --)
9033 emit_insn_after (gen_nop (), first);
9034
9035 return;
9036 }
9037
9038 /* (float (blah)) is in parallel with a clobber. */
9039 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9040 body = XVECEXP (body, 0, 0);
9041
9042 if (GET_CODE (body) == SET)
9043 {
9044 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9045
9046 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9047 be followed by a non Cirrus insn. */
9048 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9049 {
9050 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9051 emit_insn_after (gen_nop (), first);
9052
9053 return;
9054 }
9055 else if (arm_memory_load_p (first))
9056 {
9057 unsigned int arm_regno;
9058
9059 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9060 ldr/cfmv64hr combination where the Rd field is the same
9061 in both instructions must be split with a non Cirrus
9062 insn. Example:
9063
9064 ldr r0, blah
9065 nop
9066 cfmvsr mvf0, r0. */
9067
9068 /* Get Arm register number for ldr insn. */
9069 if (GET_CODE (lhs) == REG)
9070 arm_regno = REGNO (lhs);
9071 else
9072 {
9073 gcc_assert (GET_CODE (rhs) == REG);
9074 arm_regno = REGNO (rhs);
9075 }
9076
9077 /* Next insn. */
9078 first = next_nonnote_insn (first);
9079
9080 if (! arm_cirrus_insn_p (first))
9081 return;
9082
9083 body = PATTERN (first);
9084
9085 /* (float (blah)) is in parallel with a clobber. */
9086 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9087 body = XVECEXP (body, 0, 0);
9088
9089 if (GET_CODE (body) == FLOAT)
9090 body = XEXP (body, 0);
9091
9092 if (get_attr_cirrus (first) == CIRRUS_MOVE
9093 && GET_CODE (XEXP (body, 1)) == REG
9094 && arm_regno == REGNO (XEXP (body, 1)))
9095 emit_insn_after (gen_nop (), first);
9096
9097 return;
9098 }
9099 }
9100
9101 /* get_attr cannot accept USE or CLOBBER. */
9102 if (!first
9103 || GET_CODE (first) != INSN
9104 || GET_CODE (PATTERN (first)) == USE
9105 || GET_CODE (PATTERN (first)) == CLOBBER)
9106 return;
9107
9108 attr = get_attr_cirrus (first);
9109
9110 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9111 must be followed by a non-coprocessor instruction. */
9112 if (attr == CIRRUS_COMPARE)
9113 {
9114 nops = 0;
9115
9116 t = next_nonnote_insn (first);
9117
9118 if (arm_cirrus_insn_p (t))
9119 ++ nops;
9120
9121 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9122 ++ nops;
9123
9124 while (nops --)
9125 emit_insn_after (gen_nop (), first);
9126
9127 return;
9128 }
9129 }
9130
9131 /* Return TRUE if X references a SYMBOL_REF. */
9132 int
9133 symbol_mentioned_p (rtx x)
9134 {
9135 const char * fmt;
9136 int i;
9137
9138 if (GET_CODE (x) == SYMBOL_REF)
9139 return 1;
9140
9141 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9142 are constant offsets, not symbols. */
9143 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9144 return 0;
9145
9146 fmt = GET_RTX_FORMAT (GET_CODE (x));
9147
9148 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9149 {
9150 if (fmt[i] == 'E')
9151 {
9152 int j;
9153
9154 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9155 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9156 return 1;
9157 }
9158 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9159 return 1;
9160 }
9161
9162 return 0;
9163 }
9164
9165 /* Return TRUE if X references a LABEL_REF. */
9166 int
9167 label_mentioned_p (rtx x)
9168 {
9169 const char * fmt;
9170 int i;
9171
9172 if (GET_CODE (x) == LABEL_REF)
9173 return 1;
9174
9175 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9176 instruction, but they are constant offsets, not symbols. */
9177 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9178 return 0;
9179
9180 fmt = GET_RTX_FORMAT (GET_CODE (x));
9181 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9182 {
9183 if (fmt[i] == 'E')
9184 {
9185 int j;
9186
9187 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9188 if (label_mentioned_p (XVECEXP (x, i, j)))
9189 return 1;
9190 }
9191 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9192 return 1;
9193 }
9194
9195 return 0;
9196 }
9197
9198 int
9199 tls_mentioned_p (rtx x)
9200 {
9201 switch (GET_CODE (x))
9202 {
9203 case CONST:
9204 return tls_mentioned_p (XEXP (x, 0));
9205
9206 case UNSPEC:
9207 if (XINT (x, 1) == UNSPEC_TLS)
9208 return 1;
9209
9210 default:
9211 return 0;
9212 }
9213 }
9214
9215 /* Must not copy any rtx that uses a pc-relative address. */
9216
9217 static int
9218 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9219 {
9220 if (GET_CODE (*x) == UNSPEC
9221 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9222 return 1;
9223 return 0;
9224 }
9225
9226 static bool
9227 arm_cannot_copy_insn_p (rtx insn)
9228 {
9229 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9230 }
9231
9232 enum rtx_code
9233 minmax_code (rtx x)
9234 {
9235 enum rtx_code code = GET_CODE (x);
9236
9237 switch (code)
9238 {
9239 case SMAX:
9240 return GE;
9241 case SMIN:
9242 return LE;
9243 case UMIN:
9244 return LEU;
9245 case UMAX:
9246 return GEU;
9247 default:
9248 gcc_unreachable ();
9249 }
9250 }
9251
9252 /* Return 1 if memory locations are adjacent. */
9253 int
9254 adjacent_mem_locations (rtx a, rtx b)
9255 {
9256 /* We don't guarantee to preserve the order of these memory refs. */
9257 if (volatile_refs_p (a) || volatile_refs_p (b))
9258 return 0;
9259
9260 if ((GET_CODE (XEXP (a, 0)) == REG
9261 || (GET_CODE (XEXP (a, 0)) == PLUS
9262 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9263 && (GET_CODE (XEXP (b, 0)) == REG
9264 || (GET_CODE (XEXP (b, 0)) == PLUS
9265 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9266 {
9267 HOST_WIDE_INT val0 = 0, val1 = 0;
9268 rtx reg0, reg1;
9269 int val_diff;
9270
9271 if (GET_CODE (XEXP (a, 0)) == PLUS)
9272 {
9273 reg0 = XEXP (XEXP (a, 0), 0);
9274 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9275 }
9276 else
9277 reg0 = XEXP (a, 0);
9278
9279 if (GET_CODE (XEXP (b, 0)) == PLUS)
9280 {
9281 reg1 = XEXP (XEXP (b, 0), 0);
9282 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9283 }
9284 else
9285 reg1 = XEXP (b, 0);
9286
9287 /* Don't accept any offset that will require multiple
9288 instructions to handle, since this would cause the
9289 arith_adjacentmem pattern to output an overlong sequence. */
9290 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9291 return 0;
9292
9293 /* Don't allow an eliminable register: register elimination can make
9294 the offset too large. */
9295 if (arm_eliminable_register (reg0))
9296 return 0;
9297
9298 val_diff = val1 - val0;
9299
9300 if (arm_ld_sched)
9301 {
9302 /* If the target has load delay slots, then there's no benefit
9303 to using an ldm instruction unless the offset is zero and
9304 we are optimizing for size. */
9305 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9306 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9307 && (val_diff == 4 || val_diff == -4));
9308 }
9309
9310 return ((REGNO (reg0) == REGNO (reg1))
9311 && (val_diff == 4 || val_diff == -4));
9312 }
9313
9314 return 0;
9315 }
9316
9317 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9318 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9319 instruction. ADD_OFFSET is nonzero if the base address register needs
9320 to be modified with an add instruction before we can use it. */
9321
9322 static bool
9323 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9324 int nops, HOST_WIDE_INT add_offset)
9325 {
9326 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9327 if the offset isn't small enough. The reason 2 ldrs are faster
9328 is because these ARMs are able to do more than one cache access
9329 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9330 whilst the ARM8 has a double bandwidth cache. This means that
9331 these cores can do both an instruction fetch and a data fetch in
9332 a single cycle, so the trick of calculating the address into a
9333 scratch register (one of the result regs) and then doing a load
9334 multiple actually becomes slower (and no smaller in code size).
9335 That is the transformation
9336
9337 ldr rd1, [rbase + offset]
9338 ldr rd2, [rbase + offset + 4]
9339
9340 to
9341
9342 add rd1, rbase, offset
9343 ldmia rd1, {rd1, rd2}
9344
9345 produces worse code -- '3 cycles + any stalls on rd2' instead of
9346 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9347 access per cycle, the first sequence could never complete in less
9348 than 6 cycles, whereas the ldm sequence would only take 5 and
9349 would make better use of sequential accesses if not hitting the
9350 cache.
9351
9352 We cheat here and test 'arm_ld_sched' which we currently know to
9353 only be true for the ARM8, ARM9 and StrongARM. If this ever
9354 changes, then the test below needs to be reworked. */
9355 if (nops == 2 && arm_ld_sched && add_offset != 0)
9356 return false;
9357
9358 /* XScale has load-store double instructions, but they have stricter
9359 alignment requirements than load-store multiple, so we cannot
9360 use them.
9361
9362 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9363 the pipeline until completion.
9364
9365 NREGS CYCLES
9366 1 3
9367 2 4
9368 3 5
9369 4 6
9370
9371 An ldr instruction takes 1-3 cycles, but does not block the
9372 pipeline.
9373
9374 NREGS CYCLES
9375 1 1-3
9376 2 2-6
9377 3 3-9
9378 4 4-12
9379
9380 Best case ldr will always win. However, the more ldr instructions
9381 we issue, the less likely we are to be able to schedule them well.
9382 Using ldr instructions also increases code size.
9383
9384 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9385 for counts of 3 or 4 regs. */
9386 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9387 return false;
9388 return true;
9389 }
9390
9391 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9392 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9393 an array ORDER which describes the sequence to use when accessing the
9394 offsets that produces an ascending order. In this sequence, each
9395 offset must be larger by exactly 4 than the previous one. ORDER[0]
9396 must have been filled in with the lowest offset by the caller.
9397 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9398 we use to verify that ORDER produces an ascending order of registers.
9399 Return true if it was possible to construct such an order, false if
9400 not. */
9401
9402 static bool
9403 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9404 int *unsorted_regs)
9405 {
9406 int i;
9407 for (i = 1; i < nops; i++)
9408 {
9409 int j;
9410
9411 order[i] = order[i - 1];
9412 for (j = 0; j < nops; j++)
9413 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9414 {
9415 /* We must find exactly one offset that is higher than the
9416 previous one by 4. */
9417 if (order[i] != order[i - 1])
9418 return false;
9419 order[i] = j;
9420 }
9421 if (order[i] == order[i - 1])
9422 return false;
9423 /* The register numbers must be ascending. */
9424 if (unsorted_regs != NULL
9425 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9426 return false;
9427 }
9428 return true;
9429 }
9430
9431 /* Used to determine in a peephole whether a sequence of load
9432 instructions can be changed into a load-multiple instruction.
9433 NOPS is the number of separate load instructions we are examining. The
9434 first NOPS entries in OPERANDS are the destination registers, the
9435 next NOPS entries are memory operands. If this function is
9436 successful, *BASE is set to the common base register of the memory
9437 accesses; *LOAD_OFFSET is set to the first memory location's offset
9438 from that base register.
9439 REGS is an array filled in with the destination register numbers.
9440 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9441 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9442 the sequence of registers in REGS matches the loads from ascending memory
9443 locations, and the function verifies that the register numbers are
9444 themselves ascending. If CHECK_REGS is false, the register numbers
9445 are stored in the order they are found in the operands. */
9446 static int
9447 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9448 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9449 {
9450 int unsorted_regs[MAX_LDM_STM_OPS];
9451 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9452 int order[MAX_LDM_STM_OPS];
9453 rtx base_reg_rtx = NULL;
9454 int base_reg = -1;
9455 int i, ldm_case;
9456
9457 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9458 easily extended if required. */
9459 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9460
9461 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9462
9463 /* Loop over the operands and check that the memory references are
9464 suitable (i.e. immediate offsets from the same base register). At
9465 the same time, extract the target register, and the memory
9466 offsets. */
9467 for (i = 0; i < nops; i++)
9468 {
9469 rtx reg;
9470 rtx offset;
9471
9472 /* Convert a subreg of a mem into the mem itself. */
9473 if (GET_CODE (operands[nops + i]) == SUBREG)
9474 operands[nops + i] = alter_subreg (operands + (nops + i));
9475
9476 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9477
9478 /* Don't reorder volatile memory references; it doesn't seem worth
9479 looking for the case where the order is ok anyway. */
9480 if (MEM_VOLATILE_P (operands[nops + i]))
9481 return 0;
9482
9483 offset = const0_rtx;
9484
9485 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9486 || (GET_CODE (reg) == SUBREG
9487 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9488 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9489 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9490 == REG)
9491 || (GET_CODE (reg) == SUBREG
9492 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9493 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9494 == CONST_INT)))
9495 {
9496 if (i == 0)
9497 {
9498 base_reg = REGNO (reg);
9499 base_reg_rtx = reg;
9500 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9501 return 0;
9502 }
9503 else if (base_reg != (int) REGNO (reg))
9504 /* Not addressed from the same base register. */
9505 return 0;
9506
9507 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9508 ? REGNO (operands[i])
9509 : REGNO (SUBREG_REG (operands[i])));
9510
9511 /* If it isn't an integer register, or if it overwrites the
9512 base register but isn't the last insn in the list, then
9513 we can't do this. */
9514 if (unsorted_regs[i] < 0
9515 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9516 || unsorted_regs[i] > 14
9517 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9518 return 0;
9519
9520 unsorted_offsets[i] = INTVAL (offset);
9521 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9522 order[0] = i;
9523 }
9524 else
9525 /* Not a suitable memory address. */
9526 return 0;
9527 }
9528
9529 /* All the useful information has now been extracted from the
9530 operands into unsorted_regs and unsorted_offsets; additionally,
9531 order[0] has been set to the lowest offset in the list. Sort
9532 the offsets into order, verifying that they are adjacent, and
9533 check that the register numbers are ascending. */
9534 if (!compute_offset_order (nops, unsorted_offsets, order,
9535 check_regs ? unsorted_regs : NULL))
9536 return 0;
9537
9538 if (saved_order)
9539 memcpy (saved_order, order, sizeof order);
9540
9541 if (base)
9542 {
9543 *base = base_reg;
9544
9545 for (i = 0; i < nops; i++)
9546 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9547
9548 *load_offset = unsorted_offsets[order[0]];
9549 }
9550
9551 if (TARGET_THUMB1
9552 && !peep2_reg_dead_p (nops, base_reg_rtx))
9553 return 0;
9554
9555 if (unsorted_offsets[order[0]] == 0)
9556 ldm_case = 1; /* ldmia */
9557 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9558 ldm_case = 2; /* ldmib */
9559 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9560 ldm_case = 3; /* ldmda */
9561 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9562 ldm_case = 4; /* ldmdb */
9563 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9564 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9565 ldm_case = 5;
9566 else
9567 return 0;
9568
9569 if (!multiple_operation_profitable_p (false, nops,
9570 ldm_case == 5
9571 ? unsorted_offsets[order[0]] : 0))
9572 return 0;
9573
9574 return ldm_case;
9575 }
9576
9577 /* Used to determine in a peephole whether a sequence of store instructions can
9578 be changed into a store-multiple instruction.
9579 NOPS is the number of separate store instructions we are examining.
9580 NOPS_TOTAL is the total number of instructions recognized by the peephole
9581 pattern.
9582 The first NOPS entries in OPERANDS are the source registers, the next
9583 NOPS entries are memory operands. If this function is successful, *BASE is
9584 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9585 to the first memory location's offset from that base register. REGS is an
9586 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9587 likewise filled with the corresponding rtx's.
9588 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9589 numbers to to an ascending order of stores.
9590 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9591 from ascending memory locations, and the function verifies that the register
9592 numbers are themselves ascending. If CHECK_REGS is false, the register
9593 numbers are stored in the order they are found in the operands. */
9594 static int
9595 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9596 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9597 HOST_WIDE_INT *load_offset, bool check_regs)
9598 {
9599 int unsorted_regs[MAX_LDM_STM_OPS];
9600 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9601 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9602 int order[MAX_LDM_STM_OPS];
9603 int base_reg = -1;
9604 rtx base_reg_rtx = NULL;
9605 int i, stm_case;
9606
9607 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9608 easily extended if required. */
9609 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9610
9611 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9612
9613 /* Loop over the operands and check that the memory references are
9614 suitable (i.e. immediate offsets from the same base register). At
9615 the same time, extract the target register, and the memory
9616 offsets. */
9617 for (i = 0; i < nops; i++)
9618 {
9619 rtx reg;
9620 rtx offset;
9621
9622 /* Convert a subreg of a mem into the mem itself. */
9623 if (GET_CODE (operands[nops + i]) == SUBREG)
9624 operands[nops + i] = alter_subreg (operands + (nops + i));
9625
9626 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9627
9628 /* Don't reorder volatile memory references; it doesn't seem worth
9629 looking for the case where the order is ok anyway. */
9630 if (MEM_VOLATILE_P (operands[nops + i]))
9631 return 0;
9632
9633 offset = const0_rtx;
9634
9635 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9636 || (GET_CODE (reg) == SUBREG
9637 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9638 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9639 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9640 == REG)
9641 || (GET_CODE (reg) == SUBREG
9642 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9643 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9644 == CONST_INT)))
9645 {
9646 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9647 ? operands[i] : SUBREG_REG (operands[i]));
9648 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9649
9650 if (i == 0)
9651 {
9652 base_reg = REGNO (reg);
9653 base_reg_rtx = reg;
9654 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9655 return 0;
9656 }
9657 else if (base_reg != (int) REGNO (reg))
9658 /* Not addressed from the same base register. */
9659 return 0;
9660
9661 /* If it isn't an integer register, then we can't do this. */
9662 if (unsorted_regs[i] < 0
9663 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9664 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9665 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9666 || unsorted_regs[i] > 14)
9667 return 0;
9668
9669 unsorted_offsets[i] = INTVAL (offset);
9670 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9671 order[0] = i;
9672 }
9673 else
9674 /* Not a suitable memory address. */
9675 return 0;
9676 }
9677
9678 /* All the useful information has now been extracted from the
9679 operands into unsorted_regs and unsorted_offsets; additionally,
9680 order[0] has been set to the lowest offset in the list. Sort
9681 the offsets into order, verifying that they are adjacent, and
9682 check that the register numbers are ascending. */
9683 if (!compute_offset_order (nops, unsorted_offsets, order,
9684 check_regs ? unsorted_regs : NULL))
9685 return 0;
9686
9687 if (saved_order)
9688 memcpy (saved_order, order, sizeof order);
9689
9690 if (base)
9691 {
9692 *base = base_reg;
9693
9694 for (i = 0; i < nops; i++)
9695 {
9696 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9697 if (reg_rtxs)
9698 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9699 }
9700
9701 *load_offset = unsorted_offsets[order[0]];
9702 }
9703
9704 if (TARGET_THUMB1
9705 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9706 return 0;
9707
9708 if (unsorted_offsets[order[0]] == 0)
9709 stm_case = 1; /* stmia */
9710 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9711 stm_case = 2; /* stmib */
9712 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9713 stm_case = 3; /* stmda */
9714 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9715 stm_case = 4; /* stmdb */
9716 else
9717 return 0;
9718
9719 if (!multiple_operation_profitable_p (false, nops, 0))
9720 return 0;
9721
9722 return stm_case;
9723 }
9724 \f
9725 /* Routines for use in generating RTL. */
9726
9727 /* Generate a load-multiple instruction. COUNT is the number of loads in
9728 the instruction; REGS and MEMS are arrays containing the operands.
9729 BASEREG is the base register to be used in addressing the memory operands.
9730 WBACK_OFFSET is nonzero if the instruction should update the base
9731 register. */
9732
9733 static rtx
9734 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9735 HOST_WIDE_INT wback_offset)
9736 {
9737 int i = 0, j;
9738 rtx result;
9739
9740 if (!multiple_operation_profitable_p (false, count, 0))
9741 {
9742 rtx seq;
9743
9744 start_sequence ();
9745
9746 for (i = 0; i < count; i++)
9747 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9748
9749 if (wback_offset != 0)
9750 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9751
9752 seq = get_insns ();
9753 end_sequence ();
9754
9755 return seq;
9756 }
9757
9758 result = gen_rtx_PARALLEL (VOIDmode,
9759 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9760 if (wback_offset != 0)
9761 {
9762 XVECEXP (result, 0, 0)
9763 = gen_rtx_SET (VOIDmode, basereg,
9764 plus_constant (basereg, wback_offset));
9765 i = 1;
9766 count++;
9767 }
9768
9769 for (j = 0; i < count; i++, j++)
9770 XVECEXP (result, 0, i)
9771 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9772
9773 return result;
9774 }
9775
9776 /* Generate a store-multiple instruction. COUNT is the number of stores in
9777 the instruction; REGS and MEMS are arrays containing the operands.
9778 BASEREG is the base register to be used in addressing the memory operands.
9779 WBACK_OFFSET is nonzero if the instruction should update the base
9780 register. */
9781
9782 static rtx
9783 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9784 HOST_WIDE_INT wback_offset)
9785 {
9786 int i = 0, j;
9787 rtx result;
9788
9789 if (GET_CODE (basereg) == PLUS)
9790 basereg = XEXP (basereg, 0);
9791
9792 if (!multiple_operation_profitable_p (false, count, 0))
9793 {
9794 rtx seq;
9795
9796 start_sequence ();
9797
9798 for (i = 0; i < count; i++)
9799 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9800
9801 if (wback_offset != 0)
9802 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9803
9804 seq = get_insns ();
9805 end_sequence ();
9806
9807 return seq;
9808 }
9809
9810 result = gen_rtx_PARALLEL (VOIDmode,
9811 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9812 if (wback_offset != 0)
9813 {
9814 XVECEXP (result, 0, 0)
9815 = gen_rtx_SET (VOIDmode, basereg,
9816 plus_constant (basereg, wback_offset));
9817 i = 1;
9818 count++;
9819 }
9820
9821 for (j = 0; i < count; i++, j++)
9822 XVECEXP (result, 0, i)
9823 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9824
9825 return result;
9826 }
9827
9828 /* Generate either a load-multiple or a store-multiple instruction. This
9829 function can be used in situations where we can start with a single MEM
9830 rtx and adjust its address upwards.
9831 COUNT is the number of operations in the instruction, not counting a
9832 possible update of the base register. REGS is an array containing the
9833 register operands.
9834 BASEREG is the base register to be used in addressing the memory operands,
9835 which are constructed from BASEMEM.
9836 WRITE_BACK specifies whether the generated instruction should include an
9837 update of the base register.
9838 OFFSETP is used to pass an offset to and from this function; this offset
9839 is not used when constructing the address (instead BASEMEM should have an
9840 appropriate offset in its address), it is used only for setting
9841 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9842
9843 static rtx
9844 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9845 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9846 {
9847 rtx mems[MAX_LDM_STM_OPS];
9848 HOST_WIDE_INT offset = *offsetp;
9849 int i;
9850
9851 gcc_assert (count <= MAX_LDM_STM_OPS);
9852
9853 if (GET_CODE (basereg) == PLUS)
9854 basereg = XEXP (basereg, 0);
9855
9856 for (i = 0; i < count; i++)
9857 {
9858 rtx addr = plus_constant (basereg, i * 4);
9859 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9860 offset += 4;
9861 }
9862
9863 if (write_back)
9864 *offsetp = offset;
9865
9866 if (is_load)
9867 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9868 write_back ? 4 * count : 0);
9869 else
9870 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9871 write_back ? 4 * count : 0);
9872 }
9873
9874 rtx
9875 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9876 rtx basemem, HOST_WIDE_INT *offsetp)
9877 {
9878 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9879 offsetp);
9880 }
9881
9882 rtx
9883 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9884 rtx basemem, HOST_WIDE_INT *offsetp)
9885 {
9886 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9887 offsetp);
9888 }
9889
9890 /* Called from a peephole2 expander to turn a sequence of loads into an
9891 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9892 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9893 is true if we can reorder the registers because they are used commutatively
9894 subsequently.
9895 Returns true iff we could generate a new instruction. */
9896
9897 bool
9898 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9899 {
9900 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9901 rtx mems[MAX_LDM_STM_OPS];
9902 int i, j, base_reg;
9903 rtx base_reg_rtx;
9904 HOST_WIDE_INT offset;
9905 int write_back = FALSE;
9906 int ldm_case;
9907 rtx addr;
9908
9909 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9910 &base_reg, &offset, !sort_regs);
9911
9912 if (ldm_case == 0)
9913 return false;
9914
9915 if (sort_regs)
9916 for (i = 0; i < nops - 1; i++)
9917 for (j = i + 1; j < nops; j++)
9918 if (regs[i] > regs[j])
9919 {
9920 int t = regs[i];
9921 regs[i] = regs[j];
9922 regs[j] = t;
9923 }
9924 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9925
9926 if (TARGET_THUMB1)
9927 {
9928 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9929 gcc_assert (ldm_case == 1 || ldm_case == 5);
9930 write_back = TRUE;
9931 }
9932
9933 if (ldm_case == 5)
9934 {
9935 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
9936 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
9937 offset = 0;
9938 if (!TARGET_THUMB1)
9939 {
9940 base_reg = regs[0];
9941 base_reg_rtx = newbase;
9942 }
9943 }
9944
9945 for (i = 0; i < nops; i++)
9946 {
9947 addr = plus_constant (base_reg_rtx, offset + i * 4);
9948 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9949 SImode, addr, 0);
9950 }
9951 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
9952 write_back ? offset + i * 4 : 0));
9953 return true;
9954 }
9955
9956 /* Called from a peephole2 expander to turn a sequence of stores into an
9957 STM instruction. OPERANDS are the operands found by the peephole matcher;
9958 NOPS indicates how many separate stores we are trying to combine.
9959 Returns true iff we could generate a new instruction. */
9960
9961 bool
9962 gen_stm_seq (rtx *operands, int nops)
9963 {
9964 int i;
9965 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9966 rtx mems[MAX_LDM_STM_OPS];
9967 int base_reg;
9968 rtx base_reg_rtx;
9969 HOST_WIDE_INT offset;
9970 int write_back = FALSE;
9971 int stm_case;
9972 rtx addr;
9973 bool base_reg_dies;
9974
9975 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
9976 mem_order, &base_reg, &offset, true);
9977
9978 if (stm_case == 0)
9979 return false;
9980
9981 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9982
9983 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
9984 if (TARGET_THUMB1)
9985 {
9986 gcc_assert (base_reg_dies);
9987 write_back = TRUE;
9988 }
9989
9990 if (stm_case == 5)
9991 {
9992 gcc_assert (base_reg_dies);
9993 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
9994 offset = 0;
9995 }
9996
9997 addr = plus_constant (base_reg_rtx, offset);
9998
9999 for (i = 0; i < nops; i++)
10000 {
10001 addr = plus_constant (base_reg_rtx, offset + i * 4);
10002 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10003 SImode, addr, 0);
10004 }
10005 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10006 write_back ? offset + i * 4 : 0));
10007 return true;
10008 }
10009
10010 /* Called from a peephole2 expander to turn a sequence of stores that are
10011 preceded by constant loads into an STM instruction. OPERANDS are the
10012 operands found by the peephole matcher; NOPS indicates how many
10013 separate stores we are trying to combine; there are 2 * NOPS
10014 instructions in the peephole.
10015 Returns true iff we could generate a new instruction. */
10016
10017 bool
10018 gen_const_stm_seq (rtx *operands, int nops)
10019 {
10020 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10021 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10022 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10023 rtx mems[MAX_LDM_STM_OPS];
10024 int base_reg;
10025 rtx base_reg_rtx;
10026 HOST_WIDE_INT offset;
10027 int write_back = FALSE;
10028 int stm_case;
10029 rtx addr;
10030 bool base_reg_dies;
10031 int i, j;
10032 HARD_REG_SET allocated;
10033
10034 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10035 mem_order, &base_reg, &offset, false);
10036
10037 if (stm_case == 0)
10038 return false;
10039
10040 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10041
10042 /* If the same register is used more than once, try to find a free
10043 register. */
10044 CLEAR_HARD_REG_SET (allocated);
10045 for (i = 0; i < nops; i++)
10046 {
10047 for (j = i + 1; j < nops; j++)
10048 if (regs[i] == regs[j])
10049 {
10050 rtx t = peep2_find_free_register (0, nops * 2,
10051 TARGET_THUMB1 ? "l" : "r",
10052 SImode, &allocated);
10053 if (t == NULL_RTX)
10054 return false;
10055 reg_rtxs[i] = t;
10056 regs[i] = REGNO (t);
10057 }
10058 }
10059
10060 /* Compute an ordering that maps the register numbers to an ascending
10061 sequence. */
10062 reg_order[0] = 0;
10063 for (i = 0; i < nops; i++)
10064 if (regs[i] < regs[reg_order[0]])
10065 reg_order[0] = i;
10066
10067 for (i = 1; i < nops; i++)
10068 {
10069 int this_order = reg_order[i - 1];
10070 for (j = 0; j < nops; j++)
10071 if (regs[j] > regs[reg_order[i - 1]]
10072 && (this_order == reg_order[i - 1]
10073 || regs[j] < regs[this_order]))
10074 this_order = j;
10075 reg_order[i] = this_order;
10076 }
10077
10078 /* Ensure that registers that must be live after the instruction end
10079 up with the correct value. */
10080 for (i = 0; i < nops; i++)
10081 {
10082 int this_order = reg_order[i];
10083 if ((this_order != mem_order[i]
10084 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10085 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10086 return false;
10087 }
10088
10089 /* Load the constants. */
10090 for (i = 0; i < nops; i++)
10091 {
10092 rtx op = operands[2 * nops + mem_order[i]];
10093 sorted_regs[i] = regs[reg_order[i]];
10094 emit_move_insn (reg_rtxs[reg_order[i]], op);
10095 }
10096
10097 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10098
10099 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10100 if (TARGET_THUMB1)
10101 {
10102 gcc_assert (base_reg_dies);
10103 write_back = TRUE;
10104 }
10105
10106 if (stm_case == 5)
10107 {
10108 gcc_assert (base_reg_dies);
10109 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10110 offset = 0;
10111 }
10112
10113 addr = plus_constant (base_reg_rtx, offset);
10114
10115 for (i = 0; i < nops; i++)
10116 {
10117 addr = plus_constant (base_reg_rtx, offset + i * 4);
10118 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10119 SImode, addr, 0);
10120 }
10121 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10122 write_back ? offset + i * 4 : 0));
10123 return true;
10124 }
10125
10126 int
10127 arm_gen_movmemqi (rtx *operands)
10128 {
10129 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10130 HOST_WIDE_INT srcoffset, dstoffset;
10131 int i;
10132 rtx src, dst, srcbase, dstbase;
10133 rtx part_bytes_reg = NULL;
10134 rtx mem;
10135
10136 if (GET_CODE (operands[2]) != CONST_INT
10137 || GET_CODE (operands[3]) != CONST_INT
10138 || INTVAL (operands[2]) > 64
10139 || INTVAL (operands[3]) & 3)
10140 return 0;
10141
10142 dstbase = operands[0];
10143 srcbase = operands[1];
10144
10145 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10146 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10147
10148 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10149 out_words_to_go = INTVAL (operands[2]) / 4;
10150 last_bytes = INTVAL (operands[2]) & 3;
10151 dstoffset = srcoffset = 0;
10152
10153 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10154 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10155
10156 for (i = 0; in_words_to_go >= 2; i+=4)
10157 {
10158 if (in_words_to_go > 4)
10159 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10160 TRUE, srcbase, &srcoffset));
10161 else
10162 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10163 src, FALSE, srcbase,
10164 &srcoffset));
10165
10166 if (out_words_to_go)
10167 {
10168 if (out_words_to_go > 4)
10169 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10170 TRUE, dstbase, &dstoffset));
10171 else if (out_words_to_go != 1)
10172 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10173 out_words_to_go, dst,
10174 (last_bytes == 0
10175 ? FALSE : TRUE),
10176 dstbase, &dstoffset));
10177 else
10178 {
10179 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10180 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10181 if (last_bytes != 0)
10182 {
10183 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10184 dstoffset += 4;
10185 }
10186 }
10187 }
10188
10189 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10190 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10191 }
10192
10193 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10194 if (out_words_to_go)
10195 {
10196 rtx sreg;
10197
10198 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10199 sreg = copy_to_reg (mem);
10200
10201 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10202 emit_move_insn (mem, sreg);
10203 in_words_to_go--;
10204
10205 gcc_assert (!in_words_to_go); /* Sanity check */
10206 }
10207
10208 if (in_words_to_go)
10209 {
10210 gcc_assert (in_words_to_go > 0);
10211
10212 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10213 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10214 }
10215
10216 gcc_assert (!last_bytes || part_bytes_reg);
10217
10218 if (BYTES_BIG_ENDIAN && last_bytes)
10219 {
10220 rtx tmp = gen_reg_rtx (SImode);
10221
10222 /* The bytes we want are in the top end of the word. */
10223 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10224 GEN_INT (8 * (4 - last_bytes))));
10225 part_bytes_reg = tmp;
10226
10227 while (last_bytes)
10228 {
10229 mem = adjust_automodify_address (dstbase, QImode,
10230 plus_constant (dst, last_bytes - 1),
10231 dstoffset + last_bytes - 1);
10232 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10233
10234 if (--last_bytes)
10235 {
10236 tmp = gen_reg_rtx (SImode);
10237 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10238 part_bytes_reg = tmp;
10239 }
10240 }
10241
10242 }
10243 else
10244 {
10245 if (last_bytes > 1)
10246 {
10247 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10248 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10249 last_bytes -= 2;
10250 if (last_bytes)
10251 {
10252 rtx tmp = gen_reg_rtx (SImode);
10253 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10254 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10255 part_bytes_reg = tmp;
10256 dstoffset += 2;
10257 }
10258 }
10259
10260 if (last_bytes)
10261 {
10262 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10263 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10264 }
10265 }
10266
10267 return 1;
10268 }
10269
10270 /* Select a dominance comparison mode if possible for a test of the general
10271 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10272 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10273 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10274 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10275 In all cases OP will be either EQ or NE, but we don't need to know which
10276 here. If we are unable to support a dominance comparison we return
10277 CC mode. This will then fail to match for the RTL expressions that
10278 generate this call. */
10279 enum machine_mode
10280 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10281 {
10282 enum rtx_code cond1, cond2;
10283 int swapped = 0;
10284
10285 /* Currently we will probably get the wrong result if the individual
10286 comparisons are not simple. This also ensures that it is safe to
10287 reverse a comparison if necessary. */
10288 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10289 != CCmode)
10290 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10291 != CCmode))
10292 return CCmode;
10293
10294 /* The if_then_else variant of this tests the second condition if the
10295 first passes, but is true if the first fails. Reverse the first
10296 condition to get a true "inclusive-or" expression. */
10297 if (cond_or == DOM_CC_NX_OR_Y)
10298 cond1 = reverse_condition (cond1);
10299
10300 /* If the comparisons are not equal, and one doesn't dominate the other,
10301 then we can't do this. */
10302 if (cond1 != cond2
10303 && !comparison_dominates_p (cond1, cond2)
10304 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10305 return CCmode;
10306
10307 if (swapped)
10308 {
10309 enum rtx_code temp = cond1;
10310 cond1 = cond2;
10311 cond2 = temp;
10312 }
10313
10314 switch (cond1)
10315 {
10316 case EQ:
10317 if (cond_or == DOM_CC_X_AND_Y)
10318 return CC_DEQmode;
10319
10320 switch (cond2)
10321 {
10322 case EQ: return CC_DEQmode;
10323 case LE: return CC_DLEmode;
10324 case LEU: return CC_DLEUmode;
10325 case GE: return CC_DGEmode;
10326 case GEU: return CC_DGEUmode;
10327 default: gcc_unreachable ();
10328 }
10329
10330 case LT:
10331 if (cond_or == DOM_CC_X_AND_Y)
10332 return CC_DLTmode;
10333
10334 switch (cond2)
10335 {
10336 case LT:
10337 return CC_DLTmode;
10338 case LE:
10339 return CC_DLEmode;
10340 case NE:
10341 return CC_DNEmode;
10342 default:
10343 gcc_unreachable ();
10344 }
10345
10346 case GT:
10347 if (cond_or == DOM_CC_X_AND_Y)
10348 return CC_DGTmode;
10349
10350 switch (cond2)
10351 {
10352 case GT:
10353 return CC_DGTmode;
10354 case GE:
10355 return CC_DGEmode;
10356 case NE:
10357 return CC_DNEmode;
10358 default:
10359 gcc_unreachable ();
10360 }
10361
10362 case LTU:
10363 if (cond_or == DOM_CC_X_AND_Y)
10364 return CC_DLTUmode;
10365
10366 switch (cond2)
10367 {
10368 case LTU:
10369 return CC_DLTUmode;
10370 case LEU:
10371 return CC_DLEUmode;
10372 case NE:
10373 return CC_DNEmode;
10374 default:
10375 gcc_unreachable ();
10376 }
10377
10378 case GTU:
10379 if (cond_or == DOM_CC_X_AND_Y)
10380 return CC_DGTUmode;
10381
10382 switch (cond2)
10383 {
10384 case GTU:
10385 return CC_DGTUmode;
10386 case GEU:
10387 return CC_DGEUmode;
10388 case NE:
10389 return CC_DNEmode;
10390 default:
10391 gcc_unreachable ();
10392 }
10393
10394 /* The remaining cases only occur when both comparisons are the
10395 same. */
10396 case NE:
10397 gcc_assert (cond1 == cond2);
10398 return CC_DNEmode;
10399
10400 case LE:
10401 gcc_assert (cond1 == cond2);
10402 return CC_DLEmode;
10403
10404 case GE:
10405 gcc_assert (cond1 == cond2);
10406 return CC_DGEmode;
10407
10408 case LEU:
10409 gcc_assert (cond1 == cond2);
10410 return CC_DLEUmode;
10411
10412 case GEU:
10413 gcc_assert (cond1 == cond2);
10414 return CC_DGEUmode;
10415
10416 default:
10417 gcc_unreachable ();
10418 }
10419 }
10420
10421 enum machine_mode
10422 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10423 {
10424 /* All floating point compares return CCFP if it is an equality
10425 comparison, and CCFPE otherwise. */
10426 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10427 {
10428 switch (op)
10429 {
10430 case EQ:
10431 case NE:
10432 case UNORDERED:
10433 case ORDERED:
10434 case UNLT:
10435 case UNLE:
10436 case UNGT:
10437 case UNGE:
10438 case UNEQ:
10439 case LTGT:
10440 return CCFPmode;
10441
10442 case LT:
10443 case LE:
10444 case GT:
10445 case GE:
10446 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10447 return CCFPmode;
10448 return CCFPEmode;
10449
10450 default:
10451 gcc_unreachable ();
10452 }
10453 }
10454
10455 /* A compare with a shifted operand. Because of canonicalization, the
10456 comparison will have to be swapped when we emit the assembler. */
10457 if (GET_MODE (y) == SImode
10458 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10459 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10460 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10461 || GET_CODE (x) == ROTATERT))
10462 return CC_SWPmode;
10463
10464 /* This operation is performed swapped, but since we only rely on the Z
10465 flag we don't need an additional mode. */
10466 if (GET_MODE (y) == SImode
10467 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10468 && GET_CODE (x) == NEG
10469 && (op == EQ || op == NE))
10470 return CC_Zmode;
10471
10472 /* This is a special case that is used by combine to allow a
10473 comparison of a shifted byte load to be split into a zero-extend
10474 followed by a comparison of the shifted integer (only valid for
10475 equalities and unsigned inequalities). */
10476 if (GET_MODE (x) == SImode
10477 && GET_CODE (x) == ASHIFT
10478 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10479 && GET_CODE (XEXP (x, 0)) == SUBREG
10480 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10481 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10482 && (op == EQ || op == NE
10483 || op == GEU || op == GTU || op == LTU || op == LEU)
10484 && GET_CODE (y) == CONST_INT)
10485 return CC_Zmode;
10486
10487 /* A construct for a conditional compare, if the false arm contains
10488 0, then both conditions must be true, otherwise either condition
10489 must be true. Not all conditions are possible, so CCmode is
10490 returned if it can't be done. */
10491 if (GET_CODE (x) == IF_THEN_ELSE
10492 && (XEXP (x, 2) == const0_rtx
10493 || XEXP (x, 2) == const1_rtx)
10494 && COMPARISON_P (XEXP (x, 0))
10495 && COMPARISON_P (XEXP (x, 1)))
10496 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10497 INTVAL (XEXP (x, 2)));
10498
10499 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10500 if (GET_CODE (x) == AND
10501 && COMPARISON_P (XEXP (x, 0))
10502 && COMPARISON_P (XEXP (x, 1)))
10503 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10504 DOM_CC_X_AND_Y);
10505
10506 if (GET_CODE (x) == IOR
10507 && COMPARISON_P (XEXP (x, 0))
10508 && COMPARISON_P (XEXP (x, 1)))
10509 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10510 DOM_CC_X_OR_Y);
10511
10512 /* An operation (on Thumb) where we want to test for a single bit.
10513 This is done by shifting that bit up into the top bit of a
10514 scratch register; we can then branch on the sign bit. */
10515 if (TARGET_THUMB1
10516 && GET_MODE (x) == SImode
10517 && (op == EQ || op == NE)
10518 && GET_CODE (x) == ZERO_EXTRACT
10519 && XEXP (x, 1) == const1_rtx)
10520 return CC_Nmode;
10521
10522 /* An operation that sets the condition codes as a side-effect, the
10523 V flag is not set correctly, so we can only use comparisons where
10524 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10525 instead.) */
10526 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10527 if (GET_MODE (x) == SImode
10528 && y == const0_rtx
10529 && (op == EQ || op == NE || op == LT || op == GE)
10530 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10531 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10532 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10533 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10534 || GET_CODE (x) == LSHIFTRT
10535 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10536 || GET_CODE (x) == ROTATERT
10537 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10538 return CC_NOOVmode;
10539
10540 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10541 return CC_Zmode;
10542
10543 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10544 && GET_CODE (x) == PLUS
10545 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10546 return CC_Cmode;
10547
10548 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10549 {
10550 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10551 available. */
10552 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10553 return CCmode;
10554
10555 switch (op)
10556 {
10557 case EQ:
10558 case NE:
10559 /* A DImode comparison against zero can be implemented by
10560 or'ing the two halves together. */
10561 if (y == const0_rtx)
10562 return CC_Zmode;
10563
10564 /* We can do an equality test in three Thumb instructions. */
10565 if (!TARGET_ARM)
10566 return CC_Zmode;
10567
10568 /* FALLTHROUGH */
10569
10570 case LTU:
10571 case LEU:
10572 case GTU:
10573 case GEU:
10574 /* DImode unsigned comparisons can be implemented by cmp +
10575 cmpeq without a scratch register. Not worth doing in
10576 Thumb-2. */
10577 if (TARGET_ARM)
10578 return CC_CZmode;
10579
10580 /* FALLTHROUGH */
10581
10582 case LT:
10583 case LE:
10584 case GT:
10585 case GE:
10586 /* DImode signed and unsigned comparisons can be implemented
10587 by cmp + sbcs with a scratch register, but that does not
10588 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10589 gcc_assert (op != EQ && op != NE);
10590 return CC_NCVmode;
10591
10592 default:
10593 gcc_unreachable ();
10594 }
10595 }
10596
10597 return CCmode;
10598 }
10599
10600 /* X and Y are two things to compare using CODE. Emit the compare insn and
10601 return the rtx for register 0 in the proper mode. FP means this is a
10602 floating point compare: I don't think that it is needed on the arm. */
10603 rtx
10604 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10605 {
10606 enum machine_mode mode;
10607 rtx cc_reg;
10608 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10609
10610 /* We might have X as a constant, Y as a register because of the predicates
10611 used for cmpdi. If so, force X to a register here. */
10612 if (dimode_comparison && !REG_P (x))
10613 x = force_reg (DImode, x);
10614
10615 mode = SELECT_CC_MODE (code, x, y);
10616 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10617
10618 if (dimode_comparison
10619 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10620 && mode != CC_CZmode)
10621 {
10622 rtx clobber, set;
10623
10624 /* To compare two non-zero values for equality, XOR them and
10625 then compare against zero. Not used for ARM mode; there
10626 CC_CZmode is cheaper. */
10627 if (mode == CC_Zmode && y != const0_rtx)
10628 {
10629 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10630 y = const0_rtx;
10631 }
10632 /* A scratch register is required. */
10633 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10634 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10635 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10636 }
10637 else
10638 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10639
10640 return cc_reg;
10641 }
10642
10643 /* Generate a sequence of insns that will generate the correct return
10644 address mask depending on the physical architecture that the program
10645 is running on. */
10646 rtx
10647 arm_gen_return_addr_mask (void)
10648 {
10649 rtx reg = gen_reg_rtx (Pmode);
10650
10651 emit_insn (gen_return_addr_mask (reg));
10652 return reg;
10653 }
10654
10655 void
10656 arm_reload_in_hi (rtx *operands)
10657 {
10658 rtx ref = operands[1];
10659 rtx base, scratch;
10660 HOST_WIDE_INT offset = 0;
10661
10662 if (GET_CODE (ref) == SUBREG)
10663 {
10664 offset = SUBREG_BYTE (ref);
10665 ref = SUBREG_REG (ref);
10666 }
10667
10668 if (GET_CODE (ref) == REG)
10669 {
10670 /* We have a pseudo which has been spilt onto the stack; there
10671 are two cases here: the first where there is a simple
10672 stack-slot replacement and a second where the stack-slot is
10673 out of range, or is used as a subreg. */
10674 if (reg_equiv_mem[REGNO (ref)])
10675 {
10676 ref = reg_equiv_mem[REGNO (ref)];
10677 base = find_replacement (&XEXP (ref, 0));
10678 }
10679 else
10680 /* The slot is out of range, or was dressed up in a SUBREG. */
10681 base = reg_equiv_address[REGNO (ref)];
10682 }
10683 else
10684 base = find_replacement (&XEXP (ref, 0));
10685
10686 /* Handle the case where the address is too complex to be offset by 1. */
10687 if (GET_CODE (base) == MINUS
10688 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10689 {
10690 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10691
10692 emit_set_insn (base_plus, base);
10693 base = base_plus;
10694 }
10695 else if (GET_CODE (base) == PLUS)
10696 {
10697 /* The addend must be CONST_INT, or we would have dealt with it above. */
10698 HOST_WIDE_INT hi, lo;
10699
10700 offset += INTVAL (XEXP (base, 1));
10701 base = XEXP (base, 0);
10702
10703 /* Rework the address into a legal sequence of insns. */
10704 /* Valid range for lo is -4095 -> 4095 */
10705 lo = (offset >= 0
10706 ? (offset & 0xfff)
10707 : -((-offset) & 0xfff));
10708
10709 /* Corner case, if lo is the max offset then we would be out of range
10710 once we have added the additional 1 below, so bump the msb into the
10711 pre-loading insn(s). */
10712 if (lo == 4095)
10713 lo &= 0x7ff;
10714
10715 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10716 ^ (HOST_WIDE_INT) 0x80000000)
10717 - (HOST_WIDE_INT) 0x80000000);
10718
10719 gcc_assert (hi + lo == offset);
10720
10721 if (hi != 0)
10722 {
10723 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10724
10725 /* Get the base address; addsi3 knows how to handle constants
10726 that require more than one insn. */
10727 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10728 base = base_plus;
10729 offset = lo;
10730 }
10731 }
10732
10733 /* Operands[2] may overlap operands[0] (though it won't overlap
10734 operands[1]), that's why we asked for a DImode reg -- so we can
10735 use the bit that does not overlap. */
10736 if (REGNO (operands[2]) == REGNO (operands[0]))
10737 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10738 else
10739 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10740
10741 emit_insn (gen_zero_extendqisi2 (scratch,
10742 gen_rtx_MEM (QImode,
10743 plus_constant (base,
10744 offset))));
10745 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10746 gen_rtx_MEM (QImode,
10747 plus_constant (base,
10748 offset + 1))));
10749 if (!BYTES_BIG_ENDIAN)
10750 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10751 gen_rtx_IOR (SImode,
10752 gen_rtx_ASHIFT
10753 (SImode,
10754 gen_rtx_SUBREG (SImode, operands[0], 0),
10755 GEN_INT (8)),
10756 scratch));
10757 else
10758 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10759 gen_rtx_IOR (SImode,
10760 gen_rtx_ASHIFT (SImode, scratch,
10761 GEN_INT (8)),
10762 gen_rtx_SUBREG (SImode, operands[0], 0)));
10763 }
10764
10765 /* Handle storing a half-word to memory during reload by synthesizing as two
10766 byte stores. Take care not to clobber the input values until after we
10767 have moved them somewhere safe. This code assumes that if the DImode
10768 scratch in operands[2] overlaps either the input value or output address
10769 in some way, then that value must die in this insn (we absolutely need
10770 two scratch registers for some corner cases). */
10771 void
10772 arm_reload_out_hi (rtx *operands)
10773 {
10774 rtx ref = operands[0];
10775 rtx outval = operands[1];
10776 rtx base, scratch;
10777 HOST_WIDE_INT offset = 0;
10778
10779 if (GET_CODE (ref) == SUBREG)
10780 {
10781 offset = SUBREG_BYTE (ref);
10782 ref = SUBREG_REG (ref);
10783 }
10784
10785 if (GET_CODE (ref) == REG)
10786 {
10787 /* We have a pseudo which has been spilt onto the stack; there
10788 are two cases here: the first where there is a simple
10789 stack-slot replacement and a second where the stack-slot is
10790 out of range, or is used as a subreg. */
10791 if (reg_equiv_mem[REGNO (ref)])
10792 {
10793 ref = reg_equiv_mem[REGNO (ref)];
10794 base = find_replacement (&XEXP (ref, 0));
10795 }
10796 else
10797 /* The slot is out of range, or was dressed up in a SUBREG. */
10798 base = reg_equiv_address[REGNO (ref)];
10799 }
10800 else
10801 base = find_replacement (&XEXP (ref, 0));
10802
10803 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10804
10805 /* Handle the case where the address is too complex to be offset by 1. */
10806 if (GET_CODE (base) == MINUS
10807 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10808 {
10809 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10810
10811 /* Be careful not to destroy OUTVAL. */
10812 if (reg_overlap_mentioned_p (base_plus, outval))
10813 {
10814 /* Updating base_plus might destroy outval, see if we can
10815 swap the scratch and base_plus. */
10816 if (!reg_overlap_mentioned_p (scratch, outval))
10817 {
10818 rtx tmp = scratch;
10819 scratch = base_plus;
10820 base_plus = tmp;
10821 }
10822 else
10823 {
10824 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10825
10826 /* Be conservative and copy OUTVAL into the scratch now,
10827 this should only be necessary if outval is a subreg
10828 of something larger than a word. */
10829 /* XXX Might this clobber base? I can't see how it can,
10830 since scratch is known to overlap with OUTVAL, and
10831 must be wider than a word. */
10832 emit_insn (gen_movhi (scratch_hi, outval));
10833 outval = scratch_hi;
10834 }
10835 }
10836
10837 emit_set_insn (base_plus, base);
10838 base = base_plus;
10839 }
10840 else if (GET_CODE (base) == PLUS)
10841 {
10842 /* The addend must be CONST_INT, or we would have dealt with it above. */
10843 HOST_WIDE_INT hi, lo;
10844
10845 offset += INTVAL (XEXP (base, 1));
10846 base = XEXP (base, 0);
10847
10848 /* Rework the address into a legal sequence of insns. */
10849 /* Valid range for lo is -4095 -> 4095 */
10850 lo = (offset >= 0
10851 ? (offset & 0xfff)
10852 : -((-offset) & 0xfff));
10853
10854 /* Corner case, if lo is the max offset then we would be out of range
10855 once we have added the additional 1 below, so bump the msb into the
10856 pre-loading insn(s). */
10857 if (lo == 4095)
10858 lo &= 0x7ff;
10859
10860 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10861 ^ (HOST_WIDE_INT) 0x80000000)
10862 - (HOST_WIDE_INT) 0x80000000);
10863
10864 gcc_assert (hi + lo == offset);
10865
10866 if (hi != 0)
10867 {
10868 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10869
10870 /* Be careful not to destroy OUTVAL. */
10871 if (reg_overlap_mentioned_p (base_plus, outval))
10872 {
10873 /* Updating base_plus might destroy outval, see if we
10874 can swap the scratch and base_plus. */
10875 if (!reg_overlap_mentioned_p (scratch, outval))
10876 {
10877 rtx tmp = scratch;
10878 scratch = base_plus;
10879 base_plus = tmp;
10880 }
10881 else
10882 {
10883 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10884
10885 /* Be conservative and copy outval into scratch now,
10886 this should only be necessary if outval is a
10887 subreg of something larger than a word. */
10888 /* XXX Might this clobber base? I can't see how it
10889 can, since scratch is known to overlap with
10890 outval. */
10891 emit_insn (gen_movhi (scratch_hi, outval));
10892 outval = scratch_hi;
10893 }
10894 }
10895
10896 /* Get the base address; addsi3 knows how to handle constants
10897 that require more than one insn. */
10898 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10899 base = base_plus;
10900 offset = lo;
10901 }
10902 }
10903
10904 if (BYTES_BIG_ENDIAN)
10905 {
10906 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10907 plus_constant (base, offset + 1)),
10908 gen_lowpart (QImode, outval)));
10909 emit_insn (gen_lshrsi3 (scratch,
10910 gen_rtx_SUBREG (SImode, outval, 0),
10911 GEN_INT (8)));
10912 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10913 gen_lowpart (QImode, scratch)));
10914 }
10915 else
10916 {
10917 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10918 gen_lowpart (QImode, outval)));
10919 emit_insn (gen_lshrsi3 (scratch,
10920 gen_rtx_SUBREG (SImode, outval, 0),
10921 GEN_INT (8)));
10922 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10923 plus_constant (base, offset + 1)),
10924 gen_lowpart (QImode, scratch)));
10925 }
10926 }
10927
10928 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10929 (padded to the size of a word) should be passed in a register. */
10930
10931 static bool
10932 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10933 {
10934 if (TARGET_AAPCS_BASED)
10935 return must_pass_in_stack_var_size (mode, type);
10936 else
10937 return must_pass_in_stack_var_size_or_pad (mode, type);
10938 }
10939
10940
10941 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10942 Return true if an argument passed on the stack should be padded upwards,
10943 i.e. if the least-significant byte has useful data.
10944 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10945 aggregate types are placed in the lowest memory address. */
10946
10947 bool
10948 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10949 {
10950 if (!TARGET_AAPCS_BASED)
10951 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10952
10953 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10954 return false;
10955
10956 return true;
10957 }
10958
10959
10960 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10961 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10962 byte of the register has useful data, and return the opposite if the
10963 most significant byte does.
10964 For AAPCS, small aggregates and small complex types are always padded
10965 upwards. */
10966
10967 bool
10968 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10969 tree type, int first ATTRIBUTE_UNUSED)
10970 {
10971 if (TARGET_AAPCS_BASED
10972 && BYTES_BIG_ENDIAN
10973 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10974 && int_size_in_bytes (type) <= 4)
10975 return true;
10976
10977 /* Otherwise, use default padding. */
10978 return !BYTES_BIG_ENDIAN;
10979 }
10980
10981 \f
10982 /* Print a symbolic form of X to the debug file, F. */
10983 static void
10984 arm_print_value (FILE *f, rtx x)
10985 {
10986 switch (GET_CODE (x))
10987 {
10988 case CONST_INT:
10989 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10990 return;
10991
10992 case CONST_DOUBLE:
10993 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10994 return;
10995
10996 case CONST_VECTOR:
10997 {
10998 int i;
10999
11000 fprintf (f, "<");
11001 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11002 {
11003 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11004 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11005 fputc (',', f);
11006 }
11007 fprintf (f, ">");
11008 }
11009 return;
11010
11011 case CONST_STRING:
11012 fprintf (f, "\"%s\"", XSTR (x, 0));
11013 return;
11014
11015 case SYMBOL_REF:
11016 fprintf (f, "`%s'", XSTR (x, 0));
11017 return;
11018
11019 case LABEL_REF:
11020 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11021 return;
11022
11023 case CONST:
11024 arm_print_value (f, XEXP (x, 0));
11025 return;
11026
11027 case PLUS:
11028 arm_print_value (f, XEXP (x, 0));
11029 fprintf (f, "+");
11030 arm_print_value (f, XEXP (x, 1));
11031 return;
11032
11033 case PC:
11034 fprintf (f, "pc");
11035 return;
11036
11037 default:
11038 fprintf (f, "????");
11039 return;
11040 }
11041 }
11042 \f
11043 /* Routines for manipulation of the constant pool. */
11044
11045 /* Arm instructions cannot load a large constant directly into a
11046 register; they have to come from a pc relative load. The constant
11047 must therefore be placed in the addressable range of the pc
11048 relative load. Depending on the precise pc relative load
11049 instruction the range is somewhere between 256 bytes and 4k. This
11050 means that we often have to dump a constant inside a function, and
11051 generate code to branch around it.
11052
11053 It is important to minimize this, since the branches will slow
11054 things down and make the code larger.
11055
11056 Normally we can hide the table after an existing unconditional
11057 branch so that there is no interruption of the flow, but in the
11058 worst case the code looks like this:
11059
11060 ldr rn, L1
11061 ...
11062 b L2
11063 align
11064 L1: .long value
11065 L2:
11066 ...
11067
11068 ldr rn, L3
11069 ...
11070 b L4
11071 align
11072 L3: .long value
11073 L4:
11074 ...
11075
11076 We fix this by performing a scan after scheduling, which notices
11077 which instructions need to have their operands fetched from the
11078 constant table and builds the table.
11079
11080 The algorithm starts by building a table of all the constants that
11081 need fixing up and all the natural barriers in the function (places
11082 where a constant table can be dropped without breaking the flow).
11083 For each fixup we note how far the pc-relative replacement will be
11084 able to reach and the offset of the instruction into the function.
11085
11086 Having built the table we then group the fixes together to form
11087 tables that are as large as possible (subject to addressing
11088 constraints) and emit each table of constants after the last
11089 barrier that is within range of all the instructions in the group.
11090 If a group does not contain a barrier, then we forcibly create one
11091 by inserting a jump instruction into the flow. Once the table has
11092 been inserted, the insns are then modified to reference the
11093 relevant entry in the pool.
11094
11095 Possible enhancements to the algorithm (not implemented) are:
11096
11097 1) For some processors and object formats, there may be benefit in
11098 aligning the pools to the start of cache lines; this alignment
11099 would need to be taken into account when calculating addressability
11100 of a pool. */
11101
11102 /* These typedefs are located at the start of this file, so that
11103 they can be used in the prototypes there. This comment is to
11104 remind readers of that fact so that the following structures
11105 can be understood more easily.
11106
11107 typedef struct minipool_node Mnode;
11108 typedef struct minipool_fixup Mfix; */
11109
11110 struct minipool_node
11111 {
11112 /* Doubly linked chain of entries. */
11113 Mnode * next;
11114 Mnode * prev;
11115 /* The maximum offset into the code that this entry can be placed. While
11116 pushing fixes for forward references, all entries are sorted in order
11117 of increasing max_address. */
11118 HOST_WIDE_INT max_address;
11119 /* Similarly for an entry inserted for a backwards ref. */
11120 HOST_WIDE_INT min_address;
11121 /* The number of fixes referencing this entry. This can become zero
11122 if we "unpush" an entry. In this case we ignore the entry when we
11123 come to emit the code. */
11124 int refcount;
11125 /* The offset from the start of the minipool. */
11126 HOST_WIDE_INT offset;
11127 /* The value in table. */
11128 rtx value;
11129 /* The mode of value. */
11130 enum machine_mode mode;
11131 /* The size of the value. With iWMMXt enabled
11132 sizes > 4 also imply an alignment of 8-bytes. */
11133 int fix_size;
11134 };
11135
11136 struct minipool_fixup
11137 {
11138 Mfix * next;
11139 rtx insn;
11140 HOST_WIDE_INT address;
11141 rtx * loc;
11142 enum machine_mode mode;
11143 int fix_size;
11144 rtx value;
11145 Mnode * minipool;
11146 HOST_WIDE_INT forwards;
11147 HOST_WIDE_INT backwards;
11148 };
11149
11150 /* Fixes less than a word need padding out to a word boundary. */
11151 #define MINIPOOL_FIX_SIZE(mode) \
11152 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11153
11154 static Mnode * minipool_vector_head;
11155 static Mnode * minipool_vector_tail;
11156 static rtx minipool_vector_label;
11157 static int minipool_pad;
11158
11159 /* The linked list of all minipool fixes required for this function. */
11160 Mfix * minipool_fix_head;
11161 Mfix * minipool_fix_tail;
11162 /* The fix entry for the current minipool, once it has been placed. */
11163 Mfix * minipool_barrier;
11164
11165 /* Determines if INSN is the start of a jump table. Returns the end
11166 of the TABLE or NULL_RTX. */
11167 static rtx
11168 is_jump_table (rtx insn)
11169 {
11170 rtx table;
11171
11172 if (GET_CODE (insn) == JUMP_INSN
11173 && JUMP_LABEL (insn) != NULL
11174 && ((table = next_real_insn (JUMP_LABEL (insn)))
11175 == next_real_insn (insn))
11176 && table != NULL
11177 && GET_CODE (table) == JUMP_INSN
11178 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11179 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11180 return table;
11181
11182 return NULL_RTX;
11183 }
11184
11185 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11186 #define JUMP_TABLES_IN_TEXT_SECTION 0
11187 #endif
11188
11189 static HOST_WIDE_INT
11190 get_jump_table_size (rtx insn)
11191 {
11192 /* ADDR_VECs only take room if read-only data does into the text
11193 section. */
11194 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11195 {
11196 rtx body = PATTERN (insn);
11197 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11198 HOST_WIDE_INT size;
11199 HOST_WIDE_INT modesize;
11200
11201 modesize = GET_MODE_SIZE (GET_MODE (body));
11202 size = modesize * XVECLEN (body, elt);
11203 switch (modesize)
11204 {
11205 case 1:
11206 /* Round up size of TBB table to a halfword boundary. */
11207 size = (size + 1) & ~(HOST_WIDE_INT)1;
11208 break;
11209 case 2:
11210 /* No padding necessary for TBH. */
11211 break;
11212 case 4:
11213 /* Add two bytes for alignment on Thumb. */
11214 if (TARGET_THUMB)
11215 size += 2;
11216 break;
11217 default:
11218 gcc_unreachable ();
11219 }
11220 return size;
11221 }
11222
11223 return 0;
11224 }
11225
11226 /* Move a minipool fix MP from its current location to before MAX_MP.
11227 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11228 constraints may need updating. */
11229 static Mnode *
11230 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11231 HOST_WIDE_INT max_address)
11232 {
11233 /* The code below assumes these are different. */
11234 gcc_assert (mp != max_mp);
11235
11236 if (max_mp == NULL)
11237 {
11238 if (max_address < mp->max_address)
11239 mp->max_address = max_address;
11240 }
11241 else
11242 {
11243 if (max_address > max_mp->max_address - mp->fix_size)
11244 mp->max_address = max_mp->max_address - mp->fix_size;
11245 else
11246 mp->max_address = max_address;
11247
11248 /* Unlink MP from its current position. Since max_mp is non-null,
11249 mp->prev must be non-null. */
11250 mp->prev->next = mp->next;
11251 if (mp->next != NULL)
11252 mp->next->prev = mp->prev;
11253 else
11254 minipool_vector_tail = mp->prev;
11255
11256 /* Re-insert it before MAX_MP. */
11257 mp->next = max_mp;
11258 mp->prev = max_mp->prev;
11259 max_mp->prev = mp;
11260
11261 if (mp->prev != NULL)
11262 mp->prev->next = mp;
11263 else
11264 minipool_vector_head = mp;
11265 }
11266
11267 /* Save the new entry. */
11268 max_mp = mp;
11269
11270 /* Scan over the preceding entries and adjust their addresses as
11271 required. */
11272 while (mp->prev != NULL
11273 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11274 {
11275 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11276 mp = mp->prev;
11277 }
11278
11279 return max_mp;
11280 }
11281
11282 /* Add a constant to the minipool for a forward reference. Returns the
11283 node added or NULL if the constant will not fit in this pool. */
11284 static Mnode *
11285 add_minipool_forward_ref (Mfix *fix)
11286 {
11287 /* If set, max_mp is the first pool_entry that has a lower
11288 constraint than the one we are trying to add. */
11289 Mnode * max_mp = NULL;
11290 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11291 Mnode * mp;
11292
11293 /* If the minipool starts before the end of FIX->INSN then this FIX
11294 can not be placed into the current pool. Furthermore, adding the
11295 new constant pool entry may cause the pool to start FIX_SIZE bytes
11296 earlier. */
11297 if (minipool_vector_head &&
11298 (fix->address + get_attr_length (fix->insn)
11299 >= minipool_vector_head->max_address - fix->fix_size))
11300 return NULL;
11301
11302 /* Scan the pool to see if a constant with the same value has
11303 already been added. While we are doing this, also note the
11304 location where we must insert the constant if it doesn't already
11305 exist. */
11306 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11307 {
11308 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11309 && fix->mode == mp->mode
11310 && (GET_CODE (fix->value) != CODE_LABEL
11311 || (CODE_LABEL_NUMBER (fix->value)
11312 == CODE_LABEL_NUMBER (mp->value)))
11313 && rtx_equal_p (fix->value, mp->value))
11314 {
11315 /* More than one fix references this entry. */
11316 mp->refcount++;
11317 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11318 }
11319
11320 /* Note the insertion point if necessary. */
11321 if (max_mp == NULL
11322 && mp->max_address > max_address)
11323 max_mp = mp;
11324
11325 /* If we are inserting an 8-bytes aligned quantity and
11326 we have not already found an insertion point, then
11327 make sure that all such 8-byte aligned quantities are
11328 placed at the start of the pool. */
11329 if (ARM_DOUBLEWORD_ALIGN
11330 && max_mp == NULL
11331 && fix->fix_size >= 8
11332 && mp->fix_size < 8)
11333 {
11334 max_mp = mp;
11335 max_address = mp->max_address;
11336 }
11337 }
11338
11339 /* The value is not currently in the minipool, so we need to create
11340 a new entry for it. If MAX_MP is NULL, the entry will be put on
11341 the end of the list since the placement is less constrained than
11342 any existing entry. Otherwise, we insert the new fix before
11343 MAX_MP and, if necessary, adjust the constraints on the other
11344 entries. */
11345 mp = XNEW (Mnode);
11346 mp->fix_size = fix->fix_size;
11347 mp->mode = fix->mode;
11348 mp->value = fix->value;
11349 mp->refcount = 1;
11350 /* Not yet required for a backwards ref. */
11351 mp->min_address = -65536;
11352
11353 if (max_mp == NULL)
11354 {
11355 mp->max_address = max_address;
11356 mp->next = NULL;
11357 mp->prev = minipool_vector_tail;
11358
11359 if (mp->prev == NULL)
11360 {
11361 minipool_vector_head = mp;
11362 minipool_vector_label = gen_label_rtx ();
11363 }
11364 else
11365 mp->prev->next = mp;
11366
11367 minipool_vector_tail = mp;
11368 }
11369 else
11370 {
11371 if (max_address > max_mp->max_address - mp->fix_size)
11372 mp->max_address = max_mp->max_address - mp->fix_size;
11373 else
11374 mp->max_address = max_address;
11375
11376 mp->next = max_mp;
11377 mp->prev = max_mp->prev;
11378 max_mp->prev = mp;
11379 if (mp->prev != NULL)
11380 mp->prev->next = mp;
11381 else
11382 minipool_vector_head = mp;
11383 }
11384
11385 /* Save the new entry. */
11386 max_mp = mp;
11387
11388 /* Scan over the preceding entries and adjust their addresses as
11389 required. */
11390 while (mp->prev != NULL
11391 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11392 {
11393 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11394 mp = mp->prev;
11395 }
11396
11397 return max_mp;
11398 }
11399
11400 static Mnode *
11401 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11402 HOST_WIDE_INT min_address)
11403 {
11404 HOST_WIDE_INT offset;
11405
11406 /* The code below assumes these are different. */
11407 gcc_assert (mp != min_mp);
11408
11409 if (min_mp == NULL)
11410 {
11411 if (min_address > mp->min_address)
11412 mp->min_address = min_address;
11413 }
11414 else
11415 {
11416 /* We will adjust this below if it is too loose. */
11417 mp->min_address = min_address;
11418
11419 /* Unlink MP from its current position. Since min_mp is non-null,
11420 mp->next must be non-null. */
11421 mp->next->prev = mp->prev;
11422 if (mp->prev != NULL)
11423 mp->prev->next = mp->next;
11424 else
11425 minipool_vector_head = mp->next;
11426
11427 /* Reinsert it after MIN_MP. */
11428 mp->prev = min_mp;
11429 mp->next = min_mp->next;
11430 min_mp->next = mp;
11431 if (mp->next != NULL)
11432 mp->next->prev = mp;
11433 else
11434 minipool_vector_tail = mp;
11435 }
11436
11437 min_mp = mp;
11438
11439 offset = 0;
11440 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11441 {
11442 mp->offset = offset;
11443 if (mp->refcount > 0)
11444 offset += mp->fix_size;
11445
11446 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11447 mp->next->min_address = mp->min_address + mp->fix_size;
11448 }
11449
11450 return min_mp;
11451 }
11452
11453 /* Add a constant to the minipool for a backward reference. Returns the
11454 node added or NULL if the constant will not fit in this pool.
11455
11456 Note that the code for insertion for a backwards reference can be
11457 somewhat confusing because the calculated offsets for each fix do
11458 not take into account the size of the pool (which is still under
11459 construction. */
11460 static Mnode *
11461 add_minipool_backward_ref (Mfix *fix)
11462 {
11463 /* If set, min_mp is the last pool_entry that has a lower constraint
11464 than the one we are trying to add. */
11465 Mnode *min_mp = NULL;
11466 /* This can be negative, since it is only a constraint. */
11467 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11468 Mnode *mp;
11469
11470 /* If we can't reach the current pool from this insn, or if we can't
11471 insert this entry at the end of the pool without pushing other
11472 fixes out of range, then we don't try. This ensures that we
11473 can't fail later on. */
11474 if (min_address >= minipool_barrier->address
11475 || (minipool_vector_tail->min_address + fix->fix_size
11476 >= minipool_barrier->address))
11477 return NULL;
11478
11479 /* Scan the pool to see if a constant with the same value has
11480 already been added. While we are doing this, also note the
11481 location where we must insert the constant if it doesn't already
11482 exist. */
11483 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11484 {
11485 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11486 && fix->mode == mp->mode
11487 && (GET_CODE (fix->value) != CODE_LABEL
11488 || (CODE_LABEL_NUMBER (fix->value)
11489 == CODE_LABEL_NUMBER (mp->value)))
11490 && rtx_equal_p (fix->value, mp->value)
11491 /* Check that there is enough slack to move this entry to the
11492 end of the table (this is conservative). */
11493 && (mp->max_address
11494 > (minipool_barrier->address
11495 + minipool_vector_tail->offset
11496 + minipool_vector_tail->fix_size)))
11497 {
11498 mp->refcount++;
11499 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11500 }
11501
11502 if (min_mp != NULL)
11503 mp->min_address += fix->fix_size;
11504 else
11505 {
11506 /* Note the insertion point if necessary. */
11507 if (mp->min_address < min_address)
11508 {
11509 /* For now, we do not allow the insertion of 8-byte alignment
11510 requiring nodes anywhere but at the start of the pool. */
11511 if (ARM_DOUBLEWORD_ALIGN
11512 && fix->fix_size >= 8 && mp->fix_size < 8)
11513 return NULL;
11514 else
11515 min_mp = mp;
11516 }
11517 else if (mp->max_address
11518 < minipool_barrier->address + mp->offset + fix->fix_size)
11519 {
11520 /* Inserting before this entry would push the fix beyond
11521 its maximum address (which can happen if we have
11522 re-located a forwards fix); force the new fix to come
11523 after it. */
11524 if (ARM_DOUBLEWORD_ALIGN
11525 && fix->fix_size >= 8 && mp->fix_size < 8)
11526 return NULL;
11527 else
11528 {
11529 min_mp = mp;
11530 min_address = mp->min_address + fix->fix_size;
11531 }
11532 }
11533 /* Do not insert a non-8-byte aligned quantity before 8-byte
11534 aligned quantities. */
11535 else if (ARM_DOUBLEWORD_ALIGN
11536 && fix->fix_size < 8
11537 && mp->fix_size >= 8)
11538 {
11539 min_mp = mp;
11540 min_address = mp->min_address + fix->fix_size;
11541 }
11542 }
11543 }
11544
11545 /* We need to create a new entry. */
11546 mp = XNEW (Mnode);
11547 mp->fix_size = fix->fix_size;
11548 mp->mode = fix->mode;
11549 mp->value = fix->value;
11550 mp->refcount = 1;
11551 mp->max_address = minipool_barrier->address + 65536;
11552
11553 mp->min_address = min_address;
11554
11555 if (min_mp == NULL)
11556 {
11557 mp->prev = NULL;
11558 mp->next = minipool_vector_head;
11559
11560 if (mp->next == NULL)
11561 {
11562 minipool_vector_tail = mp;
11563 minipool_vector_label = gen_label_rtx ();
11564 }
11565 else
11566 mp->next->prev = mp;
11567
11568 minipool_vector_head = mp;
11569 }
11570 else
11571 {
11572 mp->next = min_mp->next;
11573 mp->prev = min_mp;
11574 min_mp->next = mp;
11575
11576 if (mp->next != NULL)
11577 mp->next->prev = mp;
11578 else
11579 minipool_vector_tail = mp;
11580 }
11581
11582 /* Save the new entry. */
11583 min_mp = mp;
11584
11585 if (mp->prev)
11586 mp = mp->prev;
11587 else
11588 mp->offset = 0;
11589
11590 /* Scan over the following entries and adjust their offsets. */
11591 while (mp->next != NULL)
11592 {
11593 if (mp->next->min_address < mp->min_address + mp->fix_size)
11594 mp->next->min_address = mp->min_address + mp->fix_size;
11595
11596 if (mp->refcount)
11597 mp->next->offset = mp->offset + mp->fix_size;
11598 else
11599 mp->next->offset = mp->offset;
11600
11601 mp = mp->next;
11602 }
11603
11604 return min_mp;
11605 }
11606
11607 static void
11608 assign_minipool_offsets (Mfix *barrier)
11609 {
11610 HOST_WIDE_INT offset = 0;
11611 Mnode *mp;
11612
11613 minipool_barrier = barrier;
11614
11615 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11616 {
11617 mp->offset = offset;
11618
11619 if (mp->refcount > 0)
11620 offset += mp->fix_size;
11621 }
11622 }
11623
11624 /* Output the literal table */
11625 static void
11626 dump_minipool (rtx scan)
11627 {
11628 Mnode * mp;
11629 Mnode * nmp;
11630 int align64 = 0;
11631
11632 if (ARM_DOUBLEWORD_ALIGN)
11633 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11634 if (mp->refcount > 0 && mp->fix_size >= 8)
11635 {
11636 align64 = 1;
11637 break;
11638 }
11639
11640 if (dump_file)
11641 fprintf (dump_file,
11642 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11643 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11644
11645 scan = emit_label_after (gen_label_rtx (), scan);
11646 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11647 scan = emit_label_after (minipool_vector_label, scan);
11648
11649 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11650 {
11651 if (mp->refcount > 0)
11652 {
11653 if (dump_file)
11654 {
11655 fprintf (dump_file,
11656 ";; Offset %u, min %ld, max %ld ",
11657 (unsigned) mp->offset, (unsigned long) mp->min_address,
11658 (unsigned long) mp->max_address);
11659 arm_print_value (dump_file, mp->value);
11660 fputc ('\n', dump_file);
11661 }
11662
11663 switch (mp->fix_size)
11664 {
11665 #ifdef HAVE_consttable_1
11666 case 1:
11667 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11668 break;
11669
11670 #endif
11671 #ifdef HAVE_consttable_2
11672 case 2:
11673 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11674 break;
11675
11676 #endif
11677 #ifdef HAVE_consttable_4
11678 case 4:
11679 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11680 break;
11681
11682 #endif
11683 #ifdef HAVE_consttable_8
11684 case 8:
11685 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11686 break;
11687
11688 #endif
11689 #ifdef HAVE_consttable_16
11690 case 16:
11691 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11692 break;
11693
11694 #endif
11695 default:
11696 gcc_unreachable ();
11697 }
11698 }
11699
11700 nmp = mp->next;
11701 free (mp);
11702 }
11703
11704 minipool_vector_head = minipool_vector_tail = NULL;
11705 scan = emit_insn_after (gen_consttable_end (), scan);
11706 scan = emit_barrier_after (scan);
11707 }
11708
11709 /* Return the cost of forcibly inserting a barrier after INSN. */
11710 static int
11711 arm_barrier_cost (rtx insn)
11712 {
11713 /* Basing the location of the pool on the loop depth is preferable,
11714 but at the moment, the basic block information seems to be
11715 corrupt by this stage of the compilation. */
11716 int base_cost = 50;
11717 rtx next = next_nonnote_insn (insn);
11718
11719 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11720 base_cost -= 20;
11721
11722 switch (GET_CODE (insn))
11723 {
11724 case CODE_LABEL:
11725 /* It will always be better to place the table before the label, rather
11726 than after it. */
11727 return 50;
11728
11729 case INSN:
11730 case CALL_INSN:
11731 return base_cost;
11732
11733 case JUMP_INSN:
11734 return base_cost - 10;
11735
11736 default:
11737 return base_cost + 10;
11738 }
11739 }
11740
11741 /* Find the best place in the insn stream in the range
11742 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11743 Create the barrier by inserting a jump and add a new fix entry for
11744 it. */
11745 static Mfix *
11746 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11747 {
11748 HOST_WIDE_INT count = 0;
11749 rtx barrier;
11750 rtx from = fix->insn;
11751 /* The instruction after which we will insert the jump. */
11752 rtx selected = NULL;
11753 int selected_cost;
11754 /* The address at which the jump instruction will be placed. */
11755 HOST_WIDE_INT selected_address;
11756 Mfix * new_fix;
11757 HOST_WIDE_INT max_count = max_address - fix->address;
11758 rtx label = gen_label_rtx ();
11759
11760 selected_cost = arm_barrier_cost (from);
11761 selected_address = fix->address;
11762
11763 while (from && count < max_count)
11764 {
11765 rtx tmp;
11766 int new_cost;
11767
11768 /* This code shouldn't have been called if there was a natural barrier
11769 within range. */
11770 gcc_assert (GET_CODE (from) != BARRIER);
11771
11772 /* Count the length of this insn. */
11773 count += get_attr_length (from);
11774
11775 /* If there is a jump table, add its length. */
11776 tmp = is_jump_table (from);
11777 if (tmp != NULL)
11778 {
11779 count += get_jump_table_size (tmp);
11780
11781 /* Jump tables aren't in a basic block, so base the cost on
11782 the dispatch insn. If we select this location, we will
11783 still put the pool after the table. */
11784 new_cost = arm_barrier_cost (from);
11785
11786 if (count < max_count
11787 && (!selected || new_cost <= selected_cost))
11788 {
11789 selected = tmp;
11790 selected_cost = new_cost;
11791 selected_address = fix->address + count;
11792 }
11793
11794 /* Continue after the dispatch table. */
11795 from = NEXT_INSN (tmp);
11796 continue;
11797 }
11798
11799 new_cost = arm_barrier_cost (from);
11800
11801 if (count < max_count
11802 && (!selected || new_cost <= selected_cost))
11803 {
11804 selected = from;
11805 selected_cost = new_cost;
11806 selected_address = fix->address + count;
11807 }
11808
11809 from = NEXT_INSN (from);
11810 }
11811
11812 /* Make sure that we found a place to insert the jump. */
11813 gcc_assert (selected);
11814
11815 /* Create a new JUMP_INSN that branches around a barrier. */
11816 from = emit_jump_insn_after (gen_jump (label), selected);
11817 JUMP_LABEL (from) = label;
11818 barrier = emit_barrier_after (from);
11819 emit_label_after (label, barrier);
11820
11821 /* Create a minipool barrier entry for the new barrier. */
11822 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11823 new_fix->insn = barrier;
11824 new_fix->address = selected_address;
11825 new_fix->next = fix->next;
11826 fix->next = new_fix;
11827
11828 return new_fix;
11829 }
11830
11831 /* Record that there is a natural barrier in the insn stream at
11832 ADDRESS. */
11833 static void
11834 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11835 {
11836 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11837
11838 fix->insn = insn;
11839 fix->address = address;
11840
11841 fix->next = NULL;
11842 if (minipool_fix_head != NULL)
11843 minipool_fix_tail->next = fix;
11844 else
11845 minipool_fix_head = fix;
11846
11847 minipool_fix_tail = fix;
11848 }
11849
11850 /* Record INSN, which will need fixing up to load a value from the
11851 minipool. ADDRESS is the offset of the insn since the start of the
11852 function; LOC is a pointer to the part of the insn which requires
11853 fixing; VALUE is the constant that must be loaded, which is of type
11854 MODE. */
11855 static void
11856 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11857 enum machine_mode mode, rtx value)
11858 {
11859 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11860
11861 fix->insn = insn;
11862 fix->address = address;
11863 fix->loc = loc;
11864 fix->mode = mode;
11865 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11866 fix->value = value;
11867 fix->forwards = get_attr_pool_range (insn);
11868 fix->backwards = get_attr_neg_pool_range (insn);
11869 fix->minipool = NULL;
11870
11871 /* If an insn doesn't have a range defined for it, then it isn't
11872 expecting to be reworked by this code. Better to stop now than
11873 to generate duff assembly code. */
11874 gcc_assert (fix->forwards || fix->backwards);
11875
11876 /* If an entry requires 8-byte alignment then assume all constant pools
11877 require 4 bytes of padding. Trying to do this later on a per-pool
11878 basis is awkward because existing pool entries have to be modified. */
11879 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11880 minipool_pad = 4;
11881
11882 if (dump_file)
11883 {
11884 fprintf (dump_file,
11885 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11886 GET_MODE_NAME (mode),
11887 INSN_UID (insn), (unsigned long) address,
11888 -1 * (long)fix->backwards, (long)fix->forwards);
11889 arm_print_value (dump_file, fix->value);
11890 fprintf (dump_file, "\n");
11891 }
11892
11893 /* Add it to the chain of fixes. */
11894 fix->next = NULL;
11895
11896 if (minipool_fix_head != NULL)
11897 minipool_fix_tail->next = fix;
11898 else
11899 minipool_fix_head = fix;
11900
11901 minipool_fix_tail = fix;
11902 }
11903
11904 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11905 Returns the number of insns needed, or 99 if we don't know how to
11906 do it. */
11907 int
11908 arm_const_double_inline_cost (rtx val)
11909 {
11910 rtx lowpart, highpart;
11911 enum machine_mode mode;
11912
11913 mode = GET_MODE (val);
11914
11915 if (mode == VOIDmode)
11916 mode = DImode;
11917
11918 gcc_assert (GET_MODE_SIZE (mode) == 8);
11919
11920 lowpart = gen_lowpart (SImode, val);
11921 highpart = gen_highpart_mode (SImode, mode, val);
11922
11923 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11924 gcc_assert (GET_CODE (highpart) == CONST_INT);
11925
11926 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11927 NULL_RTX, NULL_RTX, 0, 0)
11928 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11929 NULL_RTX, NULL_RTX, 0, 0));
11930 }
11931
11932 /* Return true if it is worthwhile to split a 64-bit constant into two
11933 32-bit operations. This is the case if optimizing for size, or
11934 if we have load delay slots, or if one 32-bit part can be done with
11935 a single data operation. */
11936 bool
11937 arm_const_double_by_parts (rtx val)
11938 {
11939 enum machine_mode mode = GET_MODE (val);
11940 rtx part;
11941
11942 if (optimize_size || arm_ld_sched)
11943 return true;
11944
11945 if (mode == VOIDmode)
11946 mode = DImode;
11947
11948 part = gen_highpart_mode (SImode, mode, val);
11949
11950 gcc_assert (GET_CODE (part) == CONST_INT);
11951
11952 if (const_ok_for_arm (INTVAL (part))
11953 || const_ok_for_arm (~INTVAL (part)))
11954 return true;
11955
11956 part = gen_lowpart (SImode, val);
11957
11958 gcc_assert (GET_CODE (part) == CONST_INT);
11959
11960 if (const_ok_for_arm (INTVAL (part))
11961 || const_ok_for_arm (~INTVAL (part)))
11962 return true;
11963
11964 return false;
11965 }
11966
11967 /* Return true if it is possible to inline both the high and low parts
11968 of a 64-bit constant into 32-bit data processing instructions. */
11969 bool
11970 arm_const_double_by_immediates (rtx val)
11971 {
11972 enum machine_mode mode = GET_MODE (val);
11973 rtx part;
11974
11975 if (mode == VOIDmode)
11976 mode = DImode;
11977
11978 part = gen_highpart_mode (SImode, mode, val);
11979
11980 gcc_assert (GET_CODE (part) == CONST_INT);
11981
11982 if (!const_ok_for_arm (INTVAL (part)))
11983 return false;
11984
11985 part = gen_lowpart (SImode, val);
11986
11987 gcc_assert (GET_CODE (part) == CONST_INT);
11988
11989 if (!const_ok_for_arm (INTVAL (part)))
11990 return false;
11991
11992 return true;
11993 }
11994
11995 /* Scan INSN and note any of its operands that need fixing.
11996 If DO_PUSHES is false we do not actually push any of the fixups
11997 needed. The function returns TRUE if any fixups were needed/pushed.
11998 This is used by arm_memory_load_p() which needs to know about loads
11999 of constants that will be converted into minipool loads. */
12000 static bool
12001 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12002 {
12003 bool result = false;
12004 int opno;
12005
12006 extract_insn (insn);
12007
12008 if (!constrain_operands (1))
12009 fatal_insn_not_found (insn);
12010
12011 if (recog_data.n_alternatives == 0)
12012 return false;
12013
12014 /* Fill in recog_op_alt with information about the constraints of
12015 this insn. */
12016 preprocess_constraints ();
12017
12018 for (opno = 0; opno < recog_data.n_operands; opno++)
12019 {
12020 /* Things we need to fix can only occur in inputs. */
12021 if (recog_data.operand_type[opno] != OP_IN)
12022 continue;
12023
12024 /* If this alternative is a memory reference, then any mention
12025 of constants in this alternative is really to fool reload
12026 into allowing us to accept one there. We need to fix them up
12027 now so that we output the right code. */
12028 if (recog_op_alt[opno][which_alternative].memory_ok)
12029 {
12030 rtx op = recog_data.operand[opno];
12031
12032 if (CONSTANT_P (op))
12033 {
12034 if (do_pushes)
12035 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12036 recog_data.operand_mode[opno], op);
12037 result = true;
12038 }
12039 else if (GET_CODE (op) == MEM
12040 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12041 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12042 {
12043 if (do_pushes)
12044 {
12045 rtx cop = avoid_constant_pool_reference (op);
12046
12047 /* Casting the address of something to a mode narrower
12048 than a word can cause avoid_constant_pool_reference()
12049 to return the pool reference itself. That's no good to
12050 us here. Lets just hope that we can use the
12051 constant pool value directly. */
12052 if (op == cop)
12053 cop = get_pool_constant (XEXP (op, 0));
12054
12055 push_minipool_fix (insn, address,
12056 recog_data.operand_loc[opno],
12057 recog_data.operand_mode[opno], cop);
12058 }
12059
12060 result = true;
12061 }
12062 }
12063 }
12064
12065 return result;
12066 }
12067
12068 /* Convert instructions to their cc-clobbering variant if possible, since
12069 that allows us to use smaller encodings. */
12070
12071 static void
12072 thumb2_reorg (void)
12073 {
12074 basic_block bb;
12075 regset_head live;
12076
12077 INIT_REG_SET (&live);
12078
12079 /* We are freeing block_for_insn in the toplev to keep compatibility
12080 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12081 compute_bb_for_insn ();
12082 df_analyze ();
12083
12084 FOR_EACH_BB (bb)
12085 {
12086 rtx insn;
12087 COPY_REG_SET (&live, DF_LR_OUT (bb));
12088 df_simulate_initialize_backwards (bb, &live);
12089 FOR_BB_INSNS_REVERSE (bb, insn)
12090 {
12091 if (NONJUMP_INSN_P (insn)
12092 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12093 {
12094 rtx pat = PATTERN (insn);
12095 if (GET_CODE (pat) == SET
12096 && low_register_operand (XEXP (pat, 0), SImode)
12097 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12098 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12099 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12100 {
12101 rtx dst = XEXP (pat, 0);
12102 rtx src = XEXP (pat, 1);
12103 rtx op0 = XEXP (src, 0);
12104 if (rtx_equal_p (dst, op0)
12105 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12106 {
12107 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12108 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12109 rtvec vec = gen_rtvec (2, pat, clobber);
12110 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12111 INSN_CODE (insn) = -1;
12112 }
12113 }
12114 }
12115 if (NONDEBUG_INSN_P (insn))
12116 df_simulate_one_insn_backwards (bb, insn, &live);
12117 }
12118 }
12119 CLEAR_REG_SET (&live);
12120 }
12121
12122 /* Gcc puts the pool in the wrong place for ARM, since we can only
12123 load addresses a limited distance around the pc. We do some
12124 special munging to move the constant pool values to the correct
12125 point in the code. */
12126 static void
12127 arm_reorg (void)
12128 {
12129 rtx insn;
12130 HOST_WIDE_INT address = 0;
12131 Mfix * fix;
12132
12133 if (TARGET_THUMB2)
12134 thumb2_reorg ();
12135
12136 minipool_fix_head = minipool_fix_tail = NULL;
12137
12138 /* The first insn must always be a note, or the code below won't
12139 scan it properly. */
12140 insn = get_insns ();
12141 gcc_assert (GET_CODE (insn) == NOTE);
12142 minipool_pad = 0;
12143
12144 /* Scan all the insns and record the operands that will need fixing. */
12145 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12146 {
12147 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12148 && (arm_cirrus_insn_p (insn)
12149 || GET_CODE (insn) == JUMP_INSN
12150 || arm_memory_load_p (insn)))
12151 cirrus_reorg (insn);
12152
12153 if (GET_CODE (insn) == BARRIER)
12154 push_minipool_barrier (insn, address);
12155 else if (INSN_P (insn))
12156 {
12157 rtx table;
12158
12159 note_invalid_constants (insn, address, true);
12160 address += get_attr_length (insn);
12161
12162 /* If the insn is a vector jump, add the size of the table
12163 and skip the table. */
12164 if ((table = is_jump_table (insn)) != NULL)
12165 {
12166 address += get_jump_table_size (table);
12167 insn = table;
12168 }
12169 }
12170 }
12171
12172 fix = minipool_fix_head;
12173
12174 /* Now scan the fixups and perform the required changes. */
12175 while (fix)
12176 {
12177 Mfix * ftmp;
12178 Mfix * fdel;
12179 Mfix * last_added_fix;
12180 Mfix * last_barrier = NULL;
12181 Mfix * this_fix;
12182
12183 /* Skip any further barriers before the next fix. */
12184 while (fix && GET_CODE (fix->insn) == BARRIER)
12185 fix = fix->next;
12186
12187 /* No more fixes. */
12188 if (fix == NULL)
12189 break;
12190
12191 last_added_fix = NULL;
12192
12193 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12194 {
12195 if (GET_CODE (ftmp->insn) == BARRIER)
12196 {
12197 if (ftmp->address >= minipool_vector_head->max_address)
12198 break;
12199
12200 last_barrier = ftmp;
12201 }
12202 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12203 break;
12204
12205 last_added_fix = ftmp; /* Keep track of the last fix added. */
12206 }
12207
12208 /* If we found a barrier, drop back to that; any fixes that we
12209 could have reached but come after the barrier will now go in
12210 the next mini-pool. */
12211 if (last_barrier != NULL)
12212 {
12213 /* Reduce the refcount for those fixes that won't go into this
12214 pool after all. */
12215 for (fdel = last_barrier->next;
12216 fdel && fdel != ftmp;
12217 fdel = fdel->next)
12218 {
12219 fdel->minipool->refcount--;
12220 fdel->minipool = NULL;
12221 }
12222
12223 ftmp = last_barrier;
12224 }
12225 else
12226 {
12227 /* ftmp is first fix that we can't fit into this pool and
12228 there no natural barriers that we could use. Insert a
12229 new barrier in the code somewhere between the previous
12230 fix and this one, and arrange to jump around it. */
12231 HOST_WIDE_INT max_address;
12232
12233 /* The last item on the list of fixes must be a barrier, so
12234 we can never run off the end of the list of fixes without
12235 last_barrier being set. */
12236 gcc_assert (ftmp);
12237
12238 max_address = minipool_vector_head->max_address;
12239 /* Check that there isn't another fix that is in range that
12240 we couldn't fit into this pool because the pool was
12241 already too large: we need to put the pool before such an
12242 instruction. The pool itself may come just after the
12243 fix because create_fix_barrier also allows space for a
12244 jump instruction. */
12245 if (ftmp->address < max_address)
12246 max_address = ftmp->address + 1;
12247
12248 last_barrier = create_fix_barrier (last_added_fix, max_address);
12249 }
12250
12251 assign_minipool_offsets (last_barrier);
12252
12253 while (ftmp)
12254 {
12255 if (GET_CODE (ftmp->insn) != BARRIER
12256 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12257 == NULL))
12258 break;
12259
12260 ftmp = ftmp->next;
12261 }
12262
12263 /* Scan over the fixes we have identified for this pool, fixing them
12264 up and adding the constants to the pool itself. */
12265 for (this_fix = fix; this_fix && ftmp != this_fix;
12266 this_fix = this_fix->next)
12267 if (GET_CODE (this_fix->insn) != BARRIER)
12268 {
12269 rtx addr
12270 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12271 minipool_vector_label),
12272 this_fix->minipool->offset);
12273 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12274 }
12275
12276 dump_minipool (last_barrier->insn);
12277 fix = ftmp;
12278 }
12279
12280 /* From now on we must synthesize any constants that we can't handle
12281 directly. This can happen if the RTL gets split during final
12282 instruction generation. */
12283 after_arm_reorg = 1;
12284
12285 /* Free the minipool memory. */
12286 obstack_free (&minipool_obstack, minipool_startobj);
12287 }
12288 \f
12289 /* Routines to output assembly language. */
12290
12291 /* If the rtx is the correct value then return the string of the number.
12292 In this way we can ensure that valid double constants are generated even
12293 when cross compiling. */
12294 const char *
12295 fp_immediate_constant (rtx x)
12296 {
12297 REAL_VALUE_TYPE r;
12298 int i;
12299
12300 if (!fp_consts_inited)
12301 init_fp_table ();
12302
12303 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12304 for (i = 0; i < 8; i++)
12305 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12306 return strings_fp[i];
12307
12308 gcc_unreachable ();
12309 }
12310
12311 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12312 static const char *
12313 fp_const_from_val (REAL_VALUE_TYPE *r)
12314 {
12315 int i;
12316
12317 if (!fp_consts_inited)
12318 init_fp_table ();
12319
12320 for (i = 0; i < 8; i++)
12321 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12322 return strings_fp[i];
12323
12324 gcc_unreachable ();
12325 }
12326
12327 /* Output the operands of a LDM/STM instruction to STREAM.
12328 MASK is the ARM register set mask of which only bits 0-15 are important.
12329 REG is the base register, either the frame pointer or the stack pointer,
12330 INSTR is the possibly suffixed load or store instruction.
12331 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12332
12333 static void
12334 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12335 unsigned long mask, int rfe)
12336 {
12337 unsigned i;
12338 bool not_first = FALSE;
12339
12340 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12341 fputc ('\t', stream);
12342 asm_fprintf (stream, instr, reg);
12343 fputc ('{', stream);
12344
12345 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12346 if (mask & (1 << i))
12347 {
12348 if (not_first)
12349 fprintf (stream, ", ");
12350
12351 asm_fprintf (stream, "%r", i);
12352 not_first = TRUE;
12353 }
12354
12355 if (rfe)
12356 fprintf (stream, "}^\n");
12357 else
12358 fprintf (stream, "}\n");
12359 }
12360
12361
12362 /* Output a FLDMD instruction to STREAM.
12363 BASE if the register containing the address.
12364 REG and COUNT specify the register range.
12365 Extra registers may be added to avoid hardware bugs.
12366
12367 We output FLDMD even for ARMv5 VFP implementations. Although
12368 FLDMD is technically not supported until ARMv6, it is believed
12369 that all VFP implementations support its use in this context. */
12370
12371 static void
12372 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12373 {
12374 int i;
12375
12376 /* Workaround ARM10 VFPr1 bug. */
12377 if (count == 2 && !arm_arch6)
12378 {
12379 if (reg == 15)
12380 reg--;
12381 count++;
12382 }
12383
12384 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12385 load into multiple parts if we have to handle more than 16 registers. */
12386 if (count > 16)
12387 {
12388 vfp_output_fldmd (stream, base, reg, 16);
12389 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12390 return;
12391 }
12392
12393 fputc ('\t', stream);
12394 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12395
12396 for (i = reg; i < reg + count; i++)
12397 {
12398 if (i > reg)
12399 fputs (", ", stream);
12400 asm_fprintf (stream, "d%d", i);
12401 }
12402 fputs ("}\n", stream);
12403
12404 }
12405
12406
12407 /* Output the assembly for a store multiple. */
12408
12409 const char *
12410 vfp_output_fstmd (rtx * operands)
12411 {
12412 char pattern[100];
12413 int p;
12414 int base;
12415 int i;
12416
12417 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12418 p = strlen (pattern);
12419
12420 gcc_assert (GET_CODE (operands[1]) == REG);
12421
12422 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12423 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12424 {
12425 p += sprintf (&pattern[p], ", d%d", base + i);
12426 }
12427 strcpy (&pattern[p], "}");
12428
12429 output_asm_insn (pattern, operands);
12430 return "";
12431 }
12432
12433
12434 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12435 number of bytes pushed. */
12436
12437 static int
12438 vfp_emit_fstmd (int base_reg, int count)
12439 {
12440 rtx par;
12441 rtx dwarf;
12442 rtx tmp, reg;
12443 int i;
12444
12445 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12446 register pairs are stored by a store multiple insn. We avoid this
12447 by pushing an extra pair. */
12448 if (count == 2 && !arm_arch6)
12449 {
12450 if (base_reg == LAST_VFP_REGNUM - 3)
12451 base_reg -= 2;
12452 count++;
12453 }
12454
12455 /* FSTMD may not store more than 16 doubleword registers at once. Split
12456 larger stores into multiple parts (up to a maximum of two, in
12457 practice). */
12458 if (count > 16)
12459 {
12460 int saved;
12461 /* NOTE: base_reg is an internal register number, so each D register
12462 counts as 2. */
12463 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12464 saved += vfp_emit_fstmd (base_reg, 16);
12465 return saved;
12466 }
12467
12468 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12469 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12470
12471 reg = gen_rtx_REG (DFmode, base_reg);
12472 base_reg += 2;
12473
12474 XVECEXP (par, 0, 0)
12475 = gen_rtx_SET (VOIDmode,
12476 gen_frame_mem
12477 (BLKmode,
12478 gen_rtx_PRE_MODIFY (Pmode,
12479 stack_pointer_rtx,
12480 plus_constant
12481 (stack_pointer_rtx,
12482 - (count * 8)))
12483 ),
12484 gen_rtx_UNSPEC (BLKmode,
12485 gen_rtvec (1, reg),
12486 UNSPEC_PUSH_MULT));
12487
12488 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12489 plus_constant (stack_pointer_rtx, -(count * 8)));
12490 RTX_FRAME_RELATED_P (tmp) = 1;
12491 XVECEXP (dwarf, 0, 0) = tmp;
12492
12493 tmp = gen_rtx_SET (VOIDmode,
12494 gen_frame_mem (DFmode, stack_pointer_rtx),
12495 reg);
12496 RTX_FRAME_RELATED_P (tmp) = 1;
12497 XVECEXP (dwarf, 0, 1) = tmp;
12498
12499 for (i = 1; i < count; i++)
12500 {
12501 reg = gen_rtx_REG (DFmode, base_reg);
12502 base_reg += 2;
12503 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12504
12505 tmp = gen_rtx_SET (VOIDmode,
12506 gen_frame_mem (DFmode,
12507 plus_constant (stack_pointer_rtx,
12508 i * 8)),
12509 reg);
12510 RTX_FRAME_RELATED_P (tmp) = 1;
12511 XVECEXP (dwarf, 0, i + 1) = tmp;
12512 }
12513
12514 par = emit_insn (par);
12515 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12516 RTX_FRAME_RELATED_P (par) = 1;
12517
12518 return count * 8;
12519 }
12520
12521 /* Emit a call instruction with pattern PAT. ADDR is the address of
12522 the call target. */
12523
12524 void
12525 arm_emit_call_insn (rtx pat, rtx addr)
12526 {
12527 rtx insn;
12528
12529 insn = emit_call_insn (pat);
12530
12531 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12532 If the call might use such an entry, add a use of the PIC register
12533 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12534 if (TARGET_VXWORKS_RTP
12535 && flag_pic
12536 && GET_CODE (addr) == SYMBOL_REF
12537 && (SYMBOL_REF_DECL (addr)
12538 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12539 : !SYMBOL_REF_LOCAL_P (addr)))
12540 {
12541 require_pic_register ();
12542 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12543 }
12544 }
12545
12546 /* Output a 'call' insn. */
12547 const char *
12548 output_call (rtx *operands)
12549 {
12550 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12551
12552 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12553 if (REGNO (operands[0]) == LR_REGNUM)
12554 {
12555 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12556 output_asm_insn ("mov%?\t%0, %|lr", operands);
12557 }
12558
12559 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12560
12561 if (TARGET_INTERWORK || arm_arch4t)
12562 output_asm_insn ("bx%?\t%0", operands);
12563 else
12564 output_asm_insn ("mov%?\t%|pc, %0", operands);
12565
12566 return "";
12567 }
12568
12569 /* Output a 'call' insn that is a reference in memory. This is
12570 disabled for ARMv5 and we prefer a blx instead because otherwise
12571 there's a significant performance overhead. */
12572 const char *
12573 output_call_mem (rtx *operands)
12574 {
12575 gcc_assert (!arm_arch5);
12576 if (TARGET_INTERWORK)
12577 {
12578 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12579 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12580 output_asm_insn ("bx%?\t%|ip", operands);
12581 }
12582 else if (regno_use_in (LR_REGNUM, operands[0]))
12583 {
12584 /* LR is used in the memory address. We load the address in the
12585 first instruction. It's safe to use IP as the target of the
12586 load since the call will kill it anyway. */
12587 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12588 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12589 if (arm_arch4t)
12590 output_asm_insn ("bx%?\t%|ip", operands);
12591 else
12592 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12593 }
12594 else
12595 {
12596 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12597 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12598 }
12599
12600 return "";
12601 }
12602
12603
12604 /* Output a move from arm registers to an fpa registers.
12605 OPERANDS[0] is an fpa register.
12606 OPERANDS[1] is the first registers of an arm register pair. */
12607 const char *
12608 output_mov_long_double_fpa_from_arm (rtx *operands)
12609 {
12610 int arm_reg0 = REGNO (operands[1]);
12611 rtx ops[3];
12612
12613 gcc_assert (arm_reg0 != IP_REGNUM);
12614
12615 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12616 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12617 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12618
12619 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12620 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12621
12622 return "";
12623 }
12624
12625 /* Output a move from an fpa register to arm registers.
12626 OPERANDS[0] is the first registers of an arm register pair.
12627 OPERANDS[1] is an fpa register. */
12628 const char *
12629 output_mov_long_double_arm_from_fpa (rtx *operands)
12630 {
12631 int arm_reg0 = REGNO (operands[0]);
12632 rtx ops[3];
12633
12634 gcc_assert (arm_reg0 != IP_REGNUM);
12635
12636 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12637 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12638 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12639
12640 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12641 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12642 return "";
12643 }
12644
12645 /* Output a move from arm registers to arm registers of a long double
12646 OPERANDS[0] is the destination.
12647 OPERANDS[1] is the source. */
12648 const char *
12649 output_mov_long_double_arm_from_arm (rtx *operands)
12650 {
12651 /* We have to be careful here because the two might overlap. */
12652 int dest_start = REGNO (operands[0]);
12653 int src_start = REGNO (operands[1]);
12654 rtx ops[2];
12655 int i;
12656
12657 if (dest_start < src_start)
12658 {
12659 for (i = 0; i < 3; i++)
12660 {
12661 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12662 ops[1] = gen_rtx_REG (SImode, src_start + i);
12663 output_asm_insn ("mov%?\t%0, %1", ops);
12664 }
12665 }
12666 else
12667 {
12668 for (i = 2; i >= 0; i--)
12669 {
12670 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12671 ops[1] = gen_rtx_REG (SImode, src_start + i);
12672 output_asm_insn ("mov%?\t%0, %1", ops);
12673 }
12674 }
12675
12676 return "";
12677 }
12678
12679 void
12680 arm_emit_movpair (rtx dest, rtx src)
12681 {
12682 /* If the src is an immediate, simplify it. */
12683 if (CONST_INT_P (src))
12684 {
12685 HOST_WIDE_INT val = INTVAL (src);
12686 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12687 if ((val >> 16) & 0x0000ffff)
12688 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12689 GEN_INT (16)),
12690 GEN_INT ((val >> 16) & 0x0000ffff));
12691 return;
12692 }
12693 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12694 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12695 }
12696
12697 /* Output a move from arm registers to an fpa registers.
12698 OPERANDS[0] is an fpa register.
12699 OPERANDS[1] is the first registers of an arm register pair. */
12700 const char *
12701 output_mov_double_fpa_from_arm (rtx *operands)
12702 {
12703 int arm_reg0 = REGNO (operands[1]);
12704 rtx ops[2];
12705
12706 gcc_assert (arm_reg0 != IP_REGNUM);
12707
12708 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12709 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12710 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12711 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12712 return "";
12713 }
12714
12715 /* Output a move from an fpa register to arm registers.
12716 OPERANDS[0] is the first registers of an arm register pair.
12717 OPERANDS[1] is an fpa register. */
12718 const char *
12719 output_mov_double_arm_from_fpa (rtx *operands)
12720 {
12721 int arm_reg0 = REGNO (operands[0]);
12722 rtx ops[2];
12723
12724 gcc_assert (arm_reg0 != IP_REGNUM);
12725
12726 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12727 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12728 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12729 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12730 return "";
12731 }
12732
12733 /* Output a move between double words.
12734 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12735 or MEM<-REG and all MEMs must be offsettable addresses. */
12736 const char *
12737 output_move_double (rtx *operands)
12738 {
12739 enum rtx_code code0 = GET_CODE (operands[0]);
12740 enum rtx_code code1 = GET_CODE (operands[1]);
12741 rtx otherops[3];
12742
12743 if (code0 == REG)
12744 {
12745 unsigned int reg0 = REGNO (operands[0]);
12746
12747 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12748
12749 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12750
12751 switch (GET_CODE (XEXP (operands[1], 0)))
12752 {
12753 case REG:
12754 if (TARGET_LDRD
12755 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12756 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12757 else
12758 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12759 break;
12760
12761 case PRE_INC:
12762 gcc_assert (TARGET_LDRD);
12763 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12764 break;
12765
12766 case PRE_DEC:
12767 if (TARGET_LDRD)
12768 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12769 else
12770 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12771 break;
12772
12773 case POST_INC:
12774 if (TARGET_LDRD)
12775 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12776 else
12777 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12778 break;
12779
12780 case POST_DEC:
12781 gcc_assert (TARGET_LDRD);
12782 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12783 break;
12784
12785 case PRE_MODIFY:
12786 case POST_MODIFY:
12787 /* Autoicrement addressing modes should never have overlapping
12788 base and destination registers, and overlapping index registers
12789 are already prohibited, so this doesn't need to worry about
12790 fix_cm3_ldrd. */
12791 otherops[0] = operands[0];
12792 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12793 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12794
12795 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12796 {
12797 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12798 {
12799 /* Registers overlap so split out the increment. */
12800 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12801 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12802 }
12803 else
12804 {
12805 /* Use a single insn if we can.
12806 FIXME: IWMMXT allows offsets larger than ldrd can
12807 handle, fix these up with a pair of ldr. */
12808 if (TARGET_THUMB2
12809 || GET_CODE (otherops[2]) != CONST_INT
12810 || (INTVAL (otherops[2]) > -256
12811 && INTVAL (otherops[2]) < 256))
12812 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12813 else
12814 {
12815 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12816 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12817 }
12818 }
12819 }
12820 else
12821 {
12822 /* Use a single insn if we can.
12823 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12824 fix these up with a pair of ldr. */
12825 if (TARGET_THUMB2
12826 || GET_CODE (otherops[2]) != CONST_INT
12827 || (INTVAL (otherops[2]) > -256
12828 && INTVAL (otherops[2]) < 256))
12829 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12830 else
12831 {
12832 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12833 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12834 }
12835 }
12836 break;
12837
12838 case LABEL_REF:
12839 case CONST:
12840 /* We might be able to use ldrd %0, %1 here. However the range is
12841 different to ldr/adr, and it is broken on some ARMv7-M
12842 implementations. */
12843 /* Use the second register of the pair to avoid problematic
12844 overlap. */
12845 otherops[1] = operands[1];
12846 output_asm_insn ("adr%?\t%0, %1", otherops);
12847 operands[1] = otherops[0];
12848 if (TARGET_LDRD)
12849 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12850 else
12851 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12852 break;
12853
12854 /* ??? This needs checking for thumb2. */
12855 default:
12856 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12857 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12858 {
12859 otherops[0] = operands[0];
12860 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12861 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12862
12863 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12864 {
12865 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12866 {
12867 switch ((int) INTVAL (otherops[2]))
12868 {
12869 case -8:
12870 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12871 return "";
12872 case -4:
12873 if (TARGET_THUMB2)
12874 break;
12875 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12876 return "";
12877 case 4:
12878 if (TARGET_THUMB2)
12879 break;
12880 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12881 return "";
12882 }
12883 }
12884 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12885 operands[1] = otherops[0];
12886 if (TARGET_LDRD
12887 && (GET_CODE (otherops[2]) == REG
12888 || TARGET_THUMB2
12889 || (GET_CODE (otherops[2]) == CONST_INT
12890 && INTVAL (otherops[2]) > -256
12891 && INTVAL (otherops[2]) < 256)))
12892 {
12893 if (reg_overlap_mentioned_p (operands[0],
12894 otherops[2]))
12895 {
12896 rtx tmp;
12897 /* Swap base and index registers over to
12898 avoid a conflict. */
12899 tmp = otherops[1];
12900 otherops[1] = otherops[2];
12901 otherops[2] = tmp;
12902 }
12903 /* If both registers conflict, it will usually
12904 have been fixed by a splitter. */
12905 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12906 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12907 {
12908 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12909 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12910 }
12911 else
12912 {
12913 otherops[0] = operands[0];
12914 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12915 }
12916 return "";
12917 }
12918
12919 if (GET_CODE (otherops[2]) == CONST_INT)
12920 {
12921 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12922 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12923 else
12924 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12925 }
12926 else
12927 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12928 }
12929 else
12930 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12931
12932 if (TARGET_LDRD)
12933 return "ldr%(d%)\t%0, [%1]";
12934
12935 return "ldm%(ia%)\t%1, %M0";
12936 }
12937 else
12938 {
12939 otherops[1] = adjust_address (operands[1], SImode, 4);
12940 /* Take care of overlapping base/data reg. */
12941 if (reg_mentioned_p (operands[0], operands[1]))
12942 {
12943 output_asm_insn ("ldr%?\t%0, %1", otherops);
12944 output_asm_insn ("ldr%?\t%0, %1", operands);
12945 }
12946 else
12947 {
12948 output_asm_insn ("ldr%?\t%0, %1", operands);
12949 output_asm_insn ("ldr%?\t%0, %1", otherops);
12950 }
12951 }
12952 }
12953 }
12954 else
12955 {
12956 /* Constraints should ensure this. */
12957 gcc_assert (code0 == MEM && code1 == REG);
12958 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12959
12960 switch (GET_CODE (XEXP (operands[0], 0)))
12961 {
12962 case REG:
12963 if (TARGET_LDRD)
12964 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12965 else
12966 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12967 break;
12968
12969 case PRE_INC:
12970 gcc_assert (TARGET_LDRD);
12971 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12972 break;
12973
12974 case PRE_DEC:
12975 if (TARGET_LDRD)
12976 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12977 else
12978 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12979 break;
12980
12981 case POST_INC:
12982 if (TARGET_LDRD)
12983 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12984 else
12985 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12986 break;
12987
12988 case POST_DEC:
12989 gcc_assert (TARGET_LDRD);
12990 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12991 break;
12992
12993 case PRE_MODIFY:
12994 case POST_MODIFY:
12995 otherops[0] = operands[1];
12996 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12997 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12998
12999 /* IWMMXT allows offsets larger than ldrd can handle,
13000 fix these up with a pair of ldr. */
13001 if (!TARGET_THUMB2
13002 && GET_CODE (otherops[2]) == CONST_INT
13003 && (INTVAL(otherops[2]) <= -256
13004 || INTVAL(otherops[2]) >= 256))
13005 {
13006 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13007 {
13008 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13009 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13010 }
13011 else
13012 {
13013 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13014 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13015 }
13016 }
13017 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13018 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13019 else
13020 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13021 break;
13022
13023 case PLUS:
13024 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13025 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13026 {
13027 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13028 {
13029 case -8:
13030 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13031 return "";
13032
13033 case -4:
13034 if (TARGET_THUMB2)
13035 break;
13036 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13037 return "";
13038
13039 case 4:
13040 if (TARGET_THUMB2)
13041 break;
13042 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13043 return "";
13044 }
13045 }
13046 if (TARGET_LDRD
13047 && (GET_CODE (otherops[2]) == REG
13048 || TARGET_THUMB2
13049 || (GET_CODE (otherops[2]) == CONST_INT
13050 && INTVAL (otherops[2]) > -256
13051 && INTVAL (otherops[2]) < 256)))
13052 {
13053 otherops[0] = operands[1];
13054 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13055 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13056 return "";
13057 }
13058 /* Fall through */
13059
13060 default:
13061 otherops[0] = adjust_address (operands[0], SImode, 4);
13062 otherops[1] = operands[1];
13063 output_asm_insn ("str%?\t%1, %0", operands);
13064 output_asm_insn ("str%?\t%H1, %0", otherops);
13065 }
13066 }
13067
13068 return "";
13069 }
13070
13071 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13072 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13073
13074 const char *
13075 output_move_quad (rtx *operands)
13076 {
13077 if (REG_P (operands[0]))
13078 {
13079 /* Load, or reg->reg move. */
13080
13081 if (MEM_P (operands[1]))
13082 {
13083 switch (GET_CODE (XEXP (operands[1], 0)))
13084 {
13085 case REG:
13086 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13087 break;
13088
13089 case LABEL_REF:
13090 case CONST:
13091 output_asm_insn ("adr%?\t%0, %1", operands);
13092 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13093 break;
13094
13095 default:
13096 gcc_unreachable ();
13097 }
13098 }
13099 else
13100 {
13101 rtx ops[2];
13102 int dest, src, i;
13103
13104 gcc_assert (REG_P (operands[1]));
13105
13106 dest = REGNO (operands[0]);
13107 src = REGNO (operands[1]);
13108
13109 /* This seems pretty dumb, but hopefully GCC won't try to do it
13110 very often. */
13111 if (dest < src)
13112 for (i = 0; i < 4; i++)
13113 {
13114 ops[0] = gen_rtx_REG (SImode, dest + i);
13115 ops[1] = gen_rtx_REG (SImode, src + i);
13116 output_asm_insn ("mov%?\t%0, %1", ops);
13117 }
13118 else
13119 for (i = 3; i >= 0; i--)
13120 {
13121 ops[0] = gen_rtx_REG (SImode, dest + i);
13122 ops[1] = gen_rtx_REG (SImode, src + i);
13123 output_asm_insn ("mov%?\t%0, %1", ops);
13124 }
13125 }
13126 }
13127 else
13128 {
13129 gcc_assert (MEM_P (operands[0]));
13130 gcc_assert (REG_P (operands[1]));
13131 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13132
13133 switch (GET_CODE (XEXP (operands[0], 0)))
13134 {
13135 case REG:
13136 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13137 break;
13138
13139 default:
13140 gcc_unreachable ();
13141 }
13142 }
13143
13144 return "";
13145 }
13146
13147 /* Output a VFP load or store instruction. */
13148
13149 const char *
13150 output_move_vfp (rtx *operands)
13151 {
13152 rtx reg, mem, addr, ops[2];
13153 int load = REG_P (operands[0]);
13154 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13155 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13156 const char *templ;
13157 char buff[50];
13158 enum machine_mode mode;
13159
13160 reg = operands[!load];
13161 mem = operands[load];
13162
13163 mode = GET_MODE (reg);
13164
13165 gcc_assert (REG_P (reg));
13166 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13167 gcc_assert (mode == SFmode
13168 || mode == DFmode
13169 || mode == SImode
13170 || mode == DImode
13171 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13172 gcc_assert (MEM_P (mem));
13173
13174 addr = XEXP (mem, 0);
13175
13176 switch (GET_CODE (addr))
13177 {
13178 case PRE_DEC:
13179 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13180 ops[0] = XEXP (addr, 0);
13181 ops[1] = reg;
13182 break;
13183
13184 case POST_INC:
13185 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13186 ops[0] = XEXP (addr, 0);
13187 ops[1] = reg;
13188 break;
13189
13190 default:
13191 templ = "f%s%c%%?\t%%%s0, %%1%s";
13192 ops[0] = reg;
13193 ops[1] = mem;
13194 break;
13195 }
13196
13197 sprintf (buff, templ,
13198 load ? "ld" : "st",
13199 dp ? 'd' : 's',
13200 dp ? "P" : "",
13201 integer_p ? "\t%@ int" : "");
13202 output_asm_insn (buff, ops);
13203
13204 return "";
13205 }
13206
13207 /* Output a Neon quad-word load or store, or a load or store for
13208 larger structure modes.
13209
13210 WARNING: The ordering of elements is weird in big-endian mode,
13211 because we use VSTM, as required by the EABI. GCC RTL defines
13212 element ordering based on in-memory order. This can be differ
13213 from the architectural ordering of elements within a NEON register.
13214 The intrinsics defined in arm_neon.h use the NEON register element
13215 ordering, not the GCC RTL element ordering.
13216
13217 For example, the in-memory ordering of a big-endian a quadword
13218 vector with 16-bit elements when stored from register pair {d0,d1}
13219 will be (lowest address first, d0[N] is NEON register element N):
13220
13221 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13222
13223 When necessary, quadword registers (dN, dN+1) are moved to ARM
13224 registers from rN in the order:
13225
13226 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13227
13228 So that STM/LDM can be used on vectors in ARM registers, and the
13229 same memory layout will result as if VSTM/VLDM were used. */
13230
13231 const char *
13232 output_move_neon (rtx *operands)
13233 {
13234 rtx reg, mem, addr, ops[2];
13235 int regno, load = REG_P (operands[0]);
13236 const char *templ;
13237 char buff[50];
13238 enum machine_mode mode;
13239
13240 reg = operands[!load];
13241 mem = operands[load];
13242
13243 mode = GET_MODE (reg);
13244
13245 gcc_assert (REG_P (reg));
13246 regno = REGNO (reg);
13247 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13248 || NEON_REGNO_OK_FOR_QUAD (regno));
13249 gcc_assert (VALID_NEON_DREG_MODE (mode)
13250 || VALID_NEON_QREG_MODE (mode)
13251 || VALID_NEON_STRUCT_MODE (mode));
13252 gcc_assert (MEM_P (mem));
13253
13254 addr = XEXP (mem, 0);
13255
13256 /* Strip off const from addresses like (const (plus (...))). */
13257 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13258 addr = XEXP (addr, 0);
13259
13260 switch (GET_CODE (addr))
13261 {
13262 case POST_INC:
13263 templ = "v%smia%%?\t%%0!, %%h1";
13264 ops[0] = XEXP (addr, 0);
13265 ops[1] = reg;
13266 break;
13267
13268 case PRE_DEC:
13269 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13270 templ = "v%smdb%%?\t%%0!, %%h1";
13271 ops[0] = XEXP (addr, 0);
13272 ops[1] = reg;
13273 break;
13274
13275 case POST_MODIFY:
13276 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13277 gcc_unreachable ();
13278
13279 case LABEL_REF:
13280 case PLUS:
13281 {
13282 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13283 int i;
13284 int overlap = -1;
13285 for (i = 0; i < nregs; i++)
13286 {
13287 /* We're only using DImode here because it's a convenient size. */
13288 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13289 ops[1] = adjust_address (mem, DImode, 8 * i);
13290 if (reg_overlap_mentioned_p (ops[0], mem))
13291 {
13292 gcc_assert (overlap == -1);
13293 overlap = i;
13294 }
13295 else
13296 {
13297 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13298 output_asm_insn (buff, ops);
13299 }
13300 }
13301 if (overlap != -1)
13302 {
13303 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13304 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13305 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13306 output_asm_insn (buff, ops);
13307 }
13308
13309 return "";
13310 }
13311
13312 default:
13313 templ = "v%smia%%?\t%%m0, %%h1";
13314 ops[0] = mem;
13315 ops[1] = reg;
13316 }
13317
13318 sprintf (buff, templ, load ? "ld" : "st");
13319 output_asm_insn (buff, ops);
13320
13321 return "";
13322 }
13323
13324 /* Compute and return the length of neon_mov<mode>, where <mode> is
13325 one of VSTRUCT modes: EI, OI, CI or XI. */
13326 int
13327 arm_attr_length_move_neon (rtx insn)
13328 {
13329 rtx reg, mem, addr;
13330 int load;
13331 enum machine_mode mode;
13332
13333 extract_insn_cached (insn);
13334
13335 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13336 {
13337 mode = GET_MODE (recog_data.operand[0]);
13338 switch (mode)
13339 {
13340 case EImode:
13341 case OImode:
13342 return 8;
13343 case CImode:
13344 return 12;
13345 case XImode:
13346 return 16;
13347 default:
13348 gcc_unreachable ();
13349 }
13350 }
13351
13352 load = REG_P (recog_data.operand[0]);
13353 reg = recog_data.operand[!load];
13354 mem = recog_data.operand[load];
13355
13356 gcc_assert (MEM_P (mem));
13357
13358 mode = GET_MODE (reg);
13359 addr = XEXP (mem, 0);
13360
13361 /* Strip off const from addresses like (const (plus (...))). */
13362 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13363 addr = XEXP (addr, 0);
13364
13365 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13366 {
13367 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13368 return insns * 4;
13369 }
13370 else
13371 return 4;
13372 }
13373
13374 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13375 return zero. */
13376
13377 int
13378 arm_address_offset_is_imm (rtx insn)
13379 {
13380 rtx mem, addr;
13381
13382 extract_insn_cached (insn);
13383
13384 if (REG_P (recog_data.operand[0]))
13385 return 0;
13386
13387 mem = recog_data.operand[0];
13388
13389 gcc_assert (MEM_P (mem));
13390
13391 addr = XEXP (mem, 0);
13392
13393 if (GET_CODE (addr) == REG
13394 || (GET_CODE (addr) == PLUS
13395 && GET_CODE (XEXP (addr, 0)) == REG
13396 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13397 return 1;
13398 else
13399 return 0;
13400 }
13401
13402 /* Output an ADD r, s, #n where n may be too big for one instruction.
13403 If adding zero to one register, output nothing. */
13404 const char *
13405 output_add_immediate (rtx *operands)
13406 {
13407 HOST_WIDE_INT n = INTVAL (operands[2]);
13408
13409 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13410 {
13411 if (n < 0)
13412 output_multi_immediate (operands,
13413 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13414 -n);
13415 else
13416 output_multi_immediate (operands,
13417 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13418 n);
13419 }
13420
13421 return "";
13422 }
13423
13424 /* Output a multiple immediate operation.
13425 OPERANDS is the vector of operands referred to in the output patterns.
13426 INSTR1 is the output pattern to use for the first constant.
13427 INSTR2 is the output pattern to use for subsequent constants.
13428 IMMED_OP is the index of the constant slot in OPERANDS.
13429 N is the constant value. */
13430 static const char *
13431 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13432 int immed_op, HOST_WIDE_INT n)
13433 {
13434 #if HOST_BITS_PER_WIDE_INT > 32
13435 n &= 0xffffffff;
13436 #endif
13437
13438 if (n == 0)
13439 {
13440 /* Quick and easy output. */
13441 operands[immed_op] = const0_rtx;
13442 output_asm_insn (instr1, operands);
13443 }
13444 else
13445 {
13446 int i;
13447 const char * instr = instr1;
13448
13449 /* Note that n is never zero here (which would give no output). */
13450 for (i = 0; i < 32; i += 2)
13451 {
13452 if (n & (3 << i))
13453 {
13454 operands[immed_op] = GEN_INT (n & (255 << i));
13455 output_asm_insn (instr, operands);
13456 instr = instr2;
13457 i += 6;
13458 }
13459 }
13460 }
13461
13462 return "";
13463 }
13464
13465 /* Return the name of a shifter operation. */
13466 static const char *
13467 arm_shift_nmem(enum rtx_code code)
13468 {
13469 switch (code)
13470 {
13471 case ASHIFT:
13472 return ARM_LSL_NAME;
13473
13474 case ASHIFTRT:
13475 return "asr";
13476
13477 case LSHIFTRT:
13478 return "lsr";
13479
13480 case ROTATERT:
13481 return "ror";
13482
13483 default:
13484 abort();
13485 }
13486 }
13487
13488 /* Return the appropriate ARM instruction for the operation code.
13489 The returned result should not be overwritten. OP is the rtx of the
13490 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13491 was shifted. */
13492 const char *
13493 arithmetic_instr (rtx op, int shift_first_arg)
13494 {
13495 switch (GET_CODE (op))
13496 {
13497 case PLUS:
13498 return "add";
13499
13500 case MINUS:
13501 return shift_first_arg ? "rsb" : "sub";
13502
13503 case IOR:
13504 return "orr";
13505
13506 case XOR:
13507 return "eor";
13508
13509 case AND:
13510 return "and";
13511
13512 case ASHIFT:
13513 case ASHIFTRT:
13514 case LSHIFTRT:
13515 case ROTATERT:
13516 return arm_shift_nmem(GET_CODE(op));
13517
13518 default:
13519 gcc_unreachable ();
13520 }
13521 }
13522
13523 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13524 for the operation code. The returned result should not be overwritten.
13525 OP is the rtx code of the shift.
13526 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13527 shift. */
13528 static const char *
13529 shift_op (rtx op, HOST_WIDE_INT *amountp)
13530 {
13531 const char * mnem;
13532 enum rtx_code code = GET_CODE (op);
13533
13534 switch (GET_CODE (XEXP (op, 1)))
13535 {
13536 case REG:
13537 case SUBREG:
13538 *amountp = -1;
13539 break;
13540
13541 case CONST_INT:
13542 *amountp = INTVAL (XEXP (op, 1));
13543 break;
13544
13545 default:
13546 gcc_unreachable ();
13547 }
13548
13549 switch (code)
13550 {
13551 case ROTATE:
13552 gcc_assert (*amountp != -1);
13553 *amountp = 32 - *amountp;
13554 code = ROTATERT;
13555
13556 /* Fall through. */
13557
13558 case ASHIFT:
13559 case ASHIFTRT:
13560 case LSHIFTRT:
13561 case ROTATERT:
13562 mnem = arm_shift_nmem(code);
13563 break;
13564
13565 case MULT:
13566 /* We never have to worry about the amount being other than a
13567 power of 2, since this case can never be reloaded from a reg. */
13568 gcc_assert (*amountp != -1);
13569 *amountp = int_log2 (*amountp);
13570 return ARM_LSL_NAME;
13571
13572 default:
13573 gcc_unreachable ();
13574 }
13575
13576 if (*amountp != -1)
13577 {
13578 /* This is not 100% correct, but follows from the desire to merge
13579 multiplication by a power of 2 with the recognizer for a
13580 shift. >=32 is not a valid shift for "lsl", so we must try and
13581 output a shift that produces the correct arithmetical result.
13582 Using lsr #32 is identical except for the fact that the carry bit
13583 is not set correctly if we set the flags; but we never use the
13584 carry bit from such an operation, so we can ignore that. */
13585 if (code == ROTATERT)
13586 /* Rotate is just modulo 32. */
13587 *amountp &= 31;
13588 else if (*amountp != (*amountp & 31))
13589 {
13590 if (code == ASHIFT)
13591 mnem = "lsr";
13592 *amountp = 32;
13593 }
13594
13595 /* Shifts of 0 are no-ops. */
13596 if (*amountp == 0)
13597 return NULL;
13598 }
13599
13600 return mnem;
13601 }
13602
13603 /* Obtain the shift from the POWER of two. */
13604
13605 static HOST_WIDE_INT
13606 int_log2 (HOST_WIDE_INT power)
13607 {
13608 HOST_WIDE_INT shift = 0;
13609
13610 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13611 {
13612 gcc_assert (shift <= 31);
13613 shift++;
13614 }
13615
13616 return shift;
13617 }
13618
13619 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13620 because /bin/as is horribly restrictive. The judgement about
13621 whether or not each character is 'printable' (and can be output as
13622 is) or not (and must be printed with an octal escape) must be made
13623 with reference to the *host* character set -- the situation is
13624 similar to that discussed in the comments above pp_c_char in
13625 c-pretty-print.c. */
13626
13627 #define MAX_ASCII_LEN 51
13628
13629 void
13630 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13631 {
13632 int i;
13633 int len_so_far = 0;
13634
13635 fputs ("\t.ascii\t\"", stream);
13636
13637 for (i = 0; i < len; i++)
13638 {
13639 int c = p[i];
13640
13641 if (len_so_far >= MAX_ASCII_LEN)
13642 {
13643 fputs ("\"\n\t.ascii\t\"", stream);
13644 len_so_far = 0;
13645 }
13646
13647 if (ISPRINT (c))
13648 {
13649 if (c == '\\' || c == '\"')
13650 {
13651 putc ('\\', stream);
13652 len_so_far++;
13653 }
13654 putc (c, stream);
13655 len_so_far++;
13656 }
13657 else
13658 {
13659 fprintf (stream, "\\%03o", c);
13660 len_so_far += 4;
13661 }
13662 }
13663
13664 fputs ("\"\n", stream);
13665 }
13666 \f
13667 /* Compute the register save mask for registers 0 through 12
13668 inclusive. This code is used by arm_compute_save_reg_mask. */
13669
13670 static unsigned long
13671 arm_compute_save_reg0_reg12_mask (void)
13672 {
13673 unsigned long func_type = arm_current_func_type ();
13674 unsigned long save_reg_mask = 0;
13675 unsigned int reg;
13676
13677 if (IS_INTERRUPT (func_type))
13678 {
13679 unsigned int max_reg;
13680 /* Interrupt functions must not corrupt any registers,
13681 even call clobbered ones. If this is a leaf function
13682 we can just examine the registers used by the RTL, but
13683 otherwise we have to assume that whatever function is
13684 called might clobber anything, and so we have to save
13685 all the call-clobbered registers as well. */
13686 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13687 /* FIQ handlers have registers r8 - r12 banked, so
13688 we only need to check r0 - r7, Normal ISRs only
13689 bank r14 and r15, so we must check up to r12.
13690 r13 is the stack pointer which is always preserved,
13691 so we do not need to consider it here. */
13692 max_reg = 7;
13693 else
13694 max_reg = 12;
13695
13696 for (reg = 0; reg <= max_reg; reg++)
13697 if (df_regs_ever_live_p (reg)
13698 || (! current_function_is_leaf && call_used_regs[reg]))
13699 save_reg_mask |= (1 << reg);
13700
13701 /* Also save the pic base register if necessary. */
13702 if (flag_pic
13703 && !TARGET_SINGLE_PIC_BASE
13704 && arm_pic_register != INVALID_REGNUM
13705 && crtl->uses_pic_offset_table)
13706 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13707 }
13708 else if (IS_VOLATILE(func_type))
13709 {
13710 /* For noreturn functions we historically omitted register saves
13711 altogether. However this really messes up debugging. As a
13712 compromise save just the frame pointers. Combined with the link
13713 register saved elsewhere this should be sufficient to get
13714 a backtrace. */
13715 if (frame_pointer_needed)
13716 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13717 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13718 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13719 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13720 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13721 }
13722 else
13723 {
13724 /* In the normal case we only need to save those registers
13725 which are call saved and which are used by this function. */
13726 for (reg = 0; reg <= 11; reg++)
13727 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13728 save_reg_mask |= (1 << reg);
13729
13730 /* Handle the frame pointer as a special case. */
13731 if (frame_pointer_needed)
13732 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13733
13734 /* If we aren't loading the PIC register,
13735 don't stack it even though it may be live. */
13736 if (flag_pic
13737 && !TARGET_SINGLE_PIC_BASE
13738 && arm_pic_register != INVALID_REGNUM
13739 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13740 || crtl->uses_pic_offset_table))
13741 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13742
13743 /* The prologue will copy SP into R0, so save it. */
13744 if (IS_STACKALIGN (func_type))
13745 save_reg_mask |= 1;
13746 }
13747
13748 /* Save registers so the exception handler can modify them. */
13749 if (crtl->calls_eh_return)
13750 {
13751 unsigned int i;
13752
13753 for (i = 0; ; i++)
13754 {
13755 reg = EH_RETURN_DATA_REGNO (i);
13756 if (reg == INVALID_REGNUM)
13757 break;
13758 save_reg_mask |= 1 << reg;
13759 }
13760 }
13761
13762 return save_reg_mask;
13763 }
13764
13765
13766 /* Compute the number of bytes used to store the static chain register on the
13767 stack, above the stack frame. We need to know this accurately to get the
13768 alignment of the rest of the stack frame correct. */
13769
13770 static int arm_compute_static_chain_stack_bytes (void)
13771 {
13772 unsigned long func_type = arm_current_func_type ();
13773 int static_chain_stack_bytes = 0;
13774
13775 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13776 IS_NESTED (func_type) &&
13777 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13778 static_chain_stack_bytes = 4;
13779
13780 return static_chain_stack_bytes;
13781 }
13782
13783
13784 /* Compute a bit mask of which registers need to be
13785 saved on the stack for the current function.
13786 This is used by arm_get_frame_offsets, which may add extra registers. */
13787
13788 static unsigned long
13789 arm_compute_save_reg_mask (void)
13790 {
13791 unsigned int save_reg_mask = 0;
13792 unsigned long func_type = arm_current_func_type ();
13793 unsigned int reg;
13794
13795 if (IS_NAKED (func_type))
13796 /* This should never really happen. */
13797 return 0;
13798
13799 /* If we are creating a stack frame, then we must save the frame pointer,
13800 IP (which will hold the old stack pointer), LR and the PC. */
13801 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13802 save_reg_mask |=
13803 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13804 | (1 << IP_REGNUM)
13805 | (1 << LR_REGNUM)
13806 | (1 << PC_REGNUM);
13807
13808 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13809
13810 /* Decide if we need to save the link register.
13811 Interrupt routines have their own banked link register,
13812 so they never need to save it.
13813 Otherwise if we do not use the link register we do not need to save
13814 it. If we are pushing other registers onto the stack however, we
13815 can save an instruction in the epilogue by pushing the link register
13816 now and then popping it back into the PC. This incurs extra memory
13817 accesses though, so we only do it when optimizing for size, and only
13818 if we know that we will not need a fancy return sequence. */
13819 if (df_regs_ever_live_p (LR_REGNUM)
13820 || (save_reg_mask
13821 && optimize_size
13822 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13823 && !crtl->calls_eh_return))
13824 save_reg_mask |= 1 << LR_REGNUM;
13825
13826 if (cfun->machine->lr_save_eliminated)
13827 save_reg_mask &= ~ (1 << LR_REGNUM);
13828
13829 if (TARGET_REALLY_IWMMXT
13830 && ((bit_count (save_reg_mask)
13831 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13832 arm_compute_static_chain_stack_bytes())
13833 ) % 2) != 0)
13834 {
13835 /* The total number of registers that are going to be pushed
13836 onto the stack is odd. We need to ensure that the stack
13837 is 64-bit aligned before we start to save iWMMXt registers,
13838 and also before we start to create locals. (A local variable
13839 might be a double or long long which we will load/store using
13840 an iWMMXt instruction). Therefore we need to push another
13841 ARM register, so that the stack will be 64-bit aligned. We
13842 try to avoid using the arg registers (r0 -r3) as they might be
13843 used to pass values in a tail call. */
13844 for (reg = 4; reg <= 12; reg++)
13845 if ((save_reg_mask & (1 << reg)) == 0)
13846 break;
13847
13848 if (reg <= 12)
13849 save_reg_mask |= (1 << reg);
13850 else
13851 {
13852 cfun->machine->sibcall_blocked = 1;
13853 save_reg_mask |= (1 << 3);
13854 }
13855 }
13856
13857 /* We may need to push an additional register for use initializing the
13858 PIC base register. */
13859 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13860 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13861 {
13862 reg = thumb_find_work_register (1 << 4);
13863 if (!call_used_regs[reg])
13864 save_reg_mask |= (1 << reg);
13865 }
13866
13867 return save_reg_mask;
13868 }
13869
13870
13871 /* Compute a bit mask of which registers need to be
13872 saved on the stack for the current function. */
13873 static unsigned long
13874 thumb1_compute_save_reg_mask (void)
13875 {
13876 unsigned long mask;
13877 unsigned reg;
13878
13879 mask = 0;
13880 for (reg = 0; reg < 12; reg ++)
13881 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13882 mask |= 1 << reg;
13883
13884 if (flag_pic
13885 && !TARGET_SINGLE_PIC_BASE
13886 && arm_pic_register != INVALID_REGNUM
13887 && crtl->uses_pic_offset_table)
13888 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13889
13890 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13891 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13892 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13893
13894 /* LR will also be pushed if any lo regs are pushed. */
13895 if (mask & 0xff || thumb_force_lr_save ())
13896 mask |= (1 << LR_REGNUM);
13897
13898 /* Make sure we have a low work register if we need one.
13899 We will need one if we are going to push a high register,
13900 but we are not currently intending to push a low register. */
13901 if ((mask & 0xff) == 0
13902 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13903 {
13904 /* Use thumb_find_work_register to choose which register
13905 we will use. If the register is live then we will
13906 have to push it. Use LAST_LO_REGNUM as our fallback
13907 choice for the register to select. */
13908 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13909 /* Make sure the register returned by thumb_find_work_register is
13910 not part of the return value. */
13911 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13912 reg = LAST_LO_REGNUM;
13913
13914 if (! call_used_regs[reg])
13915 mask |= 1 << reg;
13916 }
13917
13918 /* The 504 below is 8 bytes less than 512 because there are two possible
13919 alignment words. We can't tell here if they will be present or not so we
13920 have to play it safe and assume that they are. */
13921 if ((CALLER_INTERWORKING_SLOT_SIZE +
13922 ROUND_UP_WORD (get_frame_size ()) +
13923 crtl->outgoing_args_size) >= 504)
13924 {
13925 /* This is the same as the code in thumb1_expand_prologue() which
13926 determines which register to use for stack decrement. */
13927 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13928 if (mask & (1 << reg))
13929 break;
13930
13931 if (reg > LAST_LO_REGNUM)
13932 {
13933 /* Make sure we have a register available for stack decrement. */
13934 mask |= 1 << LAST_LO_REGNUM;
13935 }
13936 }
13937
13938 return mask;
13939 }
13940
13941
13942 /* Return the number of bytes required to save VFP registers. */
13943 static int
13944 arm_get_vfp_saved_size (void)
13945 {
13946 unsigned int regno;
13947 int count;
13948 int saved;
13949
13950 saved = 0;
13951 /* Space for saved VFP registers. */
13952 if (TARGET_HARD_FLOAT && TARGET_VFP)
13953 {
13954 count = 0;
13955 for (regno = FIRST_VFP_REGNUM;
13956 regno < LAST_VFP_REGNUM;
13957 regno += 2)
13958 {
13959 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13960 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13961 {
13962 if (count > 0)
13963 {
13964 /* Workaround ARM10 VFPr1 bug. */
13965 if (count == 2 && !arm_arch6)
13966 count++;
13967 saved += count * 8;
13968 }
13969 count = 0;
13970 }
13971 else
13972 count++;
13973 }
13974 if (count > 0)
13975 {
13976 if (count == 2 && !arm_arch6)
13977 count++;
13978 saved += count * 8;
13979 }
13980 }
13981 return saved;
13982 }
13983
13984
13985 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13986 everything bar the final return instruction. */
13987 const char *
13988 output_return_instruction (rtx operand, int really_return, int reverse)
13989 {
13990 char conditional[10];
13991 char instr[100];
13992 unsigned reg;
13993 unsigned long live_regs_mask;
13994 unsigned long func_type;
13995 arm_stack_offsets *offsets;
13996
13997 func_type = arm_current_func_type ();
13998
13999 if (IS_NAKED (func_type))
14000 return "";
14001
14002 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14003 {
14004 /* If this function was declared non-returning, and we have
14005 found a tail call, then we have to trust that the called
14006 function won't return. */
14007 if (really_return)
14008 {
14009 rtx ops[2];
14010
14011 /* Otherwise, trap an attempted return by aborting. */
14012 ops[0] = operand;
14013 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14014 : "abort");
14015 assemble_external_libcall (ops[1]);
14016 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14017 }
14018
14019 return "";
14020 }
14021
14022 gcc_assert (!cfun->calls_alloca || really_return);
14023
14024 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14025
14026 cfun->machine->return_used_this_function = 1;
14027
14028 offsets = arm_get_frame_offsets ();
14029 live_regs_mask = offsets->saved_regs_mask;
14030
14031 if (live_regs_mask)
14032 {
14033 const char * return_reg;
14034
14035 /* If we do not have any special requirements for function exit
14036 (e.g. interworking) then we can load the return address
14037 directly into the PC. Otherwise we must load it into LR. */
14038 if (really_return
14039 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14040 return_reg = reg_names[PC_REGNUM];
14041 else
14042 return_reg = reg_names[LR_REGNUM];
14043
14044 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14045 {
14046 /* There are three possible reasons for the IP register
14047 being saved. 1) a stack frame was created, in which case
14048 IP contains the old stack pointer, or 2) an ISR routine
14049 corrupted it, or 3) it was saved to align the stack on
14050 iWMMXt. In case 1, restore IP into SP, otherwise just
14051 restore IP. */
14052 if (frame_pointer_needed)
14053 {
14054 live_regs_mask &= ~ (1 << IP_REGNUM);
14055 live_regs_mask |= (1 << SP_REGNUM);
14056 }
14057 else
14058 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14059 }
14060
14061 /* On some ARM architectures it is faster to use LDR rather than
14062 LDM to load a single register. On other architectures, the
14063 cost is the same. In 26 bit mode, or for exception handlers,
14064 we have to use LDM to load the PC so that the CPSR is also
14065 restored. */
14066 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14067 if (live_regs_mask == (1U << reg))
14068 break;
14069
14070 if (reg <= LAST_ARM_REGNUM
14071 && (reg != LR_REGNUM
14072 || ! really_return
14073 || ! IS_INTERRUPT (func_type)))
14074 {
14075 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14076 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14077 }
14078 else
14079 {
14080 char *p;
14081 int first = 1;
14082
14083 /* Generate the load multiple instruction to restore the
14084 registers. Note we can get here, even if
14085 frame_pointer_needed is true, but only if sp already
14086 points to the base of the saved core registers. */
14087 if (live_regs_mask & (1 << SP_REGNUM))
14088 {
14089 unsigned HOST_WIDE_INT stack_adjust;
14090
14091 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14092 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14093
14094 if (stack_adjust && arm_arch5 && TARGET_ARM)
14095 if (TARGET_UNIFIED_ASM)
14096 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14097 else
14098 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14099 else
14100 {
14101 /* If we can't use ldmib (SA110 bug),
14102 then try to pop r3 instead. */
14103 if (stack_adjust)
14104 live_regs_mask |= 1 << 3;
14105
14106 if (TARGET_UNIFIED_ASM)
14107 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14108 else
14109 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14110 }
14111 }
14112 else
14113 if (TARGET_UNIFIED_ASM)
14114 sprintf (instr, "pop%s\t{", conditional);
14115 else
14116 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14117
14118 p = instr + strlen (instr);
14119
14120 for (reg = 0; reg <= SP_REGNUM; reg++)
14121 if (live_regs_mask & (1 << reg))
14122 {
14123 int l = strlen (reg_names[reg]);
14124
14125 if (first)
14126 first = 0;
14127 else
14128 {
14129 memcpy (p, ", ", 2);
14130 p += 2;
14131 }
14132
14133 memcpy (p, "%|", 2);
14134 memcpy (p + 2, reg_names[reg], l);
14135 p += l + 2;
14136 }
14137
14138 if (live_regs_mask & (1 << LR_REGNUM))
14139 {
14140 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14141 /* If returning from an interrupt, restore the CPSR. */
14142 if (IS_INTERRUPT (func_type))
14143 strcat (p, "^");
14144 }
14145 else
14146 strcpy (p, "}");
14147 }
14148
14149 output_asm_insn (instr, & operand);
14150
14151 /* See if we need to generate an extra instruction to
14152 perform the actual function return. */
14153 if (really_return
14154 && func_type != ARM_FT_INTERWORKED
14155 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14156 {
14157 /* The return has already been handled
14158 by loading the LR into the PC. */
14159 really_return = 0;
14160 }
14161 }
14162
14163 if (really_return)
14164 {
14165 switch ((int) ARM_FUNC_TYPE (func_type))
14166 {
14167 case ARM_FT_ISR:
14168 case ARM_FT_FIQ:
14169 /* ??? This is wrong for unified assembly syntax. */
14170 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14171 break;
14172
14173 case ARM_FT_INTERWORKED:
14174 sprintf (instr, "bx%s\t%%|lr", conditional);
14175 break;
14176
14177 case ARM_FT_EXCEPTION:
14178 /* ??? This is wrong for unified assembly syntax. */
14179 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14180 break;
14181
14182 default:
14183 /* Use bx if it's available. */
14184 if (arm_arch5 || arm_arch4t)
14185 sprintf (instr, "bx%s\t%%|lr", conditional);
14186 else
14187 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14188 break;
14189 }
14190
14191 output_asm_insn (instr, & operand);
14192 }
14193
14194 return "";
14195 }
14196
14197 /* Write the function name into the code section, directly preceding
14198 the function prologue.
14199
14200 Code will be output similar to this:
14201 t0
14202 .ascii "arm_poke_function_name", 0
14203 .align
14204 t1
14205 .word 0xff000000 + (t1 - t0)
14206 arm_poke_function_name
14207 mov ip, sp
14208 stmfd sp!, {fp, ip, lr, pc}
14209 sub fp, ip, #4
14210
14211 When performing a stack backtrace, code can inspect the value
14212 of 'pc' stored at 'fp' + 0. If the trace function then looks
14213 at location pc - 12 and the top 8 bits are set, then we know
14214 that there is a function name embedded immediately preceding this
14215 location and has length ((pc[-3]) & 0xff000000).
14216
14217 We assume that pc is declared as a pointer to an unsigned long.
14218
14219 It is of no benefit to output the function name if we are assembling
14220 a leaf function. These function types will not contain a stack
14221 backtrace structure, therefore it is not possible to determine the
14222 function name. */
14223 void
14224 arm_poke_function_name (FILE *stream, const char *name)
14225 {
14226 unsigned long alignlength;
14227 unsigned long length;
14228 rtx x;
14229
14230 length = strlen (name) + 1;
14231 alignlength = ROUND_UP_WORD (length);
14232
14233 ASM_OUTPUT_ASCII (stream, name, length);
14234 ASM_OUTPUT_ALIGN (stream, 2);
14235 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14236 assemble_aligned_integer (UNITS_PER_WORD, x);
14237 }
14238
14239 /* Place some comments into the assembler stream
14240 describing the current function. */
14241 static void
14242 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14243 {
14244 unsigned long func_type;
14245
14246 if (TARGET_THUMB1)
14247 {
14248 thumb1_output_function_prologue (f, frame_size);
14249 return;
14250 }
14251
14252 /* Sanity check. */
14253 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14254
14255 func_type = arm_current_func_type ();
14256
14257 switch ((int) ARM_FUNC_TYPE (func_type))
14258 {
14259 default:
14260 case ARM_FT_NORMAL:
14261 break;
14262 case ARM_FT_INTERWORKED:
14263 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14264 break;
14265 case ARM_FT_ISR:
14266 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14267 break;
14268 case ARM_FT_FIQ:
14269 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14270 break;
14271 case ARM_FT_EXCEPTION:
14272 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14273 break;
14274 }
14275
14276 if (IS_NAKED (func_type))
14277 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14278
14279 if (IS_VOLATILE (func_type))
14280 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14281
14282 if (IS_NESTED (func_type))
14283 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14284 if (IS_STACKALIGN (func_type))
14285 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14286
14287 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14288 crtl->args.size,
14289 crtl->args.pretend_args_size, frame_size);
14290
14291 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14292 frame_pointer_needed,
14293 cfun->machine->uses_anonymous_args);
14294
14295 if (cfun->machine->lr_save_eliminated)
14296 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14297
14298 if (crtl->calls_eh_return)
14299 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14300
14301 }
14302
14303 const char *
14304 arm_output_epilogue (rtx sibling)
14305 {
14306 int reg;
14307 unsigned long saved_regs_mask;
14308 unsigned long func_type;
14309 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14310 frame that is $fp + 4 for a non-variadic function. */
14311 int floats_offset = 0;
14312 rtx operands[3];
14313 FILE * f = asm_out_file;
14314 unsigned int lrm_count = 0;
14315 int really_return = (sibling == NULL);
14316 int start_reg;
14317 arm_stack_offsets *offsets;
14318
14319 /* If we have already generated the return instruction
14320 then it is futile to generate anything else. */
14321 if (use_return_insn (FALSE, sibling) &&
14322 (cfun->machine->return_used_this_function != 0))
14323 return "";
14324
14325 func_type = arm_current_func_type ();
14326
14327 if (IS_NAKED (func_type))
14328 /* Naked functions don't have epilogues. */
14329 return "";
14330
14331 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14332 {
14333 rtx op;
14334
14335 /* A volatile function should never return. Call abort. */
14336 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14337 assemble_external_libcall (op);
14338 output_asm_insn ("bl\t%a0", &op);
14339
14340 return "";
14341 }
14342
14343 /* If we are throwing an exception, then we really must be doing a
14344 return, so we can't tail-call. */
14345 gcc_assert (!crtl->calls_eh_return || really_return);
14346
14347 offsets = arm_get_frame_offsets ();
14348 saved_regs_mask = offsets->saved_regs_mask;
14349
14350 if (TARGET_IWMMXT)
14351 lrm_count = bit_count (saved_regs_mask);
14352
14353 floats_offset = offsets->saved_args;
14354 /* Compute how far away the floats will be. */
14355 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14356 if (saved_regs_mask & (1 << reg))
14357 floats_offset += 4;
14358
14359 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14360 {
14361 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14362 int vfp_offset = offsets->frame;
14363
14364 if (TARGET_FPA_EMU2)
14365 {
14366 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14367 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14368 {
14369 floats_offset += 12;
14370 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14371 reg, FP_REGNUM, floats_offset - vfp_offset);
14372 }
14373 }
14374 else
14375 {
14376 start_reg = LAST_FPA_REGNUM;
14377
14378 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14379 {
14380 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14381 {
14382 floats_offset += 12;
14383
14384 /* We can't unstack more than four registers at once. */
14385 if (start_reg - reg == 3)
14386 {
14387 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14388 reg, FP_REGNUM, floats_offset - vfp_offset);
14389 start_reg = reg - 1;
14390 }
14391 }
14392 else
14393 {
14394 if (reg != start_reg)
14395 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14396 reg + 1, start_reg - reg,
14397 FP_REGNUM, floats_offset - vfp_offset);
14398 start_reg = reg - 1;
14399 }
14400 }
14401
14402 /* Just in case the last register checked also needs unstacking. */
14403 if (reg != start_reg)
14404 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14405 reg + 1, start_reg - reg,
14406 FP_REGNUM, floats_offset - vfp_offset);
14407 }
14408
14409 if (TARGET_HARD_FLOAT && TARGET_VFP)
14410 {
14411 int saved_size;
14412
14413 /* The fldmd insns do not have base+offset addressing
14414 modes, so we use IP to hold the address. */
14415 saved_size = arm_get_vfp_saved_size ();
14416
14417 if (saved_size > 0)
14418 {
14419 floats_offset += saved_size;
14420 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14421 FP_REGNUM, floats_offset - vfp_offset);
14422 }
14423 start_reg = FIRST_VFP_REGNUM;
14424 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14425 {
14426 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14427 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14428 {
14429 if (start_reg != reg)
14430 vfp_output_fldmd (f, IP_REGNUM,
14431 (start_reg - FIRST_VFP_REGNUM) / 2,
14432 (reg - start_reg) / 2);
14433 start_reg = reg + 2;
14434 }
14435 }
14436 if (start_reg != reg)
14437 vfp_output_fldmd (f, IP_REGNUM,
14438 (start_reg - FIRST_VFP_REGNUM) / 2,
14439 (reg - start_reg) / 2);
14440 }
14441
14442 if (TARGET_IWMMXT)
14443 {
14444 /* The frame pointer is guaranteed to be non-double-word aligned.
14445 This is because it is set to (old_stack_pointer - 4) and the
14446 old_stack_pointer was double word aligned. Thus the offset to
14447 the iWMMXt registers to be loaded must also be non-double-word
14448 sized, so that the resultant address *is* double-word aligned.
14449 We can ignore floats_offset since that was already included in
14450 the live_regs_mask. */
14451 lrm_count += (lrm_count % 2 ? 2 : 1);
14452
14453 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14454 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14455 {
14456 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14457 reg, FP_REGNUM, lrm_count * 4);
14458 lrm_count += 2;
14459 }
14460 }
14461
14462 /* saved_regs_mask should contain the IP, which at the time of stack
14463 frame generation actually contains the old stack pointer. So a
14464 quick way to unwind the stack is just pop the IP register directly
14465 into the stack pointer. */
14466 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14467 saved_regs_mask &= ~ (1 << IP_REGNUM);
14468 saved_regs_mask |= (1 << SP_REGNUM);
14469
14470 /* There are two registers left in saved_regs_mask - LR and PC. We
14471 only need to restore the LR register (the return address), but to
14472 save time we can load it directly into the PC, unless we need a
14473 special function exit sequence, or we are not really returning. */
14474 if (really_return
14475 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14476 && !crtl->calls_eh_return)
14477 /* Delete the LR from the register mask, so that the LR on
14478 the stack is loaded into the PC in the register mask. */
14479 saved_regs_mask &= ~ (1 << LR_REGNUM);
14480 else
14481 saved_regs_mask &= ~ (1 << PC_REGNUM);
14482
14483 /* We must use SP as the base register, because SP is one of the
14484 registers being restored. If an interrupt or page fault
14485 happens in the ldm instruction, the SP might or might not
14486 have been restored. That would be bad, as then SP will no
14487 longer indicate the safe area of stack, and we can get stack
14488 corruption. Using SP as the base register means that it will
14489 be reset correctly to the original value, should an interrupt
14490 occur. If the stack pointer already points at the right
14491 place, then omit the subtraction. */
14492 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14493 || cfun->calls_alloca)
14494 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14495 4 * bit_count (saved_regs_mask));
14496 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14497
14498 if (IS_INTERRUPT (func_type))
14499 /* Interrupt handlers will have pushed the
14500 IP onto the stack, so restore it now. */
14501 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14502 }
14503 else
14504 {
14505 /* This branch is executed for ARM mode (non-apcs frames) and
14506 Thumb-2 mode. Frame layout is essentially the same for those
14507 cases, except that in ARM mode frame pointer points to the
14508 first saved register, while in Thumb-2 mode the frame pointer points
14509 to the last saved register.
14510
14511 It is possible to make frame pointer point to last saved
14512 register in both cases, and remove some conditionals below.
14513 That means that fp setup in prologue would be just "mov fp, sp"
14514 and sp restore in epilogue would be just "mov sp, fp", whereas
14515 now we have to use add/sub in those cases. However, the value
14516 of that would be marginal, as both mov and add/sub are 32-bit
14517 in ARM mode, and it would require extra conditionals
14518 in arm_expand_prologue to distingish ARM-apcs-frame case
14519 (where frame pointer is required to point at first register)
14520 and ARM-non-apcs-frame. Therefore, such change is postponed
14521 until real need arise. */
14522 unsigned HOST_WIDE_INT amount;
14523 int rfe;
14524 /* Restore stack pointer if necessary. */
14525 if (TARGET_ARM && frame_pointer_needed)
14526 {
14527 operands[0] = stack_pointer_rtx;
14528 operands[1] = hard_frame_pointer_rtx;
14529
14530 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14531 output_add_immediate (operands);
14532 }
14533 else
14534 {
14535 if (frame_pointer_needed)
14536 {
14537 /* For Thumb-2 restore sp from the frame pointer.
14538 Operand restrictions mean we have to incrememnt FP, then copy
14539 to SP. */
14540 amount = offsets->locals_base - offsets->saved_regs;
14541 operands[0] = hard_frame_pointer_rtx;
14542 }
14543 else
14544 {
14545 unsigned long count;
14546 operands[0] = stack_pointer_rtx;
14547 amount = offsets->outgoing_args - offsets->saved_regs;
14548 /* pop call clobbered registers if it avoids a
14549 separate stack adjustment. */
14550 count = offsets->saved_regs - offsets->saved_args;
14551 if (optimize_size
14552 && count != 0
14553 && !crtl->calls_eh_return
14554 && bit_count(saved_regs_mask) * 4 == count
14555 && !IS_INTERRUPT (func_type)
14556 && !crtl->tail_call_emit)
14557 {
14558 unsigned long mask;
14559 /* Preserve return values, of any size. */
14560 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14561 mask ^= 0xf;
14562 mask &= ~saved_regs_mask;
14563 reg = 0;
14564 while (bit_count (mask) * 4 > amount)
14565 {
14566 while ((mask & (1 << reg)) == 0)
14567 reg++;
14568 mask &= ~(1 << reg);
14569 }
14570 if (bit_count (mask) * 4 == amount) {
14571 amount = 0;
14572 saved_regs_mask |= mask;
14573 }
14574 }
14575 }
14576
14577 if (amount)
14578 {
14579 operands[1] = operands[0];
14580 operands[2] = GEN_INT (amount);
14581 output_add_immediate (operands);
14582 }
14583 if (frame_pointer_needed)
14584 asm_fprintf (f, "\tmov\t%r, %r\n",
14585 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14586 }
14587
14588 if (TARGET_FPA_EMU2)
14589 {
14590 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14591 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14592 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14593 reg, SP_REGNUM);
14594 }
14595 else
14596 {
14597 start_reg = FIRST_FPA_REGNUM;
14598
14599 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14600 {
14601 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14602 {
14603 if (reg - start_reg == 3)
14604 {
14605 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14606 start_reg, SP_REGNUM);
14607 start_reg = reg + 1;
14608 }
14609 }
14610 else
14611 {
14612 if (reg != start_reg)
14613 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14614 start_reg, reg - start_reg,
14615 SP_REGNUM);
14616
14617 start_reg = reg + 1;
14618 }
14619 }
14620
14621 /* Just in case the last register checked also needs unstacking. */
14622 if (reg != start_reg)
14623 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14624 start_reg, reg - start_reg, SP_REGNUM);
14625 }
14626
14627 if (TARGET_HARD_FLOAT && TARGET_VFP)
14628 {
14629 int end_reg = LAST_VFP_REGNUM + 1;
14630
14631 /* Scan the registers in reverse order. We need to match
14632 any groupings made in the prologue and generate matching
14633 pop operations. */
14634 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14635 {
14636 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14637 && (!df_regs_ever_live_p (reg + 1)
14638 || call_used_regs[reg + 1]))
14639 {
14640 if (end_reg > reg + 2)
14641 vfp_output_fldmd (f, SP_REGNUM,
14642 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14643 (end_reg - (reg + 2)) / 2);
14644 end_reg = reg;
14645 }
14646 }
14647 if (end_reg > reg + 2)
14648 vfp_output_fldmd (f, SP_REGNUM, 0,
14649 (end_reg - (reg + 2)) / 2);
14650 }
14651
14652 if (TARGET_IWMMXT)
14653 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14654 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14655 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14656
14657 /* If we can, restore the LR into the PC. */
14658 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14659 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14660 && !IS_STACKALIGN (func_type)
14661 && really_return
14662 && crtl->args.pretend_args_size == 0
14663 && saved_regs_mask & (1 << LR_REGNUM)
14664 && !crtl->calls_eh_return)
14665 {
14666 saved_regs_mask &= ~ (1 << LR_REGNUM);
14667 saved_regs_mask |= (1 << PC_REGNUM);
14668 rfe = IS_INTERRUPT (func_type);
14669 }
14670 else
14671 rfe = 0;
14672
14673 /* Load the registers off the stack. If we only have one register
14674 to load use the LDR instruction - it is faster. For Thumb-2
14675 always use pop and the assembler will pick the best instruction.*/
14676 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14677 && !IS_INTERRUPT(func_type))
14678 {
14679 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14680 }
14681 else if (saved_regs_mask)
14682 {
14683 if (saved_regs_mask & (1 << SP_REGNUM))
14684 /* Note - write back to the stack register is not enabled
14685 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14686 in the list of registers and if we add writeback the
14687 instruction becomes UNPREDICTABLE. */
14688 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14689 rfe);
14690 else if (TARGET_ARM)
14691 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14692 rfe);
14693 else
14694 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14695 }
14696
14697 if (crtl->args.pretend_args_size)
14698 {
14699 /* Unwind the pre-pushed regs. */
14700 operands[0] = operands[1] = stack_pointer_rtx;
14701 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14702 output_add_immediate (operands);
14703 }
14704 }
14705
14706 /* We may have already restored PC directly from the stack. */
14707 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14708 return "";
14709
14710 /* Stack adjustment for exception handler. */
14711 if (crtl->calls_eh_return)
14712 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14713 ARM_EH_STACKADJ_REGNUM);
14714
14715 /* Generate the return instruction. */
14716 switch ((int) ARM_FUNC_TYPE (func_type))
14717 {
14718 case ARM_FT_ISR:
14719 case ARM_FT_FIQ:
14720 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14721 break;
14722
14723 case ARM_FT_EXCEPTION:
14724 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14725 break;
14726
14727 case ARM_FT_INTERWORKED:
14728 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14729 break;
14730
14731 default:
14732 if (IS_STACKALIGN (func_type))
14733 {
14734 /* See comment in arm_expand_prologue. */
14735 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14736 }
14737 if (arm_arch5 || arm_arch4t)
14738 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14739 else
14740 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14741 break;
14742 }
14743
14744 return "";
14745 }
14746
14747 static void
14748 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14749 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14750 {
14751 arm_stack_offsets *offsets;
14752
14753 if (TARGET_THUMB1)
14754 {
14755 int regno;
14756
14757 /* Emit any call-via-reg trampolines that are needed for v4t support
14758 of call_reg and call_value_reg type insns. */
14759 for (regno = 0; regno < LR_REGNUM; regno++)
14760 {
14761 rtx label = cfun->machine->call_via[regno];
14762
14763 if (label != NULL)
14764 {
14765 switch_to_section (function_section (current_function_decl));
14766 targetm.asm_out.internal_label (asm_out_file, "L",
14767 CODE_LABEL_NUMBER (label));
14768 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14769 }
14770 }
14771
14772 /* ??? Probably not safe to set this here, since it assumes that a
14773 function will be emitted as assembly immediately after we generate
14774 RTL for it. This does not happen for inline functions. */
14775 cfun->machine->return_used_this_function = 0;
14776 }
14777 else /* TARGET_32BIT */
14778 {
14779 /* We need to take into account any stack-frame rounding. */
14780 offsets = arm_get_frame_offsets ();
14781
14782 gcc_assert (!use_return_insn (FALSE, NULL)
14783 || (cfun->machine->return_used_this_function != 0)
14784 || offsets->saved_regs == offsets->outgoing_args
14785 || frame_pointer_needed);
14786
14787 /* Reset the ARM-specific per-function variables. */
14788 after_arm_reorg = 0;
14789 }
14790 }
14791
14792 /* Generate and emit an insn that we will recognize as a push_multi.
14793 Unfortunately, since this insn does not reflect very well the actual
14794 semantics of the operation, we need to annotate the insn for the benefit
14795 of DWARF2 frame unwind information. */
14796 static rtx
14797 emit_multi_reg_push (unsigned long mask)
14798 {
14799 int num_regs = 0;
14800 int num_dwarf_regs;
14801 int i, j;
14802 rtx par;
14803 rtx dwarf;
14804 int dwarf_par_index;
14805 rtx tmp, reg;
14806
14807 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14808 if (mask & (1 << i))
14809 num_regs++;
14810
14811 gcc_assert (num_regs && num_regs <= 16);
14812
14813 /* We don't record the PC in the dwarf frame information. */
14814 num_dwarf_regs = num_regs;
14815 if (mask & (1 << PC_REGNUM))
14816 num_dwarf_regs--;
14817
14818 /* For the body of the insn we are going to generate an UNSPEC in
14819 parallel with several USEs. This allows the insn to be recognized
14820 by the push_multi pattern in the arm.md file.
14821
14822 The body of the insn looks something like this:
14823
14824 (parallel [
14825 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14826 (const_int:SI <num>)))
14827 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14828 (use (reg:SI XX))
14829 (use (reg:SI YY))
14830 ...
14831 ])
14832
14833 For the frame note however, we try to be more explicit and actually
14834 show each register being stored into the stack frame, plus a (single)
14835 decrement of the stack pointer. We do it this way in order to be
14836 friendly to the stack unwinding code, which only wants to see a single
14837 stack decrement per instruction. The RTL we generate for the note looks
14838 something like this:
14839
14840 (sequence [
14841 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14842 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14843 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14844 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14845 ...
14846 ])
14847
14848 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14849 instead we'd have a parallel expression detailing all
14850 the stores to the various memory addresses so that debug
14851 information is more up-to-date. Remember however while writing
14852 this to take care of the constraints with the push instruction.
14853
14854 Note also that this has to be taken care of for the VFP registers.
14855
14856 For more see PR43399. */
14857
14858 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14859 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14860 dwarf_par_index = 1;
14861
14862 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14863 {
14864 if (mask & (1 << i))
14865 {
14866 reg = gen_rtx_REG (SImode, i);
14867
14868 XVECEXP (par, 0, 0)
14869 = gen_rtx_SET (VOIDmode,
14870 gen_frame_mem
14871 (BLKmode,
14872 gen_rtx_PRE_MODIFY (Pmode,
14873 stack_pointer_rtx,
14874 plus_constant
14875 (stack_pointer_rtx,
14876 -4 * num_regs))
14877 ),
14878 gen_rtx_UNSPEC (BLKmode,
14879 gen_rtvec (1, reg),
14880 UNSPEC_PUSH_MULT));
14881
14882 if (i != PC_REGNUM)
14883 {
14884 tmp = gen_rtx_SET (VOIDmode,
14885 gen_frame_mem (SImode, stack_pointer_rtx),
14886 reg);
14887 RTX_FRAME_RELATED_P (tmp) = 1;
14888 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14889 dwarf_par_index++;
14890 }
14891
14892 break;
14893 }
14894 }
14895
14896 for (j = 1, i++; j < num_regs; i++)
14897 {
14898 if (mask & (1 << i))
14899 {
14900 reg = gen_rtx_REG (SImode, i);
14901
14902 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14903
14904 if (i != PC_REGNUM)
14905 {
14906 tmp
14907 = gen_rtx_SET (VOIDmode,
14908 gen_frame_mem
14909 (SImode,
14910 plus_constant (stack_pointer_rtx,
14911 4 * j)),
14912 reg);
14913 RTX_FRAME_RELATED_P (tmp) = 1;
14914 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14915 }
14916
14917 j++;
14918 }
14919 }
14920
14921 par = emit_insn (par);
14922
14923 tmp = gen_rtx_SET (VOIDmode,
14924 stack_pointer_rtx,
14925 plus_constant (stack_pointer_rtx, -4 * num_regs));
14926 RTX_FRAME_RELATED_P (tmp) = 1;
14927 XVECEXP (dwarf, 0, 0) = tmp;
14928
14929 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14930
14931 return par;
14932 }
14933
14934 /* Calculate the size of the return value that is passed in registers. */
14935 static unsigned
14936 arm_size_return_regs (void)
14937 {
14938 enum machine_mode mode;
14939
14940 if (crtl->return_rtx != 0)
14941 mode = GET_MODE (crtl->return_rtx);
14942 else
14943 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14944
14945 return GET_MODE_SIZE (mode);
14946 }
14947
14948 static rtx
14949 emit_sfm (int base_reg, int count)
14950 {
14951 rtx par;
14952 rtx dwarf;
14953 rtx tmp, reg;
14954 int i;
14955
14956 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14957 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14958
14959 reg = gen_rtx_REG (XFmode, base_reg++);
14960
14961 XVECEXP (par, 0, 0)
14962 = gen_rtx_SET (VOIDmode,
14963 gen_frame_mem
14964 (BLKmode,
14965 gen_rtx_PRE_MODIFY (Pmode,
14966 stack_pointer_rtx,
14967 plus_constant
14968 (stack_pointer_rtx,
14969 -12 * count))
14970 ),
14971 gen_rtx_UNSPEC (BLKmode,
14972 gen_rtvec (1, reg),
14973 UNSPEC_PUSH_MULT));
14974 tmp = gen_rtx_SET (VOIDmode,
14975 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14976 RTX_FRAME_RELATED_P (tmp) = 1;
14977 XVECEXP (dwarf, 0, 1) = tmp;
14978
14979 for (i = 1; i < count; i++)
14980 {
14981 reg = gen_rtx_REG (XFmode, base_reg++);
14982 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14983
14984 tmp = gen_rtx_SET (VOIDmode,
14985 gen_frame_mem (XFmode,
14986 plus_constant (stack_pointer_rtx,
14987 i * 12)),
14988 reg);
14989 RTX_FRAME_RELATED_P (tmp) = 1;
14990 XVECEXP (dwarf, 0, i + 1) = tmp;
14991 }
14992
14993 tmp = gen_rtx_SET (VOIDmode,
14994 stack_pointer_rtx,
14995 plus_constant (stack_pointer_rtx, -12 * count));
14996
14997 RTX_FRAME_RELATED_P (tmp) = 1;
14998 XVECEXP (dwarf, 0, 0) = tmp;
14999
15000 par = emit_insn (par);
15001 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15002
15003 return par;
15004 }
15005
15006
15007 /* Return true if the current function needs to save/restore LR. */
15008
15009 static bool
15010 thumb_force_lr_save (void)
15011 {
15012 return !cfun->machine->lr_save_eliminated
15013 && (!leaf_function_p ()
15014 || thumb_far_jump_used_p ()
15015 || df_regs_ever_live_p (LR_REGNUM));
15016 }
15017
15018
15019 /* Compute the distance from register FROM to register TO.
15020 These can be the arg pointer (26), the soft frame pointer (25),
15021 the stack pointer (13) or the hard frame pointer (11).
15022 In thumb mode r7 is used as the soft frame pointer, if needed.
15023 Typical stack layout looks like this:
15024
15025 old stack pointer -> | |
15026 ----
15027 | | \
15028 | | saved arguments for
15029 | | vararg functions
15030 | | /
15031 --
15032 hard FP & arg pointer -> | | \
15033 | | stack
15034 | | frame
15035 | | /
15036 --
15037 | | \
15038 | | call saved
15039 | | registers
15040 soft frame pointer -> | | /
15041 --
15042 | | \
15043 | | local
15044 | | variables
15045 locals base pointer -> | | /
15046 --
15047 | | \
15048 | | outgoing
15049 | | arguments
15050 current stack pointer -> | | /
15051 --
15052
15053 For a given function some or all of these stack components
15054 may not be needed, giving rise to the possibility of
15055 eliminating some of the registers.
15056
15057 The values returned by this function must reflect the behavior
15058 of arm_expand_prologue() and arm_compute_save_reg_mask().
15059
15060 The sign of the number returned reflects the direction of stack
15061 growth, so the values are positive for all eliminations except
15062 from the soft frame pointer to the hard frame pointer.
15063
15064 SFP may point just inside the local variables block to ensure correct
15065 alignment. */
15066
15067
15068 /* Calculate stack offsets. These are used to calculate register elimination
15069 offsets and in prologue/epilogue code. Also calculates which registers
15070 should be saved. */
15071
15072 static arm_stack_offsets *
15073 arm_get_frame_offsets (void)
15074 {
15075 struct arm_stack_offsets *offsets;
15076 unsigned long func_type;
15077 int leaf;
15078 int saved;
15079 int core_saved;
15080 HOST_WIDE_INT frame_size;
15081 int i;
15082
15083 offsets = &cfun->machine->stack_offsets;
15084
15085 /* We need to know if we are a leaf function. Unfortunately, it
15086 is possible to be called after start_sequence has been called,
15087 which causes get_insns to return the insns for the sequence,
15088 not the function, which will cause leaf_function_p to return
15089 the incorrect result.
15090
15091 to know about leaf functions once reload has completed, and the
15092 frame size cannot be changed after that time, so we can safely
15093 use the cached value. */
15094
15095 if (reload_completed)
15096 return offsets;
15097
15098 /* Initially this is the size of the local variables. It will translated
15099 into an offset once we have determined the size of preceding data. */
15100 frame_size = ROUND_UP_WORD (get_frame_size ());
15101
15102 leaf = leaf_function_p ();
15103
15104 /* Space for variadic functions. */
15105 offsets->saved_args = crtl->args.pretend_args_size;
15106
15107 /* In Thumb mode this is incorrect, but never used. */
15108 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15109 arm_compute_static_chain_stack_bytes();
15110
15111 if (TARGET_32BIT)
15112 {
15113 unsigned int regno;
15114
15115 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15116 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15117 saved = core_saved;
15118
15119 /* We know that SP will be doubleword aligned on entry, and we must
15120 preserve that condition at any subroutine call. We also require the
15121 soft frame pointer to be doubleword aligned. */
15122
15123 if (TARGET_REALLY_IWMMXT)
15124 {
15125 /* Check for the call-saved iWMMXt registers. */
15126 for (regno = FIRST_IWMMXT_REGNUM;
15127 regno <= LAST_IWMMXT_REGNUM;
15128 regno++)
15129 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15130 saved += 8;
15131 }
15132
15133 func_type = arm_current_func_type ();
15134 if (! IS_VOLATILE (func_type))
15135 {
15136 /* Space for saved FPA registers. */
15137 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15138 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15139 saved += 12;
15140
15141 /* Space for saved VFP registers. */
15142 if (TARGET_HARD_FLOAT && TARGET_VFP)
15143 saved += arm_get_vfp_saved_size ();
15144 }
15145 }
15146 else /* TARGET_THUMB1 */
15147 {
15148 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15149 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15150 saved = core_saved;
15151 if (TARGET_BACKTRACE)
15152 saved += 16;
15153 }
15154
15155 /* Saved registers include the stack frame. */
15156 offsets->saved_regs = offsets->saved_args + saved +
15157 arm_compute_static_chain_stack_bytes();
15158 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15159 /* A leaf function does not need any stack alignment if it has nothing
15160 on the stack. */
15161 if (leaf && frame_size == 0)
15162 {
15163 offsets->outgoing_args = offsets->soft_frame;
15164 offsets->locals_base = offsets->soft_frame;
15165 return offsets;
15166 }
15167
15168 /* Ensure SFP has the correct alignment. */
15169 if (ARM_DOUBLEWORD_ALIGN
15170 && (offsets->soft_frame & 7))
15171 {
15172 offsets->soft_frame += 4;
15173 /* Try to align stack by pushing an extra reg. Don't bother doing this
15174 when there is a stack frame as the alignment will be rolled into
15175 the normal stack adjustment. */
15176 if (frame_size + crtl->outgoing_args_size == 0)
15177 {
15178 int reg = -1;
15179
15180 /* If it is safe to use r3, then do so. This sometimes
15181 generates better code on Thumb-2 by avoiding the need to
15182 use 32-bit push/pop instructions. */
15183 if (!crtl->tail_call_emit
15184 && arm_size_return_regs () <= 12
15185 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15186 {
15187 reg = 3;
15188 }
15189 else
15190 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15191 {
15192 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15193 {
15194 reg = i;
15195 break;
15196 }
15197 }
15198
15199 if (reg != -1)
15200 {
15201 offsets->saved_regs += 4;
15202 offsets->saved_regs_mask |= (1 << reg);
15203 }
15204 }
15205 }
15206
15207 offsets->locals_base = offsets->soft_frame + frame_size;
15208 offsets->outgoing_args = (offsets->locals_base
15209 + crtl->outgoing_args_size);
15210
15211 if (ARM_DOUBLEWORD_ALIGN)
15212 {
15213 /* Ensure SP remains doubleword aligned. */
15214 if (offsets->outgoing_args & 7)
15215 offsets->outgoing_args += 4;
15216 gcc_assert (!(offsets->outgoing_args & 7));
15217 }
15218
15219 return offsets;
15220 }
15221
15222
15223 /* Calculate the relative offsets for the different stack pointers. Positive
15224 offsets are in the direction of stack growth. */
15225
15226 HOST_WIDE_INT
15227 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15228 {
15229 arm_stack_offsets *offsets;
15230
15231 offsets = arm_get_frame_offsets ();
15232
15233 /* OK, now we have enough information to compute the distances.
15234 There must be an entry in these switch tables for each pair
15235 of registers in ELIMINABLE_REGS, even if some of the entries
15236 seem to be redundant or useless. */
15237 switch (from)
15238 {
15239 case ARG_POINTER_REGNUM:
15240 switch (to)
15241 {
15242 case THUMB_HARD_FRAME_POINTER_REGNUM:
15243 return 0;
15244
15245 case FRAME_POINTER_REGNUM:
15246 /* This is the reverse of the soft frame pointer
15247 to hard frame pointer elimination below. */
15248 return offsets->soft_frame - offsets->saved_args;
15249
15250 case ARM_HARD_FRAME_POINTER_REGNUM:
15251 /* This is only non-zero in the case where the static chain register
15252 is stored above the frame. */
15253 return offsets->frame - offsets->saved_args - 4;
15254
15255 case STACK_POINTER_REGNUM:
15256 /* If nothing has been pushed on the stack at all
15257 then this will return -4. This *is* correct! */
15258 return offsets->outgoing_args - (offsets->saved_args + 4);
15259
15260 default:
15261 gcc_unreachable ();
15262 }
15263 gcc_unreachable ();
15264
15265 case FRAME_POINTER_REGNUM:
15266 switch (to)
15267 {
15268 case THUMB_HARD_FRAME_POINTER_REGNUM:
15269 return 0;
15270
15271 case ARM_HARD_FRAME_POINTER_REGNUM:
15272 /* The hard frame pointer points to the top entry in the
15273 stack frame. The soft frame pointer to the bottom entry
15274 in the stack frame. If there is no stack frame at all,
15275 then they are identical. */
15276
15277 return offsets->frame - offsets->soft_frame;
15278
15279 case STACK_POINTER_REGNUM:
15280 return offsets->outgoing_args - offsets->soft_frame;
15281
15282 default:
15283 gcc_unreachable ();
15284 }
15285 gcc_unreachable ();
15286
15287 default:
15288 /* You cannot eliminate from the stack pointer.
15289 In theory you could eliminate from the hard frame
15290 pointer to the stack pointer, but this will never
15291 happen, since if a stack frame is not needed the
15292 hard frame pointer will never be used. */
15293 gcc_unreachable ();
15294 }
15295 }
15296
15297 /* Given FROM and TO register numbers, say whether this elimination is
15298 allowed. Frame pointer elimination is automatically handled.
15299
15300 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15301 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15302 pointer, we must eliminate FRAME_POINTER_REGNUM into
15303 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15304 ARG_POINTER_REGNUM. */
15305
15306 bool
15307 arm_can_eliminate (const int from, const int to)
15308 {
15309 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15310 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15311 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15312 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15313 true);
15314 }
15315
15316 /* Emit RTL to save coprocessor registers on function entry. Returns the
15317 number of bytes pushed. */
15318
15319 static int
15320 arm_save_coproc_regs(void)
15321 {
15322 int saved_size = 0;
15323 unsigned reg;
15324 unsigned start_reg;
15325 rtx insn;
15326
15327 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15328 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15329 {
15330 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15331 insn = gen_rtx_MEM (V2SImode, insn);
15332 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15333 RTX_FRAME_RELATED_P (insn) = 1;
15334 saved_size += 8;
15335 }
15336
15337 /* Save any floating point call-saved registers used by this
15338 function. */
15339 if (TARGET_FPA_EMU2)
15340 {
15341 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15342 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15343 {
15344 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15345 insn = gen_rtx_MEM (XFmode, insn);
15346 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15347 RTX_FRAME_RELATED_P (insn) = 1;
15348 saved_size += 12;
15349 }
15350 }
15351 else
15352 {
15353 start_reg = LAST_FPA_REGNUM;
15354
15355 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15356 {
15357 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15358 {
15359 if (start_reg - reg == 3)
15360 {
15361 insn = emit_sfm (reg, 4);
15362 RTX_FRAME_RELATED_P (insn) = 1;
15363 saved_size += 48;
15364 start_reg = reg - 1;
15365 }
15366 }
15367 else
15368 {
15369 if (start_reg != reg)
15370 {
15371 insn = emit_sfm (reg + 1, start_reg - reg);
15372 RTX_FRAME_RELATED_P (insn) = 1;
15373 saved_size += (start_reg - reg) * 12;
15374 }
15375 start_reg = reg - 1;
15376 }
15377 }
15378
15379 if (start_reg != reg)
15380 {
15381 insn = emit_sfm (reg + 1, start_reg - reg);
15382 saved_size += (start_reg - reg) * 12;
15383 RTX_FRAME_RELATED_P (insn) = 1;
15384 }
15385 }
15386 if (TARGET_HARD_FLOAT && TARGET_VFP)
15387 {
15388 start_reg = FIRST_VFP_REGNUM;
15389
15390 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15391 {
15392 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15393 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15394 {
15395 if (start_reg != reg)
15396 saved_size += vfp_emit_fstmd (start_reg,
15397 (reg - start_reg) / 2);
15398 start_reg = reg + 2;
15399 }
15400 }
15401 if (start_reg != reg)
15402 saved_size += vfp_emit_fstmd (start_reg,
15403 (reg - start_reg) / 2);
15404 }
15405 return saved_size;
15406 }
15407
15408
15409 /* Set the Thumb frame pointer from the stack pointer. */
15410
15411 static void
15412 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15413 {
15414 HOST_WIDE_INT amount;
15415 rtx insn, dwarf;
15416
15417 amount = offsets->outgoing_args - offsets->locals_base;
15418 if (amount < 1024)
15419 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15420 stack_pointer_rtx, GEN_INT (amount)));
15421 else
15422 {
15423 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15424 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15425 expects the first two operands to be the same. */
15426 if (TARGET_THUMB2)
15427 {
15428 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15429 stack_pointer_rtx,
15430 hard_frame_pointer_rtx));
15431 }
15432 else
15433 {
15434 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15435 hard_frame_pointer_rtx,
15436 stack_pointer_rtx));
15437 }
15438 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15439 plus_constant (stack_pointer_rtx, amount));
15440 RTX_FRAME_RELATED_P (dwarf) = 1;
15441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15442 }
15443
15444 RTX_FRAME_RELATED_P (insn) = 1;
15445 }
15446
15447 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15448 function. */
15449 void
15450 arm_expand_prologue (void)
15451 {
15452 rtx amount;
15453 rtx insn;
15454 rtx ip_rtx;
15455 unsigned long live_regs_mask;
15456 unsigned long func_type;
15457 int fp_offset = 0;
15458 int saved_pretend_args = 0;
15459 int saved_regs = 0;
15460 unsigned HOST_WIDE_INT args_to_push;
15461 arm_stack_offsets *offsets;
15462
15463 func_type = arm_current_func_type ();
15464
15465 /* Naked functions don't have prologues. */
15466 if (IS_NAKED (func_type))
15467 return;
15468
15469 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15470 args_to_push = crtl->args.pretend_args_size;
15471
15472 /* Compute which register we will have to save onto the stack. */
15473 offsets = arm_get_frame_offsets ();
15474 live_regs_mask = offsets->saved_regs_mask;
15475
15476 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15477
15478 if (IS_STACKALIGN (func_type))
15479 {
15480 rtx dwarf;
15481 rtx r0;
15482 rtx r1;
15483 /* Handle a word-aligned stack pointer. We generate the following:
15484
15485 mov r0, sp
15486 bic r1, r0, #7
15487 mov sp, r1
15488 <save and restore r0 in normal prologue/epilogue>
15489 mov sp, r0
15490 bx lr
15491
15492 The unwinder doesn't need to know about the stack realignment.
15493 Just tell it we saved SP in r0. */
15494 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15495
15496 r0 = gen_rtx_REG (SImode, 0);
15497 r1 = gen_rtx_REG (SImode, 1);
15498 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15499 compiler won't choke. */
15500 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15501 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15502 insn = gen_movsi (r0, stack_pointer_rtx);
15503 RTX_FRAME_RELATED_P (insn) = 1;
15504 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15505 emit_insn (insn);
15506 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15507 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15508 }
15509
15510 /* For APCS frames, if IP register is clobbered
15511 when creating frame, save that register in a special
15512 way. */
15513 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15514 {
15515 if (IS_INTERRUPT (func_type))
15516 {
15517 /* Interrupt functions must not corrupt any registers.
15518 Creating a frame pointer however, corrupts the IP
15519 register, so we must push it first. */
15520 insn = emit_multi_reg_push (1 << IP_REGNUM);
15521
15522 /* Do not set RTX_FRAME_RELATED_P on this insn.
15523 The dwarf stack unwinding code only wants to see one
15524 stack decrement per function, and this is not it. If
15525 this instruction is labeled as being part of the frame
15526 creation sequence then dwarf2out_frame_debug_expr will
15527 die when it encounters the assignment of IP to FP
15528 later on, since the use of SP here establishes SP as
15529 the CFA register and not IP.
15530
15531 Anyway this instruction is not really part of the stack
15532 frame creation although it is part of the prologue. */
15533 }
15534 else if (IS_NESTED (func_type))
15535 {
15536 /* The Static chain register is the same as the IP register
15537 used as a scratch register during stack frame creation.
15538 To get around this need to find somewhere to store IP
15539 whilst the frame is being created. We try the following
15540 places in order:
15541
15542 1. The last argument register.
15543 2. A slot on the stack above the frame. (This only
15544 works if the function is not a varargs function).
15545 3. Register r3, after pushing the argument registers
15546 onto the stack.
15547
15548 Note - we only need to tell the dwarf2 backend about the SP
15549 adjustment in the second variant; the static chain register
15550 doesn't need to be unwound, as it doesn't contain a value
15551 inherited from the caller. */
15552
15553 if (df_regs_ever_live_p (3) == false)
15554 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15555 else if (args_to_push == 0)
15556 {
15557 rtx dwarf;
15558
15559 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15560 saved_regs += 4;
15561
15562 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15563 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15564 fp_offset = 4;
15565
15566 /* Just tell the dwarf backend that we adjusted SP. */
15567 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15568 plus_constant (stack_pointer_rtx,
15569 -fp_offset));
15570 RTX_FRAME_RELATED_P (insn) = 1;
15571 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15572 }
15573 else
15574 {
15575 /* Store the args on the stack. */
15576 if (cfun->machine->uses_anonymous_args)
15577 insn = emit_multi_reg_push
15578 ((0xf0 >> (args_to_push / 4)) & 0xf);
15579 else
15580 insn = emit_insn
15581 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15582 GEN_INT (- args_to_push)));
15583
15584 RTX_FRAME_RELATED_P (insn) = 1;
15585
15586 saved_pretend_args = 1;
15587 fp_offset = args_to_push;
15588 args_to_push = 0;
15589
15590 /* Now reuse r3 to preserve IP. */
15591 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15592 }
15593 }
15594
15595 insn = emit_set_insn (ip_rtx,
15596 plus_constant (stack_pointer_rtx, fp_offset));
15597 RTX_FRAME_RELATED_P (insn) = 1;
15598 }
15599
15600 if (args_to_push)
15601 {
15602 /* Push the argument registers, or reserve space for them. */
15603 if (cfun->machine->uses_anonymous_args)
15604 insn = emit_multi_reg_push
15605 ((0xf0 >> (args_to_push / 4)) & 0xf);
15606 else
15607 insn = emit_insn
15608 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15609 GEN_INT (- args_to_push)));
15610 RTX_FRAME_RELATED_P (insn) = 1;
15611 }
15612
15613 /* If this is an interrupt service routine, and the link register
15614 is going to be pushed, and we're not generating extra
15615 push of IP (needed when frame is needed and frame layout if apcs),
15616 subtracting four from LR now will mean that the function return
15617 can be done with a single instruction. */
15618 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15619 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15620 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15621 && TARGET_ARM)
15622 {
15623 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15624
15625 emit_set_insn (lr, plus_constant (lr, -4));
15626 }
15627
15628 if (live_regs_mask)
15629 {
15630 saved_regs += bit_count (live_regs_mask) * 4;
15631 if (optimize_size && !frame_pointer_needed
15632 && saved_regs == offsets->saved_regs - offsets->saved_args)
15633 {
15634 /* If no coprocessor registers are being pushed and we don't have
15635 to worry about a frame pointer then push extra registers to
15636 create the stack frame. This is done is a way that does not
15637 alter the frame layout, so is independent of the epilogue. */
15638 int n;
15639 int frame;
15640 n = 0;
15641 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15642 n++;
15643 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15644 if (frame && n * 4 >= frame)
15645 {
15646 n = frame / 4;
15647 live_regs_mask |= (1 << n) - 1;
15648 saved_regs += frame;
15649 }
15650 }
15651 insn = emit_multi_reg_push (live_regs_mask);
15652 RTX_FRAME_RELATED_P (insn) = 1;
15653 }
15654
15655 if (! IS_VOLATILE (func_type))
15656 saved_regs += arm_save_coproc_regs ();
15657
15658 if (frame_pointer_needed && TARGET_ARM)
15659 {
15660 /* Create the new frame pointer. */
15661 if (TARGET_APCS_FRAME)
15662 {
15663 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15664 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15665 RTX_FRAME_RELATED_P (insn) = 1;
15666
15667 if (IS_NESTED (func_type))
15668 {
15669 /* Recover the static chain register. */
15670 if (!df_regs_ever_live_p (3)
15671 || saved_pretend_args)
15672 insn = gen_rtx_REG (SImode, 3);
15673 else /* if (crtl->args.pretend_args_size == 0) */
15674 {
15675 insn = plus_constant (hard_frame_pointer_rtx, 4);
15676 insn = gen_frame_mem (SImode, insn);
15677 }
15678 emit_set_insn (ip_rtx, insn);
15679 /* Add a USE to stop propagate_one_insn() from barfing. */
15680 emit_insn (gen_prologue_use (ip_rtx));
15681 }
15682 }
15683 else
15684 {
15685 insn = GEN_INT (saved_regs - 4);
15686 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15687 stack_pointer_rtx, insn));
15688 RTX_FRAME_RELATED_P (insn) = 1;
15689 }
15690 }
15691
15692 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15693 {
15694 /* This add can produce multiple insns for a large constant, so we
15695 need to get tricky. */
15696 rtx last = get_last_insn ();
15697
15698 amount = GEN_INT (offsets->saved_args + saved_regs
15699 - offsets->outgoing_args);
15700
15701 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15702 amount));
15703 do
15704 {
15705 last = last ? NEXT_INSN (last) : get_insns ();
15706 RTX_FRAME_RELATED_P (last) = 1;
15707 }
15708 while (last != insn);
15709
15710 /* If the frame pointer is needed, emit a special barrier that
15711 will prevent the scheduler from moving stores to the frame
15712 before the stack adjustment. */
15713 if (frame_pointer_needed)
15714 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15715 hard_frame_pointer_rtx));
15716 }
15717
15718
15719 if (frame_pointer_needed && TARGET_THUMB2)
15720 thumb_set_frame_pointer (offsets);
15721
15722 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15723 {
15724 unsigned long mask;
15725
15726 mask = live_regs_mask;
15727 mask &= THUMB2_WORK_REGS;
15728 if (!IS_NESTED (func_type))
15729 mask |= (1 << IP_REGNUM);
15730 arm_load_pic_register (mask);
15731 }
15732
15733 /* If we are profiling, make sure no instructions are scheduled before
15734 the call to mcount. Similarly if the user has requested no
15735 scheduling in the prolog. Similarly if we want non-call exceptions
15736 using the EABI unwinder, to prevent faulting instructions from being
15737 swapped with a stack adjustment. */
15738 if (crtl->profile || !TARGET_SCHED_PROLOG
15739 || (arm_except_unwind_info () == UI_TARGET
15740 && cfun->can_throw_non_call_exceptions))
15741 emit_insn (gen_blockage ());
15742
15743 /* If the link register is being kept alive, with the return address in it,
15744 then make sure that it does not get reused by the ce2 pass. */
15745 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15746 cfun->machine->lr_save_eliminated = 1;
15747 }
15748 \f
15749 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15750 static void
15751 arm_print_condition (FILE *stream)
15752 {
15753 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15754 {
15755 /* Branch conversion is not implemented for Thumb-2. */
15756 if (TARGET_THUMB)
15757 {
15758 output_operand_lossage ("predicated Thumb instruction");
15759 return;
15760 }
15761 if (current_insn_predicate != NULL)
15762 {
15763 output_operand_lossage
15764 ("predicated instruction in conditional sequence");
15765 return;
15766 }
15767
15768 fputs (arm_condition_codes[arm_current_cc], stream);
15769 }
15770 else if (current_insn_predicate)
15771 {
15772 enum arm_cond_code code;
15773
15774 if (TARGET_THUMB1)
15775 {
15776 output_operand_lossage ("predicated Thumb instruction");
15777 return;
15778 }
15779
15780 code = get_arm_condition_code (current_insn_predicate);
15781 fputs (arm_condition_codes[code], stream);
15782 }
15783 }
15784
15785
15786 /* If CODE is 'd', then the X is a condition operand and the instruction
15787 should only be executed if the condition is true.
15788 if CODE is 'D', then the X is a condition operand and the instruction
15789 should only be executed if the condition is false: however, if the mode
15790 of the comparison is CCFPEmode, then always execute the instruction -- we
15791 do this because in these circumstances !GE does not necessarily imply LT;
15792 in these cases the instruction pattern will take care to make sure that
15793 an instruction containing %d will follow, thereby undoing the effects of
15794 doing this instruction unconditionally.
15795 If CODE is 'N' then X is a floating point operand that must be negated
15796 before output.
15797 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15798 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15799 static void
15800 arm_print_operand (FILE *stream, rtx x, int code)
15801 {
15802 switch (code)
15803 {
15804 case '@':
15805 fputs (ASM_COMMENT_START, stream);
15806 return;
15807
15808 case '_':
15809 fputs (user_label_prefix, stream);
15810 return;
15811
15812 case '|':
15813 fputs (REGISTER_PREFIX, stream);
15814 return;
15815
15816 case '?':
15817 arm_print_condition (stream);
15818 return;
15819
15820 case '(':
15821 /* Nothing in unified syntax, otherwise the current condition code. */
15822 if (!TARGET_UNIFIED_ASM)
15823 arm_print_condition (stream);
15824 break;
15825
15826 case ')':
15827 /* The current condition code in unified syntax, otherwise nothing. */
15828 if (TARGET_UNIFIED_ASM)
15829 arm_print_condition (stream);
15830 break;
15831
15832 case '.':
15833 /* The current condition code for a condition code setting instruction.
15834 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15835 if (TARGET_UNIFIED_ASM)
15836 {
15837 fputc('s', stream);
15838 arm_print_condition (stream);
15839 }
15840 else
15841 {
15842 arm_print_condition (stream);
15843 fputc('s', stream);
15844 }
15845 return;
15846
15847 case '!':
15848 /* If the instruction is conditionally executed then print
15849 the current condition code, otherwise print 's'. */
15850 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15851 if (current_insn_predicate)
15852 arm_print_condition (stream);
15853 else
15854 fputc('s', stream);
15855 break;
15856
15857 /* %# is a "break" sequence. It doesn't output anything, but is used to
15858 separate e.g. operand numbers from following text, if that text consists
15859 of further digits which we don't want to be part of the operand
15860 number. */
15861 case '#':
15862 return;
15863
15864 case 'N':
15865 {
15866 REAL_VALUE_TYPE r;
15867 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15868 r = real_value_negate (&r);
15869 fprintf (stream, "%s", fp_const_from_val (&r));
15870 }
15871 return;
15872
15873 /* An integer or symbol address without a preceding # sign. */
15874 case 'c':
15875 switch (GET_CODE (x))
15876 {
15877 case CONST_INT:
15878 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15879 break;
15880
15881 case SYMBOL_REF:
15882 output_addr_const (stream, x);
15883 break;
15884
15885 default:
15886 gcc_unreachable ();
15887 }
15888 return;
15889
15890 case 'B':
15891 if (GET_CODE (x) == CONST_INT)
15892 {
15893 HOST_WIDE_INT val;
15894 val = ARM_SIGN_EXTEND (~INTVAL (x));
15895 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15896 }
15897 else
15898 {
15899 putc ('~', stream);
15900 output_addr_const (stream, x);
15901 }
15902 return;
15903
15904 case 'L':
15905 /* The low 16 bits of an immediate constant. */
15906 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15907 return;
15908
15909 case 'i':
15910 fprintf (stream, "%s", arithmetic_instr (x, 1));
15911 return;
15912
15913 /* Truncate Cirrus shift counts. */
15914 case 's':
15915 if (GET_CODE (x) == CONST_INT)
15916 {
15917 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15918 return;
15919 }
15920 arm_print_operand (stream, x, 0);
15921 return;
15922
15923 case 'I':
15924 fprintf (stream, "%s", arithmetic_instr (x, 0));
15925 return;
15926
15927 case 'S':
15928 {
15929 HOST_WIDE_INT val;
15930 const char *shift;
15931
15932 if (!shift_operator (x, SImode))
15933 {
15934 output_operand_lossage ("invalid shift operand");
15935 break;
15936 }
15937
15938 shift = shift_op (x, &val);
15939
15940 if (shift)
15941 {
15942 fprintf (stream, ", %s ", shift);
15943 if (val == -1)
15944 arm_print_operand (stream, XEXP (x, 1), 0);
15945 else
15946 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15947 }
15948 }
15949 return;
15950
15951 /* An explanation of the 'Q', 'R' and 'H' register operands:
15952
15953 In a pair of registers containing a DI or DF value the 'Q'
15954 operand returns the register number of the register containing
15955 the least significant part of the value. The 'R' operand returns
15956 the register number of the register containing the most
15957 significant part of the value.
15958
15959 The 'H' operand returns the higher of the two register numbers.
15960 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15961 same as the 'Q' operand, since the most significant part of the
15962 value is held in the lower number register. The reverse is true
15963 on systems where WORDS_BIG_ENDIAN is false.
15964
15965 The purpose of these operands is to distinguish between cases
15966 where the endian-ness of the values is important (for example
15967 when they are added together), and cases where the endian-ness
15968 is irrelevant, but the order of register operations is important.
15969 For example when loading a value from memory into a register
15970 pair, the endian-ness does not matter. Provided that the value
15971 from the lower memory address is put into the lower numbered
15972 register, and the value from the higher address is put into the
15973 higher numbered register, the load will work regardless of whether
15974 the value being loaded is big-wordian or little-wordian. The
15975 order of the two register loads can matter however, if the address
15976 of the memory location is actually held in one of the registers
15977 being overwritten by the load.
15978
15979 The 'Q' and 'R' constraints are also available for 64-bit
15980 constants. */
15981 case 'Q':
15982 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15983 {
15984 rtx part = gen_lowpart (SImode, x);
15985 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15986 return;
15987 }
15988
15989 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15990 {
15991 output_operand_lossage ("invalid operand for code '%c'", code);
15992 return;
15993 }
15994
15995 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15996 return;
15997
15998 case 'R':
15999 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16000 {
16001 enum machine_mode mode = GET_MODE (x);
16002 rtx part;
16003
16004 if (mode == VOIDmode)
16005 mode = DImode;
16006 part = gen_highpart_mode (SImode, mode, x);
16007 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16008 return;
16009 }
16010
16011 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16012 {
16013 output_operand_lossage ("invalid operand for code '%c'", code);
16014 return;
16015 }
16016
16017 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16018 return;
16019
16020 case 'H':
16021 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16022 {
16023 output_operand_lossage ("invalid operand for code '%c'", code);
16024 return;
16025 }
16026
16027 asm_fprintf (stream, "%r", REGNO (x) + 1);
16028 return;
16029
16030 case 'J':
16031 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16032 {
16033 output_operand_lossage ("invalid operand for code '%c'", code);
16034 return;
16035 }
16036
16037 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16038 return;
16039
16040 case 'K':
16041 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16042 {
16043 output_operand_lossage ("invalid operand for code '%c'", code);
16044 return;
16045 }
16046
16047 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16048 return;
16049
16050 case 'm':
16051 asm_fprintf (stream, "%r",
16052 GET_CODE (XEXP (x, 0)) == REG
16053 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16054 return;
16055
16056 case 'M':
16057 asm_fprintf (stream, "{%r-%r}",
16058 REGNO (x),
16059 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16060 return;
16061
16062 /* Like 'M', but writing doubleword vector registers, for use by Neon
16063 insns. */
16064 case 'h':
16065 {
16066 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16067 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16068 if (numregs == 1)
16069 asm_fprintf (stream, "{d%d}", regno);
16070 else
16071 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16072 }
16073 return;
16074
16075 case 'd':
16076 /* CONST_TRUE_RTX means always -- that's the default. */
16077 if (x == const_true_rtx)
16078 return;
16079
16080 if (!COMPARISON_P (x))
16081 {
16082 output_operand_lossage ("invalid operand for code '%c'", code);
16083 return;
16084 }
16085
16086 fputs (arm_condition_codes[get_arm_condition_code (x)],
16087 stream);
16088 return;
16089
16090 case 'D':
16091 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16092 want to do that. */
16093 if (x == const_true_rtx)
16094 {
16095 output_operand_lossage ("instruction never executed");
16096 return;
16097 }
16098 if (!COMPARISON_P (x))
16099 {
16100 output_operand_lossage ("invalid operand for code '%c'", code);
16101 return;
16102 }
16103
16104 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16105 (get_arm_condition_code (x))],
16106 stream);
16107 return;
16108
16109 /* Cirrus registers can be accessed in a variety of ways:
16110 single floating point (f)
16111 double floating point (d)
16112 32bit integer (fx)
16113 64bit integer (dx). */
16114 case 'W': /* Cirrus register in F mode. */
16115 case 'X': /* Cirrus register in D mode. */
16116 case 'Y': /* Cirrus register in FX mode. */
16117 case 'Z': /* Cirrus register in DX mode. */
16118 gcc_assert (GET_CODE (x) == REG
16119 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16120
16121 fprintf (stream, "mv%s%s",
16122 code == 'W' ? "f"
16123 : code == 'X' ? "d"
16124 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16125
16126 return;
16127
16128 /* Print cirrus register in the mode specified by the register's mode. */
16129 case 'V':
16130 {
16131 int mode = GET_MODE (x);
16132
16133 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16134 {
16135 output_operand_lossage ("invalid operand for code '%c'", code);
16136 return;
16137 }
16138
16139 fprintf (stream, "mv%s%s",
16140 mode == DFmode ? "d"
16141 : mode == SImode ? "fx"
16142 : mode == DImode ? "dx"
16143 : "f", reg_names[REGNO (x)] + 2);
16144
16145 return;
16146 }
16147
16148 case 'U':
16149 if (GET_CODE (x) != REG
16150 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16151 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16152 /* Bad value for wCG register number. */
16153 {
16154 output_operand_lossage ("invalid operand for code '%c'", code);
16155 return;
16156 }
16157
16158 else
16159 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16160 return;
16161
16162 /* Print an iWMMXt control register name. */
16163 case 'w':
16164 if (GET_CODE (x) != CONST_INT
16165 || INTVAL (x) < 0
16166 || INTVAL (x) >= 16)
16167 /* Bad value for wC register number. */
16168 {
16169 output_operand_lossage ("invalid operand for code '%c'", code);
16170 return;
16171 }
16172
16173 else
16174 {
16175 static const char * wc_reg_names [16] =
16176 {
16177 "wCID", "wCon", "wCSSF", "wCASF",
16178 "wC4", "wC5", "wC6", "wC7",
16179 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16180 "wC12", "wC13", "wC14", "wC15"
16181 };
16182
16183 fprintf (stream, wc_reg_names [INTVAL (x)]);
16184 }
16185 return;
16186
16187 /* Print the high single-precision register of a VFP double-precision
16188 register. */
16189 case 'p':
16190 {
16191 int mode = GET_MODE (x);
16192 int regno;
16193
16194 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16195 {
16196 output_operand_lossage ("invalid operand for code '%c'", code);
16197 return;
16198 }
16199
16200 regno = REGNO (x);
16201 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16202 {
16203 output_operand_lossage ("invalid operand for code '%c'", code);
16204 return;
16205 }
16206
16207 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16208 }
16209 return;
16210
16211 /* Print a VFP/Neon double precision or quad precision register name. */
16212 case 'P':
16213 case 'q':
16214 {
16215 int mode = GET_MODE (x);
16216 int is_quad = (code == 'q');
16217 int regno;
16218
16219 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16220 {
16221 output_operand_lossage ("invalid operand for code '%c'", code);
16222 return;
16223 }
16224
16225 if (GET_CODE (x) != REG
16226 || !IS_VFP_REGNUM (REGNO (x)))
16227 {
16228 output_operand_lossage ("invalid operand for code '%c'", code);
16229 return;
16230 }
16231
16232 regno = REGNO (x);
16233 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16234 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16235 {
16236 output_operand_lossage ("invalid operand for code '%c'", code);
16237 return;
16238 }
16239
16240 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16241 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16242 }
16243 return;
16244
16245 /* These two codes print the low/high doubleword register of a Neon quad
16246 register, respectively. For pair-structure types, can also print
16247 low/high quadword registers. */
16248 case 'e':
16249 case 'f':
16250 {
16251 int mode = GET_MODE (x);
16252 int regno;
16253
16254 if ((GET_MODE_SIZE (mode) != 16
16255 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16256 {
16257 output_operand_lossage ("invalid operand for code '%c'", code);
16258 return;
16259 }
16260
16261 regno = REGNO (x);
16262 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16263 {
16264 output_operand_lossage ("invalid operand for code '%c'", code);
16265 return;
16266 }
16267
16268 if (GET_MODE_SIZE (mode) == 16)
16269 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16270 + (code == 'f' ? 1 : 0));
16271 else
16272 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16273 + (code == 'f' ? 1 : 0));
16274 }
16275 return;
16276
16277 /* Print a VFPv3 floating-point constant, represented as an integer
16278 index. */
16279 case 'G':
16280 {
16281 int index = vfp3_const_double_index (x);
16282 gcc_assert (index != -1);
16283 fprintf (stream, "%d", index);
16284 }
16285 return;
16286
16287 /* Print bits representing opcode features for Neon.
16288
16289 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16290 and polynomials as unsigned.
16291
16292 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16293
16294 Bit 2 is 1 for rounding functions, 0 otherwise. */
16295
16296 /* Identify the type as 's', 'u', 'p' or 'f'. */
16297 case 'T':
16298 {
16299 HOST_WIDE_INT bits = INTVAL (x);
16300 fputc ("uspf"[bits & 3], stream);
16301 }
16302 return;
16303
16304 /* Likewise, but signed and unsigned integers are both 'i'. */
16305 case 'F':
16306 {
16307 HOST_WIDE_INT bits = INTVAL (x);
16308 fputc ("iipf"[bits & 3], stream);
16309 }
16310 return;
16311
16312 /* As for 'T', but emit 'u' instead of 'p'. */
16313 case 't':
16314 {
16315 HOST_WIDE_INT bits = INTVAL (x);
16316 fputc ("usuf"[bits & 3], stream);
16317 }
16318 return;
16319
16320 /* Bit 2: rounding (vs none). */
16321 case 'O':
16322 {
16323 HOST_WIDE_INT bits = INTVAL (x);
16324 fputs ((bits & 4) != 0 ? "r" : "", stream);
16325 }
16326 return;
16327
16328 /* Memory operand for vld1/vst1 instruction. */
16329 case 'A':
16330 {
16331 rtx addr;
16332 bool postinc = FALSE;
16333 unsigned align, modesize, align_bits;
16334
16335 gcc_assert (GET_CODE (x) == MEM);
16336 addr = XEXP (x, 0);
16337 if (GET_CODE (addr) == POST_INC)
16338 {
16339 postinc = 1;
16340 addr = XEXP (addr, 0);
16341 }
16342 asm_fprintf (stream, "[%r", REGNO (addr));
16343
16344 /* We know the alignment of this access, so we can emit a hint in the
16345 instruction (for some alignments) as an aid to the memory subsystem
16346 of the target. */
16347 align = MEM_ALIGN (x) >> 3;
16348 modesize = GET_MODE_SIZE (GET_MODE (x));
16349
16350 /* Only certain alignment specifiers are supported by the hardware. */
16351 if (modesize == 16 && (align % 32) == 0)
16352 align_bits = 256;
16353 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16354 align_bits = 128;
16355 else if ((align % 8) == 0)
16356 align_bits = 64;
16357 else
16358 align_bits = 0;
16359
16360 if (align_bits != 0)
16361 asm_fprintf (stream, ":%d", align_bits);
16362
16363 asm_fprintf (stream, "]");
16364
16365 if (postinc)
16366 fputs("!", stream);
16367 }
16368 return;
16369
16370 case 'C':
16371 {
16372 rtx addr;
16373
16374 gcc_assert (GET_CODE (x) == MEM);
16375 addr = XEXP (x, 0);
16376 gcc_assert (GET_CODE (addr) == REG);
16377 asm_fprintf (stream, "[%r]", REGNO (addr));
16378 }
16379 return;
16380
16381 /* Translate an S register number into a D register number and element index. */
16382 case 'y':
16383 {
16384 int mode = GET_MODE (x);
16385 int regno;
16386
16387 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16388 {
16389 output_operand_lossage ("invalid operand for code '%c'", code);
16390 return;
16391 }
16392
16393 regno = REGNO (x);
16394 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16395 {
16396 output_operand_lossage ("invalid operand for code '%c'", code);
16397 return;
16398 }
16399
16400 regno = regno - FIRST_VFP_REGNUM;
16401 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16402 }
16403 return;
16404
16405 /* Register specifier for vld1.16/vst1.16. Translate the S register
16406 number into a D register number and element index. */
16407 case 'z':
16408 {
16409 int mode = GET_MODE (x);
16410 int regno;
16411
16412 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16413 {
16414 output_operand_lossage ("invalid operand for code '%c'", code);
16415 return;
16416 }
16417
16418 regno = REGNO (x);
16419 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16420 {
16421 output_operand_lossage ("invalid operand for code '%c'", code);
16422 return;
16423 }
16424
16425 regno = regno - FIRST_VFP_REGNUM;
16426 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16427 }
16428 return;
16429
16430 default:
16431 if (x == 0)
16432 {
16433 output_operand_lossage ("missing operand");
16434 return;
16435 }
16436
16437 switch (GET_CODE (x))
16438 {
16439 case REG:
16440 asm_fprintf (stream, "%r", REGNO (x));
16441 break;
16442
16443 case MEM:
16444 output_memory_reference_mode = GET_MODE (x);
16445 output_address (XEXP (x, 0));
16446 break;
16447
16448 case CONST_DOUBLE:
16449 if (TARGET_NEON)
16450 {
16451 char fpstr[20];
16452 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16453 sizeof (fpstr), 0, 1);
16454 fprintf (stream, "#%s", fpstr);
16455 }
16456 else
16457 fprintf (stream, "#%s", fp_immediate_constant (x));
16458 break;
16459
16460 default:
16461 gcc_assert (GET_CODE (x) != NEG);
16462 fputc ('#', stream);
16463 if (GET_CODE (x) == HIGH)
16464 {
16465 fputs (":lower16:", stream);
16466 x = XEXP (x, 0);
16467 }
16468
16469 output_addr_const (stream, x);
16470 break;
16471 }
16472 }
16473 }
16474 \f
16475 /* Target hook for printing a memory address. */
16476 static void
16477 arm_print_operand_address (FILE *stream, rtx x)
16478 {
16479 if (TARGET_32BIT)
16480 {
16481 int is_minus = GET_CODE (x) == MINUS;
16482
16483 if (GET_CODE (x) == REG)
16484 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16485 else if (GET_CODE (x) == PLUS || is_minus)
16486 {
16487 rtx base = XEXP (x, 0);
16488 rtx index = XEXP (x, 1);
16489 HOST_WIDE_INT offset = 0;
16490 if (GET_CODE (base) != REG
16491 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16492 {
16493 /* Ensure that BASE is a register. */
16494 /* (one of them must be). */
16495 /* Also ensure the SP is not used as in index register. */
16496 rtx temp = base;
16497 base = index;
16498 index = temp;
16499 }
16500 switch (GET_CODE (index))
16501 {
16502 case CONST_INT:
16503 offset = INTVAL (index);
16504 if (is_minus)
16505 offset = -offset;
16506 asm_fprintf (stream, "[%r, #%wd]",
16507 REGNO (base), offset);
16508 break;
16509
16510 case REG:
16511 asm_fprintf (stream, "[%r, %s%r]",
16512 REGNO (base), is_minus ? "-" : "",
16513 REGNO (index));
16514 break;
16515
16516 case MULT:
16517 case ASHIFTRT:
16518 case LSHIFTRT:
16519 case ASHIFT:
16520 case ROTATERT:
16521 {
16522 asm_fprintf (stream, "[%r, %s%r",
16523 REGNO (base), is_minus ? "-" : "",
16524 REGNO (XEXP (index, 0)));
16525 arm_print_operand (stream, index, 'S');
16526 fputs ("]", stream);
16527 break;
16528 }
16529
16530 default:
16531 gcc_unreachable ();
16532 }
16533 }
16534 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16535 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16536 {
16537 extern enum machine_mode output_memory_reference_mode;
16538
16539 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16540
16541 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16542 asm_fprintf (stream, "[%r, #%s%d]!",
16543 REGNO (XEXP (x, 0)),
16544 GET_CODE (x) == PRE_DEC ? "-" : "",
16545 GET_MODE_SIZE (output_memory_reference_mode));
16546 else
16547 asm_fprintf (stream, "[%r], #%s%d",
16548 REGNO (XEXP (x, 0)),
16549 GET_CODE (x) == POST_DEC ? "-" : "",
16550 GET_MODE_SIZE (output_memory_reference_mode));
16551 }
16552 else if (GET_CODE (x) == PRE_MODIFY)
16553 {
16554 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16555 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16556 asm_fprintf (stream, "#%wd]!",
16557 INTVAL (XEXP (XEXP (x, 1), 1)));
16558 else
16559 asm_fprintf (stream, "%r]!",
16560 REGNO (XEXP (XEXP (x, 1), 1)));
16561 }
16562 else if (GET_CODE (x) == POST_MODIFY)
16563 {
16564 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16565 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16566 asm_fprintf (stream, "#%wd",
16567 INTVAL (XEXP (XEXP (x, 1), 1)));
16568 else
16569 asm_fprintf (stream, "%r",
16570 REGNO (XEXP (XEXP (x, 1), 1)));
16571 }
16572 else output_addr_const (stream, x);
16573 }
16574 else
16575 {
16576 if (GET_CODE (x) == REG)
16577 asm_fprintf (stream, "[%r]", REGNO (x));
16578 else if (GET_CODE (x) == POST_INC)
16579 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16580 else if (GET_CODE (x) == PLUS)
16581 {
16582 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16583 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16584 asm_fprintf (stream, "[%r, #%wd]",
16585 REGNO (XEXP (x, 0)),
16586 INTVAL (XEXP (x, 1)));
16587 else
16588 asm_fprintf (stream, "[%r, %r]",
16589 REGNO (XEXP (x, 0)),
16590 REGNO (XEXP (x, 1)));
16591 }
16592 else
16593 output_addr_const (stream, x);
16594 }
16595 }
16596 \f
16597 /* Target hook for indicating whether a punctuation character for
16598 TARGET_PRINT_OPERAND is valid. */
16599 static bool
16600 arm_print_operand_punct_valid_p (unsigned char code)
16601 {
16602 return (code == '@' || code == '|' || code == '.'
16603 || code == '(' || code == ')' || code == '#'
16604 || (TARGET_32BIT && (code == '?'))
16605 || (TARGET_THUMB2 && (code == '!'))
16606 || (TARGET_THUMB && (code == '_')));
16607 }
16608 \f
16609 /* Target hook for assembling integer objects. The ARM version needs to
16610 handle word-sized values specially. */
16611 static bool
16612 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16613 {
16614 enum machine_mode mode;
16615
16616 if (size == UNITS_PER_WORD && aligned_p)
16617 {
16618 fputs ("\t.word\t", asm_out_file);
16619 output_addr_const (asm_out_file, x);
16620
16621 /* Mark symbols as position independent. We only do this in the
16622 .text segment, not in the .data segment. */
16623 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16624 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16625 {
16626 /* See legitimize_pic_address for an explanation of the
16627 TARGET_VXWORKS_RTP check. */
16628 if (TARGET_VXWORKS_RTP
16629 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16630 fputs ("(GOT)", asm_out_file);
16631 else
16632 fputs ("(GOTOFF)", asm_out_file);
16633 }
16634 fputc ('\n', asm_out_file);
16635 return true;
16636 }
16637
16638 mode = GET_MODE (x);
16639
16640 if (arm_vector_mode_supported_p (mode))
16641 {
16642 int i, units;
16643
16644 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16645
16646 units = CONST_VECTOR_NUNITS (x);
16647 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16648
16649 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16650 for (i = 0; i < units; i++)
16651 {
16652 rtx elt = CONST_VECTOR_ELT (x, i);
16653 assemble_integer
16654 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16655 }
16656 else
16657 for (i = 0; i < units; i++)
16658 {
16659 rtx elt = CONST_VECTOR_ELT (x, i);
16660 REAL_VALUE_TYPE rval;
16661
16662 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16663
16664 assemble_real
16665 (rval, GET_MODE_INNER (mode),
16666 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16667 }
16668
16669 return true;
16670 }
16671
16672 return default_assemble_integer (x, size, aligned_p);
16673 }
16674
16675 static void
16676 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16677 {
16678 section *s;
16679
16680 if (!TARGET_AAPCS_BASED)
16681 {
16682 (is_ctor ?
16683 default_named_section_asm_out_constructor
16684 : default_named_section_asm_out_destructor) (symbol, priority);
16685 return;
16686 }
16687
16688 /* Put these in the .init_array section, using a special relocation. */
16689 if (priority != DEFAULT_INIT_PRIORITY)
16690 {
16691 char buf[18];
16692 sprintf (buf, "%s.%.5u",
16693 is_ctor ? ".init_array" : ".fini_array",
16694 priority);
16695 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16696 }
16697 else if (is_ctor)
16698 s = ctors_section;
16699 else
16700 s = dtors_section;
16701
16702 switch_to_section (s);
16703 assemble_align (POINTER_SIZE);
16704 fputs ("\t.word\t", asm_out_file);
16705 output_addr_const (asm_out_file, symbol);
16706 fputs ("(target1)\n", asm_out_file);
16707 }
16708
16709 /* Add a function to the list of static constructors. */
16710
16711 static void
16712 arm_elf_asm_constructor (rtx symbol, int priority)
16713 {
16714 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16715 }
16716
16717 /* Add a function to the list of static destructors. */
16718
16719 static void
16720 arm_elf_asm_destructor (rtx symbol, int priority)
16721 {
16722 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16723 }
16724 \f
16725 /* A finite state machine takes care of noticing whether or not instructions
16726 can be conditionally executed, and thus decrease execution time and code
16727 size by deleting branch instructions. The fsm is controlled by
16728 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16729
16730 /* The state of the fsm controlling condition codes are:
16731 0: normal, do nothing special
16732 1: make ASM_OUTPUT_OPCODE not output this instruction
16733 2: make ASM_OUTPUT_OPCODE not output this instruction
16734 3: make instructions conditional
16735 4: make instructions conditional
16736
16737 State transitions (state->state by whom under condition):
16738 0 -> 1 final_prescan_insn if the `target' is a label
16739 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16740 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16741 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16742 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16743 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16744 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16745 (the target insn is arm_target_insn).
16746
16747 If the jump clobbers the conditions then we use states 2 and 4.
16748
16749 A similar thing can be done with conditional return insns.
16750
16751 XXX In case the `target' is an unconditional branch, this conditionalising
16752 of the instructions always reduces code size, but not always execution
16753 time. But then, I want to reduce the code size to somewhere near what
16754 /bin/cc produces. */
16755
16756 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16757 instructions. When a COND_EXEC instruction is seen the subsequent
16758 instructions are scanned so that multiple conditional instructions can be
16759 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16760 specify the length and true/false mask for the IT block. These will be
16761 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16762
16763 /* Returns the index of the ARM condition code string in
16764 `arm_condition_codes'. COMPARISON should be an rtx like
16765 `(eq (...) (...))'. */
16766 static enum arm_cond_code
16767 get_arm_condition_code (rtx comparison)
16768 {
16769 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16770 enum arm_cond_code code;
16771 enum rtx_code comp_code = GET_CODE (comparison);
16772
16773 if (GET_MODE_CLASS (mode) != MODE_CC)
16774 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16775 XEXP (comparison, 1));
16776
16777 switch (mode)
16778 {
16779 case CC_DNEmode: code = ARM_NE; goto dominance;
16780 case CC_DEQmode: code = ARM_EQ; goto dominance;
16781 case CC_DGEmode: code = ARM_GE; goto dominance;
16782 case CC_DGTmode: code = ARM_GT; goto dominance;
16783 case CC_DLEmode: code = ARM_LE; goto dominance;
16784 case CC_DLTmode: code = ARM_LT; goto dominance;
16785 case CC_DGEUmode: code = ARM_CS; goto dominance;
16786 case CC_DGTUmode: code = ARM_HI; goto dominance;
16787 case CC_DLEUmode: code = ARM_LS; goto dominance;
16788 case CC_DLTUmode: code = ARM_CC;
16789
16790 dominance:
16791 gcc_assert (comp_code == EQ || comp_code == NE);
16792
16793 if (comp_code == EQ)
16794 return ARM_INVERSE_CONDITION_CODE (code);
16795 return code;
16796
16797 case CC_NOOVmode:
16798 switch (comp_code)
16799 {
16800 case NE: return ARM_NE;
16801 case EQ: return ARM_EQ;
16802 case GE: return ARM_PL;
16803 case LT: return ARM_MI;
16804 default: gcc_unreachable ();
16805 }
16806
16807 case CC_Zmode:
16808 switch (comp_code)
16809 {
16810 case NE: return ARM_NE;
16811 case EQ: return ARM_EQ;
16812 default: gcc_unreachable ();
16813 }
16814
16815 case CC_Nmode:
16816 switch (comp_code)
16817 {
16818 case NE: return ARM_MI;
16819 case EQ: return ARM_PL;
16820 default: gcc_unreachable ();
16821 }
16822
16823 case CCFPEmode:
16824 case CCFPmode:
16825 /* These encodings assume that AC=1 in the FPA system control
16826 byte. This allows us to handle all cases except UNEQ and
16827 LTGT. */
16828 switch (comp_code)
16829 {
16830 case GE: return ARM_GE;
16831 case GT: return ARM_GT;
16832 case LE: return ARM_LS;
16833 case LT: return ARM_MI;
16834 case NE: return ARM_NE;
16835 case EQ: return ARM_EQ;
16836 case ORDERED: return ARM_VC;
16837 case UNORDERED: return ARM_VS;
16838 case UNLT: return ARM_LT;
16839 case UNLE: return ARM_LE;
16840 case UNGT: return ARM_HI;
16841 case UNGE: return ARM_PL;
16842 /* UNEQ and LTGT do not have a representation. */
16843 case UNEQ: /* Fall through. */
16844 case LTGT: /* Fall through. */
16845 default: gcc_unreachable ();
16846 }
16847
16848 case CC_SWPmode:
16849 switch (comp_code)
16850 {
16851 case NE: return ARM_NE;
16852 case EQ: return ARM_EQ;
16853 case GE: return ARM_LE;
16854 case GT: return ARM_LT;
16855 case LE: return ARM_GE;
16856 case LT: return ARM_GT;
16857 case GEU: return ARM_LS;
16858 case GTU: return ARM_CC;
16859 case LEU: return ARM_CS;
16860 case LTU: return ARM_HI;
16861 default: gcc_unreachable ();
16862 }
16863
16864 case CC_Cmode:
16865 switch (comp_code)
16866 {
16867 case LTU: return ARM_CS;
16868 case GEU: return ARM_CC;
16869 default: gcc_unreachable ();
16870 }
16871
16872 case CC_CZmode:
16873 switch (comp_code)
16874 {
16875 case NE: return ARM_NE;
16876 case EQ: return ARM_EQ;
16877 case GEU: return ARM_CS;
16878 case GTU: return ARM_HI;
16879 case LEU: return ARM_LS;
16880 case LTU: return ARM_CC;
16881 default: gcc_unreachable ();
16882 }
16883
16884 case CC_NCVmode:
16885 switch (comp_code)
16886 {
16887 case GE: return ARM_GE;
16888 case LT: return ARM_LT;
16889 case GEU: return ARM_CS;
16890 case LTU: return ARM_CC;
16891 default: gcc_unreachable ();
16892 }
16893
16894 case CCmode:
16895 switch (comp_code)
16896 {
16897 case NE: return ARM_NE;
16898 case EQ: return ARM_EQ;
16899 case GE: return ARM_GE;
16900 case GT: return ARM_GT;
16901 case LE: return ARM_LE;
16902 case LT: return ARM_LT;
16903 case GEU: return ARM_CS;
16904 case GTU: return ARM_HI;
16905 case LEU: return ARM_LS;
16906 case LTU: return ARM_CC;
16907 default: gcc_unreachable ();
16908 }
16909
16910 default: gcc_unreachable ();
16911 }
16912 }
16913
16914 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16915 instructions. */
16916 void
16917 thumb2_final_prescan_insn (rtx insn)
16918 {
16919 rtx first_insn = insn;
16920 rtx body = PATTERN (insn);
16921 rtx predicate;
16922 enum arm_cond_code code;
16923 int n;
16924 int mask;
16925
16926 /* Remove the previous insn from the count of insns to be output. */
16927 if (arm_condexec_count)
16928 arm_condexec_count--;
16929
16930 /* Nothing to do if we are already inside a conditional block. */
16931 if (arm_condexec_count)
16932 return;
16933
16934 if (GET_CODE (body) != COND_EXEC)
16935 return;
16936
16937 /* Conditional jumps are implemented directly. */
16938 if (GET_CODE (insn) == JUMP_INSN)
16939 return;
16940
16941 predicate = COND_EXEC_TEST (body);
16942 arm_current_cc = get_arm_condition_code (predicate);
16943
16944 n = get_attr_ce_count (insn);
16945 arm_condexec_count = 1;
16946 arm_condexec_mask = (1 << n) - 1;
16947 arm_condexec_masklen = n;
16948 /* See if subsequent instructions can be combined into the same block. */
16949 for (;;)
16950 {
16951 insn = next_nonnote_insn (insn);
16952
16953 /* Jumping into the middle of an IT block is illegal, so a label or
16954 barrier terminates the block. */
16955 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16956 break;
16957
16958 body = PATTERN (insn);
16959 /* USE and CLOBBER aren't really insns, so just skip them. */
16960 if (GET_CODE (body) == USE
16961 || GET_CODE (body) == CLOBBER)
16962 continue;
16963
16964 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16965 if (GET_CODE (body) != COND_EXEC)
16966 break;
16967 /* Allow up to 4 conditionally executed instructions in a block. */
16968 n = get_attr_ce_count (insn);
16969 if (arm_condexec_masklen + n > 4)
16970 break;
16971
16972 predicate = COND_EXEC_TEST (body);
16973 code = get_arm_condition_code (predicate);
16974 mask = (1 << n) - 1;
16975 if (arm_current_cc == code)
16976 arm_condexec_mask |= (mask << arm_condexec_masklen);
16977 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16978 break;
16979
16980 arm_condexec_count++;
16981 arm_condexec_masklen += n;
16982
16983 /* A jump must be the last instruction in a conditional block. */
16984 if (GET_CODE(insn) == JUMP_INSN)
16985 break;
16986 }
16987 /* Restore recog_data (getting the attributes of other insns can
16988 destroy this array, but final.c assumes that it remains intact
16989 across this call). */
16990 extract_constrain_insn_cached (first_insn);
16991 }
16992
16993 void
16994 arm_final_prescan_insn (rtx insn)
16995 {
16996 /* BODY will hold the body of INSN. */
16997 rtx body = PATTERN (insn);
16998
16999 /* This will be 1 if trying to repeat the trick, and things need to be
17000 reversed if it appears to fail. */
17001 int reverse = 0;
17002
17003 /* If we start with a return insn, we only succeed if we find another one. */
17004 int seeking_return = 0;
17005
17006 /* START_INSN will hold the insn from where we start looking. This is the
17007 first insn after the following code_label if REVERSE is true. */
17008 rtx start_insn = insn;
17009
17010 /* If in state 4, check if the target branch is reached, in order to
17011 change back to state 0. */
17012 if (arm_ccfsm_state == 4)
17013 {
17014 if (insn == arm_target_insn)
17015 {
17016 arm_target_insn = NULL;
17017 arm_ccfsm_state = 0;
17018 }
17019 return;
17020 }
17021
17022 /* If in state 3, it is possible to repeat the trick, if this insn is an
17023 unconditional branch to a label, and immediately following this branch
17024 is the previous target label which is only used once, and the label this
17025 branch jumps to is not too far off. */
17026 if (arm_ccfsm_state == 3)
17027 {
17028 if (simplejump_p (insn))
17029 {
17030 start_insn = next_nonnote_insn (start_insn);
17031 if (GET_CODE (start_insn) == BARRIER)
17032 {
17033 /* XXX Isn't this always a barrier? */
17034 start_insn = next_nonnote_insn (start_insn);
17035 }
17036 if (GET_CODE (start_insn) == CODE_LABEL
17037 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17038 && LABEL_NUSES (start_insn) == 1)
17039 reverse = TRUE;
17040 else
17041 return;
17042 }
17043 else if (GET_CODE (body) == RETURN)
17044 {
17045 start_insn = next_nonnote_insn (start_insn);
17046 if (GET_CODE (start_insn) == BARRIER)
17047 start_insn = next_nonnote_insn (start_insn);
17048 if (GET_CODE (start_insn) == CODE_LABEL
17049 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17050 && LABEL_NUSES (start_insn) == 1)
17051 {
17052 reverse = TRUE;
17053 seeking_return = 1;
17054 }
17055 else
17056 return;
17057 }
17058 else
17059 return;
17060 }
17061
17062 gcc_assert (!arm_ccfsm_state || reverse);
17063 if (GET_CODE (insn) != JUMP_INSN)
17064 return;
17065
17066 /* This jump might be paralleled with a clobber of the condition codes
17067 the jump should always come first */
17068 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17069 body = XVECEXP (body, 0, 0);
17070
17071 if (reverse
17072 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17073 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17074 {
17075 int insns_skipped;
17076 int fail = FALSE, succeed = FALSE;
17077 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17078 int then_not_else = TRUE;
17079 rtx this_insn = start_insn, label = 0;
17080
17081 /* Register the insn jumped to. */
17082 if (reverse)
17083 {
17084 if (!seeking_return)
17085 label = XEXP (SET_SRC (body), 0);
17086 }
17087 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17088 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17089 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17090 {
17091 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17092 then_not_else = FALSE;
17093 }
17094 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17095 seeking_return = 1;
17096 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17097 {
17098 seeking_return = 1;
17099 then_not_else = FALSE;
17100 }
17101 else
17102 gcc_unreachable ();
17103
17104 /* See how many insns this branch skips, and what kind of insns. If all
17105 insns are okay, and the label or unconditional branch to the same
17106 label is not too far away, succeed. */
17107 for (insns_skipped = 0;
17108 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17109 {
17110 rtx scanbody;
17111
17112 this_insn = next_nonnote_insn (this_insn);
17113 if (!this_insn)
17114 break;
17115
17116 switch (GET_CODE (this_insn))
17117 {
17118 case CODE_LABEL:
17119 /* Succeed if it is the target label, otherwise fail since
17120 control falls in from somewhere else. */
17121 if (this_insn == label)
17122 {
17123 arm_ccfsm_state = 1;
17124 succeed = TRUE;
17125 }
17126 else
17127 fail = TRUE;
17128 break;
17129
17130 case BARRIER:
17131 /* Succeed if the following insn is the target label.
17132 Otherwise fail.
17133 If return insns are used then the last insn in a function
17134 will be a barrier. */
17135 this_insn = next_nonnote_insn (this_insn);
17136 if (this_insn && this_insn == label)
17137 {
17138 arm_ccfsm_state = 1;
17139 succeed = TRUE;
17140 }
17141 else
17142 fail = TRUE;
17143 break;
17144
17145 case CALL_INSN:
17146 /* The AAPCS says that conditional calls should not be
17147 used since they make interworking inefficient (the
17148 linker can't transform BL<cond> into BLX). That's
17149 only a problem if the machine has BLX. */
17150 if (arm_arch5)
17151 {
17152 fail = TRUE;
17153 break;
17154 }
17155
17156 /* Succeed if the following insn is the target label, or
17157 if the following two insns are a barrier and the
17158 target label. */
17159 this_insn = next_nonnote_insn (this_insn);
17160 if (this_insn && GET_CODE (this_insn) == BARRIER)
17161 this_insn = next_nonnote_insn (this_insn);
17162
17163 if (this_insn && this_insn == label
17164 && insns_skipped < max_insns_skipped)
17165 {
17166 arm_ccfsm_state = 1;
17167 succeed = TRUE;
17168 }
17169 else
17170 fail = TRUE;
17171 break;
17172
17173 case JUMP_INSN:
17174 /* If this is an unconditional branch to the same label, succeed.
17175 If it is to another label, do nothing. If it is conditional,
17176 fail. */
17177 /* XXX Probably, the tests for SET and the PC are
17178 unnecessary. */
17179
17180 scanbody = PATTERN (this_insn);
17181 if (GET_CODE (scanbody) == SET
17182 && GET_CODE (SET_DEST (scanbody)) == PC)
17183 {
17184 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17185 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17186 {
17187 arm_ccfsm_state = 2;
17188 succeed = TRUE;
17189 }
17190 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17191 fail = TRUE;
17192 }
17193 /* Fail if a conditional return is undesirable (e.g. on a
17194 StrongARM), but still allow this if optimizing for size. */
17195 else if (GET_CODE (scanbody) == RETURN
17196 && !use_return_insn (TRUE, NULL)
17197 && !optimize_size)
17198 fail = TRUE;
17199 else if (GET_CODE (scanbody) == RETURN
17200 && seeking_return)
17201 {
17202 arm_ccfsm_state = 2;
17203 succeed = TRUE;
17204 }
17205 else if (GET_CODE (scanbody) == PARALLEL)
17206 {
17207 switch (get_attr_conds (this_insn))
17208 {
17209 case CONDS_NOCOND:
17210 break;
17211 default:
17212 fail = TRUE;
17213 break;
17214 }
17215 }
17216 else
17217 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17218
17219 break;
17220
17221 case INSN:
17222 /* Instructions using or affecting the condition codes make it
17223 fail. */
17224 scanbody = PATTERN (this_insn);
17225 if (!(GET_CODE (scanbody) == SET
17226 || GET_CODE (scanbody) == PARALLEL)
17227 || get_attr_conds (this_insn) != CONDS_NOCOND)
17228 fail = TRUE;
17229
17230 /* A conditional cirrus instruction must be followed by
17231 a non Cirrus instruction. However, since we
17232 conditionalize instructions in this function and by
17233 the time we get here we can't add instructions
17234 (nops), because shorten_branches() has already been
17235 called, we will disable conditionalizing Cirrus
17236 instructions to be safe. */
17237 if (GET_CODE (scanbody) != USE
17238 && GET_CODE (scanbody) != CLOBBER
17239 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17240 fail = TRUE;
17241 break;
17242
17243 default:
17244 break;
17245 }
17246 }
17247 if (succeed)
17248 {
17249 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17250 arm_target_label = CODE_LABEL_NUMBER (label);
17251 else
17252 {
17253 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17254
17255 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17256 {
17257 this_insn = next_nonnote_insn (this_insn);
17258 gcc_assert (!this_insn
17259 || (GET_CODE (this_insn) != BARRIER
17260 && GET_CODE (this_insn) != CODE_LABEL));
17261 }
17262 if (!this_insn)
17263 {
17264 /* Oh, dear! we ran off the end.. give up. */
17265 extract_constrain_insn_cached (insn);
17266 arm_ccfsm_state = 0;
17267 arm_target_insn = NULL;
17268 return;
17269 }
17270 arm_target_insn = this_insn;
17271 }
17272
17273 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17274 what it was. */
17275 if (!reverse)
17276 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17277
17278 if (reverse || then_not_else)
17279 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17280 }
17281
17282 /* Restore recog_data (getting the attributes of other insns can
17283 destroy this array, but final.c assumes that it remains intact
17284 across this call. */
17285 extract_constrain_insn_cached (insn);
17286 }
17287 }
17288
17289 /* Output IT instructions. */
17290 void
17291 thumb2_asm_output_opcode (FILE * stream)
17292 {
17293 char buff[5];
17294 int n;
17295
17296 if (arm_condexec_mask)
17297 {
17298 for (n = 0; n < arm_condexec_masklen; n++)
17299 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17300 buff[n] = 0;
17301 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17302 arm_condition_codes[arm_current_cc]);
17303 arm_condexec_mask = 0;
17304 }
17305 }
17306
17307 /* Returns true if REGNO is a valid register
17308 for holding a quantity of type MODE. */
17309 int
17310 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17311 {
17312 if (GET_MODE_CLASS (mode) == MODE_CC)
17313 return (regno == CC_REGNUM
17314 || (TARGET_HARD_FLOAT && TARGET_VFP
17315 && regno == VFPCC_REGNUM));
17316
17317 if (TARGET_THUMB1)
17318 /* For the Thumb we only allow values bigger than SImode in
17319 registers 0 - 6, so that there is always a second low
17320 register available to hold the upper part of the value.
17321 We probably we ought to ensure that the register is the
17322 start of an even numbered register pair. */
17323 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17324
17325 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17326 && IS_CIRRUS_REGNUM (regno))
17327 /* We have outlawed SI values in Cirrus registers because they
17328 reside in the lower 32 bits, but SF values reside in the
17329 upper 32 bits. This causes gcc all sorts of grief. We can't
17330 even split the registers into pairs because Cirrus SI values
17331 get sign extended to 64bits-- aldyh. */
17332 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17333
17334 if (TARGET_HARD_FLOAT && TARGET_VFP
17335 && IS_VFP_REGNUM (regno))
17336 {
17337 if (mode == SFmode || mode == SImode)
17338 return VFP_REGNO_OK_FOR_SINGLE (regno);
17339
17340 if (mode == DFmode)
17341 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17342
17343 /* VFP registers can hold HFmode values, but there is no point in
17344 putting them there unless we have hardware conversion insns. */
17345 if (mode == HFmode)
17346 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17347
17348 if (TARGET_NEON)
17349 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17350 || (VALID_NEON_QREG_MODE (mode)
17351 && NEON_REGNO_OK_FOR_QUAD (regno))
17352 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17353 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17354 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17355 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17356 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17357
17358 return FALSE;
17359 }
17360
17361 if (TARGET_REALLY_IWMMXT)
17362 {
17363 if (IS_IWMMXT_GR_REGNUM (regno))
17364 return mode == SImode;
17365
17366 if (IS_IWMMXT_REGNUM (regno))
17367 return VALID_IWMMXT_REG_MODE (mode);
17368 }
17369
17370 /* We allow almost any value to be stored in the general registers.
17371 Restrict doubleword quantities to even register pairs so that we can
17372 use ldrd. Do not allow very large Neon structure opaque modes in
17373 general registers; they would use too many. */
17374 if (regno <= LAST_ARM_REGNUM)
17375 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17376 && ARM_NUM_REGS (mode) <= 4;
17377
17378 if (regno == FRAME_POINTER_REGNUM
17379 || regno == ARG_POINTER_REGNUM)
17380 /* We only allow integers in the fake hard registers. */
17381 return GET_MODE_CLASS (mode) == MODE_INT;
17382
17383 /* The only registers left are the FPA registers
17384 which we only allow to hold FP values. */
17385 return (TARGET_HARD_FLOAT && TARGET_FPA
17386 && GET_MODE_CLASS (mode) == MODE_FLOAT
17387 && regno >= FIRST_FPA_REGNUM
17388 && regno <= LAST_FPA_REGNUM);
17389 }
17390
17391 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17392 not used in arm mode. */
17393
17394 enum reg_class
17395 arm_regno_class (int regno)
17396 {
17397 if (TARGET_THUMB1)
17398 {
17399 if (regno == STACK_POINTER_REGNUM)
17400 return STACK_REG;
17401 if (regno == CC_REGNUM)
17402 return CC_REG;
17403 if (regno < 8)
17404 return LO_REGS;
17405 return HI_REGS;
17406 }
17407
17408 if (TARGET_THUMB2 && regno < 8)
17409 return LO_REGS;
17410
17411 if ( regno <= LAST_ARM_REGNUM
17412 || regno == FRAME_POINTER_REGNUM
17413 || regno == ARG_POINTER_REGNUM)
17414 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17415
17416 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17417 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17418
17419 if (IS_CIRRUS_REGNUM (regno))
17420 return CIRRUS_REGS;
17421
17422 if (IS_VFP_REGNUM (regno))
17423 {
17424 if (regno <= D7_VFP_REGNUM)
17425 return VFP_D0_D7_REGS;
17426 else if (regno <= LAST_LO_VFP_REGNUM)
17427 return VFP_LO_REGS;
17428 else
17429 return VFP_HI_REGS;
17430 }
17431
17432 if (IS_IWMMXT_REGNUM (regno))
17433 return IWMMXT_REGS;
17434
17435 if (IS_IWMMXT_GR_REGNUM (regno))
17436 return IWMMXT_GR_REGS;
17437
17438 return FPA_REGS;
17439 }
17440
17441 /* Handle a special case when computing the offset
17442 of an argument from the frame pointer. */
17443 int
17444 arm_debugger_arg_offset (int value, rtx addr)
17445 {
17446 rtx insn;
17447
17448 /* We are only interested if dbxout_parms() failed to compute the offset. */
17449 if (value != 0)
17450 return 0;
17451
17452 /* We can only cope with the case where the address is held in a register. */
17453 if (GET_CODE (addr) != REG)
17454 return 0;
17455
17456 /* If we are using the frame pointer to point at the argument, then
17457 an offset of 0 is correct. */
17458 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17459 return 0;
17460
17461 /* If we are using the stack pointer to point at the
17462 argument, then an offset of 0 is correct. */
17463 /* ??? Check this is consistent with thumb2 frame layout. */
17464 if ((TARGET_THUMB || !frame_pointer_needed)
17465 && REGNO (addr) == SP_REGNUM)
17466 return 0;
17467
17468 /* Oh dear. The argument is pointed to by a register rather
17469 than being held in a register, or being stored at a known
17470 offset from the frame pointer. Since GDB only understands
17471 those two kinds of argument we must translate the address
17472 held in the register into an offset from the frame pointer.
17473 We do this by searching through the insns for the function
17474 looking to see where this register gets its value. If the
17475 register is initialized from the frame pointer plus an offset
17476 then we are in luck and we can continue, otherwise we give up.
17477
17478 This code is exercised by producing debugging information
17479 for a function with arguments like this:
17480
17481 double func (double a, double b, int c, double d) {return d;}
17482
17483 Without this code the stab for parameter 'd' will be set to
17484 an offset of 0 from the frame pointer, rather than 8. */
17485
17486 /* The if() statement says:
17487
17488 If the insn is a normal instruction
17489 and if the insn is setting the value in a register
17490 and if the register being set is the register holding the address of the argument
17491 and if the address is computing by an addition
17492 that involves adding to a register
17493 which is the frame pointer
17494 a constant integer
17495
17496 then... */
17497
17498 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17499 {
17500 if ( GET_CODE (insn) == INSN
17501 && GET_CODE (PATTERN (insn)) == SET
17502 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17503 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17504 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17505 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17506 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17507 )
17508 {
17509 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17510
17511 break;
17512 }
17513 }
17514
17515 if (value == 0)
17516 {
17517 debug_rtx (addr);
17518 warning (0, "unable to compute real location of stacked parameter");
17519 value = 8; /* XXX magic hack */
17520 }
17521
17522 return value;
17523 }
17524 \f
17525 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17526 do \
17527 { \
17528 if ((MASK) & insn_flags) \
17529 add_builtin_function ((NAME), (TYPE), (CODE), \
17530 BUILT_IN_MD, NULL, NULL_TREE); \
17531 } \
17532 while (0)
17533
17534 struct builtin_description
17535 {
17536 const unsigned int mask;
17537 const enum insn_code icode;
17538 const char * const name;
17539 const enum arm_builtins code;
17540 const enum rtx_code comparison;
17541 const unsigned int flag;
17542 };
17543
17544 static const struct builtin_description bdesc_2arg[] =
17545 {
17546 #define IWMMXT_BUILTIN(code, string, builtin) \
17547 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17548 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17549
17550 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17551 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17552 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17553 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17554 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17555 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17556 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17557 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17558 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17559 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17560 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17561 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17562 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17563 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17564 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17565 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17566 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17567 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17568 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17569 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17570 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17571 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17572 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17573 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17574 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17575 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17576 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17577 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17578 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17579 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17580 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17581 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17582 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17583 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17584 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17585 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17586 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17587 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17588 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17589 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17590 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17591 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17592 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17593 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17594 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17595 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17596 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17597 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17598 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17599 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17600 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17601 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17602 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17603 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17604 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17605 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17606 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17607 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17608
17609 #define IWMMXT_BUILTIN2(code, builtin) \
17610 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17611
17612 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17613 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17614 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17615 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17616 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17617 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17618 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17619 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17620 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17621 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17622 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17623 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17624 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17625 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17626 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17627 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17628 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17629 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17630 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17631 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17632 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17633 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17634 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17635 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17636 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17637 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17638 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17639 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17640 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17641 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17642 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17643 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17644 };
17645
17646 static const struct builtin_description bdesc_1arg[] =
17647 {
17648 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17649 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17650 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17651 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17652 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17653 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17654 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17655 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17656 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17657 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17658 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17659 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17660 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17661 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17662 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17663 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17664 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17665 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17666 };
17667
17668 /* Set up all the iWMMXt builtins. This is
17669 not called if TARGET_IWMMXT is zero. */
17670
17671 static void
17672 arm_init_iwmmxt_builtins (void)
17673 {
17674 const struct builtin_description * d;
17675 size_t i;
17676 tree endlink = void_list_node;
17677
17678 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17679 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17680 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17681
17682 tree int_ftype_int
17683 = build_function_type (integer_type_node,
17684 tree_cons (NULL_TREE, integer_type_node, endlink));
17685 tree v8qi_ftype_v8qi_v8qi_int
17686 = build_function_type (V8QI_type_node,
17687 tree_cons (NULL_TREE, V8QI_type_node,
17688 tree_cons (NULL_TREE, V8QI_type_node,
17689 tree_cons (NULL_TREE,
17690 integer_type_node,
17691 endlink))));
17692 tree v4hi_ftype_v4hi_int
17693 = build_function_type (V4HI_type_node,
17694 tree_cons (NULL_TREE, V4HI_type_node,
17695 tree_cons (NULL_TREE, integer_type_node,
17696 endlink)));
17697 tree v2si_ftype_v2si_int
17698 = build_function_type (V2SI_type_node,
17699 tree_cons (NULL_TREE, V2SI_type_node,
17700 tree_cons (NULL_TREE, integer_type_node,
17701 endlink)));
17702 tree v2si_ftype_di_di
17703 = build_function_type (V2SI_type_node,
17704 tree_cons (NULL_TREE, long_long_integer_type_node,
17705 tree_cons (NULL_TREE, long_long_integer_type_node,
17706 endlink)));
17707 tree di_ftype_di_int
17708 = build_function_type (long_long_integer_type_node,
17709 tree_cons (NULL_TREE, long_long_integer_type_node,
17710 tree_cons (NULL_TREE, integer_type_node,
17711 endlink)));
17712 tree di_ftype_di_int_int
17713 = build_function_type (long_long_integer_type_node,
17714 tree_cons (NULL_TREE, long_long_integer_type_node,
17715 tree_cons (NULL_TREE, integer_type_node,
17716 tree_cons (NULL_TREE,
17717 integer_type_node,
17718 endlink))));
17719 tree int_ftype_v8qi
17720 = build_function_type (integer_type_node,
17721 tree_cons (NULL_TREE, V8QI_type_node,
17722 endlink));
17723 tree int_ftype_v4hi
17724 = build_function_type (integer_type_node,
17725 tree_cons (NULL_TREE, V4HI_type_node,
17726 endlink));
17727 tree int_ftype_v2si
17728 = build_function_type (integer_type_node,
17729 tree_cons (NULL_TREE, V2SI_type_node,
17730 endlink));
17731 tree int_ftype_v8qi_int
17732 = build_function_type (integer_type_node,
17733 tree_cons (NULL_TREE, V8QI_type_node,
17734 tree_cons (NULL_TREE, integer_type_node,
17735 endlink)));
17736 tree int_ftype_v4hi_int
17737 = build_function_type (integer_type_node,
17738 tree_cons (NULL_TREE, V4HI_type_node,
17739 tree_cons (NULL_TREE, integer_type_node,
17740 endlink)));
17741 tree int_ftype_v2si_int
17742 = build_function_type (integer_type_node,
17743 tree_cons (NULL_TREE, V2SI_type_node,
17744 tree_cons (NULL_TREE, integer_type_node,
17745 endlink)));
17746 tree v8qi_ftype_v8qi_int_int
17747 = build_function_type (V8QI_type_node,
17748 tree_cons (NULL_TREE, V8QI_type_node,
17749 tree_cons (NULL_TREE, integer_type_node,
17750 tree_cons (NULL_TREE,
17751 integer_type_node,
17752 endlink))));
17753 tree v4hi_ftype_v4hi_int_int
17754 = build_function_type (V4HI_type_node,
17755 tree_cons (NULL_TREE, V4HI_type_node,
17756 tree_cons (NULL_TREE, integer_type_node,
17757 tree_cons (NULL_TREE,
17758 integer_type_node,
17759 endlink))));
17760 tree v2si_ftype_v2si_int_int
17761 = build_function_type (V2SI_type_node,
17762 tree_cons (NULL_TREE, V2SI_type_node,
17763 tree_cons (NULL_TREE, integer_type_node,
17764 tree_cons (NULL_TREE,
17765 integer_type_node,
17766 endlink))));
17767 /* Miscellaneous. */
17768 tree v8qi_ftype_v4hi_v4hi
17769 = build_function_type (V8QI_type_node,
17770 tree_cons (NULL_TREE, V4HI_type_node,
17771 tree_cons (NULL_TREE, V4HI_type_node,
17772 endlink)));
17773 tree v4hi_ftype_v2si_v2si
17774 = build_function_type (V4HI_type_node,
17775 tree_cons (NULL_TREE, V2SI_type_node,
17776 tree_cons (NULL_TREE, V2SI_type_node,
17777 endlink)));
17778 tree v2si_ftype_v4hi_v4hi
17779 = build_function_type (V2SI_type_node,
17780 tree_cons (NULL_TREE, V4HI_type_node,
17781 tree_cons (NULL_TREE, V4HI_type_node,
17782 endlink)));
17783 tree v2si_ftype_v8qi_v8qi
17784 = build_function_type (V2SI_type_node,
17785 tree_cons (NULL_TREE, V8QI_type_node,
17786 tree_cons (NULL_TREE, V8QI_type_node,
17787 endlink)));
17788 tree v4hi_ftype_v4hi_di
17789 = build_function_type (V4HI_type_node,
17790 tree_cons (NULL_TREE, V4HI_type_node,
17791 tree_cons (NULL_TREE,
17792 long_long_integer_type_node,
17793 endlink)));
17794 tree v2si_ftype_v2si_di
17795 = build_function_type (V2SI_type_node,
17796 tree_cons (NULL_TREE, V2SI_type_node,
17797 tree_cons (NULL_TREE,
17798 long_long_integer_type_node,
17799 endlink)));
17800 tree void_ftype_int_int
17801 = build_function_type (void_type_node,
17802 tree_cons (NULL_TREE, integer_type_node,
17803 tree_cons (NULL_TREE, integer_type_node,
17804 endlink)));
17805 tree di_ftype_void
17806 = build_function_type (long_long_unsigned_type_node, endlink);
17807 tree di_ftype_v8qi
17808 = build_function_type (long_long_integer_type_node,
17809 tree_cons (NULL_TREE, V8QI_type_node,
17810 endlink));
17811 tree di_ftype_v4hi
17812 = build_function_type (long_long_integer_type_node,
17813 tree_cons (NULL_TREE, V4HI_type_node,
17814 endlink));
17815 tree di_ftype_v2si
17816 = build_function_type (long_long_integer_type_node,
17817 tree_cons (NULL_TREE, V2SI_type_node,
17818 endlink));
17819 tree v2si_ftype_v4hi
17820 = build_function_type (V2SI_type_node,
17821 tree_cons (NULL_TREE, V4HI_type_node,
17822 endlink));
17823 tree v4hi_ftype_v8qi
17824 = build_function_type (V4HI_type_node,
17825 tree_cons (NULL_TREE, V8QI_type_node,
17826 endlink));
17827
17828 tree di_ftype_di_v4hi_v4hi
17829 = build_function_type (long_long_unsigned_type_node,
17830 tree_cons (NULL_TREE,
17831 long_long_unsigned_type_node,
17832 tree_cons (NULL_TREE, V4HI_type_node,
17833 tree_cons (NULL_TREE,
17834 V4HI_type_node,
17835 endlink))));
17836
17837 tree di_ftype_v4hi_v4hi
17838 = build_function_type (long_long_unsigned_type_node,
17839 tree_cons (NULL_TREE, V4HI_type_node,
17840 tree_cons (NULL_TREE, V4HI_type_node,
17841 endlink)));
17842
17843 /* Normal vector binops. */
17844 tree v8qi_ftype_v8qi_v8qi
17845 = build_function_type (V8QI_type_node,
17846 tree_cons (NULL_TREE, V8QI_type_node,
17847 tree_cons (NULL_TREE, V8QI_type_node,
17848 endlink)));
17849 tree v4hi_ftype_v4hi_v4hi
17850 = build_function_type (V4HI_type_node,
17851 tree_cons (NULL_TREE, V4HI_type_node,
17852 tree_cons (NULL_TREE, V4HI_type_node,
17853 endlink)));
17854 tree v2si_ftype_v2si_v2si
17855 = build_function_type (V2SI_type_node,
17856 tree_cons (NULL_TREE, V2SI_type_node,
17857 tree_cons (NULL_TREE, V2SI_type_node,
17858 endlink)));
17859 tree di_ftype_di_di
17860 = build_function_type (long_long_unsigned_type_node,
17861 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17862 tree_cons (NULL_TREE,
17863 long_long_unsigned_type_node,
17864 endlink)));
17865
17866 /* Add all builtins that are more or less simple operations on two
17867 operands. */
17868 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17869 {
17870 /* Use one of the operands; the target can have a different mode for
17871 mask-generating compares. */
17872 enum machine_mode mode;
17873 tree type;
17874
17875 if (d->name == 0)
17876 continue;
17877
17878 mode = insn_data[d->icode].operand[1].mode;
17879
17880 switch (mode)
17881 {
17882 case V8QImode:
17883 type = v8qi_ftype_v8qi_v8qi;
17884 break;
17885 case V4HImode:
17886 type = v4hi_ftype_v4hi_v4hi;
17887 break;
17888 case V2SImode:
17889 type = v2si_ftype_v2si_v2si;
17890 break;
17891 case DImode:
17892 type = di_ftype_di_di;
17893 break;
17894
17895 default:
17896 gcc_unreachable ();
17897 }
17898
17899 def_mbuiltin (d->mask, d->name, type, d->code);
17900 }
17901
17902 /* Add the remaining MMX insns with somewhat more complicated types. */
17903 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17904 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17905 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17906
17907 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17908 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17909 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17910 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17911 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17912 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17913
17914 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17915 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17916 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17917 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17918 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17919 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17920
17921 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17922 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17923 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17924 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17925 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17926 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17927
17928 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17929 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17930 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17931 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17932 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17933 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17934
17935 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17936
17937 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17938 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17939 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17940 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17941
17942 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17943 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17944 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17945 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17946 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17947 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17948 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17949 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17950 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17951
17952 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17953 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17954 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17955
17956 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17957 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17958 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17959
17960 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17961 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17962 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17963 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17966
17967 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17968 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17969 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17971 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17973 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17976 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17979
17980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17981 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17984
17985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17990 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17992 }
17993
17994 static void
17995 arm_init_tls_builtins (void)
17996 {
17997 tree ftype, decl;
17998
17999 ftype = build_function_type (ptr_type_node, void_list_node);
18000 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18001 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18002 NULL, NULL_TREE);
18003 TREE_NOTHROW (decl) = 1;
18004 TREE_READONLY (decl) = 1;
18005 }
18006
18007 enum neon_builtin_type_bits {
18008 T_V8QI = 0x0001,
18009 T_V4HI = 0x0002,
18010 T_V2SI = 0x0004,
18011 T_V2SF = 0x0008,
18012 T_DI = 0x0010,
18013 T_V16QI = 0x0020,
18014 T_V8HI = 0x0040,
18015 T_V4SI = 0x0080,
18016 T_V4SF = 0x0100,
18017 T_V2DI = 0x0200,
18018 T_TI = 0x0400,
18019 T_EI = 0x0800,
18020 T_OI = 0x1000
18021 };
18022
18023 #define v8qi_UP T_V8QI
18024 #define v4hi_UP T_V4HI
18025 #define v2si_UP T_V2SI
18026 #define v2sf_UP T_V2SF
18027 #define di_UP T_DI
18028 #define v16qi_UP T_V16QI
18029 #define v8hi_UP T_V8HI
18030 #define v4si_UP T_V4SI
18031 #define v4sf_UP T_V4SF
18032 #define v2di_UP T_V2DI
18033 #define ti_UP T_TI
18034 #define ei_UP T_EI
18035 #define oi_UP T_OI
18036
18037 #define UP(X) X##_UP
18038
18039 #define T_MAX 13
18040
18041 typedef enum {
18042 NEON_BINOP,
18043 NEON_TERNOP,
18044 NEON_UNOP,
18045 NEON_GETLANE,
18046 NEON_SETLANE,
18047 NEON_CREATE,
18048 NEON_DUP,
18049 NEON_DUPLANE,
18050 NEON_COMBINE,
18051 NEON_SPLIT,
18052 NEON_LANEMUL,
18053 NEON_LANEMULL,
18054 NEON_LANEMULH,
18055 NEON_LANEMAC,
18056 NEON_SCALARMUL,
18057 NEON_SCALARMULL,
18058 NEON_SCALARMULH,
18059 NEON_SCALARMAC,
18060 NEON_CONVERT,
18061 NEON_FIXCONV,
18062 NEON_SELECT,
18063 NEON_RESULTPAIR,
18064 NEON_REINTERP,
18065 NEON_VTBL,
18066 NEON_VTBX,
18067 NEON_LOAD1,
18068 NEON_LOAD1LANE,
18069 NEON_STORE1,
18070 NEON_STORE1LANE,
18071 NEON_LOADSTRUCT,
18072 NEON_LOADSTRUCTLANE,
18073 NEON_STORESTRUCT,
18074 NEON_STORESTRUCTLANE,
18075 NEON_LOGICBINOP,
18076 NEON_SHIFTINSERT,
18077 NEON_SHIFTIMM,
18078 NEON_SHIFTACC
18079 } neon_itype;
18080
18081 typedef struct {
18082 const char *name;
18083 const neon_itype itype;
18084 const int bits;
18085 const enum insn_code codes[T_MAX];
18086 const unsigned int num_vars;
18087 unsigned int base_fcode;
18088 } neon_builtin_datum;
18089
18090 #define CF(N,X) CODE_FOR_neon_##N##X
18091
18092 #define VAR1(T, N, A) \
18093 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18094 #define VAR2(T, N, A, B) \
18095 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18096 #define VAR3(T, N, A, B, C) \
18097 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18098 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18099 #define VAR4(T, N, A, B, C, D) \
18100 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18101 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18102 #define VAR5(T, N, A, B, C, D, E) \
18103 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18104 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18105 #define VAR6(T, N, A, B, C, D, E, F) \
18106 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18107 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18108 #define VAR7(T, N, A, B, C, D, E, F, G) \
18109 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18110 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18111 CF (N, G) }, 7, 0
18112 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18113 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18114 | UP (H), \
18115 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18116 CF (N, G), CF (N, H) }, 8, 0
18117 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18118 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18119 | UP (H) | UP (I), \
18120 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18121 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18122 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18123 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18124 | UP (H) | UP (I) | UP (J), \
18125 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18126 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18127
18128 /* The mode entries in the following table correspond to the "key" type of the
18129 instruction variant, i.e. equivalent to that which would be specified after
18130 the assembler mnemonic, which usually refers to the last vector operand.
18131 (Signed/unsigned/polynomial types are not differentiated between though, and
18132 are all mapped onto the same mode for a given element size.) The modes
18133 listed per instruction should be the same as those defined for that
18134 instruction's pattern in neon.md.
18135 WARNING: Variants should be listed in the same increasing order as
18136 neon_builtin_type_bits. */
18137
18138 static neon_builtin_datum neon_builtin_data[] =
18139 {
18140 { VAR10 (BINOP, vadd,
18141 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18142 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18143 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18144 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18145 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18146 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18147 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18148 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18149 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18150 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18151 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18152 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18153 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18154 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18155 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18156 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18157 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18158 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18159 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18160 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18161 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18162 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18163 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18164 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18165 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18166 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18167 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18168 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18169 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18170 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18171 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18172 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18173 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18174 { VAR10 (BINOP, vsub,
18175 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18176 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18177 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18178 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18179 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18180 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18181 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18182 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18183 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18184 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18185 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18186 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18187 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18188 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18189 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18190 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18191 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18192 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18193 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18194 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18195 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18196 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18197 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18198 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18199 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18200 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18201 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18202 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18203 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18204 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18205 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18206 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18207 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18208 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18209 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18210 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18211 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18212 /* FIXME: vget_lane supports more variants than this! */
18213 { VAR10 (GETLANE, vget_lane,
18214 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18215 { VAR10 (SETLANE, vset_lane,
18216 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18217 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18218 { VAR10 (DUP, vdup_n,
18219 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18220 { VAR10 (DUPLANE, vdup_lane,
18221 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18222 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18223 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18224 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18225 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18226 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18227 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18228 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18229 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18230 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18231 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18232 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18233 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18234 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18235 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18236 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18237 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18238 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18239 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18240 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18241 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18242 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18243 { VAR10 (BINOP, vext,
18244 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18245 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18246 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18247 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18248 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18249 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18250 { VAR10 (SELECT, vbsl,
18251 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18252 { VAR1 (VTBL, vtbl1, v8qi) },
18253 { VAR1 (VTBL, vtbl2, v8qi) },
18254 { VAR1 (VTBL, vtbl3, v8qi) },
18255 { VAR1 (VTBL, vtbl4, v8qi) },
18256 { VAR1 (VTBX, vtbx1, v8qi) },
18257 { VAR1 (VTBX, vtbx2, v8qi) },
18258 { VAR1 (VTBX, vtbx3, v8qi) },
18259 { VAR1 (VTBX, vtbx4, v8qi) },
18260 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18261 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18262 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18263 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18264 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18265 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18266 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18267 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18268 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18269 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18270 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18271 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18272 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18273 { VAR10 (LOAD1, vld1,
18274 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18275 { VAR10 (LOAD1LANE, vld1_lane,
18276 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18277 { VAR10 (LOAD1, vld1_dup,
18278 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18279 { VAR10 (STORE1, vst1,
18280 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18281 { VAR10 (STORE1LANE, vst1_lane,
18282 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18283 { VAR9 (LOADSTRUCT,
18284 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18285 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18286 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18287 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18288 { VAR9 (STORESTRUCT, vst2,
18289 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18290 { VAR7 (STORESTRUCTLANE, vst2_lane,
18291 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18292 { VAR9 (LOADSTRUCT,
18293 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18294 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18295 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18296 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18297 { VAR9 (STORESTRUCT, vst3,
18298 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18299 { VAR7 (STORESTRUCTLANE, vst3_lane,
18300 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18301 { VAR9 (LOADSTRUCT, vld4,
18302 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18303 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18304 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18305 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18306 { VAR9 (STORESTRUCT, vst4,
18307 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18308 { VAR7 (STORESTRUCTLANE, vst4_lane,
18309 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18310 { VAR10 (LOGICBINOP, vand,
18311 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18312 { VAR10 (LOGICBINOP, vorr,
18313 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18314 { VAR10 (BINOP, veor,
18315 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18316 { VAR10 (LOGICBINOP, vbic,
18317 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18318 { VAR10 (LOGICBINOP, vorn,
18319 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18320 };
18321
18322 #undef CF
18323 #undef VAR1
18324 #undef VAR2
18325 #undef VAR3
18326 #undef VAR4
18327 #undef VAR5
18328 #undef VAR6
18329 #undef VAR7
18330 #undef VAR8
18331 #undef VAR9
18332 #undef VAR10
18333
18334 static void
18335 arm_init_neon_builtins (void)
18336 {
18337 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18338
18339 tree neon_intQI_type_node;
18340 tree neon_intHI_type_node;
18341 tree neon_polyQI_type_node;
18342 tree neon_polyHI_type_node;
18343 tree neon_intSI_type_node;
18344 tree neon_intDI_type_node;
18345 tree neon_float_type_node;
18346
18347 tree intQI_pointer_node;
18348 tree intHI_pointer_node;
18349 tree intSI_pointer_node;
18350 tree intDI_pointer_node;
18351 tree float_pointer_node;
18352
18353 tree const_intQI_node;
18354 tree const_intHI_node;
18355 tree const_intSI_node;
18356 tree const_intDI_node;
18357 tree const_float_node;
18358
18359 tree const_intQI_pointer_node;
18360 tree const_intHI_pointer_node;
18361 tree const_intSI_pointer_node;
18362 tree const_intDI_pointer_node;
18363 tree const_float_pointer_node;
18364
18365 tree V8QI_type_node;
18366 tree V4HI_type_node;
18367 tree V2SI_type_node;
18368 tree V2SF_type_node;
18369 tree V16QI_type_node;
18370 tree V8HI_type_node;
18371 tree V4SI_type_node;
18372 tree V4SF_type_node;
18373 tree V2DI_type_node;
18374
18375 tree intUQI_type_node;
18376 tree intUHI_type_node;
18377 tree intUSI_type_node;
18378 tree intUDI_type_node;
18379
18380 tree intEI_type_node;
18381 tree intOI_type_node;
18382 tree intCI_type_node;
18383 tree intXI_type_node;
18384
18385 tree V8QI_pointer_node;
18386 tree V4HI_pointer_node;
18387 tree V2SI_pointer_node;
18388 tree V2SF_pointer_node;
18389 tree V16QI_pointer_node;
18390 tree V8HI_pointer_node;
18391 tree V4SI_pointer_node;
18392 tree V4SF_pointer_node;
18393 tree V2DI_pointer_node;
18394
18395 tree void_ftype_pv8qi_v8qi_v8qi;
18396 tree void_ftype_pv4hi_v4hi_v4hi;
18397 tree void_ftype_pv2si_v2si_v2si;
18398 tree void_ftype_pv2sf_v2sf_v2sf;
18399 tree void_ftype_pdi_di_di;
18400 tree void_ftype_pv16qi_v16qi_v16qi;
18401 tree void_ftype_pv8hi_v8hi_v8hi;
18402 tree void_ftype_pv4si_v4si_v4si;
18403 tree void_ftype_pv4sf_v4sf_v4sf;
18404 tree void_ftype_pv2di_v2di_v2di;
18405
18406 tree reinterp_ftype_dreg[5][5];
18407 tree reinterp_ftype_qreg[5][5];
18408 tree dreg_types[5], qreg_types[5];
18409
18410 /* Create distinguished type nodes for NEON vector element types,
18411 and pointers to values of such types, so we can detect them later. */
18412 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18413 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18414 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18415 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18416 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18417 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18418 neon_float_type_node = make_node (REAL_TYPE);
18419 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18420 layout_type (neon_float_type_node);
18421
18422 /* Define typedefs which exactly correspond to the modes we are basing vector
18423 types on. If you change these names you'll need to change
18424 the table used by arm_mangle_type too. */
18425 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18426 "__builtin_neon_qi");
18427 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18428 "__builtin_neon_hi");
18429 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18430 "__builtin_neon_si");
18431 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18432 "__builtin_neon_sf");
18433 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18434 "__builtin_neon_di");
18435 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18436 "__builtin_neon_poly8");
18437 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18438 "__builtin_neon_poly16");
18439
18440 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18441 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18442 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18443 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18444 float_pointer_node = build_pointer_type (neon_float_type_node);
18445
18446 /* Next create constant-qualified versions of the above types. */
18447 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18448 TYPE_QUAL_CONST);
18449 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18450 TYPE_QUAL_CONST);
18451 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18452 TYPE_QUAL_CONST);
18453 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18454 TYPE_QUAL_CONST);
18455 const_float_node = build_qualified_type (neon_float_type_node,
18456 TYPE_QUAL_CONST);
18457
18458 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18459 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18460 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18461 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18462 const_float_pointer_node = build_pointer_type (const_float_node);
18463
18464 /* Now create vector types based on our NEON element types. */
18465 /* 64-bit vectors. */
18466 V8QI_type_node =
18467 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18468 V4HI_type_node =
18469 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18470 V2SI_type_node =
18471 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18472 V2SF_type_node =
18473 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18474 /* 128-bit vectors. */
18475 V16QI_type_node =
18476 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18477 V8HI_type_node =
18478 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18479 V4SI_type_node =
18480 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18481 V4SF_type_node =
18482 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18483 V2DI_type_node =
18484 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18485
18486 /* Unsigned integer types for various mode sizes. */
18487 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18488 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18489 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18490 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18491
18492 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18493 "__builtin_neon_uqi");
18494 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18495 "__builtin_neon_uhi");
18496 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18497 "__builtin_neon_usi");
18498 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18499 "__builtin_neon_udi");
18500
18501 /* Opaque integer types for structures of vectors. */
18502 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18503 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18504 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18505 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18506
18507 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18508 "__builtin_neon_ti");
18509 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18510 "__builtin_neon_ei");
18511 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18512 "__builtin_neon_oi");
18513 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18514 "__builtin_neon_ci");
18515 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18516 "__builtin_neon_xi");
18517
18518 /* Pointers to vector types. */
18519 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18520 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18521 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18522 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18523 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18524 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18525 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18526 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18527 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18528
18529 /* Operations which return results as pairs. */
18530 void_ftype_pv8qi_v8qi_v8qi =
18531 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18532 V8QI_type_node, NULL);
18533 void_ftype_pv4hi_v4hi_v4hi =
18534 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18535 V4HI_type_node, NULL);
18536 void_ftype_pv2si_v2si_v2si =
18537 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18538 V2SI_type_node, NULL);
18539 void_ftype_pv2sf_v2sf_v2sf =
18540 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18541 V2SF_type_node, NULL);
18542 void_ftype_pdi_di_di =
18543 build_function_type_list (void_type_node, intDI_pointer_node,
18544 neon_intDI_type_node, neon_intDI_type_node, NULL);
18545 void_ftype_pv16qi_v16qi_v16qi =
18546 build_function_type_list (void_type_node, V16QI_pointer_node,
18547 V16QI_type_node, V16QI_type_node, NULL);
18548 void_ftype_pv8hi_v8hi_v8hi =
18549 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18550 V8HI_type_node, NULL);
18551 void_ftype_pv4si_v4si_v4si =
18552 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18553 V4SI_type_node, NULL);
18554 void_ftype_pv4sf_v4sf_v4sf =
18555 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18556 V4SF_type_node, NULL);
18557 void_ftype_pv2di_v2di_v2di =
18558 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18559 V2DI_type_node, NULL);
18560
18561 dreg_types[0] = V8QI_type_node;
18562 dreg_types[1] = V4HI_type_node;
18563 dreg_types[2] = V2SI_type_node;
18564 dreg_types[3] = V2SF_type_node;
18565 dreg_types[4] = neon_intDI_type_node;
18566
18567 qreg_types[0] = V16QI_type_node;
18568 qreg_types[1] = V8HI_type_node;
18569 qreg_types[2] = V4SI_type_node;
18570 qreg_types[3] = V4SF_type_node;
18571 qreg_types[4] = V2DI_type_node;
18572
18573 for (i = 0; i < 5; i++)
18574 {
18575 int j;
18576 for (j = 0; j < 5; j++)
18577 {
18578 reinterp_ftype_dreg[i][j]
18579 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18580 reinterp_ftype_qreg[i][j]
18581 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18582 }
18583 }
18584
18585 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18586 {
18587 neon_builtin_datum *d = &neon_builtin_data[i];
18588 unsigned int j, codeidx = 0;
18589
18590 d->base_fcode = fcode;
18591
18592 for (j = 0; j < T_MAX; j++)
18593 {
18594 const char* const modenames[] = {
18595 "v8qi", "v4hi", "v2si", "v2sf", "di",
18596 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18597 };
18598 char namebuf[60];
18599 tree ftype = NULL;
18600 enum insn_code icode;
18601 int is_load = 0, is_store = 0;
18602
18603 if ((d->bits & (1 << j)) == 0)
18604 continue;
18605
18606 icode = d->codes[codeidx++];
18607
18608 switch (d->itype)
18609 {
18610 case NEON_LOAD1:
18611 case NEON_LOAD1LANE:
18612 case NEON_LOADSTRUCT:
18613 case NEON_LOADSTRUCTLANE:
18614 is_load = 1;
18615 /* Fall through. */
18616 case NEON_STORE1:
18617 case NEON_STORE1LANE:
18618 case NEON_STORESTRUCT:
18619 case NEON_STORESTRUCTLANE:
18620 if (!is_load)
18621 is_store = 1;
18622 /* Fall through. */
18623 case NEON_UNOP:
18624 case NEON_BINOP:
18625 case NEON_LOGICBINOP:
18626 case NEON_SHIFTINSERT:
18627 case NEON_TERNOP:
18628 case NEON_GETLANE:
18629 case NEON_SETLANE:
18630 case NEON_CREATE:
18631 case NEON_DUP:
18632 case NEON_DUPLANE:
18633 case NEON_SHIFTIMM:
18634 case NEON_SHIFTACC:
18635 case NEON_COMBINE:
18636 case NEON_SPLIT:
18637 case NEON_CONVERT:
18638 case NEON_FIXCONV:
18639 case NEON_LANEMUL:
18640 case NEON_LANEMULL:
18641 case NEON_LANEMULH:
18642 case NEON_LANEMAC:
18643 case NEON_SCALARMUL:
18644 case NEON_SCALARMULL:
18645 case NEON_SCALARMULH:
18646 case NEON_SCALARMAC:
18647 case NEON_SELECT:
18648 case NEON_VTBL:
18649 case NEON_VTBX:
18650 {
18651 int k;
18652 tree return_type = void_type_node, args = void_list_node;
18653
18654 /* Build a function type directly from the insn_data for this
18655 builtin. The build_function_type() function takes care of
18656 removing duplicates for us. */
18657 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18658 {
18659 tree eltype;
18660
18661 if (is_load && k == 1)
18662 {
18663 /* Neon load patterns always have the memory operand
18664 (a SImode pointer) in the operand 1 position. We
18665 want a const pointer to the element type in that
18666 position. */
18667 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18668
18669 switch (1 << j)
18670 {
18671 case T_V8QI:
18672 case T_V16QI:
18673 eltype = const_intQI_pointer_node;
18674 break;
18675
18676 case T_V4HI:
18677 case T_V8HI:
18678 eltype = const_intHI_pointer_node;
18679 break;
18680
18681 case T_V2SI:
18682 case T_V4SI:
18683 eltype = const_intSI_pointer_node;
18684 break;
18685
18686 case T_V2SF:
18687 case T_V4SF:
18688 eltype = const_float_pointer_node;
18689 break;
18690
18691 case T_DI:
18692 case T_V2DI:
18693 eltype = const_intDI_pointer_node;
18694 break;
18695
18696 default: gcc_unreachable ();
18697 }
18698 }
18699 else if (is_store && k == 0)
18700 {
18701 /* Similarly, Neon store patterns use operand 0 as
18702 the memory location to store to (a SImode pointer).
18703 Use a pointer to the element type of the store in
18704 that position. */
18705 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18706
18707 switch (1 << j)
18708 {
18709 case T_V8QI:
18710 case T_V16QI:
18711 eltype = intQI_pointer_node;
18712 break;
18713
18714 case T_V4HI:
18715 case T_V8HI:
18716 eltype = intHI_pointer_node;
18717 break;
18718
18719 case T_V2SI:
18720 case T_V4SI:
18721 eltype = intSI_pointer_node;
18722 break;
18723
18724 case T_V2SF:
18725 case T_V4SF:
18726 eltype = float_pointer_node;
18727 break;
18728
18729 case T_DI:
18730 case T_V2DI:
18731 eltype = intDI_pointer_node;
18732 break;
18733
18734 default: gcc_unreachable ();
18735 }
18736 }
18737 else
18738 {
18739 switch (insn_data[icode].operand[k].mode)
18740 {
18741 case VOIDmode: eltype = void_type_node; break;
18742 /* Scalars. */
18743 case QImode: eltype = neon_intQI_type_node; break;
18744 case HImode: eltype = neon_intHI_type_node; break;
18745 case SImode: eltype = neon_intSI_type_node; break;
18746 case SFmode: eltype = neon_float_type_node; break;
18747 case DImode: eltype = neon_intDI_type_node; break;
18748 case TImode: eltype = intTI_type_node; break;
18749 case EImode: eltype = intEI_type_node; break;
18750 case OImode: eltype = intOI_type_node; break;
18751 case CImode: eltype = intCI_type_node; break;
18752 case XImode: eltype = intXI_type_node; break;
18753 /* 64-bit vectors. */
18754 case V8QImode: eltype = V8QI_type_node; break;
18755 case V4HImode: eltype = V4HI_type_node; break;
18756 case V2SImode: eltype = V2SI_type_node; break;
18757 case V2SFmode: eltype = V2SF_type_node; break;
18758 /* 128-bit vectors. */
18759 case V16QImode: eltype = V16QI_type_node; break;
18760 case V8HImode: eltype = V8HI_type_node; break;
18761 case V4SImode: eltype = V4SI_type_node; break;
18762 case V4SFmode: eltype = V4SF_type_node; break;
18763 case V2DImode: eltype = V2DI_type_node; break;
18764 default: gcc_unreachable ();
18765 }
18766 }
18767
18768 if (k == 0 && !is_store)
18769 return_type = eltype;
18770 else
18771 args = tree_cons (NULL_TREE, eltype, args);
18772 }
18773
18774 ftype = build_function_type (return_type, args);
18775 }
18776 break;
18777
18778 case NEON_RESULTPAIR:
18779 {
18780 switch (insn_data[icode].operand[1].mode)
18781 {
18782 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18783 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18784 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18785 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18786 case DImode: ftype = void_ftype_pdi_di_di; break;
18787 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18788 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18789 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18790 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18791 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18792 default: gcc_unreachable ();
18793 }
18794 }
18795 break;
18796
18797 case NEON_REINTERP:
18798 {
18799 /* We iterate over 5 doubleword types, then 5 quadword
18800 types. */
18801 int rhs = j % 5;
18802 switch (insn_data[icode].operand[0].mode)
18803 {
18804 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18805 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18806 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18807 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18808 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18809 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18810 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18811 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18812 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18813 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18814 default: gcc_unreachable ();
18815 }
18816 }
18817 break;
18818
18819 default:
18820 gcc_unreachable ();
18821 }
18822
18823 gcc_assert (ftype != NULL);
18824
18825 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18826
18827 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18828 NULL_TREE);
18829 }
18830 }
18831 }
18832
18833 static void
18834 arm_init_fp16_builtins (void)
18835 {
18836 tree fp16_type = make_node (REAL_TYPE);
18837 TYPE_PRECISION (fp16_type) = 16;
18838 layout_type (fp16_type);
18839 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18840 }
18841
18842 static void
18843 arm_init_builtins (void)
18844 {
18845 arm_init_tls_builtins ();
18846
18847 if (TARGET_REALLY_IWMMXT)
18848 arm_init_iwmmxt_builtins ();
18849
18850 if (TARGET_NEON)
18851 arm_init_neon_builtins ();
18852
18853 if (arm_fp16_format)
18854 arm_init_fp16_builtins ();
18855 }
18856
18857 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18858
18859 static const char *
18860 arm_invalid_parameter_type (const_tree t)
18861 {
18862 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18863 return N_("function parameters cannot have __fp16 type");
18864 return NULL;
18865 }
18866
18867 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18868
18869 static const char *
18870 arm_invalid_return_type (const_tree t)
18871 {
18872 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18873 return N_("functions cannot return __fp16 type");
18874 return NULL;
18875 }
18876
18877 /* Implement TARGET_PROMOTED_TYPE. */
18878
18879 static tree
18880 arm_promoted_type (const_tree t)
18881 {
18882 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18883 return float_type_node;
18884 return NULL_TREE;
18885 }
18886
18887 /* Implement TARGET_CONVERT_TO_TYPE.
18888 Specifically, this hook implements the peculiarity of the ARM
18889 half-precision floating-point C semantics that requires conversions between
18890 __fp16 to or from double to do an intermediate conversion to float. */
18891
18892 static tree
18893 arm_convert_to_type (tree type, tree expr)
18894 {
18895 tree fromtype = TREE_TYPE (expr);
18896 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18897 return NULL_TREE;
18898 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18899 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18900 return convert (type, convert (float_type_node, expr));
18901 return NULL_TREE;
18902 }
18903
18904 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18905 This simply adds HFmode as a supported mode; even though we don't
18906 implement arithmetic on this type directly, it's supported by
18907 optabs conversions, much the way the double-word arithmetic is
18908 special-cased in the default hook. */
18909
18910 static bool
18911 arm_scalar_mode_supported_p (enum machine_mode mode)
18912 {
18913 if (mode == HFmode)
18914 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18915 else
18916 return default_scalar_mode_supported_p (mode);
18917 }
18918
18919 /* Errors in the source file can cause expand_expr to return const0_rtx
18920 where we expect a vector. To avoid crashing, use one of the vector
18921 clear instructions. */
18922
18923 static rtx
18924 safe_vector_operand (rtx x, enum machine_mode mode)
18925 {
18926 if (x != const0_rtx)
18927 return x;
18928 x = gen_reg_rtx (mode);
18929
18930 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18931 : gen_rtx_SUBREG (DImode, x, 0)));
18932 return x;
18933 }
18934
18935 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18936
18937 static rtx
18938 arm_expand_binop_builtin (enum insn_code icode,
18939 tree exp, rtx target)
18940 {
18941 rtx pat;
18942 tree arg0 = CALL_EXPR_ARG (exp, 0);
18943 tree arg1 = CALL_EXPR_ARG (exp, 1);
18944 rtx op0 = expand_normal (arg0);
18945 rtx op1 = expand_normal (arg1);
18946 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18947 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18948 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18949
18950 if (VECTOR_MODE_P (mode0))
18951 op0 = safe_vector_operand (op0, mode0);
18952 if (VECTOR_MODE_P (mode1))
18953 op1 = safe_vector_operand (op1, mode1);
18954
18955 if (! target
18956 || GET_MODE (target) != tmode
18957 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18958 target = gen_reg_rtx (tmode);
18959
18960 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18961
18962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18963 op0 = copy_to_mode_reg (mode0, op0);
18964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18965 op1 = copy_to_mode_reg (mode1, op1);
18966
18967 pat = GEN_FCN (icode) (target, op0, op1);
18968 if (! pat)
18969 return 0;
18970 emit_insn (pat);
18971 return target;
18972 }
18973
18974 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18975
18976 static rtx
18977 arm_expand_unop_builtin (enum insn_code icode,
18978 tree exp, rtx target, int do_load)
18979 {
18980 rtx pat;
18981 tree arg0 = CALL_EXPR_ARG (exp, 0);
18982 rtx op0 = expand_normal (arg0);
18983 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18984 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18985
18986 if (! target
18987 || GET_MODE (target) != tmode
18988 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18989 target = gen_reg_rtx (tmode);
18990 if (do_load)
18991 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18992 else
18993 {
18994 if (VECTOR_MODE_P (mode0))
18995 op0 = safe_vector_operand (op0, mode0);
18996
18997 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18998 op0 = copy_to_mode_reg (mode0, op0);
18999 }
19000
19001 pat = GEN_FCN (icode) (target, op0);
19002 if (! pat)
19003 return 0;
19004 emit_insn (pat);
19005 return target;
19006 }
19007
19008 static int
19009 neon_builtin_compare (const void *a, const void *b)
19010 {
19011 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19012 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19013 unsigned int soughtcode = key->base_fcode;
19014
19015 if (soughtcode >= memb->base_fcode
19016 && soughtcode < memb->base_fcode + memb->num_vars)
19017 return 0;
19018 else if (soughtcode < memb->base_fcode)
19019 return -1;
19020 else
19021 return 1;
19022 }
19023
19024 static enum insn_code
19025 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19026 {
19027 neon_builtin_datum key, *found;
19028 int idx;
19029
19030 key.base_fcode = fcode;
19031 found = (neon_builtin_datum *)
19032 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19033 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19034 gcc_assert (found);
19035 idx = fcode - (int) found->base_fcode;
19036 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19037
19038 if (itype)
19039 *itype = found->itype;
19040
19041 return found->codes[idx];
19042 }
19043
19044 typedef enum {
19045 NEON_ARG_COPY_TO_REG,
19046 NEON_ARG_CONSTANT,
19047 NEON_ARG_STOP
19048 } builtin_arg;
19049
19050 #define NEON_MAX_BUILTIN_ARGS 5
19051
19052 /* Expand a Neon builtin. */
19053 static rtx
19054 arm_expand_neon_args (rtx target, int icode, int have_retval,
19055 tree exp, ...)
19056 {
19057 va_list ap;
19058 rtx pat;
19059 tree arg[NEON_MAX_BUILTIN_ARGS];
19060 rtx op[NEON_MAX_BUILTIN_ARGS];
19061 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19062 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19063 int argc = 0;
19064
19065 if (have_retval
19066 && (!target
19067 || GET_MODE (target) != tmode
19068 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19069 target = gen_reg_rtx (tmode);
19070
19071 va_start (ap, exp);
19072
19073 for (;;)
19074 {
19075 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19076
19077 if (thisarg == NEON_ARG_STOP)
19078 break;
19079 else
19080 {
19081 arg[argc] = CALL_EXPR_ARG (exp, argc);
19082 op[argc] = expand_normal (arg[argc]);
19083 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19084
19085 switch (thisarg)
19086 {
19087 case NEON_ARG_COPY_TO_REG:
19088 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19089 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19090 (op[argc], mode[argc]))
19091 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19092 break;
19093
19094 case NEON_ARG_CONSTANT:
19095 /* FIXME: This error message is somewhat unhelpful. */
19096 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19097 (op[argc], mode[argc]))
19098 error ("argument must be a constant");
19099 break;
19100
19101 case NEON_ARG_STOP:
19102 gcc_unreachable ();
19103 }
19104
19105 argc++;
19106 }
19107 }
19108
19109 va_end (ap);
19110
19111 if (have_retval)
19112 switch (argc)
19113 {
19114 case 1:
19115 pat = GEN_FCN (icode) (target, op[0]);
19116 break;
19117
19118 case 2:
19119 pat = GEN_FCN (icode) (target, op[0], op[1]);
19120 break;
19121
19122 case 3:
19123 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19124 break;
19125
19126 case 4:
19127 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19128 break;
19129
19130 case 5:
19131 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19132 break;
19133
19134 default:
19135 gcc_unreachable ();
19136 }
19137 else
19138 switch (argc)
19139 {
19140 case 1:
19141 pat = GEN_FCN (icode) (op[0]);
19142 break;
19143
19144 case 2:
19145 pat = GEN_FCN (icode) (op[0], op[1]);
19146 break;
19147
19148 case 3:
19149 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19150 break;
19151
19152 case 4:
19153 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19154 break;
19155
19156 case 5:
19157 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19158 break;
19159
19160 default:
19161 gcc_unreachable ();
19162 }
19163
19164 if (!pat)
19165 return 0;
19166
19167 emit_insn (pat);
19168
19169 return target;
19170 }
19171
19172 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19173 constants defined per-instruction or per instruction-variant. Instead, the
19174 required info is looked up in the table neon_builtin_data. */
19175 static rtx
19176 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19177 {
19178 neon_itype itype;
19179 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19180
19181 switch (itype)
19182 {
19183 case NEON_UNOP:
19184 case NEON_CONVERT:
19185 case NEON_DUPLANE:
19186 return arm_expand_neon_args (target, icode, 1, exp,
19187 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19188
19189 case NEON_BINOP:
19190 case NEON_SETLANE:
19191 case NEON_SCALARMUL:
19192 case NEON_SCALARMULL:
19193 case NEON_SCALARMULH:
19194 case NEON_SHIFTINSERT:
19195 case NEON_LOGICBINOP:
19196 return arm_expand_neon_args (target, icode, 1, exp,
19197 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19198 NEON_ARG_STOP);
19199
19200 case NEON_TERNOP:
19201 return arm_expand_neon_args (target, icode, 1, exp,
19202 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19203 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19204
19205 case NEON_GETLANE:
19206 case NEON_FIXCONV:
19207 case NEON_SHIFTIMM:
19208 return arm_expand_neon_args (target, icode, 1, exp,
19209 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19210 NEON_ARG_STOP);
19211
19212 case NEON_CREATE:
19213 return arm_expand_neon_args (target, icode, 1, exp,
19214 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19215
19216 case NEON_DUP:
19217 case NEON_SPLIT:
19218 case NEON_REINTERP:
19219 return arm_expand_neon_args (target, icode, 1, exp,
19220 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19221
19222 case NEON_COMBINE:
19223 case NEON_VTBL:
19224 return arm_expand_neon_args (target, icode, 1, exp,
19225 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19226
19227 case NEON_RESULTPAIR:
19228 return arm_expand_neon_args (target, icode, 0, exp,
19229 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19230 NEON_ARG_STOP);
19231
19232 case NEON_LANEMUL:
19233 case NEON_LANEMULL:
19234 case NEON_LANEMULH:
19235 return arm_expand_neon_args (target, icode, 1, exp,
19236 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19237 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19238
19239 case NEON_LANEMAC:
19240 return arm_expand_neon_args (target, icode, 1, exp,
19241 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19242 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19243
19244 case NEON_SHIFTACC:
19245 return arm_expand_neon_args (target, icode, 1, exp,
19246 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19247 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19248
19249 case NEON_SCALARMAC:
19250 return arm_expand_neon_args (target, icode, 1, exp,
19251 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19252 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19253
19254 case NEON_SELECT:
19255 case NEON_VTBX:
19256 return arm_expand_neon_args (target, icode, 1, exp,
19257 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19258 NEON_ARG_STOP);
19259
19260 case NEON_LOAD1:
19261 case NEON_LOADSTRUCT:
19262 return arm_expand_neon_args (target, icode, 1, exp,
19263 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19264
19265 case NEON_LOAD1LANE:
19266 case NEON_LOADSTRUCTLANE:
19267 return arm_expand_neon_args (target, icode, 1, exp,
19268 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19269 NEON_ARG_STOP);
19270
19271 case NEON_STORE1:
19272 case NEON_STORESTRUCT:
19273 return arm_expand_neon_args (target, icode, 0, exp,
19274 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19275
19276 case NEON_STORE1LANE:
19277 case NEON_STORESTRUCTLANE:
19278 return arm_expand_neon_args (target, icode, 0, exp,
19279 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19280 NEON_ARG_STOP);
19281 }
19282
19283 gcc_unreachable ();
19284 }
19285
19286 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19287 void
19288 neon_reinterpret (rtx dest, rtx src)
19289 {
19290 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19291 }
19292
19293 /* Emit code to place a Neon pair result in memory locations (with equal
19294 registers). */
19295 void
19296 neon_emit_pair_result_insn (enum machine_mode mode,
19297 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19298 rtx op1, rtx op2)
19299 {
19300 rtx mem = gen_rtx_MEM (mode, destaddr);
19301 rtx tmp1 = gen_reg_rtx (mode);
19302 rtx tmp2 = gen_reg_rtx (mode);
19303
19304 emit_insn (intfn (tmp1, op1, tmp2, op2));
19305
19306 emit_move_insn (mem, tmp1);
19307 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19308 emit_move_insn (mem, tmp2);
19309 }
19310
19311 /* Set up operands for a register copy from src to dest, taking care not to
19312 clobber registers in the process.
19313 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19314 be called with a large N, so that should be OK. */
19315
19316 void
19317 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19318 {
19319 unsigned int copied = 0, opctr = 0;
19320 unsigned int done = (1 << count) - 1;
19321 unsigned int i, j;
19322
19323 while (copied != done)
19324 {
19325 for (i = 0; i < count; i++)
19326 {
19327 int good = 1;
19328
19329 for (j = 0; good && j < count; j++)
19330 if (i != j && (copied & (1 << j)) == 0
19331 && reg_overlap_mentioned_p (src[j], dest[i]))
19332 good = 0;
19333
19334 if (good)
19335 {
19336 operands[opctr++] = dest[i];
19337 operands[opctr++] = src[i];
19338 copied |= 1 << i;
19339 }
19340 }
19341 }
19342
19343 gcc_assert (opctr == count * 2);
19344 }
19345
19346 /* Expand an expression EXP that calls a built-in function,
19347 with result going to TARGET if that's convenient
19348 (and in mode MODE if that's convenient).
19349 SUBTARGET may be used as the target for computing one of EXP's operands.
19350 IGNORE is nonzero if the value is to be ignored. */
19351
19352 static rtx
19353 arm_expand_builtin (tree exp,
19354 rtx target,
19355 rtx subtarget ATTRIBUTE_UNUSED,
19356 enum machine_mode mode ATTRIBUTE_UNUSED,
19357 int ignore ATTRIBUTE_UNUSED)
19358 {
19359 const struct builtin_description * d;
19360 enum insn_code icode;
19361 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19362 tree arg0;
19363 tree arg1;
19364 tree arg2;
19365 rtx op0;
19366 rtx op1;
19367 rtx op2;
19368 rtx pat;
19369 int fcode = DECL_FUNCTION_CODE (fndecl);
19370 size_t i;
19371 enum machine_mode tmode;
19372 enum machine_mode mode0;
19373 enum machine_mode mode1;
19374 enum machine_mode mode2;
19375
19376 if (fcode >= ARM_BUILTIN_NEON_BASE)
19377 return arm_expand_neon_builtin (fcode, exp, target);
19378
19379 switch (fcode)
19380 {
19381 case ARM_BUILTIN_TEXTRMSB:
19382 case ARM_BUILTIN_TEXTRMUB:
19383 case ARM_BUILTIN_TEXTRMSH:
19384 case ARM_BUILTIN_TEXTRMUH:
19385 case ARM_BUILTIN_TEXTRMSW:
19386 case ARM_BUILTIN_TEXTRMUW:
19387 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19388 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19389 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19390 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19391 : CODE_FOR_iwmmxt_textrmw);
19392
19393 arg0 = CALL_EXPR_ARG (exp, 0);
19394 arg1 = CALL_EXPR_ARG (exp, 1);
19395 op0 = expand_normal (arg0);
19396 op1 = expand_normal (arg1);
19397 tmode = insn_data[icode].operand[0].mode;
19398 mode0 = insn_data[icode].operand[1].mode;
19399 mode1 = insn_data[icode].operand[2].mode;
19400
19401 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19402 op0 = copy_to_mode_reg (mode0, op0);
19403 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19404 {
19405 /* @@@ better error message */
19406 error ("selector must be an immediate");
19407 return gen_reg_rtx (tmode);
19408 }
19409 if (target == 0
19410 || GET_MODE (target) != tmode
19411 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19412 target = gen_reg_rtx (tmode);
19413 pat = GEN_FCN (icode) (target, op0, op1);
19414 if (! pat)
19415 return 0;
19416 emit_insn (pat);
19417 return target;
19418
19419 case ARM_BUILTIN_TINSRB:
19420 case ARM_BUILTIN_TINSRH:
19421 case ARM_BUILTIN_TINSRW:
19422 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19423 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19424 : CODE_FOR_iwmmxt_tinsrw);
19425 arg0 = CALL_EXPR_ARG (exp, 0);
19426 arg1 = CALL_EXPR_ARG (exp, 1);
19427 arg2 = CALL_EXPR_ARG (exp, 2);
19428 op0 = expand_normal (arg0);
19429 op1 = expand_normal (arg1);
19430 op2 = expand_normal (arg2);
19431 tmode = insn_data[icode].operand[0].mode;
19432 mode0 = insn_data[icode].operand[1].mode;
19433 mode1 = insn_data[icode].operand[2].mode;
19434 mode2 = insn_data[icode].operand[3].mode;
19435
19436 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19437 op0 = copy_to_mode_reg (mode0, op0);
19438 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19439 op1 = copy_to_mode_reg (mode1, op1);
19440 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19441 {
19442 /* @@@ better error message */
19443 error ("selector must be an immediate");
19444 return const0_rtx;
19445 }
19446 if (target == 0
19447 || GET_MODE (target) != tmode
19448 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19449 target = gen_reg_rtx (tmode);
19450 pat = GEN_FCN (icode) (target, op0, op1, op2);
19451 if (! pat)
19452 return 0;
19453 emit_insn (pat);
19454 return target;
19455
19456 case ARM_BUILTIN_SETWCX:
19457 arg0 = CALL_EXPR_ARG (exp, 0);
19458 arg1 = CALL_EXPR_ARG (exp, 1);
19459 op0 = force_reg (SImode, expand_normal (arg0));
19460 op1 = expand_normal (arg1);
19461 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19462 return 0;
19463
19464 case ARM_BUILTIN_GETWCX:
19465 arg0 = CALL_EXPR_ARG (exp, 0);
19466 op0 = expand_normal (arg0);
19467 target = gen_reg_rtx (SImode);
19468 emit_insn (gen_iwmmxt_tmrc (target, op0));
19469 return target;
19470
19471 case ARM_BUILTIN_WSHUFH:
19472 icode = CODE_FOR_iwmmxt_wshufh;
19473 arg0 = CALL_EXPR_ARG (exp, 0);
19474 arg1 = CALL_EXPR_ARG (exp, 1);
19475 op0 = expand_normal (arg0);
19476 op1 = expand_normal (arg1);
19477 tmode = insn_data[icode].operand[0].mode;
19478 mode1 = insn_data[icode].operand[1].mode;
19479 mode2 = insn_data[icode].operand[2].mode;
19480
19481 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19482 op0 = copy_to_mode_reg (mode1, op0);
19483 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19484 {
19485 /* @@@ better error message */
19486 error ("mask must be an immediate");
19487 return const0_rtx;
19488 }
19489 if (target == 0
19490 || GET_MODE (target) != tmode
19491 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19492 target = gen_reg_rtx (tmode);
19493 pat = GEN_FCN (icode) (target, op0, op1);
19494 if (! pat)
19495 return 0;
19496 emit_insn (pat);
19497 return target;
19498
19499 case ARM_BUILTIN_WSADB:
19500 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19501 case ARM_BUILTIN_WSADH:
19502 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19503 case ARM_BUILTIN_WSADBZ:
19504 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19505 case ARM_BUILTIN_WSADHZ:
19506 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19507
19508 /* Several three-argument builtins. */
19509 case ARM_BUILTIN_WMACS:
19510 case ARM_BUILTIN_WMACU:
19511 case ARM_BUILTIN_WALIGN:
19512 case ARM_BUILTIN_TMIA:
19513 case ARM_BUILTIN_TMIAPH:
19514 case ARM_BUILTIN_TMIATT:
19515 case ARM_BUILTIN_TMIATB:
19516 case ARM_BUILTIN_TMIABT:
19517 case ARM_BUILTIN_TMIABB:
19518 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19519 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19520 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19521 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19522 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19523 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19524 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19525 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19526 : CODE_FOR_iwmmxt_walign);
19527 arg0 = CALL_EXPR_ARG (exp, 0);
19528 arg1 = CALL_EXPR_ARG (exp, 1);
19529 arg2 = CALL_EXPR_ARG (exp, 2);
19530 op0 = expand_normal (arg0);
19531 op1 = expand_normal (arg1);
19532 op2 = expand_normal (arg2);
19533 tmode = insn_data[icode].operand[0].mode;
19534 mode0 = insn_data[icode].operand[1].mode;
19535 mode1 = insn_data[icode].operand[2].mode;
19536 mode2 = insn_data[icode].operand[3].mode;
19537
19538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19539 op0 = copy_to_mode_reg (mode0, op0);
19540 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19541 op1 = copy_to_mode_reg (mode1, op1);
19542 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19543 op2 = copy_to_mode_reg (mode2, op2);
19544 if (target == 0
19545 || GET_MODE (target) != tmode
19546 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19547 target = gen_reg_rtx (tmode);
19548 pat = GEN_FCN (icode) (target, op0, op1, op2);
19549 if (! pat)
19550 return 0;
19551 emit_insn (pat);
19552 return target;
19553
19554 case ARM_BUILTIN_WZERO:
19555 target = gen_reg_rtx (DImode);
19556 emit_insn (gen_iwmmxt_clrdi (target));
19557 return target;
19558
19559 case ARM_BUILTIN_THREAD_POINTER:
19560 return arm_load_tp (target);
19561
19562 default:
19563 break;
19564 }
19565
19566 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19567 if (d->code == (const enum arm_builtins) fcode)
19568 return arm_expand_binop_builtin (d->icode, exp, target);
19569
19570 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19571 if (d->code == (const enum arm_builtins) fcode)
19572 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19573
19574 /* @@@ Should really do something sensible here. */
19575 return NULL_RTX;
19576 }
19577 \f
19578 /* Return the number (counting from 0) of
19579 the least significant set bit in MASK. */
19580
19581 inline static int
19582 number_of_first_bit_set (unsigned mask)
19583 {
19584 int bit;
19585
19586 for (bit = 0;
19587 (mask & (1 << bit)) == 0;
19588 ++bit)
19589 continue;
19590
19591 return bit;
19592 }
19593
19594 /* Emit code to push or pop registers to or from the stack. F is the
19595 assembly file. MASK is the registers to push or pop. PUSH is
19596 nonzero if we should push, and zero if we should pop. For debugging
19597 output, if pushing, adjust CFA_OFFSET by the amount of space added
19598 to the stack. REAL_REGS should have the same number of bits set as
19599 MASK, and will be used instead (in the same order) to describe which
19600 registers were saved - this is used to mark the save slots when we
19601 push high registers after moving them to low registers. */
19602 static void
19603 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19604 unsigned long real_regs)
19605 {
19606 int regno;
19607 int lo_mask = mask & 0xFF;
19608 int pushed_words = 0;
19609
19610 gcc_assert (mask);
19611
19612 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19613 {
19614 /* Special case. Do not generate a POP PC statement here, do it in
19615 thumb_exit() */
19616 thumb_exit (f, -1);
19617 return;
19618 }
19619
19620 if (push && arm_except_unwind_info () == UI_TARGET)
19621 {
19622 fprintf (f, "\t.save\t{");
19623 for (regno = 0; regno < 15; regno++)
19624 {
19625 if (real_regs & (1 << regno))
19626 {
19627 if (real_regs & ((1 << regno) -1))
19628 fprintf (f, ", ");
19629 asm_fprintf (f, "%r", regno);
19630 }
19631 }
19632 fprintf (f, "}\n");
19633 }
19634
19635 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19636
19637 /* Look at the low registers first. */
19638 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19639 {
19640 if (lo_mask & 1)
19641 {
19642 asm_fprintf (f, "%r", regno);
19643
19644 if ((lo_mask & ~1) != 0)
19645 fprintf (f, ", ");
19646
19647 pushed_words++;
19648 }
19649 }
19650
19651 if (push && (mask & (1 << LR_REGNUM)))
19652 {
19653 /* Catch pushing the LR. */
19654 if (mask & 0xFF)
19655 fprintf (f, ", ");
19656
19657 asm_fprintf (f, "%r", LR_REGNUM);
19658
19659 pushed_words++;
19660 }
19661 else if (!push && (mask & (1 << PC_REGNUM)))
19662 {
19663 /* Catch popping the PC. */
19664 if (TARGET_INTERWORK || TARGET_BACKTRACE
19665 || crtl->calls_eh_return)
19666 {
19667 /* The PC is never poped directly, instead
19668 it is popped into r3 and then BX is used. */
19669 fprintf (f, "}\n");
19670
19671 thumb_exit (f, -1);
19672
19673 return;
19674 }
19675 else
19676 {
19677 if (mask & 0xFF)
19678 fprintf (f, ", ");
19679
19680 asm_fprintf (f, "%r", PC_REGNUM);
19681 }
19682 }
19683
19684 fprintf (f, "}\n");
19685
19686 if (push && pushed_words && dwarf2out_do_frame ())
19687 {
19688 char *l = dwarf2out_cfi_label (false);
19689 int pushed_mask = real_regs;
19690
19691 *cfa_offset += pushed_words * 4;
19692 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19693
19694 pushed_words = 0;
19695 pushed_mask = real_regs;
19696 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19697 {
19698 if (pushed_mask & 1)
19699 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19700 }
19701 }
19702 }
19703
19704 /* Generate code to return from a thumb function.
19705 If 'reg_containing_return_addr' is -1, then the return address is
19706 actually on the stack, at the stack pointer. */
19707 static void
19708 thumb_exit (FILE *f, int reg_containing_return_addr)
19709 {
19710 unsigned regs_available_for_popping;
19711 unsigned regs_to_pop;
19712 int pops_needed;
19713 unsigned available;
19714 unsigned required;
19715 int mode;
19716 int size;
19717 int restore_a4 = FALSE;
19718
19719 /* Compute the registers we need to pop. */
19720 regs_to_pop = 0;
19721 pops_needed = 0;
19722
19723 if (reg_containing_return_addr == -1)
19724 {
19725 regs_to_pop |= 1 << LR_REGNUM;
19726 ++pops_needed;
19727 }
19728
19729 if (TARGET_BACKTRACE)
19730 {
19731 /* Restore the (ARM) frame pointer and stack pointer. */
19732 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19733 pops_needed += 2;
19734 }
19735
19736 /* If there is nothing to pop then just emit the BX instruction and
19737 return. */
19738 if (pops_needed == 0)
19739 {
19740 if (crtl->calls_eh_return)
19741 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19742
19743 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19744 return;
19745 }
19746 /* Otherwise if we are not supporting interworking and we have not created
19747 a backtrace structure and the function was not entered in ARM mode then
19748 just pop the return address straight into the PC. */
19749 else if (!TARGET_INTERWORK
19750 && !TARGET_BACKTRACE
19751 && !is_called_in_ARM_mode (current_function_decl)
19752 && !crtl->calls_eh_return)
19753 {
19754 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19755 return;
19756 }
19757
19758 /* Find out how many of the (return) argument registers we can corrupt. */
19759 regs_available_for_popping = 0;
19760
19761 /* If returning via __builtin_eh_return, the bottom three registers
19762 all contain information needed for the return. */
19763 if (crtl->calls_eh_return)
19764 size = 12;
19765 else
19766 {
19767 /* If we can deduce the registers used from the function's
19768 return value. This is more reliable that examining
19769 df_regs_ever_live_p () because that will be set if the register is
19770 ever used in the function, not just if the register is used
19771 to hold a return value. */
19772
19773 if (crtl->return_rtx != 0)
19774 mode = GET_MODE (crtl->return_rtx);
19775 else
19776 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19777
19778 size = GET_MODE_SIZE (mode);
19779
19780 if (size == 0)
19781 {
19782 /* In a void function we can use any argument register.
19783 In a function that returns a structure on the stack
19784 we can use the second and third argument registers. */
19785 if (mode == VOIDmode)
19786 regs_available_for_popping =
19787 (1 << ARG_REGISTER (1))
19788 | (1 << ARG_REGISTER (2))
19789 | (1 << ARG_REGISTER (3));
19790 else
19791 regs_available_for_popping =
19792 (1 << ARG_REGISTER (2))
19793 | (1 << ARG_REGISTER (3));
19794 }
19795 else if (size <= 4)
19796 regs_available_for_popping =
19797 (1 << ARG_REGISTER (2))
19798 | (1 << ARG_REGISTER (3));
19799 else if (size <= 8)
19800 regs_available_for_popping =
19801 (1 << ARG_REGISTER (3));
19802 }
19803
19804 /* Match registers to be popped with registers into which we pop them. */
19805 for (available = regs_available_for_popping,
19806 required = regs_to_pop;
19807 required != 0 && available != 0;
19808 available &= ~(available & - available),
19809 required &= ~(required & - required))
19810 -- pops_needed;
19811
19812 /* If we have any popping registers left over, remove them. */
19813 if (available > 0)
19814 regs_available_for_popping &= ~available;
19815
19816 /* Otherwise if we need another popping register we can use
19817 the fourth argument register. */
19818 else if (pops_needed)
19819 {
19820 /* If we have not found any free argument registers and
19821 reg a4 contains the return address, we must move it. */
19822 if (regs_available_for_popping == 0
19823 && reg_containing_return_addr == LAST_ARG_REGNUM)
19824 {
19825 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19826 reg_containing_return_addr = LR_REGNUM;
19827 }
19828 else if (size > 12)
19829 {
19830 /* Register a4 is being used to hold part of the return value,
19831 but we have dire need of a free, low register. */
19832 restore_a4 = TRUE;
19833
19834 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19835 }
19836
19837 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19838 {
19839 /* The fourth argument register is available. */
19840 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19841
19842 --pops_needed;
19843 }
19844 }
19845
19846 /* Pop as many registers as we can. */
19847 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19848 regs_available_for_popping);
19849
19850 /* Process the registers we popped. */
19851 if (reg_containing_return_addr == -1)
19852 {
19853 /* The return address was popped into the lowest numbered register. */
19854 regs_to_pop &= ~(1 << LR_REGNUM);
19855
19856 reg_containing_return_addr =
19857 number_of_first_bit_set (regs_available_for_popping);
19858
19859 /* Remove this register for the mask of available registers, so that
19860 the return address will not be corrupted by further pops. */
19861 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19862 }
19863
19864 /* If we popped other registers then handle them here. */
19865 if (regs_available_for_popping)
19866 {
19867 int frame_pointer;
19868
19869 /* Work out which register currently contains the frame pointer. */
19870 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19871
19872 /* Move it into the correct place. */
19873 asm_fprintf (f, "\tmov\t%r, %r\n",
19874 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19875
19876 /* (Temporarily) remove it from the mask of popped registers. */
19877 regs_available_for_popping &= ~(1 << frame_pointer);
19878 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19879
19880 if (regs_available_for_popping)
19881 {
19882 int stack_pointer;
19883
19884 /* We popped the stack pointer as well,
19885 find the register that contains it. */
19886 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19887
19888 /* Move it into the stack register. */
19889 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19890
19891 /* At this point we have popped all necessary registers, so
19892 do not worry about restoring regs_available_for_popping
19893 to its correct value:
19894
19895 assert (pops_needed == 0)
19896 assert (regs_available_for_popping == (1 << frame_pointer))
19897 assert (regs_to_pop == (1 << STACK_POINTER)) */
19898 }
19899 else
19900 {
19901 /* Since we have just move the popped value into the frame
19902 pointer, the popping register is available for reuse, and
19903 we know that we still have the stack pointer left to pop. */
19904 regs_available_for_popping |= (1 << frame_pointer);
19905 }
19906 }
19907
19908 /* If we still have registers left on the stack, but we no longer have
19909 any registers into which we can pop them, then we must move the return
19910 address into the link register and make available the register that
19911 contained it. */
19912 if (regs_available_for_popping == 0 && pops_needed > 0)
19913 {
19914 regs_available_for_popping |= 1 << reg_containing_return_addr;
19915
19916 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19917 reg_containing_return_addr);
19918
19919 reg_containing_return_addr = LR_REGNUM;
19920 }
19921
19922 /* If we have registers left on the stack then pop some more.
19923 We know that at most we will want to pop FP and SP. */
19924 if (pops_needed > 0)
19925 {
19926 int popped_into;
19927 int move_to;
19928
19929 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19930 regs_available_for_popping);
19931
19932 /* We have popped either FP or SP.
19933 Move whichever one it is into the correct register. */
19934 popped_into = number_of_first_bit_set (regs_available_for_popping);
19935 move_to = number_of_first_bit_set (regs_to_pop);
19936
19937 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19938
19939 regs_to_pop &= ~(1 << move_to);
19940
19941 --pops_needed;
19942 }
19943
19944 /* If we still have not popped everything then we must have only
19945 had one register available to us and we are now popping the SP. */
19946 if (pops_needed > 0)
19947 {
19948 int popped_into;
19949
19950 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19951 regs_available_for_popping);
19952
19953 popped_into = number_of_first_bit_set (regs_available_for_popping);
19954
19955 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19956 /*
19957 assert (regs_to_pop == (1 << STACK_POINTER))
19958 assert (pops_needed == 1)
19959 */
19960 }
19961
19962 /* If necessary restore the a4 register. */
19963 if (restore_a4)
19964 {
19965 if (reg_containing_return_addr != LR_REGNUM)
19966 {
19967 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19968 reg_containing_return_addr = LR_REGNUM;
19969 }
19970
19971 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19972 }
19973
19974 if (crtl->calls_eh_return)
19975 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19976
19977 /* Return to caller. */
19978 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19979 }
19980 \f
19981 /* Scan INSN just before assembler is output for it.
19982 For Thumb-1, we track the status of the condition codes; this
19983 information is used in the cbranchsi4_insn pattern. */
19984 void
19985 thumb1_final_prescan_insn (rtx insn)
19986 {
19987 if (flag_print_asm_name)
19988 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19989 INSN_ADDRESSES (INSN_UID (insn)));
19990 /* Don't overwrite the previous setter when we get to a cbranch. */
19991 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19992 {
19993 enum attr_conds conds;
19994
19995 if (cfun->machine->thumb1_cc_insn)
19996 {
19997 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
19998 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
19999 CC_STATUS_INIT;
20000 }
20001 conds = get_attr_conds (insn);
20002 if (conds == CONDS_SET)
20003 {
20004 rtx set = single_set (insn);
20005 cfun->machine->thumb1_cc_insn = insn;
20006 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20007 cfun->machine->thumb1_cc_op1 = const0_rtx;
20008 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20009 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20010 {
20011 rtx src1 = XEXP (SET_SRC (set), 1);
20012 if (src1 == const0_rtx)
20013 cfun->machine->thumb1_cc_mode = CCmode;
20014 }
20015 }
20016 else if (conds != CONDS_NOCOND)
20017 cfun->machine->thumb1_cc_insn = NULL_RTX;
20018 }
20019 }
20020
20021 int
20022 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20023 {
20024 unsigned HOST_WIDE_INT mask = 0xff;
20025 int i;
20026
20027 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20028 if (val == 0) /* XXX */
20029 return 0;
20030
20031 for (i = 0; i < 25; i++)
20032 if ((val & (mask << i)) == val)
20033 return 1;
20034
20035 return 0;
20036 }
20037
20038 /* Returns nonzero if the current function contains,
20039 or might contain a far jump. */
20040 static int
20041 thumb_far_jump_used_p (void)
20042 {
20043 rtx insn;
20044
20045 /* This test is only important for leaf functions. */
20046 /* assert (!leaf_function_p ()); */
20047
20048 /* If we have already decided that far jumps may be used,
20049 do not bother checking again, and always return true even if
20050 it turns out that they are not being used. Once we have made
20051 the decision that far jumps are present (and that hence the link
20052 register will be pushed onto the stack) we cannot go back on it. */
20053 if (cfun->machine->far_jump_used)
20054 return 1;
20055
20056 /* If this function is not being called from the prologue/epilogue
20057 generation code then it must be being called from the
20058 INITIAL_ELIMINATION_OFFSET macro. */
20059 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20060 {
20061 /* In this case we know that we are being asked about the elimination
20062 of the arg pointer register. If that register is not being used,
20063 then there are no arguments on the stack, and we do not have to
20064 worry that a far jump might force the prologue to push the link
20065 register, changing the stack offsets. In this case we can just
20066 return false, since the presence of far jumps in the function will
20067 not affect stack offsets.
20068
20069 If the arg pointer is live (or if it was live, but has now been
20070 eliminated and so set to dead) then we do have to test to see if
20071 the function might contain a far jump. This test can lead to some
20072 false negatives, since before reload is completed, then length of
20073 branch instructions is not known, so gcc defaults to returning their
20074 longest length, which in turn sets the far jump attribute to true.
20075
20076 A false negative will not result in bad code being generated, but it
20077 will result in a needless push and pop of the link register. We
20078 hope that this does not occur too often.
20079
20080 If we need doubleword stack alignment this could affect the other
20081 elimination offsets so we can't risk getting it wrong. */
20082 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20083 cfun->machine->arg_pointer_live = 1;
20084 else if (!cfun->machine->arg_pointer_live)
20085 return 0;
20086 }
20087
20088 /* Check to see if the function contains a branch
20089 insn with the far jump attribute set. */
20090 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20091 {
20092 if (GET_CODE (insn) == JUMP_INSN
20093 /* Ignore tablejump patterns. */
20094 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20095 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20096 && get_attr_far_jump (insn) == FAR_JUMP_YES
20097 )
20098 {
20099 /* Record the fact that we have decided that
20100 the function does use far jumps. */
20101 cfun->machine->far_jump_used = 1;
20102 return 1;
20103 }
20104 }
20105
20106 return 0;
20107 }
20108
20109 /* Return nonzero if FUNC must be entered in ARM mode. */
20110 int
20111 is_called_in_ARM_mode (tree func)
20112 {
20113 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20114
20115 /* Ignore the problem about functions whose address is taken. */
20116 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20117 return TRUE;
20118
20119 #ifdef ARM_PE
20120 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20121 #else
20122 return FALSE;
20123 #endif
20124 }
20125
20126 /* Given the stack offsets and register mask in OFFSETS, decide how
20127 many additional registers to push instead of subtracting a constant
20128 from SP. For epilogues the principle is the same except we use pop.
20129 FOR_PROLOGUE indicates which we're generating. */
20130 static int
20131 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20132 {
20133 HOST_WIDE_INT amount;
20134 unsigned long live_regs_mask = offsets->saved_regs_mask;
20135 /* Extract a mask of the ones we can give to the Thumb's push/pop
20136 instruction. */
20137 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20138 /* Then count how many other high registers will need to be pushed. */
20139 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20140 int n_free, reg_base;
20141
20142 if (!for_prologue && frame_pointer_needed)
20143 amount = offsets->locals_base - offsets->saved_regs;
20144 else
20145 amount = offsets->outgoing_args - offsets->saved_regs;
20146
20147 /* If the stack frame size is 512 exactly, we can save one load
20148 instruction, which should make this a win even when optimizing
20149 for speed. */
20150 if (!optimize_size && amount != 512)
20151 return 0;
20152
20153 /* Can't do this if there are high registers to push. */
20154 if (high_regs_pushed != 0)
20155 return 0;
20156
20157 /* Shouldn't do it in the prologue if no registers would normally
20158 be pushed at all. In the epilogue, also allow it if we'll have
20159 a pop insn for the PC. */
20160 if (l_mask == 0
20161 && (for_prologue
20162 || TARGET_BACKTRACE
20163 || (live_regs_mask & 1 << LR_REGNUM) == 0
20164 || TARGET_INTERWORK
20165 || crtl->args.pretend_args_size != 0))
20166 return 0;
20167
20168 /* Don't do this if thumb_expand_prologue wants to emit instructions
20169 between the push and the stack frame allocation. */
20170 if (for_prologue
20171 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20172 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20173 return 0;
20174
20175 reg_base = 0;
20176 n_free = 0;
20177 if (!for_prologue)
20178 {
20179 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20180 live_regs_mask >>= reg_base;
20181 }
20182
20183 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20184 && (for_prologue || call_used_regs[reg_base + n_free]))
20185 {
20186 live_regs_mask >>= 1;
20187 n_free++;
20188 }
20189
20190 if (n_free == 0)
20191 return 0;
20192 gcc_assert (amount / 4 * 4 == amount);
20193
20194 if (amount >= 512 && (amount - n_free * 4) < 512)
20195 return (amount - 508) / 4;
20196 if (amount <= n_free * 4)
20197 return amount / 4;
20198 return 0;
20199 }
20200
20201 /* The bits which aren't usefully expanded as rtl. */
20202 const char *
20203 thumb_unexpanded_epilogue (void)
20204 {
20205 arm_stack_offsets *offsets;
20206 int regno;
20207 unsigned long live_regs_mask = 0;
20208 int high_regs_pushed = 0;
20209 int extra_pop;
20210 int had_to_push_lr;
20211 int size;
20212
20213 if (cfun->machine->return_used_this_function != 0)
20214 return "";
20215
20216 if (IS_NAKED (arm_current_func_type ()))
20217 return "";
20218
20219 offsets = arm_get_frame_offsets ();
20220 live_regs_mask = offsets->saved_regs_mask;
20221 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20222
20223 /* If we can deduce the registers used from the function's return value.
20224 This is more reliable that examining df_regs_ever_live_p () because that
20225 will be set if the register is ever used in the function, not just if
20226 the register is used to hold a return value. */
20227 size = arm_size_return_regs ();
20228
20229 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20230 if (extra_pop > 0)
20231 {
20232 unsigned long extra_mask = (1 << extra_pop) - 1;
20233 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20234 }
20235
20236 /* The prolog may have pushed some high registers to use as
20237 work registers. e.g. the testsuite file:
20238 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20239 compiles to produce:
20240 push {r4, r5, r6, r7, lr}
20241 mov r7, r9
20242 mov r6, r8
20243 push {r6, r7}
20244 as part of the prolog. We have to undo that pushing here. */
20245
20246 if (high_regs_pushed)
20247 {
20248 unsigned long mask = live_regs_mask & 0xff;
20249 int next_hi_reg;
20250
20251 /* The available low registers depend on the size of the value we are
20252 returning. */
20253 if (size <= 12)
20254 mask |= 1 << 3;
20255 if (size <= 8)
20256 mask |= 1 << 2;
20257
20258 if (mask == 0)
20259 /* Oh dear! We have no low registers into which we can pop
20260 high registers! */
20261 internal_error
20262 ("no low registers available for popping high registers");
20263
20264 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20265 if (live_regs_mask & (1 << next_hi_reg))
20266 break;
20267
20268 while (high_regs_pushed)
20269 {
20270 /* Find lo register(s) into which the high register(s) can
20271 be popped. */
20272 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20273 {
20274 if (mask & (1 << regno))
20275 high_regs_pushed--;
20276 if (high_regs_pushed == 0)
20277 break;
20278 }
20279
20280 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20281
20282 /* Pop the values into the low register(s). */
20283 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20284
20285 /* Move the value(s) into the high registers. */
20286 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20287 {
20288 if (mask & (1 << regno))
20289 {
20290 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20291 regno);
20292
20293 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20294 if (live_regs_mask & (1 << next_hi_reg))
20295 break;
20296 }
20297 }
20298 }
20299 live_regs_mask &= ~0x0f00;
20300 }
20301
20302 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20303 live_regs_mask &= 0xff;
20304
20305 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20306 {
20307 /* Pop the return address into the PC. */
20308 if (had_to_push_lr)
20309 live_regs_mask |= 1 << PC_REGNUM;
20310
20311 /* Either no argument registers were pushed or a backtrace
20312 structure was created which includes an adjusted stack
20313 pointer, so just pop everything. */
20314 if (live_regs_mask)
20315 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20316 live_regs_mask);
20317
20318 /* We have either just popped the return address into the
20319 PC or it is was kept in LR for the entire function.
20320 Note that thumb_pushpop has already called thumb_exit if the
20321 PC was in the list. */
20322 if (!had_to_push_lr)
20323 thumb_exit (asm_out_file, LR_REGNUM);
20324 }
20325 else
20326 {
20327 /* Pop everything but the return address. */
20328 if (live_regs_mask)
20329 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20330 live_regs_mask);
20331
20332 if (had_to_push_lr)
20333 {
20334 if (size > 12)
20335 {
20336 /* We have no free low regs, so save one. */
20337 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20338 LAST_ARG_REGNUM);
20339 }
20340
20341 /* Get the return address into a temporary register. */
20342 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20343 1 << LAST_ARG_REGNUM);
20344
20345 if (size > 12)
20346 {
20347 /* Move the return address to lr. */
20348 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20349 LAST_ARG_REGNUM);
20350 /* Restore the low register. */
20351 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20352 IP_REGNUM);
20353 regno = LR_REGNUM;
20354 }
20355 else
20356 regno = LAST_ARG_REGNUM;
20357 }
20358 else
20359 regno = LR_REGNUM;
20360
20361 /* Remove the argument registers that were pushed onto the stack. */
20362 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20363 SP_REGNUM, SP_REGNUM,
20364 crtl->args.pretend_args_size);
20365
20366 thumb_exit (asm_out_file, regno);
20367 }
20368
20369 return "";
20370 }
20371
20372 /* Functions to save and restore machine-specific function data. */
20373 static struct machine_function *
20374 arm_init_machine_status (void)
20375 {
20376 struct machine_function *machine;
20377 machine = ggc_alloc_cleared_machine_function ();
20378
20379 #if ARM_FT_UNKNOWN != 0
20380 machine->func_type = ARM_FT_UNKNOWN;
20381 #endif
20382 return machine;
20383 }
20384
20385 /* Return an RTX indicating where the return address to the
20386 calling function can be found. */
20387 rtx
20388 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20389 {
20390 if (count != 0)
20391 return NULL_RTX;
20392
20393 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20394 }
20395
20396 /* Do anything needed before RTL is emitted for each function. */
20397 void
20398 arm_init_expanders (void)
20399 {
20400 /* Arrange to initialize and mark the machine per-function status. */
20401 init_machine_status = arm_init_machine_status;
20402
20403 /* This is to stop the combine pass optimizing away the alignment
20404 adjustment of va_arg. */
20405 /* ??? It is claimed that this should not be necessary. */
20406 if (cfun)
20407 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20408 }
20409
20410
20411 /* Like arm_compute_initial_elimination offset. Simpler because there
20412 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20413 to point at the base of the local variables after static stack
20414 space for a function has been allocated. */
20415
20416 HOST_WIDE_INT
20417 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20418 {
20419 arm_stack_offsets *offsets;
20420
20421 offsets = arm_get_frame_offsets ();
20422
20423 switch (from)
20424 {
20425 case ARG_POINTER_REGNUM:
20426 switch (to)
20427 {
20428 case STACK_POINTER_REGNUM:
20429 return offsets->outgoing_args - offsets->saved_args;
20430
20431 case FRAME_POINTER_REGNUM:
20432 return offsets->soft_frame - offsets->saved_args;
20433
20434 case ARM_HARD_FRAME_POINTER_REGNUM:
20435 return offsets->saved_regs - offsets->saved_args;
20436
20437 case THUMB_HARD_FRAME_POINTER_REGNUM:
20438 return offsets->locals_base - offsets->saved_args;
20439
20440 default:
20441 gcc_unreachable ();
20442 }
20443 break;
20444
20445 case FRAME_POINTER_REGNUM:
20446 switch (to)
20447 {
20448 case STACK_POINTER_REGNUM:
20449 return offsets->outgoing_args - offsets->soft_frame;
20450
20451 case ARM_HARD_FRAME_POINTER_REGNUM:
20452 return offsets->saved_regs - offsets->soft_frame;
20453
20454 case THUMB_HARD_FRAME_POINTER_REGNUM:
20455 return offsets->locals_base - offsets->soft_frame;
20456
20457 default:
20458 gcc_unreachable ();
20459 }
20460 break;
20461
20462 default:
20463 gcc_unreachable ();
20464 }
20465 }
20466
20467 /* Generate the rest of a function's prologue. */
20468 void
20469 thumb1_expand_prologue (void)
20470 {
20471 rtx insn, dwarf;
20472
20473 HOST_WIDE_INT amount;
20474 arm_stack_offsets *offsets;
20475 unsigned long func_type;
20476 int regno;
20477 unsigned long live_regs_mask;
20478
20479 func_type = arm_current_func_type ();
20480
20481 /* Naked functions don't have prologues. */
20482 if (IS_NAKED (func_type))
20483 return;
20484
20485 if (IS_INTERRUPT (func_type))
20486 {
20487 error ("interrupt Service Routines cannot be coded in Thumb mode");
20488 return;
20489 }
20490
20491 offsets = arm_get_frame_offsets ();
20492 live_regs_mask = offsets->saved_regs_mask;
20493 /* Load the pic register before setting the frame pointer,
20494 so we can use r7 as a temporary work register. */
20495 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20496 arm_load_pic_register (live_regs_mask);
20497
20498 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20499 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20500 stack_pointer_rtx);
20501
20502 amount = offsets->outgoing_args - offsets->saved_regs;
20503 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20504 if (amount)
20505 {
20506 if (amount < 512)
20507 {
20508 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20509 GEN_INT (- amount)));
20510 RTX_FRAME_RELATED_P (insn) = 1;
20511 }
20512 else
20513 {
20514 rtx reg;
20515
20516 /* The stack decrement is too big for an immediate value in a single
20517 insn. In theory we could issue multiple subtracts, but after
20518 three of them it becomes more space efficient to place the full
20519 value in the constant pool and load into a register. (Also the
20520 ARM debugger really likes to see only one stack decrement per
20521 function). So instead we look for a scratch register into which
20522 we can load the decrement, and then we subtract this from the
20523 stack pointer. Unfortunately on the thumb the only available
20524 scratch registers are the argument registers, and we cannot use
20525 these as they may hold arguments to the function. Instead we
20526 attempt to locate a call preserved register which is used by this
20527 function. If we can find one, then we know that it will have
20528 been pushed at the start of the prologue and so we can corrupt
20529 it now. */
20530 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20531 if (live_regs_mask & (1 << regno))
20532 break;
20533
20534 gcc_assert(regno <= LAST_LO_REGNUM);
20535
20536 reg = gen_rtx_REG (SImode, regno);
20537
20538 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20539
20540 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20541 stack_pointer_rtx, reg));
20542 RTX_FRAME_RELATED_P (insn) = 1;
20543 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20544 plus_constant (stack_pointer_rtx,
20545 -amount));
20546 RTX_FRAME_RELATED_P (dwarf) = 1;
20547 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20548 }
20549 }
20550
20551 if (frame_pointer_needed)
20552 thumb_set_frame_pointer (offsets);
20553
20554 /* If we are profiling, make sure no instructions are scheduled before
20555 the call to mcount. Similarly if the user has requested no
20556 scheduling in the prolog. Similarly if we want non-call exceptions
20557 using the EABI unwinder, to prevent faulting instructions from being
20558 swapped with a stack adjustment. */
20559 if (crtl->profile || !TARGET_SCHED_PROLOG
20560 || (arm_except_unwind_info () == UI_TARGET
20561 && cfun->can_throw_non_call_exceptions))
20562 emit_insn (gen_blockage ());
20563
20564 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20565 if (live_regs_mask & 0xff)
20566 cfun->machine->lr_save_eliminated = 0;
20567 }
20568
20569
20570 void
20571 thumb1_expand_epilogue (void)
20572 {
20573 HOST_WIDE_INT amount;
20574 arm_stack_offsets *offsets;
20575 int regno;
20576
20577 /* Naked functions don't have prologues. */
20578 if (IS_NAKED (arm_current_func_type ()))
20579 return;
20580
20581 offsets = arm_get_frame_offsets ();
20582 amount = offsets->outgoing_args - offsets->saved_regs;
20583
20584 if (frame_pointer_needed)
20585 {
20586 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20587 amount = offsets->locals_base - offsets->saved_regs;
20588 }
20589 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20590
20591 gcc_assert (amount >= 0);
20592 if (amount)
20593 {
20594 if (amount < 512)
20595 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20596 GEN_INT (amount)));
20597 else
20598 {
20599 /* r3 is always free in the epilogue. */
20600 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20601
20602 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20603 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20604 }
20605 }
20606
20607 /* Emit a USE (stack_pointer_rtx), so that
20608 the stack adjustment will not be deleted. */
20609 emit_insn (gen_prologue_use (stack_pointer_rtx));
20610
20611 if (crtl->profile || !TARGET_SCHED_PROLOG)
20612 emit_insn (gen_blockage ());
20613
20614 /* Emit a clobber for each insn that will be restored in the epilogue,
20615 so that flow2 will get register lifetimes correct. */
20616 for (regno = 0; regno < 13; regno++)
20617 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20618 emit_clobber (gen_rtx_REG (SImode, regno));
20619
20620 if (! df_regs_ever_live_p (LR_REGNUM))
20621 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20622 }
20623
20624 static void
20625 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20626 {
20627 arm_stack_offsets *offsets;
20628 unsigned long live_regs_mask = 0;
20629 unsigned long l_mask;
20630 unsigned high_regs_pushed = 0;
20631 int cfa_offset = 0;
20632 int regno;
20633
20634 if (IS_NAKED (arm_current_func_type ()))
20635 return;
20636
20637 if (is_called_in_ARM_mode (current_function_decl))
20638 {
20639 const char * name;
20640
20641 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20642 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20643 == SYMBOL_REF);
20644 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20645
20646 /* Generate code sequence to switch us into Thumb mode. */
20647 /* The .code 32 directive has already been emitted by
20648 ASM_DECLARE_FUNCTION_NAME. */
20649 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20650 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20651
20652 /* Generate a label, so that the debugger will notice the
20653 change in instruction sets. This label is also used by
20654 the assembler to bypass the ARM code when this function
20655 is called from a Thumb encoded function elsewhere in the
20656 same file. Hence the definition of STUB_NAME here must
20657 agree with the definition in gas/config/tc-arm.c. */
20658
20659 #define STUB_NAME ".real_start_of"
20660
20661 fprintf (f, "\t.code\t16\n");
20662 #ifdef ARM_PE
20663 if (arm_dllexport_name_p (name))
20664 name = arm_strip_name_encoding (name);
20665 #endif
20666 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20667 fprintf (f, "\t.thumb_func\n");
20668 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20669 }
20670
20671 if (crtl->args.pretend_args_size)
20672 {
20673 /* Output unwind directive for the stack adjustment. */
20674 if (arm_except_unwind_info () == UI_TARGET)
20675 fprintf (f, "\t.pad #%d\n",
20676 crtl->args.pretend_args_size);
20677
20678 if (cfun->machine->uses_anonymous_args)
20679 {
20680 int num_pushes;
20681
20682 fprintf (f, "\tpush\t{");
20683
20684 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20685
20686 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20687 regno <= LAST_ARG_REGNUM;
20688 regno++)
20689 asm_fprintf (f, "%r%s", regno,
20690 regno == LAST_ARG_REGNUM ? "" : ", ");
20691
20692 fprintf (f, "}\n");
20693 }
20694 else
20695 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20696 SP_REGNUM, SP_REGNUM,
20697 crtl->args.pretend_args_size);
20698
20699 /* We don't need to record the stores for unwinding (would it
20700 help the debugger any if we did?), but record the change in
20701 the stack pointer. */
20702 if (dwarf2out_do_frame ())
20703 {
20704 char *l = dwarf2out_cfi_label (false);
20705
20706 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20707 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20708 }
20709 }
20710
20711 /* Get the registers we are going to push. */
20712 offsets = arm_get_frame_offsets ();
20713 live_regs_mask = offsets->saved_regs_mask;
20714 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20715 l_mask = live_regs_mask & 0x40ff;
20716 /* Then count how many other high registers will need to be pushed. */
20717 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20718
20719 if (TARGET_BACKTRACE)
20720 {
20721 unsigned offset;
20722 unsigned work_register;
20723
20724 /* We have been asked to create a stack backtrace structure.
20725 The code looks like this:
20726
20727 0 .align 2
20728 0 func:
20729 0 sub SP, #16 Reserve space for 4 registers.
20730 2 push {R7} Push low registers.
20731 4 add R7, SP, #20 Get the stack pointer before the push.
20732 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20733 8 mov R7, PC Get hold of the start of this code plus 12.
20734 10 str R7, [SP, #16] Store it.
20735 12 mov R7, FP Get hold of the current frame pointer.
20736 14 str R7, [SP, #4] Store it.
20737 16 mov R7, LR Get hold of the current return address.
20738 18 str R7, [SP, #12] Store it.
20739 20 add R7, SP, #16 Point at the start of the backtrace structure.
20740 22 mov FP, R7 Put this value into the frame pointer. */
20741
20742 work_register = thumb_find_work_register (live_regs_mask);
20743
20744 if (arm_except_unwind_info () == UI_TARGET)
20745 asm_fprintf (f, "\t.pad #16\n");
20746
20747 asm_fprintf
20748 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20749 SP_REGNUM, SP_REGNUM);
20750
20751 if (dwarf2out_do_frame ())
20752 {
20753 char *l = dwarf2out_cfi_label (false);
20754
20755 cfa_offset = cfa_offset + 16;
20756 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20757 }
20758
20759 if (l_mask)
20760 {
20761 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20762 offset = bit_count (l_mask) * UNITS_PER_WORD;
20763 }
20764 else
20765 offset = 0;
20766
20767 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20768 offset + 16 + crtl->args.pretend_args_size);
20769
20770 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20771 offset + 4);
20772
20773 /* Make sure that the instruction fetching the PC is in the right place
20774 to calculate "start of backtrace creation code + 12". */
20775 if (l_mask)
20776 {
20777 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20778 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20779 offset + 12);
20780 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20781 ARM_HARD_FRAME_POINTER_REGNUM);
20782 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20783 offset);
20784 }
20785 else
20786 {
20787 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20788 ARM_HARD_FRAME_POINTER_REGNUM);
20789 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20790 offset);
20791 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20792 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20793 offset + 12);
20794 }
20795
20796 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20797 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20798 offset + 8);
20799 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20800 offset + 12);
20801 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20802 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20803 }
20804 /* Optimization: If we are not pushing any low registers but we are going
20805 to push some high registers then delay our first push. This will just
20806 be a push of LR and we can combine it with the push of the first high
20807 register. */
20808 else if ((l_mask & 0xff) != 0
20809 || (high_regs_pushed == 0 && l_mask))
20810 {
20811 unsigned long mask = l_mask;
20812 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20813 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20814 }
20815
20816 if (high_regs_pushed)
20817 {
20818 unsigned pushable_regs;
20819 unsigned next_hi_reg;
20820
20821 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20822 if (live_regs_mask & (1 << next_hi_reg))
20823 break;
20824
20825 pushable_regs = l_mask & 0xff;
20826
20827 if (pushable_regs == 0)
20828 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20829
20830 while (high_regs_pushed > 0)
20831 {
20832 unsigned long real_regs_mask = 0;
20833
20834 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20835 {
20836 if (pushable_regs & (1 << regno))
20837 {
20838 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20839
20840 high_regs_pushed --;
20841 real_regs_mask |= (1 << next_hi_reg);
20842
20843 if (high_regs_pushed)
20844 {
20845 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20846 next_hi_reg --)
20847 if (live_regs_mask & (1 << next_hi_reg))
20848 break;
20849 }
20850 else
20851 {
20852 pushable_regs &= ~((1 << regno) - 1);
20853 break;
20854 }
20855 }
20856 }
20857
20858 /* If we had to find a work register and we have not yet
20859 saved the LR then add it to the list of regs to push. */
20860 if (l_mask == (1 << LR_REGNUM))
20861 {
20862 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20863 1, &cfa_offset,
20864 real_regs_mask | (1 << LR_REGNUM));
20865 l_mask = 0;
20866 }
20867 else
20868 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20869 }
20870 }
20871 }
20872
20873 /* Handle the case of a double word load into a low register from
20874 a computed memory address. The computed address may involve a
20875 register which is overwritten by the load. */
20876 const char *
20877 thumb_load_double_from_address (rtx *operands)
20878 {
20879 rtx addr;
20880 rtx base;
20881 rtx offset;
20882 rtx arg1;
20883 rtx arg2;
20884
20885 gcc_assert (GET_CODE (operands[0]) == REG);
20886 gcc_assert (GET_CODE (operands[1]) == MEM);
20887
20888 /* Get the memory address. */
20889 addr = XEXP (operands[1], 0);
20890
20891 /* Work out how the memory address is computed. */
20892 switch (GET_CODE (addr))
20893 {
20894 case REG:
20895 operands[2] = adjust_address (operands[1], SImode, 4);
20896
20897 if (REGNO (operands[0]) == REGNO (addr))
20898 {
20899 output_asm_insn ("ldr\t%H0, %2", operands);
20900 output_asm_insn ("ldr\t%0, %1", operands);
20901 }
20902 else
20903 {
20904 output_asm_insn ("ldr\t%0, %1", operands);
20905 output_asm_insn ("ldr\t%H0, %2", operands);
20906 }
20907 break;
20908
20909 case CONST:
20910 /* Compute <address> + 4 for the high order load. */
20911 operands[2] = adjust_address (operands[1], SImode, 4);
20912
20913 output_asm_insn ("ldr\t%0, %1", operands);
20914 output_asm_insn ("ldr\t%H0, %2", operands);
20915 break;
20916
20917 case PLUS:
20918 arg1 = XEXP (addr, 0);
20919 arg2 = XEXP (addr, 1);
20920
20921 if (CONSTANT_P (arg1))
20922 base = arg2, offset = arg1;
20923 else
20924 base = arg1, offset = arg2;
20925
20926 gcc_assert (GET_CODE (base) == REG);
20927
20928 /* Catch the case of <address> = <reg> + <reg> */
20929 if (GET_CODE (offset) == REG)
20930 {
20931 int reg_offset = REGNO (offset);
20932 int reg_base = REGNO (base);
20933 int reg_dest = REGNO (operands[0]);
20934
20935 /* Add the base and offset registers together into the
20936 higher destination register. */
20937 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20938 reg_dest + 1, reg_base, reg_offset);
20939
20940 /* Load the lower destination register from the address in
20941 the higher destination register. */
20942 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20943 reg_dest, reg_dest + 1);
20944
20945 /* Load the higher destination register from its own address
20946 plus 4. */
20947 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20948 reg_dest + 1, reg_dest + 1);
20949 }
20950 else
20951 {
20952 /* Compute <address> + 4 for the high order load. */
20953 operands[2] = adjust_address (operands[1], SImode, 4);
20954
20955 /* If the computed address is held in the low order register
20956 then load the high order register first, otherwise always
20957 load the low order register first. */
20958 if (REGNO (operands[0]) == REGNO (base))
20959 {
20960 output_asm_insn ("ldr\t%H0, %2", operands);
20961 output_asm_insn ("ldr\t%0, %1", operands);
20962 }
20963 else
20964 {
20965 output_asm_insn ("ldr\t%0, %1", operands);
20966 output_asm_insn ("ldr\t%H0, %2", operands);
20967 }
20968 }
20969 break;
20970
20971 case LABEL_REF:
20972 /* With no registers to worry about we can just load the value
20973 directly. */
20974 operands[2] = adjust_address (operands[1], SImode, 4);
20975
20976 output_asm_insn ("ldr\t%H0, %2", operands);
20977 output_asm_insn ("ldr\t%0, %1", operands);
20978 break;
20979
20980 default:
20981 gcc_unreachable ();
20982 }
20983
20984 return "";
20985 }
20986
20987 const char *
20988 thumb_output_move_mem_multiple (int n, rtx *operands)
20989 {
20990 rtx tmp;
20991
20992 switch (n)
20993 {
20994 case 2:
20995 if (REGNO (operands[4]) > REGNO (operands[5]))
20996 {
20997 tmp = operands[4];
20998 operands[4] = operands[5];
20999 operands[5] = tmp;
21000 }
21001 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21002 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21003 break;
21004
21005 case 3:
21006 if (REGNO (operands[4]) > REGNO (operands[5]))
21007 {
21008 tmp = operands[4];
21009 operands[4] = operands[5];
21010 operands[5] = tmp;
21011 }
21012 if (REGNO (operands[5]) > REGNO (operands[6]))
21013 {
21014 tmp = operands[5];
21015 operands[5] = operands[6];
21016 operands[6] = tmp;
21017 }
21018 if (REGNO (operands[4]) > REGNO (operands[5]))
21019 {
21020 tmp = operands[4];
21021 operands[4] = operands[5];
21022 operands[5] = tmp;
21023 }
21024
21025 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21026 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21027 break;
21028
21029 default:
21030 gcc_unreachable ();
21031 }
21032
21033 return "";
21034 }
21035
21036 /* Output a call-via instruction for thumb state. */
21037 const char *
21038 thumb_call_via_reg (rtx reg)
21039 {
21040 int regno = REGNO (reg);
21041 rtx *labelp;
21042
21043 gcc_assert (regno < LR_REGNUM);
21044
21045 /* If we are in the normal text section we can use a single instance
21046 per compilation unit. If we are doing function sections, then we need
21047 an entry per section, since we can't rely on reachability. */
21048 if (in_section == text_section)
21049 {
21050 thumb_call_reg_needed = 1;
21051
21052 if (thumb_call_via_label[regno] == NULL)
21053 thumb_call_via_label[regno] = gen_label_rtx ();
21054 labelp = thumb_call_via_label + regno;
21055 }
21056 else
21057 {
21058 if (cfun->machine->call_via[regno] == NULL)
21059 cfun->machine->call_via[regno] = gen_label_rtx ();
21060 labelp = cfun->machine->call_via + regno;
21061 }
21062
21063 output_asm_insn ("bl\t%a0", labelp);
21064 return "";
21065 }
21066
21067 /* Routines for generating rtl. */
21068 void
21069 thumb_expand_movmemqi (rtx *operands)
21070 {
21071 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21072 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21073 HOST_WIDE_INT len = INTVAL (operands[2]);
21074 HOST_WIDE_INT offset = 0;
21075
21076 while (len >= 12)
21077 {
21078 emit_insn (gen_movmem12b (out, in, out, in));
21079 len -= 12;
21080 }
21081
21082 if (len >= 8)
21083 {
21084 emit_insn (gen_movmem8b (out, in, out, in));
21085 len -= 8;
21086 }
21087
21088 if (len >= 4)
21089 {
21090 rtx reg = gen_reg_rtx (SImode);
21091 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21092 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21093 len -= 4;
21094 offset += 4;
21095 }
21096
21097 if (len >= 2)
21098 {
21099 rtx reg = gen_reg_rtx (HImode);
21100 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21101 plus_constant (in, offset))));
21102 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21103 reg));
21104 len -= 2;
21105 offset += 2;
21106 }
21107
21108 if (len)
21109 {
21110 rtx reg = gen_reg_rtx (QImode);
21111 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21112 plus_constant (in, offset))));
21113 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21114 reg));
21115 }
21116 }
21117
21118 void
21119 thumb_reload_out_hi (rtx *operands)
21120 {
21121 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21122 }
21123
21124 /* Handle reading a half-word from memory during reload. */
21125 void
21126 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21127 {
21128 gcc_unreachable ();
21129 }
21130
21131 /* Return the length of a function name prefix
21132 that starts with the character 'c'. */
21133 static int
21134 arm_get_strip_length (int c)
21135 {
21136 switch (c)
21137 {
21138 ARM_NAME_ENCODING_LENGTHS
21139 default: return 0;
21140 }
21141 }
21142
21143 /* Return a pointer to a function's name with any
21144 and all prefix encodings stripped from it. */
21145 const char *
21146 arm_strip_name_encoding (const char *name)
21147 {
21148 int skip;
21149
21150 while ((skip = arm_get_strip_length (* name)))
21151 name += skip;
21152
21153 return name;
21154 }
21155
21156 /* If there is a '*' anywhere in the name's prefix, then
21157 emit the stripped name verbatim, otherwise prepend an
21158 underscore if leading underscores are being used. */
21159 void
21160 arm_asm_output_labelref (FILE *stream, const char *name)
21161 {
21162 int skip;
21163 int verbatim = 0;
21164
21165 while ((skip = arm_get_strip_length (* name)))
21166 {
21167 verbatim |= (*name == '*');
21168 name += skip;
21169 }
21170
21171 if (verbatim)
21172 fputs (name, stream);
21173 else
21174 asm_fprintf (stream, "%U%s", name);
21175 }
21176
21177 static void
21178 arm_file_start (void)
21179 {
21180 int val;
21181
21182 if (TARGET_UNIFIED_ASM)
21183 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21184
21185 if (TARGET_BPABI)
21186 {
21187 const char *fpu_name;
21188 if (arm_selected_arch)
21189 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21190 else
21191 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21192
21193 if (TARGET_SOFT_FLOAT)
21194 {
21195 if (TARGET_VFP)
21196 fpu_name = "softvfp";
21197 else
21198 fpu_name = "softfpa";
21199 }
21200 else
21201 {
21202 fpu_name = arm_fpu_desc->name;
21203 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21204 {
21205 if (TARGET_HARD_FLOAT)
21206 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21207 if (TARGET_HARD_FLOAT_ABI)
21208 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21209 }
21210 }
21211 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21212
21213 /* Some of these attributes only apply when the corresponding features
21214 are used. However we don't have any easy way of figuring this out.
21215 Conservatively record the setting that would have been used. */
21216
21217 /* Tag_ABI_FP_rounding. */
21218 if (flag_rounding_math)
21219 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21220 if (!flag_unsafe_math_optimizations)
21221 {
21222 /* Tag_ABI_FP_denomal. */
21223 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21224 /* Tag_ABI_FP_exceptions. */
21225 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21226 }
21227 /* Tag_ABI_FP_user_exceptions. */
21228 if (flag_signaling_nans)
21229 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21230 /* Tag_ABI_FP_number_model. */
21231 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21232 flag_finite_math_only ? 1 : 3);
21233
21234 /* Tag_ABI_align8_needed. */
21235 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21236 /* Tag_ABI_align8_preserved. */
21237 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21238 /* Tag_ABI_enum_size. */
21239 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21240 flag_short_enums ? 1 : 2);
21241
21242 /* Tag_ABI_optimization_goals. */
21243 if (optimize_size)
21244 val = 4;
21245 else if (optimize >= 2)
21246 val = 2;
21247 else if (optimize)
21248 val = 1;
21249 else
21250 val = 6;
21251 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21252
21253 /* Tag_ABI_FP_16bit_format. */
21254 if (arm_fp16_format)
21255 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21256 (int)arm_fp16_format);
21257
21258 if (arm_lang_output_object_attributes_hook)
21259 arm_lang_output_object_attributes_hook();
21260 }
21261 default_file_start();
21262 }
21263
21264 static void
21265 arm_file_end (void)
21266 {
21267 int regno;
21268
21269 if (NEED_INDICATE_EXEC_STACK)
21270 /* Add .note.GNU-stack. */
21271 file_end_indicate_exec_stack ();
21272
21273 if (! thumb_call_reg_needed)
21274 return;
21275
21276 switch_to_section (text_section);
21277 asm_fprintf (asm_out_file, "\t.code 16\n");
21278 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21279
21280 for (regno = 0; regno < LR_REGNUM; regno++)
21281 {
21282 rtx label = thumb_call_via_label[regno];
21283
21284 if (label != 0)
21285 {
21286 targetm.asm_out.internal_label (asm_out_file, "L",
21287 CODE_LABEL_NUMBER (label));
21288 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21289 }
21290 }
21291 }
21292
21293 #ifndef ARM_PE
21294 /* Symbols in the text segment can be accessed without indirecting via the
21295 constant pool; it may take an extra binary operation, but this is still
21296 faster than indirecting via memory. Don't do this when not optimizing,
21297 since we won't be calculating al of the offsets necessary to do this
21298 simplification. */
21299
21300 static void
21301 arm_encode_section_info (tree decl, rtx rtl, int first)
21302 {
21303 if (optimize > 0 && TREE_CONSTANT (decl))
21304 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21305
21306 default_encode_section_info (decl, rtl, first);
21307 }
21308 #endif /* !ARM_PE */
21309
21310 static void
21311 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21312 {
21313 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21314 && !strcmp (prefix, "L"))
21315 {
21316 arm_ccfsm_state = 0;
21317 arm_target_insn = NULL;
21318 }
21319 default_internal_label (stream, prefix, labelno);
21320 }
21321
21322 /* Output code to add DELTA to the first argument, and then jump
21323 to FUNCTION. Used for C++ multiple inheritance. */
21324 static void
21325 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21326 HOST_WIDE_INT delta,
21327 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21328 tree function)
21329 {
21330 static int thunk_label = 0;
21331 char label[256];
21332 char labelpc[256];
21333 int mi_delta = delta;
21334 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21335 int shift = 0;
21336 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21337 ? 1 : 0);
21338 if (mi_delta < 0)
21339 mi_delta = - mi_delta;
21340
21341 if (TARGET_THUMB1)
21342 {
21343 int labelno = thunk_label++;
21344 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21345 /* Thunks are entered in arm mode when avaiable. */
21346 if (TARGET_THUMB1_ONLY)
21347 {
21348 /* push r3 so we can use it as a temporary. */
21349 /* TODO: Omit this save if r3 is not used. */
21350 fputs ("\tpush {r3}\n", file);
21351 fputs ("\tldr\tr3, ", file);
21352 }
21353 else
21354 {
21355 fputs ("\tldr\tr12, ", file);
21356 }
21357 assemble_name (file, label);
21358 fputc ('\n', file);
21359 if (flag_pic)
21360 {
21361 /* If we are generating PIC, the ldr instruction below loads
21362 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21363 the address of the add + 8, so we have:
21364
21365 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21366 = target + 1.
21367
21368 Note that we have "+ 1" because some versions of GNU ld
21369 don't set the low bit of the result for R_ARM_REL32
21370 relocations against thumb function symbols.
21371 On ARMv6M this is +4, not +8. */
21372 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21373 assemble_name (file, labelpc);
21374 fputs (":\n", file);
21375 if (TARGET_THUMB1_ONLY)
21376 {
21377 /* This is 2 insns after the start of the thunk, so we know it
21378 is 4-byte aligned. */
21379 fputs ("\tadd\tr3, pc, r3\n", file);
21380 fputs ("\tmov r12, r3\n", file);
21381 }
21382 else
21383 fputs ("\tadd\tr12, pc, r12\n", file);
21384 }
21385 else if (TARGET_THUMB1_ONLY)
21386 fputs ("\tmov r12, r3\n", file);
21387 }
21388 if (TARGET_THUMB1_ONLY)
21389 {
21390 if (mi_delta > 255)
21391 {
21392 fputs ("\tldr\tr3, ", file);
21393 assemble_name (file, label);
21394 fputs ("+4\n", file);
21395 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21396 mi_op, this_regno, this_regno);
21397 }
21398 else if (mi_delta != 0)
21399 {
21400 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21401 mi_op, this_regno, this_regno,
21402 mi_delta);
21403 }
21404 }
21405 else
21406 {
21407 /* TODO: Use movw/movt for large constants when available. */
21408 while (mi_delta != 0)
21409 {
21410 if ((mi_delta & (3 << shift)) == 0)
21411 shift += 2;
21412 else
21413 {
21414 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21415 mi_op, this_regno, this_regno,
21416 mi_delta & (0xff << shift));
21417 mi_delta &= ~(0xff << shift);
21418 shift += 8;
21419 }
21420 }
21421 }
21422 if (TARGET_THUMB1)
21423 {
21424 if (TARGET_THUMB1_ONLY)
21425 fputs ("\tpop\t{r3}\n", file);
21426
21427 fprintf (file, "\tbx\tr12\n");
21428 ASM_OUTPUT_ALIGN (file, 2);
21429 assemble_name (file, label);
21430 fputs (":\n", file);
21431 if (flag_pic)
21432 {
21433 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21434 rtx tem = XEXP (DECL_RTL (function), 0);
21435 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21436 tem = gen_rtx_MINUS (GET_MODE (tem),
21437 tem,
21438 gen_rtx_SYMBOL_REF (Pmode,
21439 ggc_strdup (labelpc)));
21440 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21441 }
21442 else
21443 /* Output ".word .LTHUNKn". */
21444 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21445
21446 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21447 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21448 }
21449 else
21450 {
21451 fputs ("\tb\t", file);
21452 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21453 if (NEED_PLT_RELOC)
21454 fputs ("(PLT)", file);
21455 fputc ('\n', file);
21456 }
21457 }
21458
21459 int
21460 arm_emit_vector_const (FILE *file, rtx x)
21461 {
21462 int i;
21463 const char * pattern;
21464
21465 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21466
21467 switch (GET_MODE (x))
21468 {
21469 case V2SImode: pattern = "%08x"; break;
21470 case V4HImode: pattern = "%04x"; break;
21471 case V8QImode: pattern = "%02x"; break;
21472 default: gcc_unreachable ();
21473 }
21474
21475 fprintf (file, "0x");
21476 for (i = CONST_VECTOR_NUNITS (x); i--;)
21477 {
21478 rtx element;
21479
21480 element = CONST_VECTOR_ELT (x, i);
21481 fprintf (file, pattern, INTVAL (element));
21482 }
21483
21484 return 1;
21485 }
21486
21487 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21488 HFmode constant pool entries are actually loaded with ldr. */
21489 void
21490 arm_emit_fp16_const (rtx c)
21491 {
21492 REAL_VALUE_TYPE r;
21493 long bits;
21494
21495 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21496 bits = real_to_target (NULL, &r, HFmode);
21497 if (WORDS_BIG_ENDIAN)
21498 assemble_zeros (2);
21499 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21500 if (!WORDS_BIG_ENDIAN)
21501 assemble_zeros (2);
21502 }
21503
21504 const char *
21505 arm_output_load_gr (rtx *operands)
21506 {
21507 rtx reg;
21508 rtx offset;
21509 rtx wcgr;
21510 rtx sum;
21511
21512 if (GET_CODE (operands [1]) != MEM
21513 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21514 || GET_CODE (reg = XEXP (sum, 0)) != REG
21515 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21516 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21517 return "wldrw%?\t%0, %1";
21518
21519 /* Fix up an out-of-range load of a GR register. */
21520 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21521 wcgr = operands[0];
21522 operands[0] = reg;
21523 output_asm_insn ("ldr%?\t%0, %1", operands);
21524
21525 operands[0] = wcgr;
21526 operands[1] = reg;
21527 output_asm_insn ("tmcr%?\t%0, %1", operands);
21528 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21529
21530 return "";
21531 }
21532
21533 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21534
21535 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21536 named arg and all anonymous args onto the stack.
21537 XXX I know the prologue shouldn't be pushing registers, but it is faster
21538 that way. */
21539
21540 static void
21541 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21542 enum machine_mode mode,
21543 tree type,
21544 int *pretend_size,
21545 int second_time ATTRIBUTE_UNUSED)
21546 {
21547 int nregs;
21548
21549 cfun->machine->uses_anonymous_args = 1;
21550 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21551 {
21552 nregs = pcum->aapcs_ncrn;
21553 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21554 nregs++;
21555 }
21556 else
21557 nregs = pcum->nregs;
21558
21559 if (nregs < NUM_ARG_REGS)
21560 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21561 }
21562
21563 /* Return nonzero if the CONSUMER instruction (a store) does not need
21564 PRODUCER's value to calculate the address. */
21565
21566 int
21567 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21568 {
21569 rtx value = PATTERN (producer);
21570 rtx addr = PATTERN (consumer);
21571
21572 if (GET_CODE (value) == COND_EXEC)
21573 value = COND_EXEC_CODE (value);
21574 if (GET_CODE (value) == PARALLEL)
21575 value = XVECEXP (value, 0, 0);
21576 value = XEXP (value, 0);
21577 if (GET_CODE (addr) == COND_EXEC)
21578 addr = COND_EXEC_CODE (addr);
21579 if (GET_CODE (addr) == PARALLEL)
21580 addr = XVECEXP (addr, 0, 0);
21581 addr = XEXP (addr, 0);
21582
21583 return !reg_overlap_mentioned_p (value, addr);
21584 }
21585
21586 /* Return nonzero if the CONSUMER instruction (a store) does need
21587 PRODUCER's value to calculate the address. */
21588
21589 int
21590 arm_early_store_addr_dep (rtx producer, rtx consumer)
21591 {
21592 return !arm_no_early_store_addr_dep (producer, consumer);
21593 }
21594
21595 /* Return nonzero if the CONSUMER instruction (a load) does need
21596 PRODUCER's value to calculate the address. */
21597
21598 int
21599 arm_early_load_addr_dep (rtx producer, rtx consumer)
21600 {
21601 rtx value = PATTERN (producer);
21602 rtx addr = PATTERN (consumer);
21603
21604 if (GET_CODE (value) == COND_EXEC)
21605 value = COND_EXEC_CODE (value);
21606 if (GET_CODE (value) == PARALLEL)
21607 value = XVECEXP (value, 0, 0);
21608 value = XEXP (value, 0);
21609 if (GET_CODE (addr) == COND_EXEC)
21610 addr = COND_EXEC_CODE (addr);
21611 if (GET_CODE (addr) == PARALLEL)
21612 addr = XVECEXP (addr, 0, 0);
21613 addr = XEXP (addr, 1);
21614
21615 return reg_overlap_mentioned_p (value, addr);
21616 }
21617
21618 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21619 have an early register shift value or amount dependency on the
21620 result of PRODUCER. */
21621
21622 int
21623 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21624 {
21625 rtx value = PATTERN (producer);
21626 rtx op = PATTERN (consumer);
21627 rtx early_op;
21628
21629 if (GET_CODE (value) == COND_EXEC)
21630 value = COND_EXEC_CODE (value);
21631 if (GET_CODE (value) == PARALLEL)
21632 value = XVECEXP (value, 0, 0);
21633 value = XEXP (value, 0);
21634 if (GET_CODE (op) == COND_EXEC)
21635 op = COND_EXEC_CODE (op);
21636 if (GET_CODE (op) == PARALLEL)
21637 op = XVECEXP (op, 0, 0);
21638 op = XEXP (op, 1);
21639
21640 early_op = XEXP (op, 0);
21641 /* This is either an actual independent shift, or a shift applied to
21642 the first operand of another operation. We want the whole shift
21643 operation. */
21644 if (GET_CODE (early_op) == REG)
21645 early_op = op;
21646
21647 return !reg_overlap_mentioned_p (value, early_op);
21648 }
21649
21650 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21651 have an early register shift value dependency on the result of
21652 PRODUCER. */
21653
21654 int
21655 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21656 {
21657 rtx value = PATTERN (producer);
21658 rtx op = PATTERN (consumer);
21659 rtx early_op;
21660
21661 if (GET_CODE (value) == COND_EXEC)
21662 value = COND_EXEC_CODE (value);
21663 if (GET_CODE (value) == PARALLEL)
21664 value = XVECEXP (value, 0, 0);
21665 value = XEXP (value, 0);
21666 if (GET_CODE (op) == COND_EXEC)
21667 op = COND_EXEC_CODE (op);
21668 if (GET_CODE (op) == PARALLEL)
21669 op = XVECEXP (op, 0, 0);
21670 op = XEXP (op, 1);
21671
21672 early_op = XEXP (op, 0);
21673
21674 /* This is either an actual independent shift, or a shift applied to
21675 the first operand of another operation. We want the value being
21676 shifted, in either case. */
21677 if (GET_CODE (early_op) != REG)
21678 early_op = XEXP (early_op, 0);
21679
21680 return !reg_overlap_mentioned_p (value, early_op);
21681 }
21682
21683 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21684 have an early register mult dependency on the result of
21685 PRODUCER. */
21686
21687 int
21688 arm_no_early_mul_dep (rtx producer, rtx consumer)
21689 {
21690 rtx value = PATTERN (producer);
21691 rtx op = PATTERN (consumer);
21692
21693 if (GET_CODE (value) == COND_EXEC)
21694 value = COND_EXEC_CODE (value);
21695 if (GET_CODE (value) == PARALLEL)
21696 value = XVECEXP (value, 0, 0);
21697 value = XEXP (value, 0);
21698 if (GET_CODE (op) == COND_EXEC)
21699 op = COND_EXEC_CODE (op);
21700 if (GET_CODE (op) == PARALLEL)
21701 op = XVECEXP (op, 0, 0);
21702 op = XEXP (op, 1);
21703
21704 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21705 {
21706 if (GET_CODE (XEXP (op, 0)) == MULT)
21707 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21708 else
21709 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21710 }
21711
21712 return 0;
21713 }
21714
21715 /* We can't rely on the caller doing the proper promotion when
21716 using APCS or ATPCS. */
21717
21718 static bool
21719 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21720 {
21721 return !TARGET_AAPCS_BASED;
21722 }
21723
21724 static enum machine_mode
21725 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21726 enum machine_mode mode,
21727 int *punsignedp ATTRIBUTE_UNUSED,
21728 const_tree fntype ATTRIBUTE_UNUSED,
21729 int for_return ATTRIBUTE_UNUSED)
21730 {
21731 if (GET_MODE_CLASS (mode) == MODE_INT
21732 && GET_MODE_SIZE (mode) < 4)
21733 return SImode;
21734
21735 return mode;
21736 }
21737
21738 /* AAPCS based ABIs use short enums by default. */
21739
21740 static bool
21741 arm_default_short_enums (void)
21742 {
21743 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21744 }
21745
21746
21747 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21748
21749 static bool
21750 arm_align_anon_bitfield (void)
21751 {
21752 return TARGET_AAPCS_BASED;
21753 }
21754
21755
21756 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21757
21758 static tree
21759 arm_cxx_guard_type (void)
21760 {
21761 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21762 }
21763
21764 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21765 has an accumulator dependency on the result of the producer (a
21766 multiplication instruction) and no other dependency on that result. */
21767 int
21768 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21769 {
21770 rtx mul = PATTERN (producer);
21771 rtx mac = PATTERN (consumer);
21772 rtx mul_result;
21773 rtx mac_op0, mac_op1, mac_acc;
21774
21775 if (GET_CODE (mul) == COND_EXEC)
21776 mul = COND_EXEC_CODE (mul);
21777 if (GET_CODE (mac) == COND_EXEC)
21778 mac = COND_EXEC_CODE (mac);
21779
21780 /* Check that mul is of the form (set (...) (mult ...))
21781 and mla is of the form (set (...) (plus (mult ...) (...))). */
21782 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21783 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21784 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21785 return 0;
21786
21787 mul_result = XEXP (mul, 0);
21788 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21789 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21790 mac_acc = XEXP (XEXP (mac, 1), 1);
21791
21792 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21793 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21794 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21795 }
21796
21797
21798 /* The EABI says test the least significant bit of a guard variable. */
21799
21800 static bool
21801 arm_cxx_guard_mask_bit (void)
21802 {
21803 return TARGET_AAPCS_BASED;
21804 }
21805
21806
21807 /* The EABI specifies that all array cookies are 8 bytes long. */
21808
21809 static tree
21810 arm_get_cookie_size (tree type)
21811 {
21812 tree size;
21813
21814 if (!TARGET_AAPCS_BASED)
21815 return default_cxx_get_cookie_size (type);
21816
21817 size = build_int_cst (sizetype, 8);
21818 return size;
21819 }
21820
21821
21822 /* The EABI says that array cookies should also contain the element size. */
21823
21824 static bool
21825 arm_cookie_has_size (void)
21826 {
21827 return TARGET_AAPCS_BASED;
21828 }
21829
21830
21831 /* The EABI says constructors and destructors should return a pointer to
21832 the object constructed/destroyed. */
21833
21834 static bool
21835 arm_cxx_cdtor_returns_this (void)
21836 {
21837 return TARGET_AAPCS_BASED;
21838 }
21839
21840 /* The EABI says that an inline function may never be the key
21841 method. */
21842
21843 static bool
21844 arm_cxx_key_method_may_be_inline (void)
21845 {
21846 return !TARGET_AAPCS_BASED;
21847 }
21848
21849 static void
21850 arm_cxx_determine_class_data_visibility (tree decl)
21851 {
21852 if (!TARGET_AAPCS_BASED
21853 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21854 return;
21855
21856 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21857 is exported. However, on systems without dynamic vague linkage,
21858 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21859 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21860 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21861 else
21862 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21863 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21864 }
21865
21866 static bool
21867 arm_cxx_class_data_always_comdat (void)
21868 {
21869 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21870 vague linkage if the class has no key function. */
21871 return !TARGET_AAPCS_BASED;
21872 }
21873
21874
21875 /* The EABI says __aeabi_atexit should be used to register static
21876 destructors. */
21877
21878 static bool
21879 arm_cxx_use_aeabi_atexit (void)
21880 {
21881 return TARGET_AAPCS_BASED;
21882 }
21883
21884
21885 void
21886 arm_set_return_address (rtx source, rtx scratch)
21887 {
21888 arm_stack_offsets *offsets;
21889 HOST_WIDE_INT delta;
21890 rtx addr;
21891 unsigned long saved_regs;
21892
21893 offsets = arm_get_frame_offsets ();
21894 saved_regs = offsets->saved_regs_mask;
21895
21896 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21897 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21898 else
21899 {
21900 if (frame_pointer_needed)
21901 addr = plus_constant(hard_frame_pointer_rtx, -4);
21902 else
21903 {
21904 /* LR will be the first saved register. */
21905 delta = offsets->outgoing_args - (offsets->frame + 4);
21906
21907
21908 if (delta >= 4096)
21909 {
21910 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21911 GEN_INT (delta & ~4095)));
21912 addr = scratch;
21913 delta &= 4095;
21914 }
21915 else
21916 addr = stack_pointer_rtx;
21917
21918 addr = plus_constant (addr, delta);
21919 }
21920 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21921 }
21922 }
21923
21924
21925 void
21926 thumb_set_return_address (rtx source, rtx scratch)
21927 {
21928 arm_stack_offsets *offsets;
21929 HOST_WIDE_INT delta;
21930 HOST_WIDE_INT limit;
21931 int reg;
21932 rtx addr;
21933 unsigned long mask;
21934
21935 emit_use (source);
21936
21937 offsets = arm_get_frame_offsets ();
21938 mask = offsets->saved_regs_mask;
21939 if (mask & (1 << LR_REGNUM))
21940 {
21941 limit = 1024;
21942 /* Find the saved regs. */
21943 if (frame_pointer_needed)
21944 {
21945 delta = offsets->soft_frame - offsets->saved_args;
21946 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21947 if (TARGET_THUMB1)
21948 limit = 128;
21949 }
21950 else
21951 {
21952 delta = offsets->outgoing_args - offsets->saved_args;
21953 reg = SP_REGNUM;
21954 }
21955 /* Allow for the stack frame. */
21956 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21957 delta -= 16;
21958 /* The link register is always the first saved register. */
21959 delta -= 4;
21960
21961 /* Construct the address. */
21962 addr = gen_rtx_REG (SImode, reg);
21963 if (delta > limit)
21964 {
21965 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21966 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21967 addr = scratch;
21968 }
21969 else
21970 addr = plus_constant (addr, delta);
21971
21972 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21973 }
21974 else
21975 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21976 }
21977
21978 /* Implements target hook vector_mode_supported_p. */
21979 bool
21980 arm_vector_mode_supported_p (enum machine_mode mode)
21981 {
21982 /* Neon also supports V2SImode, etc. listed in the clause below. */
21983 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21984 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21985 return true;
21986
21987 if ((TARGET_NEON || TARGET_IWMMXT)
21988 && ((mode == V2SImode)
21989 || (mode == V4HImode)
21990 || (mode == V8QImode)))
21991 return true;
21992
21993 return false;
21994 }
21995
21996 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
21997 registers when autovectorizing for Neon, at least until multiple vector
21998 widths are supported properly by the middle-end. */
21999
22000 static enum machine_mode
22001 arm_preferred_simd_mode (enum machine_mode mode)
22002 {
22003 if (TARGET_NEON)
22004 switch (mode)
22005 {
22006 case SFmode:
22007 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22008 case SImode:
22009 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22010 case HImode:
22011 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22012 case QImode:
22013 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22014 case DImode:
22015 if (TARGET_NEON_VECTORIZE_QUAD)
22016 return V2DImode;
22017 break;
22018
22019 default:;
22020 }
22021
22022 if (TARGET_REALLY_IWMMXT)
22023 switch (mode)
22024 {
22025 case SImode:
22026 return V2SImode;
22027 case HImode:
22028 return V4HImode;
22029 case QImode:
22030 return V8QImode;
22031
22032 default:;
22033 }
22034
22035 return word_mode;
22036 }
22037
22038 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22039
22040 We need to define this for LO_REGS on thumb. Otherwise we can end up
22041 using r0-r4 for function arguments, r7 for the stack frame and don't
22042 have enough left over to do doubleword arithmetic. */
22043
22044 static bool
22045 arm_class_likely_spilled_p (reg_class_t rclass)
22046 {
22047 if ((TARGET_THUMB && rclass == LO_REGS)
22048 || rclass == CC_REG)
22049 return true;
22050
22051 return false;
22052 }
22053
22054 /* Implements target hook small_register_classes_for_mode_p. */
22055 bool
22056 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22057 {
22058 return TARGET_THUMB1;
22059 }
22060
22061 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22062 ARM insns and therefore guarantee that the shift count is modulo 256.
22063 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22064 guarantee no particular behavior for out-of-range counts. */
22065
22066 static unsigned HOST_WIDE_INT
22067 arm_shift_truncation_mask (enum machine_mode mode)
22068 {
22069 return mode == SImode ? 255 : 0;
22070 }
22071
22072
22073 /* Map internal gcc register numbers to DWARF2 register numbers. */
22074
22075 unsigned int
22076 arm_dbx_register_number (unsigned int regno)
22077 {
22078 if (regno < 16)
22079 return regno;
22080
22081 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22082 compatibility. The EABI defines them as registers 96-103. */
22083 if (IS_FPA_REGNUM (regno))
22084 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22085
22086 if (IS_VFP_REGNUM (regno))
22087 {
22088 /* See comment in arm_dwarf_register_span. */
22089 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22090 return 64 + regno - FIRST_VFP_REGNUM;
22091 else
22092 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22093 }
22094
22095 if (IS_IWMMXT_GR_REGNUM (regno))
22096 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22097
22098 if (IS_IWMMXT_REGNUM (regno))
22099 return 112 + regno - FIRST_IWMMXT_REGNUM;
22100
22101 gcc_unreachable ();
22102 }
22103
22104 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22105 GCC models tham as 64 32-bit registers, so we need to describe this to
22106 the DWARF generation code. Other registers can use the default. */
22107 static rtx
22108 arm_dwarf_register_span (rtx rtl)
22109 {
22110 unsigned regno;
22111 int nregs;
22112 int i;
22113 rtx p;
22114
22115 regno = REGNO (rtl);
22116 if (!IS_VFP_REGNUM (regno))
22117 return NULL_RTX;
22118
22119 /* XXX FIXME: The EABI defines two VFP register ranges:
22120 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22121 256-287: D0-D31
22122 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22123 corresponding D register. Until GDB supports this, we shall use the
22124 legacy encodings. We also use these encodings for D0-D15 for
22125 compatibility with older debuggers. */
22126 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22127 return NULL_RTX;
22128
22129 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22130 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22131 regno = (regno - FIRST_VFP_REGNUM) / 2;
22132 for (i = 0; i < nregs; i++)
22133 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22134
22135 return p;
22136 }
22137
22138 #if ARM_UNWIND_INFO
22139 /* Emit unwind directives for a store-multiple instruction or stack pointer
22140 push during alignment.
22141 These should only ever be generated by the function prologue code, so
22142 expect them to have a particular form. */
22143
22144 static void
22145 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22146 {
22147 int i;
22148 HOST_WIDE_INT offset;
22149 HOST_WIDE_INT nregs;
22150 int reg_size;
22151 unsigned reg;
22152 unsigned lastreg;
22153 rtx e;
22154
22155 e = XVECEXP (p, 0, 0);
22156 if (GET_CODE (e) != SET)
22157 abort ();
22158
22159 /* First insn will adjust the stack pointer. */
22160 if (GET_CODE (e) != SET
22161 || GET_CODE (XEXP (e, 0)) != REG
22162 || REGNO (XEXP (e, 0)) != SP_REGNUM
22163 || GET_CODE (XEXP (e, 1)) != PLUS)
22164 abort ();
22165
22166 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22167 nregs = XVECLEN (p, 0) - 1;
22168
22169 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22170 if (reg < 16)
22171 {
22172 /* The function prologue may also push pc, but not annotate it as it is
22173 never restored. We turn this into a stack pointer adjustment. */
22174 if (nregs * 4 == offset - 4)
22175 {
22176 fprintf (asm_out_file, "\t.pad #4\n");
22177 offset -= 4;
22178 }
22179 reg_size = 4;
22180 fprintf (asm_out_file, "\t.save {");
22181 }
22182 else if (IS_VFP_REGNUM (reg))
22183 {
22184 reg_size = 8;
22185 fprintf (asm_out_file, "\t.vsave {");
22186 }
22187 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22188 {
22189 /* FPA registers are done differently. */
22190 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22191 return;
22192 }
22193 else
22194 /* Unknown register type. */
22195 abort ();
22196
22197 /* If the stack increment doesn't match the size of the saved registers,
22198 something has gone horribly wrong. */
22199 if (offset != nregs * reg_size)
22200 abort ();
22201
22202 offset = 0;
22203 lastreg = 0;
22204 /* The remaining insns will describe the stores. */
22205 for (i = 1; i <= nregs; i++)
22206 {
22207 /* Expect (set (mem <addr>) (reg)).
22208 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22209 e = XVECEXP (p, 0, i);
22210 if (GET_CODE (e) != SET
22211 || GET_CODE (XEXP (e, 0)) != MEM
22212 || GET_CODE (XEXP (e, 1)) != REG)
22213 abort ();
22214
22215 reg = REGNO (XEXP (e, 1));
22216 if (reg < lastreg)
22217 abort ();
22218
22219 if (i != 1)
22220 fprintf (asm_out_file, ", ");
22221 /* We can't use %r for vfp because we need to use the
22222 double precision register names. */
22223 if (IS_VFP_REGNUM (reg))
22224 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22225 else
22226 asm_fprintf (asm_out_file, "%r", reg);
22227
22228 #ifdef ENABLE_CHECKING
22229 /* Check that the addresses are consecutive. */
22230 e = XEXP (XEXP (e, 0), 0);
22231 if (GET_CODE (e) == PLUS)
22232 {
22233 offset += reg_size;
22234 if (GET_CODE (XEXP (e, 0)) != REG
22235 || REGNO (XEXP (e, 0)) != SP_REGNUM
22236 || GET_CODE (XEXP (e, 1)) != CONST_INT
22237 || offset != INTVAL (XEXP (e, 1)))
22238 abort ();
22239 }
22240 else if (i != 1
22241 || GET_CODE (e) != REG
22242 || REGNO (e) != SP_REGNUM)
22243 abort ();
22244 #endif
22245 }
22246 fprintf (asm_out_file, "}\n");
22247 }
22248
22249 /* Emit unwind directives for a SET. */
22250
22251 static void
22252 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22253 {
22254 rtx e0;
22255 rtx e1;
22256 unsigned reg;
22257
22258 e0 = XEXP (p, 0);
22259 e1 = XEXP (p, 1);
22260 switch (GET_CODE (e0))
22261 {
22262 case MEM:
22263 /* Pushing a single register. */
22264 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22265 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22266 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22267 abort ();
22268
22269 asm_fprintf (asm_out_file, "\t.save ");
22270 if (IS_VFP_REGNUM (REGNO (e1)))
22271 asm_fprintf(asm_out_file, "{d%d}\n",
22272 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22273 else
22274 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22275 break;
22276
22277 case REG:
22278 if (REGNO (e0) == SP_REGNUM)
22279 {
22280 /* A stack increment. */
22281 if (GET_CODE (e1) != PLUS
22282 || GET_CODE (XEXP (e1, 0)) != REG
22283 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22284 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22285 abort ();
22286
22287 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22288 -INTVAL (XEXP (e1, 1)));
22289 }
22290 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22291 {
22292 HOST_WIDE_INT offset;
22293
22294 if (GET_CODE (e1) == PLUS)
22295 {
22296 if (GET_CODE (XEXP (e1, 0)) != REG
22297 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22298 abort ();
22299 reg = REGNO (XEXP (e1, 0));
22300 offset = INTVAL (XEXP (e1, 1));
22301 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22302 HARD_FRAME_POINTER_REGNUM, reg,
22303 offset);
22304 }
22305 else if (GET_CODE (e1) == REG)
22306 {
22307 reg = REGNO (e1);
22308 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22309 HARD_FRAME_POINTER_REGNUM, reg);
22310 }
22311 else
22312 abort ();
22313 }
22314 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22315 {
22316 /* Move from sp to reg. */
22317 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22318 }
22319 else if (GET_CODE (e1) == PLUS
22320 && GET_CODE (XEXP (e1, 0)) == REG
22321 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22322 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22323 {
22324 /* Set reg to offset from sp. */
22325 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22326 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22327 }
22328 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22329 {
22330 /* Stack pointer save before alignment. */
22331 reg = REGNO (e0);
22332 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22333 reg + 0x90, reg);
22334 }
22335 else
22336 abort ();
22337 break;
22338
22339 default:
22340 abort ();
22341 }
22342 }
22343
22344
22345 /* Emit unwind directives for the given insn. */
22346
22347 static void
22348 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22349 {
22350 rtx pat;
22351
22352 if (arm_except_unwind_info () != UI_TARGET)
22353 return;
22354
22355 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22356 && (TREE_NOTHROW (current_function_decl)
22357 || crtl->all_throwers_are_sibcalls))
22358 return;
22359
22360 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22361 return;
22362
22363 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22364 if (pat)
22365 pat = XEXP (pat, 0);
22366 else
22367 pat = PATTERN (insn);
22368
22369 switch (GET_CODE (pat))
22370 {
22371 case SET:
22372 arm_unwind_emit_set (asm_out_file, pat);
22373 break;
22374
22375 case SEQUENCE:
22376 /* Store multiple. */
22377 arm_unwind_emit_sequence (asm_out_file, pat);
22378 break;
22379
22380 default:
22381 abort();
22382 }
22383 }
22384
22385
22386 /* Output a reference from a function exception table to the type_info
22387 object X. The EABI specifies that the symbol should be relocated by
22388 an R_ARM_TARGET2 relocation. */
22389
22390 static bool
22391 arm_output_ttype (rtx x)
22392 {
22393 fputs ("\t.word\t", asm_out_file);
22394 output_addr_const (asm_out_file, x);
22395 /* Use special relocations for symbol references. */
22396 if (GET_CODE (x) != CONST_INT)
22397 fputs ("(TARGET2)", asm_out_file);
22398 fputc ('\n', asm_out_file);
22399
22400 return TRUE;
22401 }
22402
22403 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22404
22405 static void
22406 arm_asm_emit_except_personality (rtx personality)
22407 {
22408 fputs ("\t.personality\t", asm_out_file);
22409 output_addr_const (asm_out_file, personality);
22410 fputc ('\n', asm_out_file);
22411 }
22412
22413 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22414
22415 static void
22416 arm_asm_init_sections (void)
22417 {
22418 exception_section = get_unnamed_section (0, output_section_asm_op,
22419 "\t.handlerdata");
22420 }
22421 #endif /* ARM_UNWIND_INFO */
22422
22423 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22424
22425 static enum unwind_info_type
22426 arm_except_unwind_info (void)
22427 {
22428 /* Honor the --enable-sjlj-exceptions configure switch. */
22429 #ifdef CONFIG_SJLJ_EXCEPTIONS
22430 if (CONFIG_SJLJ_EXCEPTIONS)
22431 return UI_SJLJ;
22432 #endif
22433
22434 /* If not using ARM EABI unwind tables... */
22435 if (ARM_UNWIND_INFO)
22436 {
22437 /* For simplicity elsewhere in this file, indicate that all unwind
22438 info is disabled if we're not emitting unwind tables. */
22439 if (!flag_exceptions && !flag_unwind_tables)
22440 return UI_NONE;
22441 else
22442 return UI_TARGET;
22443 }
22444
22445 /* ... we use sjlj exceptions for backwards compatibility. */
22446 return UI_SJLJ;
22447 }
22448
22449
22450 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22451 stack alignment. */
22452
22453 static void
22454 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22455 {
22456 rtx unspec = SET_SRC (pattern);
22457 gcc_assert (GET_CODE (unspec) == UNSPEC);
22458
22459 switch (index)
22460 {
22461 case UNSPEC_STACK_ALIGN:
22462 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22463 put anything on the stack, so hopefully it won't matter.
22464 CFA = SP will be correct after alignment. */
22465 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22466 SET_DEST (pattern));
22467 break;
22468 default:
22469 gcc_unreachable ();
22470 }
22471 }
22472
22473
22474 /* Output unwind directives for the start/end of a function. */
22475
22476 void
22477 arm_output_fn_unwind (FILE * f, bool prologue)
22478 {
22479 if (arm_except_unwind_info () != UI_TARGET)
22480 return;
22481
22482 if (prologue)
22483 fputs ("\t.fnstart\n", f);
22484 else
22485 {
22486 /* If this function will never be unwound, then mark it as such.
22487 The came condition is used in arm_unwind_emit to suppress
22488 the frame annotations. */
22489 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22490 && (TREE_NOTHROW (current_function_decl)
22491 || crtl->all_throwers_are_sibcalls))
22492 fputs("\t.cantunwind\n", f);
22493
22494 fputs ("\t.fnend\n", f);
22495 }
22496 }
22497
22498 static bool
22499 arm_emit_tls_decoration (FILE *fp, rtx x)
22500 {
22501 enum tls_reloc reloc;
22502 rtx val;
22503
22504 val = XVECEXP (x, 0, 0);
22505 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22506
22507 output_addr_const (fp, val);
22508
22509 switch (reloc)
22510 {
22511 case TLS_GD32:
22512 fputs ("(tlsgd)", fp);
22513 break;
22514 case TLS_LDM32:
22515 fputs ("(tlsldm)", fp);
22516 break;
22517 case TLS_LDO32:
22518 fputs ("(tlsldo)", fp);
22519 break;
22520 case TLS_IE32:
22521 fputs ("(gottpoff)", fp);
22522 break;
22523 case TLS_LE32:
22524 fputs ("(tpoff)", fp);
22525 break;
22526 default:
22527 gcc_unreachable ();
22528 }
22529
22530 switch (reloc)
22531 {
22532 case TLS_GD32:
22533 case TLS_LDM32:
22534 case TLS_IE32:
22535 fputs (" + (. - ", fp);
22536 output_addr_const (fp, XVECEXP (x, 0, 2));
22537 fputs (" - ", fp);
22538 output_addr_const (fp, XVECEXP (x, 0, 3));
22539 fputc (')', fp);
22540 break;
22541 default:
22542 break;
22543 }
22544
22545 return TRUE;
22546 }
22547
22548 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22549
22550 static void
22551 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22552 {
22553 gcc_assert (size == 4);
22554 fputs ("\t.word\t", file);
22555 output_addr_const (file, x);
22556 fputs ("(tlsldo)", file);
22557 }
22558
22559 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22560
22561 static bool
22562 arm_output_addr_const_extra (FILE *fp, rtx x)
22563 {
22564 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22565 return arm_emit_tls_decoration (fp, x);
22566 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22567 {
22568 char label[256];
22569 int labelno = INTVAL (XVECEXP (x, 0, 0));
22570
22571 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22572 assemble_name_raw (fp, label);
22573
22574 return TRUE;
22575 }
22576 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22577 {
22578 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22579 if (GOT_PCREL)
22580 fputs ("+.", fp);
22581 fputs ("-(", fp);
22582 output_addr_const (fp, XVECEXP (x, 0, 0));
22583 fputc (')', fp);
22584 return TRUE;
22585 }
22586 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22587 {
22588 output_addr_const (fp, XVECEXP (x, 0, 0));
22589 if (GOT_PCREL)
22590 fputs ("+.", fp);
22591 fputs ("-(", fp);
22592 output_addr_const (fp, XVECEXP (x, 0, 1));
22593 fputc (')', fp);
22594 return TRUE;
22595 }
22596 else if (GET_CODE (x) == CONST_VECTOR)
22597 return arm_emit_vector_const (fp, x);
22598
22599 return FALSE;
22600 }
22601
22602 /* Output assembly for a shift instruction.
22603 SET_FLAGS determines how the instruction modifies the condition codes.
22604 0 - Do not set condition codes.
22605 1 - Set condition codes.
22606 2 - Use smallest instruction. */
22607 const char *
22608 arm_output_shift(rtx * operands, int set_flags)
22609 {
22610 char pattern[100];
22611 static const char flag_chars[3] = {'?', '.', '!'};
22612 const char *shift;
22613 HOST_WIDE_INT val;
22614 char c;
22615
22616 c = flag_chars[set_flags];
22617 if (TARGET_UNIFIED_ASM)
22618 {
22619 shift = shift_op(operands[3], &val);
22620 if (shift)
22621 {
22622 if (val != -1)
22623 operands[2] = GEN_INT(val);
22624 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22625 }
22626 else
22627 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22628 }
22629 else
22630 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22631 output_asm_insn (pattern, operands);
22632 return "";
22633 }
22634
22635 /* Output a Thumb-1 casesi dispatch sequence. */
22636 const char *
22637 thumb1_output_casesi (rtx *operands)
22638 {
22639 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22640
22641 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22642
22643 switch (GET_MODE(diff_vec))
22644 {
22645 case QImode:
22646 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22647 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22648 case HImode:
22649 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22650 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22651 case SImode:
22652 return "bl\t%___gnu_thumb1_case_si";
22653 default:
22654 gcc_unreachable ();
22655 }
22656 }
22657
22658 /* Output a Thumb-2 casesi instruction. */
22659 const char *
22660 thumb2_output_casesi (rtx *operands)
22661 {
22662 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22663
22664 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22665
22666 output_asm_insn ("cmp\t%0, %1", operands);
22667 output_asm_insn ("bhi\t%l3", operands);
22668 switch (GET_MODE(diff_vec))
22669 {
22670 case QImode:
22671 return "tbb\t[%|pc, %0]";
22672 case HImode:
22673 return "tbh\t[%|pc, %0, lsl #1]";
22674 case SImode:
22675 if (flag_pic)
22676 {
22677 output_asm_insn ("adr\t%4, %l2", operands);
22678 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22679 output_asm_insn ("add\t%4, %4, %5", operands);
22680 return "bx\t%4";
22681 }
22682 else
22683 {
22684 output_asm_insn ("adr\t%4, %l2", operands);
22685 return "ldr\t%|pc, [%4, %0, lsl #2]";
22686 }
22687 default:
22688 gcc_unreachable ();
22689 }
22690 }
22691
22692 /* Most ARM cores are single issue, but some newer ones can dual issue.
22693 The scheduler descriptions rely on this being correct. */
22694 static int
22695 arm_issue_rate (void)
22696 {
22697 switch (arm_tune)
22698 {
22699 case cortexr4:
22700 case cortexr4f:
22701 case cortexa5:
22702 case cortexa8:
22703 case cortexa9:
22704 return 2;
22705
22706 default:
22707 return 1;
22708 }
22709 }
22710
22711 /* A table and a function to perform ARM-specific name mangling for
22712 NEON vector types in order to conform to the AAPCS (see "Procedure
22713 Call Standard for the ARM Architecture", Appendix A). To qualify
22714 for emission with the mangled names defined in that document, a
22715 vector type must not only be of the correct mode but also be
22716 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22717 typedef struct
22718 {
22719 enum machine_mode mode;
22720 const char *element_type_name;
22721 const char *aapcs_name;
22722 } arm_mangle_map_entry;
22723
22724 static arm_mangle_map_entry arm_mangle_map[] = {
22725 /* 64-bit containerized types. */
22726 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22727 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22728 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22729 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22730 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22731 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22732 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22733 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22734 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22735 /* 128-bit containerized types. */
22736 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22737 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22738 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22739 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22740 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22741 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22742 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22743 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22744 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22745 { VOIDmode, NULL, NULL }
22746 };
22747
22748 const char *
22749 arm_mangle_type (const_tree type)
22750 {
22751 arm_mangle_map_entry *pos = arm_mangle_map;
22752
22753 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22754 has to be managled as if it is in the "std" namespace. */
22755 if (TARGET_AAPCS_BASED
22756 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22757 {
22758 static bool warned;
22759 if (!warned && warn_psabi && !in_system_header)
22760 {
22761 warned = true;
22762 inform (input_location,
22763 "the mangling of %<va_list%> has changed in GCC 4.4");
22764 }
22765 return "St9__va_list";
22766 }
22767
22768 /* Half-precision float. */
22769 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22770 return "Dh";
22771
22772 if (TREE_CODE (type) != VECTOR_TYPE)
22773 return NULL;
22774
22775 /* Check the mode of the vector type, and the name of the vector
22776 element type, against the table. */
22777 while (pos->mode != VOIDmode)
22778 {
22779 tree elt_type = TREE_TYPE (type);
22780
22781 if (pos->mode == TYPE_MODE (type)
22782 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22783 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22784 pos->element_type_name))
22785 return pos->aapcs_name;
22786
22787 pos++;
22788 }
22789
22790 /* Use the default mangling for unrecognized (possibly user-defined)
22791 vector types. */
22792 return NULL;
22793 }
22794
22795 /* Order of allocation of core registers for Thumb: this allocation is
22796 written over the corresponding initial entries of the array
22797 initialized with REG_ALLOC_ORDER. We allocate all low registers
22798 first. Saving and restoring a low register is usually cheaper than
22799 using a call-clobbered high register. */
22800
22801 static const int thumb_core_reg_alloc_order[] =
22802 {
22803 3, 2, 1, 0, 4, 5, 6, 7,
22804 14, 12, 8, 9, 10, 11, 13, 15
22805 };
22806
22807 /* Adjust register allocation order when compiling for Thumb. */
22808
22809 void
22810 arm_order_regs_for_local_alloc (void)
22811 {
22812 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22813 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22814 if (TARGET_THUMB)
22815 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22816 sizeof (thumb_core_reg_alloc_order));
22817 }
22818
22819 /* Set default optimization options. */
22820 static void
22821 arm_option_optimization (int level, int size ATTRIBUTE_UNUSED)
22822 {
22823 /* Enable section anchors by default at -O1 or higher.
22824 Use 2 to distinguish from an explicit -fsection-anchors
22825 given on the command line. */
22826 if (level > 0)
22827 flag_section_anchors = 2;
22828 }
22829
22830 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22831
22832 bool
22833 arm_frame_pointer_required (void)
22834 {
22835 return (cfun->has_nonlocal_label
22836 || SUBTARGET_FRAME_POINTER_REQUIRED
22837 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22838 }
22839
22840 /* Only thumb1 can't support conditional execution, so return true if
22841 the target is not thumb1. */
22842 static bool
22843 arm_have_conditional_execution (void)
22844 {
22845 return !TARGET_THUMB1;
22846 }
22847
22848 /* Legitimize a memory reference for sync primitive implemented using
22849 ldrex / strex. We currently force the form of the reference to be
22850 indirect without offset. We do not yet support the indirect offset
22851 addressing supported by some ARM targets for these
22852 instructions. */
22853 static rtx
22854 arm_legitimize_sync_memory (rtx memory)
22855 {
22856 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22857 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22858
22859 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22860 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22861 return legitimate_memory;
22862 }
22863
22864 /* An instruction emitter. */
22865 typedef void (* emit_f) (int label, const char *, rtx *);
22866
22867 /* An instruction emitter that emits via the conventional
22868 output_asm_insn. */
22869 static void
22870 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
22871 {
22872 output_asm_insn (pattern, operands);
22873 }
22874
22875 /* Count the number of emitted synchronization instructions. */
22876 static unsigned arm_insn_count;
22877
22878 /* An emitter that counts emitted instructions but does not actually
22879 emit instruction into the the instruction stream. */
22880 static void
22881 arm_count (int label,
22882 const char *pattern ATTRIBUTE_UNUSED,
22883 rtx *operands ATTRIBUTE_UNUSED)
22884 {
22885 if (! label)
22886 ++ arm_insn_count;
22887 }
22888
22889 /* Construct a pattern using conventional output formatting and feed
22890 it to output_asm_insn. Provides a mechanism to construct the
22891 output pattern on the fly. Note the hard limit on the pattern
22892 buffer size. */
22893 static void
22894 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
22895 const char *pattern, ...)
22896 {
22897 va_list ap;
22898 char buffer[256];
22899
22900 va_start (ap, pattern);
22901 vsprintf (buffer, pattern, ap);
22902 va_end (ap);
22903 emit (label, buffer, operands);
22904 }
22905
22906 /* Emit the memory barrier instruction, if any, provided by this
22907 target to a specified emitter. */
22908 static void
22909 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
22910 {
22911 if (TARGET_HAVE_DMB)
22912 {
22913 /* Note we issue a system level barrier. We should consider
22914 issuing a inner shareabilty zone barrier here instead, ie.
22915 "DMB ISH". */
22916 emit (0, "dmb\tsy", operands);
22917 return;
22918 }
22919
22920 if (TARGET_HAVE_DMB_MCR)
22921 {
22922 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
22923 return;
22924 }
22925
22926 gcc_unreachable ();
22927 }
22928
22929 /* Emit the memory barrier instruction, if any, provided by this
22930 target. */
22931 const char *
22932 arm_output_memory_barrier (rtx *operands)
22933 {
22934 arm_process_output_memory_barrier (arm_emit, operands);
22935 return "";
22936 }
22937
22938 /* Helper to figure out the instruction suffix required on ldrex/strex
22939 for operations on an object of the specified mode. */
22940 static const char *
22941 arm_ldrex_suffix (enum machine_mode mode)
22942 {
22943 switch (mode)
22944 {
22945 case QImode: return "b";
22946 case HImode: return "h";
22947 case SImode: return "";
22948 case DImode: return "d";
22949 default:
22950 gcc_unreachable ();
22951 }
22952 return "";
22953 }
22954
22955 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
22956 mode. */
22957 static void
22958 arm_output_ldrex (emit_f emit,
22959 enum machine_mode mode,
22960 rtx target,
22961 rtx memory)
22962 {
22963 const char *suffix = arm_ldrex_suffix (mode);
22964 rtx operands[2];
22965
22966 operands[0] = target;
22967 operands[1] = memory;
22968 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
22969 }
22970
22971 /* Emit a strex{b,h,d, } instruction appropriate for the specified
22972 mode. */
22973 static void
22974 arm_output_strex (emit_f emit,
22975 enum machine_mode mode,
22976 const char *cc,
22977 rtx result,
22978 rtx value,
22979 rtx memory)
22980 {
22981 const char *suffix = arm_ldrex_suffix (mode);
22982 rtx operands[3];
22983
22984 operands[0] = result;
22985 operands[1] = value;
22986 operands[2] = memory;
22987 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
22988 cc);
22989 }
22990
22991 /* Helper to emit a two operand instruction. */
22992 static void
22993 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
22994 {
22995 rtx operands[2];
22996
22997 operands[0] = d;
22998 operands[1] = s;
22999 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23000 }
23001
23002 /* Helper to emit a three operand instruction. */
23003 static void
23004 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23005 {
23006 rtx operands[3];
23007
23008 operands[0] = d;
23009 operands[1] = a;
23010 operands[2] = b;
23011 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23012 }
23013
23014 /* Emit a load store exclusive synchronization loop.
23015
23016 do
23017 old_value = [mem]
23018 if old_value != required_value
23019 break;
23020 t1 = sync_op (old_value, new_value)
23021 [mem] = t1, t2 = [0|1]
23022 while ! t2
23023
23024 Note:
23025 t1 == t2 is not permitted
23026 t1 == old_value is permitted
23027
23028 required_value:
23029
23030 RTX register or const_int representing the required old_value for
23031 the modify to continue, if NULL no comparsion is performed. */
23032 static void
23033 arm_output_sync_loop (emit_f emit,
23034 enum machine_mode mode,
23035 rtx old_value,
23036 rtx memory,
23037 rtx required_value,
23038 rtx new_value,
23039 rtx t1,
23040 rtx t2,
23041 enum attr_sync_op sync_op,
23042 int early_barrier_required)
23043 {
23044 rtx operands[1];
23045
23046 gcc_assert (t1 != t2);
23047
23048 if (early_barrier_required)
23049 arm_process_output_memory_barrier (emit, NULL);
23050
23051 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23052
23053 arm_output_ldrex (emit, mode, old_value, memory);
23054
23055 if (required_value)
23056 {
23057 rtx operands[2];
23058
23059 operands[0] = old_value;
23060 operands[1] = required_value;
23061 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23062 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23063 }
23064
23065 switch (sync_op)
23066 {
23067 case SYNC_OP_ADD:
23068 arm_output_op3 (emit, "add", t1, old_value, new_value);
23069 break;
23070
23071 case SYNC_OP_SUB:
23072 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23073 break;
23074
23075 case SYNC_OP_IOR:
23076 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23077 break;
23078
23079 case SYNC_OP_XOR:
23080 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23081 break;
23082
23083 case SYNC_OP_AND:
23084 arm_output_op3 (emit,"and", t1, old_value, new_value);
23085 break;
23086
23087 case SYNC_OP_NAND:
23088 arm_output_op3 (emit, "and", t1, old_value, new_value);
23089 arm_output_op2 (emit, "mvn", t1, t1);
23090 break;
23091
23092 case SYNC_OP_NONE:
23093 t1 = new_value;
23094 break;
23095 }
23096
23097 arm_output_strex (emit, mode, "", t2, t1, memory);
23098 operands[0] = t2;
23099 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23100 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
23101
23102 arm_process_output_memory_barrier (emit, NULL);
23103 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23104 }
23105
23106 static rtx
23107 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23108 {
23109 if (index > 0)
23110 default_value = operands[index - 1];
23111
23112 return default_value;
23113 }
23114
23115 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23116 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23117
23118 /* Extract the operands for a synchroniztion instruction from the
23119 instructions attributes and emit the instruction. */
23120 static void
23121 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23122 {
23123 rtx result, memory, required_value, new_value, t1, t2;
23124 int early_barrier;
23125 enum machine_mode mode;
23126 enum attr_sync_op sync_op;
23127
23128 result = FETCH_SYNC_OPERAND(result, 0);
23129 memory = FETCH_SYNC_OPERAND(memory, 0);
23130 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23131 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23132 t1 = FETCH_SYNC_OPERAND(t1, 0);
23133 t2 = FETCH_SYNC_OPERAND(t2, 0);
23134 early_barrier =
23135 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23136 sync_op = get_attr_sync_op (insn);
23137 mode = GET_MODE (memory);
23138
23139 arm_output_sync_loop (emit, mode, result, memory, required_value,
23140 new_value, t1, t2, sync_op, early_barrier);
23141 }
23142
23143 /* Emit a synchronization instruction loop. */
23144 const char *
23145 arm_output_sync_insn (rtx insn, rtx *operands)
23146 {
23147 arm_process_output_sync_insn (arm_emit, insn, operands);
23148 return "";
23149 }
23150
23151 /* Count the number of machine instruction that will be emitted for a
23152 synchronization instruction. Note that the emitter used does not
23153 emit instructions, it just counts instructions being carefull not
23154 to count labels. */
23155 unsigned int
23156 arm_sync_loop_insns (rtx insn, rtx *operands)
23157 {
23158 arm_insn_count = 0;
23159 arm_process_output_sync_insn (arm_count, insn, operands);
23160 return arm_insn_count;
23161 }
23162
23163 /* Helper to call a target sync instruction generator, dealing with
23164 the variation in operands required by the different generators. */
23165 static rtx
23166 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23167 rtx memory, rtx required_value, rtx new_value)
23168 {
23169 switch (generator->op)
23170 {
23171 case arm_sync_generator_omn:
23172 gcc_assert (! required_value);
23173 return generator->u.omn (old_value, memory, new_value);
23174
23175 case arm_sync_generator_omrn:
23176 gcc_assert (required_value);
23177 return generator->u.omrn (old_value, memory, required_value, new_value);
23178 }
23179
23180 return NULL;
23181 }
23182
23183 /* Expand a synchronization loop. The synchronization loop is expanded
23184 as an opaque block of instructions in order to ensure that we do
23185 not subsequently get extraneous memory accesses inserted within the
23186 critical region. The exclusive access property of ldrex/strex is
23187 only guaranteed in there are no intervening memory accesses. */
23188 void
23189 arm_expand_sync (enum machine_mode mode,
23190 struct arm_sync_generator *generator,
23191 rtx target, rtx memory, rtx required_value, rtx new_value)
23192 {
23193 if (target == NULL)
23194 target = gen_reg_rtx (mode);
23195
23196 memory = arm_legitimize_sync_memory (memory);
23197 if (mode != SImode)
23198 {
23199 rtx load_temp = gen_reg_rtx (SImode);
23200
23201 if (required_value)
23202 required_value = convert_modes (SImode, mode, required_value, true);
23203
23204 new_value = convert_modes (SImode, mode, new_value, true);
23205 emit_insn (arm_call_generator (generator, load_temp, memory,
23206 required_value, new_value));
23207 emit_move_insn (target, gen_lowpart (mode, load_temp));
23208 }
23209 else
23210 {
23211 emit_insn (arm_call_generator (generator, target, memory, required_value,
23212 new_value));
23213 }
23214 }
23215
23216 static bool
23217 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23218 {
23219 /* Vectors which aren't in packed structures will not be less aligned than
23220 the natural alignment of their element type, so this is safe. */
23221 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23222 return !is_packed;
23223
23224 return default_builtin_vector_alignment_reachable (type, is_packed);
23225 }
23226
23227 static bool
23228 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23229 const_tree type, int misalignment,
23230 bool is_packed)
23231 {
23232 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23233 {
23234 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23235
23236 if (is_packed)
23237 return align == 1;
23238
23239 /* If the misalignment is unknown, we should be able to handle the access
23240 so long as it is not to a member of a packed data structure. */
23241 if (misalignment == -1)
23242 return true;
23243
23244 /* Return true if the misalignment is a multiple of the natural alignment
23245 of the vector's element type. This is probably always going to be
23246 true in practice, since we've already established that this isn't a
23247 packed access. */
23248 return ((misalignment % align) == 0);
23249 }
23250
23251 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23252 is_packed);
23253 }
23254
23255 #include "gt-arm.h"